#!/usr/bin/perl -w
use strict;
use File::Basename;
use Getopt::Std;
my $PROGRAM = basename $0;
my $USAGE=
"Usage: $PROGRAM
";

use DomRefine::Read;
use DomRefine::General;

### Settings ###
my %OPT;
getopts('', \%OPT);

my $TMP_INPUT = define_tmp_file("$PROGRAM.input");
END {
    remove_tmp_file($TMP_INPUT);
}

### Main ###
-t and die $USAGE;
save_stdin($TMP_INPUT);

my %domain = ();
my %cluster = ();
get_dclst_structure($TMP_INPUT, \%cluster, \%domain);

paste_short_domains(\%domain, \%cluster);

output_clusters(\%cluster, \%domain);

################################################################################
### Functions ##################################################################
################################################################################
sub paste_short_domains {
    my ($r_domain, $r_cluster) = @_;

    for my $gene (keys %{$r_domain}) {
	for my $domain (keys %{${$r_domain}{$gene}}) {
	    my $cluster = ${$r_domain}{$gene}{$domain}{cluster};
	    my $begin = ${$r_domain}{$gene}{$domain}{begin};
	    my $end = ${$r_domain}{$gene}{$domain}{end};
	    my $len = $end - $begin + 1;
	    
	    my $adj_domain;
	    if ( (${$r_domain}{$gene}{$domain-1} and ${$r_domain}{$gene}{$domain-1} ne $cluster) and
		 (${$r_domain}{$gene}{$domain+1} and ${$r_domain}{$gene}{$domain+1} ne $cluster) ) {
	    } elsif (${$r_domain}{$gene}{$domain-1} and ${$r_domain}{$gene}{$domain-1} ne $cluster) {
		$adj_domain = $domain - 1;
	    } elsif (${$r_domain}{$gene}{$domain+1} and ${$r_domain}{$gene}{$domain+1} ne $cluster) {
		$adj_domain = $domain + 1;
	    }

	    if (defined $adj_domain) {
		my $adj_begin = ${$r_domain}{$gene}{$adj_domain}{begin};
		my $adj_end = ${$r_domain}{$gene}{$adj_domain}{end};
		my $adj_len = $adj_end - $adj_begin + 1;
		if ($len < 10 and $adj_len >= 100) {
		    # print STDERR "paste [$cluster] $gene ($domain)$begin-$end ($adj_domain)$adj_begin-$adj_end\n";
		    delete ${$r_domain}{$gene}{$domain};
		    delete ${$r_cluster}{$cluster}{$gene};
		    my $new_begin = min($begin, $adj_begin);
		    my $new_end = max($end, $adj_end);
		    ${$r_domain}{$gene}{$adj_domain}{begin} = $new_begin;
		    ${$r_domain}{$gene}{$adj_domain}{end} = $new_end;
		}
	    }
	}
    }
}
