#!/usr/bin/perl -w
use strict;
use File::Basename;
use Getopt::Std;
my $PROGRAM = basename $0;
my $USAGE=
"Usage: cat DCLST_FILE | $PROGRAM LIST_TO_MERGE
-f: fast mode (skip stepwise check)
";

use DomRefine::Read;
use DomRefine::General;
use DomRefine::Score;
use DomRefine::Refine;

### Settings ###
my %OPT;
getopts('f', \%OPT);

my $TMP_INPUT = define_tmp_file("$PROGRAM.input");
END {
    remove_tmp_file($TMP_INPUT);
}

!@ARGV and die $USAGE;
my $LIST_TO_MERGE = $ARGV[0];

### Main ###
-t and die $USAGE;
save_stdin($TMP_INPUT);

my %GET_NEW_CLUSTER = ();
my %GET_OLD_CLUSTER = ();

open(LIST_TO_MERGE, "$LIST_TO_MERGE") || die;
while (<LIST_TO_MERGE>) {
    chomp;

#     my @cluster = split(/[,\s]/, $_);

    # my ($cluster_pair, $score_change) = split;
    # if ($score_change < -0.05) {
    # 	next;
    # }
    # my @cluster = split(/,/, $cluster_pair);

    my ($n, $cluster_set) = split;
    my @cluster = split(/,/, $cluster_set);

    print STDERR "\nMERGE_TEST: @cluster\n";

    my ($flg_changed, @cluster_to_be_considered) = get_clusters_to_be_considerd(@cluster);
    if (@cluster_to_be_considered == 0) {
	die;
    } elsif (@cluster_to_be_considered == 1) {
	next;
    }

    my $dclst_to_be_merged = extract_dclst_with_check($TMP_INPUT, @cluster_to_be_considered);
    my ($dclst_merged, $merged_cluster_id) = merge_all_to_a_cluster($dclst_to_be_merged);
    if (!$OPT{f}) { # skip, when f option specified.
	# if ($flg_changed) {
	    my $score_diff_normalized = get_score_diff_normalized($dclst_to_be_merged, $dclst_merged);
	    if ($score_diff_normalized < -0.05) {
		next;
	    }
	# }
    }
    save_contents($dclst_merged . extract_dclst_compl($TMP_INPUT, @cluster_to_be_considered), $TMP_INPUT);
    update_cluster_mapping($merged_cluster_id, @cluster_to_be_considered);
    print STDERR "MERGE: @cluster\n";
}
close(LIST_TO_MERGE);

system "cat $TMP_INPUT";

################################################################################
### Functions ##################################################################
################################################################################

sub get_score_diff_normalized {
    my ($dclst_to_be_merged, $dclst_merged) = @_;

    my $score = score_dclst($dclst_to_be_merged); # opt_r is needed
    my $score_merged = score_dclst($dclst_merged); # to be calculated by scores_dclst_of_a_cluster below # opt_r is needed
    my $score_diff = $score_merged - $score;
    print STDERR "diff = $score_diff = $score_merged - $score\n";

#     my ($n_seq, $n_pos, $n_aa) = stats_dclst($dclst_merged);
    my ($n_seq, $n_pos, $n_aa) = scores_dclst_of_a_cluster($dclst_merged); # opt_r is needed
    print STDERR "n_seq = $n_seq, n_aa = $n_aa\n";
    my $score_diff_normalized = $score_diff / ($n_seq * $n_aa);
    print STDERR "diff_norm = $score_diff_normalized\n";

    return $score_diff_normalized;
}

sub get_clusters_to_be_considerd {
    my @cluster = @_;

    my @cluster_to_be_considered = ();
    my $flg_changed = 0;
    for my $cluster (@cluster) {
	if ($GET_NEW_CLUSTER{$cluster}) {
	    my $new_cluster = $GET_NEW_CLUSTER{$cluster};
	    print STDERR "[$cluster] is changed to [$new_cluster].\n";
	    push @cluster_to_be_considered, $new_cluster;
	    $flg_changed = 1;
	} else {
	    push @cluster_to_be_considered, $cluster;
	}
    }
    @cluster_to_be_considered = uniq(@cluster_to_be_considered);

    return($flg_changed, @cluster_to_be_considered);
}

sub update_cluster_mapping {
    my ($new_cluster, @cluster_to_be_merged) = @_;

    my @old_cluster = ();
    for my $cluster_to_be_merged (@cluster_to_be_merged) {
	if ($GET_OLD_CLUSTER{$cluster_to_be_merged}) {
	    push @old_cluster, @{$GET_OLD_CLUSTER{$cluster_to_be_merged}};
	}
    }
    @old_cluster = uniq(@old_cluster, @cluster_to_be_merged);

    # update
    for my $old_cluster (@old_cluster) {
	$GET_NEW_CLUSTER{$old_cluster} = $new_cluster;
    }
    @{$GET_OLD_CLUSTER{$new_cluster}} = @old_cluster;
}
