#!/usr/bin/perl -w
use strict;
use File::Basename;
use Getopt::Std;
my $PROGRAM = basename $0;
my $USAGE=
"Usage: $PROGRAM
";

use DomRefine::Read;
use DomRefine::Refine;
use DomRefine::Tree;
use DomRefine::General;

### Settings ###
my %OPT;
getopts('', \%OPT);

my $TMP_INPUT = define_tmp_file("$PROGRAM.input");
my $TMP_DIVIDE = define_tmp_file("$PROGRAM.divide");
END {
    remove_tmp_file($TMP_INPUT);
    remove_tmp_file($TMP_DIVIDE);
}

### Main ###
-t and die $USAGE;
save_stdin($TMP_INPUT);

my $TREE = create_tree($TMP_INPUT);
my @NODES = $TREE->get_root_node->get_all_Descendents;
my $IDX = find_min_tree($TMP_INPUT, \@NODES);
# my $IDX = find_max_overlap($TMP_INPUT);
$TREE->reroot_at_midpoint($NODES[$IDX]);
print_tree(\$TREE);

################################################################################
### Function ###################################################################
################################################################################
sub find_max_overlap {
    my ($tree_file) = @_;

    my @overlap = ();
    open(TMP_DIVIDE, ">$TMP_DIVIDE") || die;;
    for (my $i=0; $i<@NODES; $i++) {
	if ($NODES[$i]->branch_length > 0) {
	    my $node1 = $NODES[$i]->internal_id;
	    my $node2 = $NODES[$i]->ancestor->internal_id;
	    my ($sp_overlap, $detail) = get_sp_overlap($tree_file, $i, $node1, $node2);
	    print TMP_DIVIDE $detail;
	    push @overlap, $sp_overlap;
	} else {
	    # print STDERR "i=$i\tnode=", $NODES[$i]->to_string, "\n"; # print for debug
	}
    }
    my $i_best = max_i(@overlap);
    close(TMP_DIVIDE);
    
    # system "cat $TMP_DIVIDE | sort -t '\t' -k4,4 -k5,5gr -k3,3r";
    system "cat $TMP_DIVIDE";
    return $i_best;
}

sub get_sp_overlap {
    my ($tree_file, $i, $node1, $node2) = @_;

    my $tree = create_tree($tree_file);
    my @nodes = $tree->get_root_node->get_all_Descendents;
    my $n_seq = grep {$_->is_Leaf} @nodes;
    my $branch_length = $nodes[$i]->branch_length;
    $tree->move_id_to_bootstrap;
    my $boot = $nodes[$i]->bootstrap || "";

    # calculation
    $tree->reroot_at_midpoint($nodes[$i]);
    my $root_node = $tree->get_root_node;
    my ($sub_tree1_node, $sub_tree2_node) = $root_node->each_Descendent;
    my @leaves1 = get_sub_tree_leaves($sub_tree1_node);
    my @leaves2 = get_sub_tree_leaves($sub_tree2_node);

    my @species1 = get_species_from_leaves(@leaves1);
    my @species2 = get_species_from_leaves(@leaves2);
    my @all_species = uniq(@species1, @species2);
    my @common_species = check_redundancy(@species1, @species2);

    my $sum_of_duplication = 0;
    my $sum_of_sp_disappeared = 0;
    ($sum_of_duplication, $sum_of_sp_disappeared) = sum_of_duplication(\$tree);

    # print
    $node1 ||= "";
    $node2 ||= "";
    my @len1 = get_sub_tree_branch_length($sub_tree1_node);
    my @len2 = get_sub_tree_branch_length($sub_tree2_node);
    my $len1 = mean(@len1) || 0;
    my $len2 = mean(@len2) || 0;
    $len1 = sprintf("%.5f", $len1);
    $len2 = sprintf("%.5f", $len2);
    my $len12 = mean(@len1, @len2);
    my $len_relative = 0;
    if ($len12) {
	$len_relative = $branch_length / $len12;
    }
    $len_relative = sprintf("%.5f", $len_relative);
    my $log_ratio_len = "";
    if ($len2 and $len1/$len2) {
	$log_ratio_len = log($len1/$len2)/log(2);
	$log_ratio_len = abs($log_ratio_len);
	$log_ratio_len = sprintf("%.5f", $log_ratio_len);
    }
    my $n1 = scalar(@leaves1);
    my $n2 = scalar(@leaves2);
    my $n_sp = scalar(@all_species);
    my $n_sp_common = scalar(@common_species);
    my $sp_overlap = @common_species/@all_species;
    $sp_overlap = sprintf("%.5f", $sp_overlap);

    my $detail = "i=$i($node1,$node2)\tb=$boot,\tl= $branch_length"
	. ", l_rel= $len_relative"
	. ", l1= $len1, l2= $len2"
	. ", |log2(l1/l2)|= $log_ratio_len,"
	. ", n= $n_seq = $n1 + $n2, "
	. "o_sp=$n_sp_common/$n_sp=\t$sp_overlap\t, n_dup=$sum_of_duplication, n_dis=$sum_of_sp_disappeared\n";

    return $sp_overlap, $detail;
}
