#!/usr/bin/perl -s
use strict;
use DirHandle;
use FileHandle;

#
use MBGD;
#$main::CMD_convPhyloPat = 'convPhyloPat.pl';
#$main::CMD_phylopat     = 'phylopat';
#$main::CMD_domclust     = 'domclust';

$| = 1;

###############################################################################
#
sub expand_entry {
    my($ent_name) = shift;
    my($names_ref) = shift;

    my($expOut) = '';
    my($comp_name) = ($ent_name =~ /(\d+)/);

    #
    my(@list_name0) = @{$names_ref->[$comp_name]};
    while (scalar(@list_name0) >= 2) {
        my(@list_name_new);
        my($n) = scalar(@list_name0);
        while (scalar(@list_name0) >= 2) {
            my($name1) = shift(@list_name0);
            my($name2) = shift(@list_name0);
            my($name_new) = "($name1:0,$name2:0)";

            push(@list_name_new, $name_new);
        }
        if (scalar(@list_name0) == 1) {
            push(@list_name_new, @list_name0);
        }
        @list_name0 = @list_name_new;
    }

    return $list_name0[0];
}

###############################################################################
#
sub printPhylopatCluster {
    my($fileDomClustOut) = shift;
    my($names_ref) = shift;
    my($pat_ref) = shift;
    my($clusterNo);
    my($id_out_ref) = {};

    my($fh) = FileHandle->new("$fileDomClustOut") || die("Can not open $fileDomClustOut($!)");
    while(my$line = $fh->getline()) {
        if ($line =~ /^Cluster\s+(\d+)/) {
            $clusterNo = $1;
            print $clusterNo, "\t";
            print 1, "\t";
        }
        elsif ($line =~ /dmy_\d+/) {
            my(@id_list) = ($line =~ /dmy_(\d+)/g);
            foreach my$id (@id_list) {
                $id_out_ref->{"$id"} = 1;
            }

            $line =~ s#(dmy_\d+)#expand_entry($1, $names_ref)#eg;
            $line =~ s#\s##g;    # $BITMW$J6uGrJ8;z$r=|5n(B
            print $line, "\n";
        }
        else {
#            print $line;
        }
    }

    # cutoff $B=hM}$K$h$j:o=|$5$l$?%/%i%9%?!<$r(B SingleTree $B$H$7$F=PNO(B
    foreach my$comp_name (keys(%{$pat_ref})) {
        next if ($comp_name =~ /^\s*$/);
        next if (exists($id_out_ref->{"$comp_name"}));

        $clusterNo++;
        print $clusterNo, "\t";
        print '0', "\t";              # domclust $B$NBP>]30(B

        my($line) = "dmy_$comp_name;";
        $line =~ s#(dmy_\d+)#expand_entry($1, $names_ref)#eg;
        $line =~ s#\s##g;    # $BITMW$J6uGrJ8;z$r=|5n(B
        print $line, "\n";

    }

    $fh->close();

    return;
}

###############################################################################
#
sub phylopatCluster {
    my($fileIn) = shift;
    my(%args) = @_;

    my($MIN_EXIST, $cutoff, $missdist_ratio, $cutoff1, $cutoff2, $distpass);
    my($phylopatOpt,$domclustOpt);
    my($fh);
    my($cmd);

    #
    my$phyloPatOrigOut = "/tmp/phylopat.origout.$$";
    my$domClustOut     = "/tmp/domclust.out.$$";

    #
    $MIN_EXIST = $args{'min_exist'};
    $MIN_EXIST = 5 if (! $MIN_EXIST);

    #
    $cutoff = $args{'cutoff'};
    $distpass = $args{'distpass'};
    $cutoff = 1 if (! $cutoff);
    if ($args{'scale'}) {
        $cutoff *= $args{'scale'};
        $distpass *= $args{'scale'};
        $phylopatOpt .= " -s$args{scale}";
    }

    #
    $missdist_ratio = $args{'missdist_ratio'};
    $missdist_ratio = 1 if ($missdist_ratio < 1);

    #
    $cutoff1 = $cutoff * $missdist_ratio;
    $cutoff2 = $cutoff;
##    $cutoff2 = $cutoff * 100;

    if ($args{'disttype'} =~ /[NDrRIP]/) {
        ## N (norm hamming), R (corr), I (mutual info), P (hypergeom prob)
        if ($args{'disttype'} eq 'r') {
            ## correlation coefficient
            $phylopatOpt .= " -dR";
        }
        else {
            $phylopatOpt .= " -d$args{disttype}";
        }
        if ($args{'disttype'} eq 'R') {
            ## correlation coefficient, absolute value
            $phylopatOpt .= " -a";
        }
    }
    if ($args{'probcut'} =~ /\d/) {
        $phylopatOpt .= " -p$args{probcut}";
    }
    if ($args{'abs'}) {
        $phylopatOpt .= " -a";
    }
    if ($distpass) {
        $phylopatOpt .= " -C$distpass";
    }

print STDERR "START phylopatClust: min_exist=$MIN_EXIST cutoff=$cutoff missdist_ratio=$missdist_ratio\n";

    #
    $cmd = "$main::CMD_convPhyloPat -MIN_EXIST=$MIN_EXIST -COMPRESS -ORIGOUT=$phyloPatOrigOut $fileIn";
    $cmd .= "| $main::CMD_phylopat -v $phylopatOpt -c$cutoff1 -";
    $cmd .= "| $main::CMD_domclust -v -o3 -c$cutoff2 -d -ne1 $domclustOpt - > $domClustOut";
    print STDERR "DBG :: CMD :: $cmd\n" if ($main::DEBUG);
    system("$cmd");
    print STDERR "DBG :: CMD :: Done.\n" if ($main::DEBUG);

    #
    my(@Names);
    my(@Pat);
    my($pat_ref) = {};
    $fh = new FileHandle("$phyloPatOrigOut") || die("Can not open $phyloPatOrigOut($!)");
    while($_ = $fh->getline()){
        chomp;
        my($comp_name, $name, $pat) = split(/ /, $_, 3);
        push(@{ $Names[$comp_name] }, $name);
        $Pat[$comp_name] = $pat;
        $pat_ref->{"$comp_name"} = $pat;
    }
    $fh->close();

    #
    printPhylopatCluster($domClustOut, \@Names, $pat_ref);

    unlink($phyloPatOrigOut);
    unlink($domClustOut);
}

###############################################################################
if ($0 eq __FILE__) {

    #
    my($fileIn)  = shift(@ARGV);
    if (!defined($fileIn) || !-e"$fileIn") {
        print STDERR "Usage :: $0 File-In\n";
        exit(1);
    }

    #
    my(%args);
    $args{'min_exist'}      = $main::MIN_EXIST;
    $args{'cutoff'}         = $main::CUTOFF;
    $args{'probcut'}        = $main::PROBCUT;
    $args{'missdist_ratio'} = $main::MISSDIST_RATIO;
    $args{'disttype'}       = $main::DISTTYPE;
    $args{'scale'}          = $main::SCALE;
    $args{'abs'}            = $main::ABS;
    $args{'distpass'}       = $main::DISTPASS;

    #
    phylopatCluster($fileIn, %args);
}
1
;
