#!/usr/bin/perl -s
#use strict;
use IO::Dir;
use IO::File;
require 'MBGD_Conf.pl';
require 'MBGD_commonUpdate.pl';
require 'libMBGDaxes.pl';

## sorted by taxonorder
$taxontab = "$ENV{'MBGD_HOME'}/etc/taxon.tab";
if ($main::FILE_taxontab) {
#    $taxontab = $main::FILE_taxontab;
}

if ($main::dbname) {
    $ENV{'MYSQL_DB'} = $main::dbname;
}

if (!$main::OUT) {
    $main::OUT = "$ENV{'MBGD_HOME'}/work/tax";
}

# create tax for all
my(@sp_list);
if (!$main::USE_FILE) {
    @sp_list = MBGD_GetAllGenomes();
}
else {
    @sp_list = MBGD_GetAllGenomesFile();
}
## exclude draft genomes by default
if (! $main::USE_DRAFT) {
    @sp_list = grep { ! /^gd\d{4,}/ } @sp_list;
}
if ($main::SPEC) {
    @sp_list = split(',', $main::SPEC);
}
if ($main::SPEC_ADD) {
    push(@sp_list, split(',', $main::SPEC_ADD));
}
if ($main::ADD_FILE_SPIDTAB) {
    my($file_spid) = "$ENV{'RECOG_HOME'}/etc/spid.tab";
    my($fh) = IO::File->new("$file_spid");
    if ($fh) {
        while (my$line=$fh->getline()) {
            next if ($line =~ /^\s*$/);
            next if ($line =~ /^\s*#/);

            my($spid, $spec) = split("\t", $line);
            push(@sp_list, $spec);
        }
        $fh->close();

    }
}
my($filename) = "$main::OUT";
open(FH, ">$filename") || die("Can not open $filename($!)");
select(FH);
update_tax_tree(@sp_list);
close(FH);

#
my($cmd) = "$ENV{'MBGD_HOME'}/package/domclust/Script/convtax.pl $filename > $filename.map";
system("$cmd");

exit(0);

sub update_tax_tree {
    my(@sp_list) = @_;
    my(%Genomes);
    my(%Flag);
    my($SpCnt);

    #
    undef(@Ranks);
    undef(%Ranks);

    #
    foreach my$sp (@sp_list) {
        my(%Ginfo);
        if (!$main::USE_FILE) {
            %Ginfo = &MBGD_GetGenomeInfo($sp);
        }
        if (!$Ginfo{'taxid'}) {
            %Ginfo = &MBGD_GetGenomeInfoFile($sp);
        }
        my($taxid) = $Ginfo{'taxid'};

        $taxid = 9 if ($taxid==107806); ## Buchnera sp. APS

        push(@{$Genomes{$taxid}},
             {spec=>$sp, name=>$Ginfo{'orgname'}, strain=>$Ginfo{'strain'}});
        $Flag{$taxid} = 0;
        $SpCnt++;
    }
    print STDERR "LOG :: SpCnt :: $SpCnt\n";
    #
    if ($main::user_genome) {
        my($filename) = "$main::user_genome";
        my($fh) = new FileHandle("$filename") || die("Can not open $filename($!)");
        while($_ = $fh->getline()) {
            s#[\r\n]*$##;
            my($sp, $orgname, $strain, $taxid) = split(/\t/);
            next if (! $sp);
            next if (! $orgname);
#        next if (! $strain);
            next if (! $taxid);

            push(@{$Genomes{$taxid}},
                 {spec=>$sp, name=>$orgname, strain=>$strain});
            $Flag{$taxid} = 0;
            $SpCnt++;
        }
        $fh->close();
    }
    print STDERR "LOG :: SpCnt(+ug) :: $SpCnt\n";

    $LCA = 0;
    open(TAX,$taxontab) || die("Can not open $taxontab($!)");
    while (<TAX>) {
#        print STDERR "." if (++$i % 500 == 0);
        ($taxid, $name, $lname, $rank, $class, $depth, $hier, $taxorder) = split(/\t/);
        if ($class ne 'scientific name') {
            $prevdepth = $depth;
            next;
        }

        if ($depth <= $prevdepth)  {
            for ($dp = $depth; $dp <= $prevdepth; $dp++) {
                if ($r = $Ranks[$dp]->{rank}){
                    $Ranks{$r} = 0;
                }
                $Ranks[$dp] = {};
            }
            $LCA = $depth - 1 if ($depth <= $LCA);
            $LCA = 0 if ($LCA < 0);
        }

        $Ranks[$depth] = {rank=>$rank, taxid => $taxid, name => $name};
        if ($rank) {
            $Ranks{$rank} = $depth;
        }
        if ($Genomes{$taxid}) {
            if ($printtab) {
                &printTab;
            }
            $Ranks[$depth]->{spec} = $Genomes{$taxid};
            
            &addTree($LCA, $depth, \@Ranks);
            $LCA = $depth;
            $Flag{$taxid} = 1;
            
            $OutCnt += @{$Genomes{$taxid}};
            last if ($OutCnt == $SpCnt);
        }
        $prevdepth = $depth;
    }
    &createSpecList($Ranks[0]);
    &checkPhylum($Ranks[0], 1, 0, 1, 0);
    &printTree($Ranks[0], 1, 0, 1);
    foreach $taxid (keys %Flag) {
        if (! $Flag{$taxid}) {
            my($tmpflag);
            print STDERR "Warning: $taxid (";
            foreach $g (@{$Genomes{$taxid}}) {
                print STDERR "," if ($tmpflag++);
                print STDERR "$g->{spec}";
            }
            print STDERR ") not found\n";
        }
    }

    return;
}

sub printTab {
    print join("\t", $taxid, $name, &getRanks(\@Ranks, \%Ranks, 'genus', 'species')),"\n";
}

sub getRanks {
    my($RankArray, $RankHash, @RankList) = @_;
    my(@O);
    foreach $r (@RankList) {
        my $depth = $RankHash->{$r};
        push(@O, $RankArray->[$depth]->{taxid}, $RankArray->[$depth]->{name});
    }

    return @O;
}

sub addTree {
    my($LCA, $depth, $Ranks) = @_;
##print ">$LCA,$depth\n";
    for ($i = $LCA; $i < $depth; $i++) {
        $Ranks->[$i]->{childs} = [] if (! $Ranks->[$i]->{childs});
##print "Add: $Ranks->[$i]->{name} $Ranks->[$i]->{taxid} $Ranks->[$i+1]->{taxid}\n";
        push(@{$Ranks->[$i]->{childs}}, $Ranks->[$i+1]);
    }
}

sub createSpecList {
    my($n) = @_;
    my(@speclist);
    if ($n->{spec}) {
        foreach $spec (@{$n->{spec}}) {
            push(@speclist, $spec->{spec});
        }
    }
    foreach $child (@{$n->{childs}}) {
        push(@speclist, &createSpecList($child));
    }
    $n->{speclist} = join(',', @speclist);

    return @speclist;
}

sub printTree {
    my($n, $lev, $flag, $flag2) = @_;
    my($outflag);
#    if ($n->{spec} || $flag) {
        my($status,$div,$rank);
        my @splist = split(/,/,$n->{speclist});
        if ($flag2 || $n->{rank} eq 'species') {
            $div = @{$n->{childs}};
            $rank = $n->{rank};
            print join("\t", $lev, $n->{name},
                       $n->{speclist},
                       $rank, $div, $status),"\n";
            $outflag = 1;
        }
        if ($n->{spec}) {
            $rank = 'genome';
            $div = 1;
            $status = 2;
            foreach $spec (@{$n->{spec}}) {
                print join("\t", $lev+1,
                           "$spec->{name} $spec->{strain}",
                           $spec->{spec},
                           $rank, $div, $status),"\n";
            }
            $outflag = 1;
        }
#    }
    $flag2 = 0 if ($n->{rank} eq 'species');
    $lev++ if ($outflag && $n->{rank} ne 'species' );

#   if (@{$n->{childs}} > 1) {
#       $flag = 1;
#   } else {
#       $flag = 0;
#   }

    foreach $child (@{$n->{childs}}) {
        printTree($child, $lev, $flag, $flag2);
    }
}

sub checkPhylum {
    my($n, $lev, $flag, $flag2, $flagPhylum) = @_;
    my($outflag);
    my($sta) = 0;

    my($status,$div,$rank);
    my @splist = split(/,/,$n->{speclist});
    if ($flag2 || $n->{rank} eq 'species') {
        $div = @{$n->{childs}};
        $rank = $n->{rank};
        $flagPhylum = 1 if ($rank =~ /^phylum$/i);
        $outflag = 1;
    }
    if ($n->{spec}) {
        if (! $flagPhylum) {
            $sta = 1;
        }
        $outflag = 1;
    }

    $flag2 = 0 if ($n->{rank} eq 'species');
    $lev++ if ($outflag && $n->{rank} ne 'species' );

    foreach $child (@{$n->{childs}}) {
        my($ret) = checkPhylum($child, $lev, $flag, $flag2, $flagPhylum);
        if ($ret) {
            if ($n->{rank} ne 'species' ) {
                $n->{rank} = 'phylum';
                $sta = 0;
                last;
            }
        }
    }

    return $sta;
}

