#!/usr/bin/perl -s
use strict;
use FileHandle;
require 'MBGD_Conf.pl';
require 'MBGD_commonUpdate.pl';
require 'libMBGDaxes.pl';
require "MBGD_commonPath.pl";

package TaxTree;

###############################################################################
#
sub new {
    my($class) = shift;
    my($file_taxon) = shift;
    my(@args) = @_;
    my($self) = {};

    bless($self, $class);

    ## sorted by taxonorder
    if (!$file_taxon || !-e $file_taxon) {
        $file_taxon = "$ENV{'MBGD_HOME'}/etc/taxon.tab";
    }
    $self->set_filename_taxon($file_taxon);

    #
    $self->_init();

    return $self;
}

###############################################################################
#
sub _init {
    my($self) = shift;

    #
    $self->{'SPEC_LIST'} = {};
    $self->{'FLAG_TAXID'} = {};
    $self->{'SP_COUNT'} = 0;

    return;
}

###############################################################################
#
sub set_filename_taxon {
    my($self) = shift;
    my($filename) = shift;

    $self->{'FILE_TAXON_TAB'} = $filename;

    return;
}

###############################################################################
#
sub get_filename_taxon {
    my($self) = shift;

    return $self->{'FILE_TAXON_TAB'};
}

###############################################################################
#
sub add_spec {
    my($self) = shift;
    my($spec) = shift;
    my($name) = shift;
    my($strain) = shift;
    my($taxid) = shift;

    $taxid = 9 if ($taxid == 107806); ## Buchnera sp. APS

    my($ent) = {};
    $ent->{'spec'}   = $spec;
    $ent->{'name'}   = $name;
    $ent->{'strain'} = $strain;
    $ent->{'taxid'}  = $taxid;
    if (!exists($self->{'SPEC_LIST'}->{"$taxid"})) {
        $self->{'SPEC_LIST'}->{"$taxid"} = [];
    }
    push(@{$self->{'SPEC_LIST'}->{"$taxid"}}, $ent);

    $self->set_flag($taxid, 0);

    $self->{'SP_COUNT'}++;

    return;
}

###############################################################################
#
sub get_splist_ref {
    my($self) = shift;
    my($taxid) = shift;

    return $self->{'SPEC_LIST'}->{"$taxid"};
}

###############################################################################
#
sub get_sp_count {
    my($self) = shift;

    return $self->{'SP_COUNT'};
}

###############################################################################
#
sub get_taxid_list {
    my($self) = shift;

    my(@taxid_list) = keys(%{$self->{'SPEC_LIST'}});

    return @taxid_list;
}

###############################################################################
#
sub set_flag {
    my($self) = shift;
    my($taxid) = shift;
    my($sta) = shift;

    $self->{'FLAG_TAXID'}->{"$taxid"} = $sta;

    return;
}

###############################################################################
#
sub get_flag {
    my($self) = shift;
    my($taxid) = shift;

    my($sta) = $self->{'FLAG_TAXID'}->{"$taxid"};

    return $sta;
}

###############################################################################
#
sub create_tax_tree {
    my($self) = shift;

    my($sp_count) = $self->get_sp_count();

    my($file_taxon) = $self->get_filename_taxon();
    my($fh) = FileHandle->new("$file_taxon") || die("Can not open $file_taxon($!)");

    my($LCA) = 0;
    my($out_count) = 0;
    my(@ranks_list);
    my(%ranks_hash);
    my($i) = 0;
    my($prevdepth);
    while ($_ = $fh->getline()) {
#        print STDERR "." if (++$i % 500 == 0);
        my($taxid, $name, $lname, $rank, $class, $depth, $hier, $taxorder) = split(/\t/);
        if ($class ne 'scientific name') {
            $prevdepth = $depth;
            next;
        }
        
        if ($depth <= $prevdepth)  {
            for (my$dp = $depth; $dp <= $prevdepth; $dp++) {
                if (my$r = $ranks_list[$dp]->{'rank'}){
                    $ranks_hash{"$r"} = 0;
                }
                $ranks_list[$dp] = {};
            }
            $LCA = $depth - 1 if ($depth <= $LCA);
            $LCA = 0 if ($LCA < 0);
        }

        my($ent) = $ranks_list[$depth] = {};
        $ent->{'rank'}  = $rank;
        $ent->{'taxid'} = $taxid;
        $ent->{'name'}  = $name;

        #
        if ($rank) {
            $ranks_hash{"$rank"} = $depth;
        }
        my($splist_ref) = $self->get_splist_ref($taxid);
        if ($splist_ref) {
            if ($main::printtab) {
                printTab($taxid, $name, \@ranks_list, \%ranks_hash);
            }

            $ranks_list[$depth]->{'spec'} = $splist_ref;

            &addTree($LCA, $depth, \@ranks_list);
            $LCA = $depth;
            $self->set_flag($taxid, 1);

            $out_count += scalar(@{$splist_ref});
            last if ($sp_count <= $out_count);
        }
        $prevdepth = $depth;
    }

    createSpecList($ranks_list[0]);
    checkPhylum($ranks_list[0], 1, 0, 1, 0);
    printTree($ranks_list[0], 1, 0, 1);

    #
    my(@taxid_list) = $self->get_taxid_list();
    foreach my$taxid (@taxid_list) {
        my($sta) = $self->get_flag($taxid);
        if (! $sta) {
            my($tmpflag);
            print STDERR "Warning: $taxid (";

            my($splist_ref) = $self->get_splist_ref($taxid);
            foreach my$g (@{$splist_ref}) {
                print STDERR "," if ($tmpflag++);
                print STDERR "$g->{'spec'}";
            }
            print STDERR ") not found\n";
        }
    }

    return;
}

###############################################################################
#
sub printTab {
    my($taxid) = shift;
    my($name) = shift;
    my($ranks_list_ref) = shift;
    my($ranks_hash_ref) = shift;

	print join("\t", $taxid, $name,
		&getRanks($ranks_list_ref, $ranks_hash_ref, 'genus', 'species')),"\n";
}

###############################################################################
#
sub getRanks {
	my($RankArray, $RankHash, @RankList) = @_;
	my(@O);
	foreach my$r (@RankList) {
		my $depth = $RankHash->{$r};
		push(@O, $RankArray->[$depth]->{taxid},
			$RankArray->[$depth]->{name});
	}

    return @O;
}

###############################################################################
#
sub addTree {
	my($LCA, $depth, $Ranks) = @_;
##print ">$LCA,$depth\n";
    for (my$i = $LCA; $i < $depth; $i++) {
        $Ranks->[$i]->{childs} = [] if (! $Ranks->[$i]->{childs});
##print "Add: $Ranks->[$i]->{name} $Ranks->[$i]->{taxid} $Ranks->[$i+1]->{taxid}\n";
        push(@{$Ranks->[$i]->{childs}}, $Ranks->[$i+1]);
    }
}

###############################################################################
#
sub createSpecList {
    my($n) = @_;

    my(@speclist);
    if ($n->{spec}) {
        foreach my$spec (@{$n->{spec}}) {
            push(@speclist, $spec->{spec});
        }
    }
    foreach my$child (@{$n->{childs}}) {
        push(@speclist, &createSpecList($child));
    }
    $n->{speclist} = join(',', @speclist);

    return @speclist;
}

###############################################################################
#
sub printTree {
	my($n, $lev, $flag, $flag2) = @_;

	my($outflag);
#	if ($n->{spec} || $flag) {
		my($status,$div,$rank);
		my @splist = split(/,/,$n->{speclist});
		if ($flag2 || $n->{rank} eq 'species') {
			$div = @{$n->{childs}};
			$rank = $n->{rank};
			print join("\t", $lev, $n->{name},
				$n->{speclist},
				$rank, $div, $status),"\n";
			$outflag = 1;
		}
		if ($n->{spec}) {
			$rank = 'genome';
			$div = 1;
			$status = 2;
			foreach my$spec (@{$n->{spec}}) {
				print join("\t", $lev+1,
					"$spec->{name} $spec->{strain}",
					$spec->{spec},
					$rank, $div, $status),"\n";
			}
			$outflag = 1;
		}
#	}
	$flag2 = 0 if ($n->{rank} eq 'species');
	$lev++ if ($outflag && $n->{rank} ne 'species' );

#	if (@{$n->{childs}} > 1) {
#		$flag = 1;
#	} else {
#		$flag = 0;
#	}

	foreach my$child (@{$n->{childs}}) {
		printTree($child, $lev, $flag, $flag2);
	}
}

###############################################################################
#
sub checkPhylum {
	my($n, $lev, $flag, $flag2, $flagPhylum) = @_;
	my($outflag);
    my($sta) = 0;

	my($status,$div,$rank);
	my @splist = split(/,/,$n->{speclist});
	if ($flag2 || $n->{rank} eq 'species') {
		$div = @{$n->{childs}};
		$rank = $n->{rank};
        $flagPhylum = 1 if ($rank =~ /^phylum$/i);
		$outflag = 1;
	}
	if ($n->{spec}) {
        if (! $flagPhylum) {
            $sta = 1;
        }
		$outflag = 1;
	}

	$flag2 = 0 if ($n->{rank} eq 'species');
	$lev++ if ($outflag && $n->{rank} ne 'species' );

	foreach my$child (@{$n->{childs}}) {
        my($ret) = checkPhylum($child, $lev, $flag, $flag2, $flagPhylum);
        if ($ret) {
            if ($n->{rank} ne 'species' ) {
                $n->{rank} = 'phylum';
                $sta = 0;
                last;
            }
        }
	}
    
    return $sta;
}

###############################################################################
1;
###############################################################################
