#!/usr/bin/perl

use MBGD;
require 'InfoSpec.pl';

package MBGD::Taxonomy;
use File::Basename;

$TaxFile = $main::FILE_tax;
$WeightFile = $main::FILE_udate;
#$RANK = {
#	'genome' => 0,
#	'subspecies'       =>  5, 'species'       => 10,
#	'species subgroup' => 12, 'species group' => 14,
#	'subgenus'         => 18, 'genus'         => 20,
#	'subtribe'         => 22, 'tribe'         => 24,
#	'subfamily'        => 28, 'family'        => 30, 'superfamily'  => 32,
#	'suborder'         => 38, 'order'         => 40, 'superorder'   => 42,
#	'subclass'         => 48, 'class'         => 50, 'superclass'   => 52,
#	'subphylum'        => 55, 'phylum'        => 60, 'superphylum'  => 65,
#	                          'kingdom'       => 70, 'superkingdom' => 80,
#	'' => 100,
#};

###############################################################################
#
sub new {
	my($class, $taxdir, %opt) = @_;
	if ($TAX{$taxdir}) {
		return $TAX{$taxdir};
	}
	my($this)= {};
	bless $this, $class;
	if ($taxdir)  {
#		$this->{taxfile} = "$taxdir/" . basename($TaxFile);
		$this->{taxdir} = $taxdir;
		$this->{taxfile} = "$taxdir/" . basename($main::FILE_tax);
#		$this->{weightfile} = "$taxdir/" . basename($WeightFile);
#		$this->{weightfile} = "$taxdir/" . basename($main::FILE_spweight);
	} else {
		$this->{taxdir} = $DIR_database;
		$this->{taxfile} = $main::FILE_tax;
#		$this->{weightfile} = $main::FILE_spweight;
	}
	$this->read_taxfile();
	$TAX{$taxdir} = $this;
	if ($opt{use_spweight_file} == 1) {
		$this->{use_spweight_file} = 1;
	}


	$this->{'sort_specweight'} = sub {
			$this->{specweight}->{$b}<=>$this->{specweight}->{$a}
				or
			$a cmp $b;
	};

    $this->initRank();

	return $this;
}

###############################################################################
#
sub initRank {
    my($this) = shift;
    my(@rankFmtList) = ('%sspecies',
                        'species %sgroup',
                        '%sgenus',
                        '%stribe',
                        '%sfamily',
                        '%sorder',
                        '%sclass',
                        '%sphylum',
                        '%skingdom',
    );
    my(@prefixes) = ( 'parv',
                      'infra',
                      'sub',
                      '',
                      'super',
#                      'megn',
    );

    $this->{'RANK'} = {};

    my($rankValue) = 1;
    $this->{'RANK'}->{'genome'} = $rankValue++;
    foreach my$rankFmt (@rankFmtList) {
        foreach my$pref (@prefixes) {
            my($r) = sprintf($rankFmt, $pref);
            $this->{'RANK_LO'} = $r if (! $this->{'RANK_LO'});  # $B:G2<0L$N(B RANK
            $this->{'RANK_HI'} = $r;                            # $B:G>e0L$N(B RANK
            $this->{'RANK'}->{"$r"} = $rankValue++;
        }
    }
    $this->{'RANK'}->{''} = $rankValue++;

    return;
}

###############################################################################
#
sub getRank {
    my($this) = shift;

    my($refRank) = {%{$this->{'RANK'}}};

    return $refRank;
}

###############################################################################
#
sub getRankValue {
    my($this, $r, $node) = @_;

    while ($r eq 'no rank') {
        $node = $node->{'parent'};
        if ($node) {
            $r = $node->{'rank'};
        }
        else {
            $r = $this->{'RANK_HI'};
        }
    }

    my($rank) = $this->{'RANK'}->{"$r"};
#print STDERR "DBG :: RANK :: $r => $rank\n";

    return $rank;
}

###############################################################################
#
sub read_taxfile {
	my($this, $taxfile) = @_;
	my(@prev);
	$prev[0] = {};
	$taxfile = $this->{taxfile} if (! $taxfile);
	if ($Tree{$taxfile}) {
		$this->{tree} = $Tree{$taxfile};
		$this->{'tax_weight'} = $TaxWeight{$taxfile};
		return;
	}

	my($sta) = open(F, $taxfile);
	if (!$sta) {
		print STDERR "Can not open $taxfile($!)\n";
		return;
	}
	while (<F>) {
		chomp;

		next if (/^#/);
		($lev, $name, $speclist, $rank, $div, $status, $url) = split(/\t/);
		$node = {
			lev=>$lev, name=>$name, speclist=>$speclist,
			rank=>$rank, div=>$div, status=>$status, url=>$url
		};
		if ($prev[$lev-1]) {
			# set relation
			push(@{$prev[$lev-1]->{child}}, $node);
			$node->{'parent'} = $prev[$lev-1];
		}
		for ($i = $#prev; $i > $lev; $i--){
			undef $prev[$i];
		}
		$prev[$lev] = $node;
	}
	close(F);

	$this->{tree} = $prev[0];
	$Tree{$taxfile} = $prev[0];	#cache
	$TaxWeight{$taxfile} = $this->{'tax_weight'} = $this->setTaxWeight($this->{tree}->{'child'});
}

###############################################################################
#
sub clear_cache {
	my($this, $taxfile) = @_;
	$taxfile = $this->{taxfile} if (! $taxfile);
    delete($Tree{$taxfile});
}

###############################################################################
#
sub clear_specweight {
	my($this) = @_;

    delete($this->{specweight});

    return;
}

###############################################################################
#
sub read_specweight {
	my($this) = shift;
	my($file_specweight) = shift;

       	if (!$file_specweight) {
		if ( $this->{use_spweight_file} ne '1' && -f $this->{use_spweight_file}) {
            		$file_specweight = $this->{use_spweight_file};
		} else {
            		$file_specweight = "$this->{taxdir}/spec_weight";
		}
       	}
	my($fh) = IO::File->new("$file_specweight");

       	if (!$fh) {
           	print STDERR "LOG :: Can not open $file_specweight($!)\n";
            	return;
        }
	while(my$line=$fh->getline()) {
		$line =~ s#[\r\n]*$##;
		($sp, $weight) = split(/\t+/, $line);
#		my(@d) = split(/\t/, $line);
#            my($sp)     = $d[5];
#            my($weight) = $d[13];
		$this->{specweight}->{$sp} = $weight;
	}
	$fh->close();
}

###############################################################################
# weight = 99999999 - MySQL::genome.specweight
sub load_specweight {
	my($this) = @_;
	if(! $this->{specweight}){
		my($db) = MBGD::DB->new();
		my(@listGenome) = MBGD::Genome->get($db);
		foreach my$ref (@listGenome) {
			my($sp) = $ref->{'sp'};
			$this->{'specweight'}->{"$sp"} = $ref->{'specweight'};
		}
	}
}

###############################################################################
#
sub get_all_spec {
	my($this) = @_;
	return $this->get_species();
}

###############################################################################
# obsolete
sub get_selected_spec {
	my($this) = @_;
	## select one strain per genus
	return $this->get_species({'one_strain' => 'genus'});
}

###############################################################################
# default $B@8J*<o(B
sub get_default_spec {
	my($this) = @_;

	## select one strain per genus
	return $this->get_species({'one_strain' => 'genus'});
}

###############################################################################
#
sub is_default_spec {
	my($this) = shift;
	my(@sp_list) = @_;

    my(@sp_all_list) = $this->get_default_spec();
    my($sp_all_hash_ref) = {};
    foreach my$sp (@sp_all_list) {
        $sp_all_hash_ref->{"$sp"} = 1;
    }

    #
    foreach my$sp (@sp_list) {
        if (!exists($sp_all_hash_ref->{"$sp"})) {
            return 0;
        }
    }

    return 1;
}

###############################################################################
#
sub get_species {
	my($this, $option) = @_;
	my(@splist);
	if ($this->{use_spweight_file}) {
		$this->read_specweight;    # text file
	} else {
		$this->load_specweight;    # MySQL
	}
	if ($option->{all_strains}) {
		$this->find_tree_all_strains(\@splist,
			$option->{spec}, $option->{all_strains});
	} else {
		$this->find_tree(\@splist, $option);
	}

	return @splist;
}

###############################################################################
#
sub find_tree {
	my($this, $splist, $option) = @_;
	$this->find_tree0($this->{tree}, $splist, $option);
	if ($option->{sort_by_specweight}) {
		my($f) = $this->{'sort_specweight'};
		@{$splist} = sort $f @{$splist};
	}
}

###############################################################################
#
sub find_tree0 {
	my($this, $node, $splist, $option, $parent) = @_;
	my($flag, $check_rank, $matched_node_flag);
	if ($option->{match_rank}) {
        if ($option->{match_rank} eq $node->{rank}) {
		    if (($option->{match_name} eq $node->{name}) ||
                ($option->{match_name_regexp} && ($node->{name} =~ /$option->{match_name_regexp}/i))) {
                $option->{match_flag} = 1;
                $matched_node_flag = 1;
		    } else {
                return;
		    }
		}
	}

	if ($option->{one_strain}) {
		$check_rank = $option->{one_strain};
	} elsif ($option->{list_related}) {
		$check_rank = $option->{list_related};
	}
	if ($check_rank) {
		my($cr) = $this->getRankValue($check_rank, {});
		my($nr) = $this->getRankValue($node->{rank}, $node);
		my($pr) = $this->getRankValue($parent->{rank}, $parent);
		if ($cr == $nr) {
			$flag = 1;
		} elsif ($cr > $nr && $cr < $pr){
			$flag = 1;
		}
		if ($flag) {
			$option->{tmpselect} = [] if(!defined($option->{tmpselect}));
		}
	}

	if ($option->{match_rank} && ! $option->{match_flag}) {
		## unmatched
	} else {
	    if ($option->{get_rank}) {
	        if ($node->{rank} eq $option->{get_rank}) {
	            push(@{$splist}, $node->{name});
	        }
	    } elsif ($node->{rank} eq 'genome') {
	        if ($check_rank) {
	            push(@{$option->{tmpselect}}, $node->{speclist});
	        } else {
	            push(@{$splist}, $node->{speclist});
	        }
	    }
	}

	foreach $n (@{$node->{child}}) {
		$this->find_tree0($n, $splist, $option, $node);
	}

	if ($flag) {
		if ($option->{one_strain}) {
            my(@spec_list) = @{$option->{tmpselect}};
            if ($option->{'exist_sp'}) {
                @spec_list = ();
                foreach my$spec (@{$option->{tmpselect}}) {
                    if (!$option->{'exist_sp'}->{"$spec"}) {
                        next;
                    }
                    push(@spec_list, $spec);
                }
            }
			my($f) = $this->{'sort_specweight'};
			my($maxsp) = sort $f @spec_list;
#print STDERR "DBG :: maxsp :: $maxsp :: @spec_list\n";
			push(@{$splist}, $maxsp) if ($maxsp);
		} elsif ($option->{list_related}) {
			if (@{$option->{tmpselect}}) {
				push(@{$splist}, $option->{tmpselect});
			}
		}
		delete $option->{tmpselect};
	}
	if ($matched_node_flag) {
		$option->{match_flag} = 0;
	}
}

###############################################################################
#
sub find_tree_all_strains {
	my($this, $splist, $spec, $rank) = @_;
	$this->find_tree_all_strains0($this->{tree}, $splist, $spec, $rank);
}

###############################################################################
#
sub find_tree_all_strains0 {
	my($this, $node, $splist, $spec, $rank) = @_;

	my($r) = $this->getRankValue($rank, {});
	my($nr) = $this->getRankValue($node->{rank}, $node);
	if (($rank eq $node->{rank}) || ($nr < $r)) {
		my @splist00 = split(/,/, $node->{speclist});
		my($f) = $this->{'sort_specweight'};
		my @splist0 = sort $f @splist00;
		if ($spec) {
			if ($node->{speclist} =~ qr/$spec/) {
				@{$splist} = @splist0;
				return 1;
			}
		} else {
			push(@{$splist}, \@splist0);
		}

		# do not search under level
		return 0;
	}

	foreach $n (@{$node->{child}}) {
		if ($this->find_tree_all_strains0($n, $splist, $spec, $rank)) {
			return 1;
		}
	}
	return 0;
}

###############################################################################
#
sub find_node {
	my($this, $spec, $rank) = @_;
	return $this->find_node0($this->{tree}, $spec, $rank);
}

###############################################################################
#
sub find_node0 {
	my($this, $node, $spec, $rank) = @_;

	my($r) = $this->getRankValue($rank, {});
	my($nr) = $this->getRankValue($node->{rank}, $node);
	if (($rank eq $node->{rank}) || ($nr < $r)) {
		if ($node->{speclist} =~ qr/$spec/) {
			return $node;
		}
		return undef;
	}

	foreach $n (@{$node->{child}}) {
		my $ret = $this->find_node0($n, $spec, $rank);
		if ($ret) {
			return $ret;
		}
	}
	return undef;
}

###############################################################################
#
sub find_name {
	my($this, $spec, $rank) = @_;

    my$node = $this->find_node($spec, $rank);
    my$name = $node->{'name'};

    my($rankGenus) = $this->{'RANK'}->{'genus'};
    my($rankRank)  = $this->{'RANK'}->{"$rank"};
	if ($rankGenus <= $rankRank) {
#		$name =~ s#^Candidatus\s+(\S+\s+\S+)#\1#;
		$name =~ s#^Candidatus\s+##;
		if ($name =~ /^\s*$/) {
			$name = $node->{'name'};
		}

		($name) = split(/\s+/, $name);
		$name = ucfirst($name);
	}

	return $name;
}

###############################################################################
#
sub get_upper_nodes {
	my($this, $name) = @_;
	local(@RetList);
	&get_upper_nodes_sub($this->{tree}, $name);
	@RetList;
}

###############################################################################
#
sub get_upper_nodes_sub {
	my($node, $name) = @_;
	if ($node->{speclist} =~ /$name/) {
		push(@RetList, $node);
		if ($node->{rank} eq 'genome') {
			return;
		}
	}
	foreach $n (@{$node->{child}}) {
		&get_upper_nodes_sub($n, $name);
	}
}

###############################################################################
#
sub print {
	my($this, $opt) = @_;
	if ($opt->{format} eq 'tree') {
		$this->print_taxtree();
	} elsif ($opt->{format} eq 'tab') {
		$this->print_tab();
	}
}

###############################################################################
#
sub print_taxtree {
	my($this) = @_;
	&print_tree($this->{tree}, 0);
}

###############################################################################
#
sub print_tree {
	my($node, $lev) = @_;
	print " " x $lev;
	print $node->{name}," [", $node->{rank}, "]\n";
	foreach $n (@{$node->{child}}) {
		&print_tree($n, $lev+1);
	}
}

###############################################################################
#
sub print_tab {
	my($this, $out) = @_;
	&print_tree_tab($this->{tree}, 0, 0, $out);
}

###############################################################################
#
sub print_tree_tab {
	my($node, $lev, $flag, $out) = @_;
	if ($flag || $node->{rank} eq 'genome') {
		my $status;
		$status = 2 if ($node->{speclist} =~ /,/);
		my $outstr = join("\t", $lev, $node->{name},
			$node->{speclist}, $node->{rank},
			$status, $node->{status}, $node->{url}) . "\n";
		if (ref $out eq 'ARRAY') {
			push(@{$out}, $outstr);
		} else {
			print $outstr;
		}
	}
	if (@{$node->{child}} > 1) {
		foreach $n (@{$node->{child}}) {
			&print_tree_tab($n, $lev+1, 1, $out);
		}
	} elsif (@{$node->{child}} == 1) {
		&print_tree_tab($node->{child}->[0], $lev, 0, $out);
	}
}

###############################################################################
#
sub setTaxWeight {
    my($this) = shift;
    my($refChild) = shift;

    my($file_spid_dist) = "$ENV{'MBGD_HOME'}/etc/spid.tab.dist";
    my($refTaxWeight) = {};
    my($idx) = 1;
    foreach my$node (@{$refChild}) {
        foreach my$sp (split(',', $node->{'speclist'})) {
            my($spid) = main::sp2spid($sp, $file_spid_dist);
            $refTaxWeight->{"$sp"} = $idx;
            $refTaxWeight->{"$spid"} = $idx;
            $idx++;
        }
    }

    return $refTaxWeight;
}

###############################################################################
#
sub sortByTaxonomy {
    my($this) = shift;
    my(@splist) = @_;

    return sort {
        my($wa) = $this->{'tax_weight'}->{"$a"};
        my($wb) = $this->{'tax_weight'}->{"$b"};

        if ($wa && $wb) {
            $wa <=> $wb;
        }
        elsif ($wa) {
            -1;
        }
        elsif ($wb) {
            1;
        }
        else {
            $a cmp $b;
        }
    } @splist;
}

###############################################################################
package main;
if ($0 eq __FILE__) {
	package main;
	$t = MBGD::Taxonomy->new();

	@n = $t->get_upper_nodes('eco');
	foreach $n (@n) {
		print join("\t",$n->{name},$n->{rank}),"\n";
	}

	$t->print( {format=>'tab'} );

	@spec = $t->get_default_spec();
	print "DefaultSpec: @spec\n";

	@spec = $t->get_species({ 'one_strain' => 'genus' });
	print "1 str/Genus: @spec\n";

	@spec = $t->get_species({ 'list_related' => 'genus' });
	print "list related: ";
	foreach $sp (@spec) {
		if (@{$sp} >=2) {
			print " (", join(' ', @{$sp}), ") ";
		}
	}
	print "\n";

	@spec = $t->get_species({ 'all_strains' => 'species', spec => 'eco' });
	print "AllStrains(eco): @spec\n";
}
###############################################################################
1;
###############################################################################

