#!/usr/local/bin/perl

require 'MBGD_Conf.pl';

package MBGD_taxonomy;

$TaxFile = $main::FILE_tax;
sub new {
	my($class, $taxfile) = @_;
	my($this)= {};
	bless $this, $class;
	if ($taxfile)  {
		$this->{taxfile} = $taxfile;
	} else {
		$this->{taxfile} = $TaxFile;
	}
	$this->read_taxfile();
	return $this;
}

sub read_taxfile {
	my($this, $taxfile) = @_;
	my(@prev);
	$prev[0] = {};
	$taxfile = $this->{taxfile} if (! $taxfile);
	if ($Tree{$taxfile}) {
		$this->{tree} = $Tree{$taxfile};
		return;
	}

	open(F, $taxfile);
	while (<F>) {
		chomp;
		next if (/^#/);
		($lev, $name, $speclist, $rank, $div, $status, $url) = split(/\t/);
		$node = {
			lev=>$lev, name=>$name, speclist=>$speclist,
			rank=>$rank, div=>$div, status=>$status, url=>$url
		};
		if ($prev[$lev-1]) {
			push(@{$prev[$lev-1]->{child}}, $node);
		}
		for ($i = $#prev; $i > $lev; $i--){
			undef $prev[$i];
		}
		$prev[$lev] = $node;
	}
	$this->{tree} = $prev[0];
	$Tree{$taxfile} = $prev[0];	#cache
	close(F);
}
sub get_all_spec {
	my($this) = @_;
	return $this->get_species();
}
sub get_default_spec {
	my($this) = @_;
	## select one strain per species
	return $this->get_species({'one_strain' => 'species'});
}
sub get_species {
	my($this, $option) = @_;
	my(@splist);
	if ($option->{all_strains}) {
		&find_tree_all_strains($this->{tree}, \@splist,
			$option->{spec}, $option->{all_strains});
	} else {
		&find_tree($this->{tree}, \@splist, $option);
	}
	return @splist;
}
sub find_tree {
	my($node, $splist, $option) = @_;
	my($flag);
	if ($option->{one_strain} && $option->{one_strain} eq $node->{rank}) {
		$option->{spec_count} = 0;
	}
	if ($option->{match_rank} && $option->{match_rank} eq $node->{rank}) {
		if ($option->{match_name} ne $node->{name}) {
			return;
		}
	}
	if ($node->{rank} eq 'genome') {
		if ($option->{one_strain}) {
			if (! $option->{spec_count}) {
				$option->{spec_count}++;
				$flag = 1;
			}
		} else {
			$flag = 1;
		}
		push(@{$splist}, $node->{speclist}) if ($flag);
	}
	foreach $n (@{$node->{child}}) {
		&find_tree($n, $splist, $option);
	}
}
sub find_tree_all_strains {
	my($node, $splist, $spec, $rank) = @_;
	if ($rank eq $node->{rank}) {
		if ($node->{speclist} =~ qr/$spec/) {
			@{$splist} = split(/,/, $node->{speclist});
			return 1;
		}
	}
	foreach $n (@{$node->{child}}) {
		if (&find_tree_all_strains($n, $splist, $spec, $rank)) {
			return 1;
		}
	}
	return 0;
}

sub print {
	my($this, $opt) = @_;
	if ($opt->{format} eq 'tree') {
		$this->print_taxtree();
	} elsif ($opt->{format} eq 'tab') {
		$this->print_tab();
	}
}
sub print_taxtree {
	my($this) = @_;
	&print_tree($this->{tree}, 0);
}
sub print_tree {
	my($node, $lev) = @_;
	print " " x $lev;
	print $node->{name}," [", $node->{rank}, "]\n";
	foreach $n (@{$node->{child}}) {
		&print_tree($n, $lev+1);
	}
}
sub print_tab {
	my($this, $out) = @_;
	&print_tree_tab($this->{tree}, 0, 0, $out);
}
sub print_tree_tab {
	my($node, $lev, $flag, $out) = @_;
	if ($flag || $node->{rank} eq 'genome') {
		my $status;
		$status = 2 if ($node->{speclist} =~ /,/);
		my $outstr = join("\t", $lev, $node->{name},
			$node->{speclist}, $node->{rank},
			$status, $node->{status}, $node->{url}) . "\n";
		if (ref $out eq 'ARRAY') {
			push(@{$out}, $outstr);
		} else {
			print $outstr;
		}
	}
	if (@{$node->{child}} > 1) {
		foreach $n (@{$node->{child}}) {
			&print_tree_tab($n, $lev+1, 1, $out);
		}
	} elsif (@{$node->{child}} == 1) {
		&print_tree_tab($node->{child}->[0], $lev, 0, $out);
	}
}

package main;
if ($0 eq __FILE__) {
	package main;
	$t = MBGD_taxonomy->new();
	$t->print( {format=>'tab'} );

	@spec = $t->get_default_spec();
	print "DefaultSpec: @spec\n";

	@spec = $t->get_species({ 'one_strain' => 'genus' });
	print "1 str/Genus: @spec\n";

	@spec = $t->get_species({ 'all_strains' => 'species', spec => 'eco' });
	print "AllStrains(eco): @spec\n";
}
1;
