#!/usr/bin/perl -s


use FindBin;
use lib "$FindBin::Bin/../perllib";
use GenomeData;

##
##
# -nocount_split -count_all
if (! $datapath) {
	$datapath = "../data";
}
$format = 'clusttab' if (! $format);
$infile = $ARGV[0];

if ($orgs) {
	@Species = split(/,/, $orgs);
	foreach $sp (@Species) {
		my $c = uc(substr($sp,1,1));
		$c = uc(substr($sp,2,1)) if ($Found{$c});
		$c = lc(substr($sp,2,1)) if ($Found{$c});
		$code{$sp} = $c;
		$Found{$c} = 1;
	}
	$MINCNT = @Species+0 if (! defined $MINCNT && ! defined $SPCNT);
	&print_spinfo(@Species);
}
if ($genomes) {
	$GenomeData = GenomeData->read($genomes, datapath=>$datapath);
	$readGenomes = 1;
} else {
	$GenomeData = GenomeData->new;
}
if ($OutGroup) {
	$GenomeData->setOutGroup($OutGroup);
}
if ($SpGrp) {
	$GenomeData->setSpGroup($SpGrp);
	$spgrp_flag = 1;
}
if ($genomes) {
	$GenomeData->setSpIndex;
}

if ($funcdir) {
	$funcData = FuncCat->new($funcdir, 'cog');
}

foreach $sp (@{ $GenomeData->{species} }) {
	$Length{$sp} = $GenomeData->{length}->{$sp};
}
$unit = 'kbp';

&read_clusttab($infile);

if ($format eq 'clustgene') {
	&print_clustgene;
}

sub read_clusttree {
	my($file) = @_;
	open(F, $file) || die;
	while(<F>){
		if (/^#/) {
			if (/SPEC=(\S+)/) {
				if (! $orgs) {
					$orgs = $1;
					@Species = split(/,/, $orgs);
					%code = &make_spcode(@Species);
					
					

					$MINCNT = @Species+0 if (! defined $MINCNT && ! defined $SPCNT);
					&print_spinfo(@Species);
				}
			}
			next;
	    	}
		chomp;
		if (/^Cluster\s+(\S+)/) {
			$clustid = $1;
			undef %Found;
			undef %FoundFunc;
		} elsif (/[\+\*]\- (\S+)/) {
			($sp,$name) = split(/:/, $1);
			$Found{$sp}->{$name} = 1;
			if ($sp eq $funcsp) {
				my $funccat = $funcData->getGeneFunc(
						"$sp:$name");
				foreach $fn (@{$funccat}) {
					$FoundFunc{$fn}++;
				}
			}
		} elsif (/^$/) {
			&print_clusttab($clustid, \%Found,\%FoundFunc);
		}
	}
}
sub read_clusttab {
	my($file) = @_;
	my($ln);
	my($fmt, $dmy);
	open(F,$file) || die "Can't open $file\n";
	while(<F>){
		chomp;
		if ($ln == 0 && /^ClusterID/) {
			## a file created by mbgd [save table]
			($dmy, $dmy, @Species0) =split;
			foreach $sp (@Species0) {
				if ($sp !~ /Gene|Func/) {
					push(@Species, $sp)
				}
			}
			%code = &make_spcode(@Species);
			if (! $orgs) {
				$orgs = join(',',@Species);
				&print_spinfo(@Species);
			}
			$MINCNT = @Species+0 if (! defined $MINCNT && ! defined $SPCNT);
			$fmt = 'mbgd';
			next;
		} elsif (/^#/) {
			($dmy, @Species) =split;
			%code = &make_spcode(@Species);
			$orgs = join(',',@Species) if (! $orgs);
			next;
		}
		$ln++;
		my(@Fields) = split(/\t/);
		my($clustid, @spData);
		if ($fmt eq 'mbgd') {
			if (! $fmt2) {
				if ( $Fields[2] =~ /^\d+$/) {
					$fmt2 = 'skip2';
				} else {
					$fmt2 = 'skip1';
				}
			}
			if ($fmt2 eq 'skip1') {
				($clustid, $dmy, @spData) = @Fields;
			} else {
				($clustid, $dmy, $dmy, @spData) = @Fields;
			}
		} else {
			($clustid, @spData) = @Fields;
		}
		my(%Found, %FoundFunc);
		my $i;
		for ($i = 0; $i < @Species; $i++) {
			my $sp = $Species[$i];
			my $spd = $spData[$i];
			foreach $name (split(/ /, $spd)) {
				if ($name =~ /:/) {
					($sp,$name) = split(/:/, $name);
				}
				$Found{$sp}->{$name} = 1;
				if ($sp eq $funcsp) {
					my $funccat = $funcData->getGeneFunc(
						"$sp:$name");
					foreach $fn (@{$funccat}) {
						$FoundFunc{$fn}++;
					}
				}
			}
		}
		&print_clusttab($clustid, \%Found,\%FoundFunc);
	}
	close(F);
}

sub make_spcode {
	my(@Species) = @_;
	my(%Found, %code);
	foreach $sp (@Species) {
		my $c = uc(substr($sp,1,1));
		$c = uc(substr($sp,2,1)) if ($Found{$c});
		$c = lc(substr($sp,2,1)) if ($Found{$c});
		$code{$sp} = $c;
		$Found{$c} = 1;
	}
	%code;
}

sub print_clusttab {
	my($clustid, $Found, $FoundFunc) = @_;
	my($cnt, $pat, $func);
	my(%FoundGrp, $spgrp);
	my(@foundSp);
	if ($readGenomes) {
		## check
	    foreach $sp (@Species) {
		if ($Found->{$sp}) {
			foreach $gn (keys %{ $Found->{$sp} }) {
				$gname = $gn;
				$gn =~ s/\(\d+\)$//;
				if (! $GenomeData->getGene($sp,$gn) ) {
					delete $Found->{$sp}->{$gname};
					
				}
			}
		}
	    }
	}
	foreach $sp (@Species) {
		if ($Found->{$sp}) {
			if ($spgrp_flag) {
				$spgrp = $GenomeData->getSpGroup($sp);
			}
			my @g = keys %{$Found->{$sp}};
		## count_all or count_one_by_one
			if ($count_all ||
				(@g == 1 &&
				 (! $nocount_split || $g[0] !~ /\(\d+\)/))) {
				$cnt++;
				$FoundGrp{$spgrp} = 1 if ($spgrp_flag);
			}
##			$pat .= $code{$sp};
			push(@foundSp, $sp);
		} else {
		}
	}
##	($count_in,$count_out) = $GenomeData->countSpecies(	
##			\@foundSp,separate_outgrp=>1);
	$pat = $GenomeData->getPhyloPat(\@foundSp);
	if ($spgrp_flag) {
		$cnt = scalar(keys %FoundGrp);
	}
	if (defined $SPCNT) {
		next if ($cnt != $SPCNT);
	} elsif (defined $MINCNT) {
		next if ($cnt < $MINCNT);
	}

	if (%{$FoundFunc}) {
		$func = join(' ', (keys %{$FoundFunc}) );
	}

	if ($format eq 'cluster') {
		print "Cluster $clustid\n";
		foreach $sp (@Species) {
			foreach $e (keys %{$Found->{$sp}}) {
				print "$sp:$e\n";
			}
		}
		print "\n";
	} elsif ($format eq 'clusttab') {
		print "$clustid";
		foreach $sp (@Species) {
			print "\t";
			print join(' ', (keys %{$Found->{$sp}}));
		}
		print "\n";
	} elsif ($format eq 'postab') {
		print "$clustid";
		foreach $sp (@Species) {
			print "\t";
			my @pos;
			foreach $n (keys %{$Found->{$sp}}) {
				$n =~ s/\(\d+\)//;
#				push(@pos,$Data{$sp}->{$n}->{pos});
				$gn = $GenomeData->getGene($sp,$n);
				push(@pos,$gn->{pos});
			}
			print join(' ', @pos);
		}
		print "\n";
	} elsif ($format eq 'clustgene') {
		foreach $sp (@Species) {
			foreach $g (keys %{$Found->{$sp}}) {
				if ($g =~ /\((\d+)\)/) {
					$dom = $1;
					$g =~ s/\($dom\)//;
				} else {
					$dom = 1;
				}
				$ClustInfo{"$sp:$g:$dom"} = {
					clustid =>$clustid,
					pat => $pat,
					func => $func,
				};
			}
		}
	}
	$CountPat{$pat}++;
}


sub print_clustgene {
	foreach $g (sort keys %ClustInfo) {
		($sp,$gene,$dom) = split(/:/, $g);
		($countpat,$totnum) = &countpat($ClustInfo{$g}->{pat});
		print "$sp:$gene\t$dom\t";
		print "$ClustInfo{$g}->{clustid}\t";
		print "$countpat/$totnum\t";
		print "$ClustInfo{$g}->{pat}\t";
		print "$ClustInfo{$g}->{func}";
		print "\n";
	}
}

sub countpat {
	my($pat) = @_;
	my($cnt, $i, $tot);

	foreach $c (split(//,$pat)){
		if (! $GenomeData->isOutGroupIndex($i)) {
			$cnt++ if ($c == 1);
			$tot++;
		}
		$i++;
	}
	return ($cnt, $tot);
}


sub pos_calc {
        my($sp, $from, $to) = @_;
        my $pos = ($from + $to) /2;
        if ($unit eq 'kbp') {
                if ($pos >= $Length{$sp} / 2) {
                        $pos = $pos - $Length{$sp};
                }
                $pos /= 1000;
        } else {
                ## $unit = 'degree';
                if ($pos < $Length{$sp} / 2) {
                        $pos = $pos / $Length{$sp} * 360;
                } else {
                        $pos = ($pos / $Length{$sp} - 1) * 360;
                }
        }
        $pos = sprintf("%.1f", $pos);
        return $pos;
}

sub print_spinfo {
	my(@spec) = @_;
	if ($format eq 'postab') {
		foreach my $sp (@spec) {
			print "# $sp $Length{$sp}\n";
		}
	} else {
		print join("\t",('#',@spec)), "\n";
	}
}
