#!/usr/local/bin/perl

###############################################################################
# MBGD::ClustTab::Fields
#  -- Adding and printing additional fields of the cluster table
###############################################################################

package MBGD::ClustTab::Fields;
use MBGD::ClustTab;

#$FIELD_FuncCat = "FuncCat";
#$FIELD_GeneName = "Gene";
#$FIELD_Motifs = "Motifs";
#$FIELD_Score = "Score";
#$FIELD_Genes = "Genes";

# The following lines specify the default orders of the input and output tables.
# FuncCat and Name are already included in the input file
#   that are added by addinfo command.

#$DefaultReadFormat = [ID,DUMMY,DATA,FuncCat,Name];
$DefaultReadFormat = [ID,DUMMY,DATA,Name, FuncCat_mbgd, FuncCat_cog, FuncCat_kegg, FuncCat_tigr ];
$DefaultWriteFormat = [Name,DATA,Query,Score,Motifs];

###############################################################################
# Base class
###############################################################################
sub getInstance {
	my($class, $name) = @_;
	if ($name) {
		$class =~ s/Fields//;
		$class .= $name;
	}
	return $Instance{$class} if (defined $Instance{$class});
	if (! UNIVERSAL::isa($class,'MBGD::ClustTab::Fields')) {
		return $Instance{$class} = 0;
	}
	my $this = $Instance{$class} = {};
	return bless $this, $class;
}

# Method add() should be called through $ClustTab->add_field(ClassName,\%option)
#
# sub add {
#          Should be defined in each concrete class !!
# }

sub print {
	my($this,$data) = @_;
	$data;
}
sub print_HTML {
	my($this,$data) = @_;
	$data;
}
sub field_name {
	my $this = shift;
	my $class = ref($this) || $this;
	my @F = split(/::/, $class);
	$F[$#F];
}
sub display_name {
	return $_[0]->field_name;
}

###############################################################################
package MBGD::ClustTab::AnnotSp;
use MBGD;
@ISA = qw{MBGD::ClustTab::Fields};

sub add {
	my($this, $clustTab, $opt) = @_;
	my $tabid = $clustTab->tabid;
	my @clustids = $clustTab->list_clusters('clustid');

    my($sp) = $opt->{'annotsp'};
	foreach $cluster ($clustTab->list_clusters) {
        my($orfname) = @{$clustTab->get_spdata_by_sp($cluster, $sp)};
        if (!$orfname) {
            next;
        }
        my($gene_ref) = MBGD::Gene->find("sp='$sp' and name='$orfname'");
		$cluster->set_field('AnnotSp', $gene_ref->{'descr'});
	}
}
## should be defiend. May be different from FIELD_xxx

sub print
{
	my($this, $annot) = @_;
}
sub print_HTML
{
	my($this, $annot) = @_;
	my($disp) = $annot;

    return $disp;
}

###############################################################################
package MBGD::ClustTab::Motifs;
use MBGD;
@ISA = qw{MBGD::ClustTab::Fields};

sub add {
	my($this, $clustTab, $opt) = @_;
	my $motifHits;
#	my @entnames = $clustTab->allgenes();
#	if (@entnames < 20000) {
#		$motifHits = MBGD::ProtMotif->findMotifs(\@entnames);
#	}
	my $tabid = $clustTab->tabid;
	my @clustids = $clustTab->list_clusters('clustid');
	$motifHits = MBGD::ProtMotif->findMotifsForClustTab(
					$tabid,\@clustids);

if (! $opt->{motif_cutoff}) {
	$opt->{motif_cutoff} = 50;
}
	foreach $cluster ($clustTab->list_clusters) {
		my @entnames = $clustTab->allgenes($cluster);
		my %motifCounts = &_countMotifHits($motifHits, \@entnames,
			$opt->{motif_cutoff});
		my @motHit;
		foreach $mot ( keys %motifCounts ) {
			if ($motifCounts{$mot} > 0) {
				push(@motHit, $mot);
			}
		}
		$cluster->set_field('Motifs', \@motHit);
	}
}
## should be defiend. May be different from FIELD_xxx

sub print
{
	my($this, $motifs) = @_;
	join(' ', @{$motifs});
}
sub print_HTML
{
	my($this, $motifs) = @_;
	my($disp);
	foreach $mot (@{$motifs}) {
		($motlib,$motid,$motname) = split(/:/, $mot );
#		$disp .= " <A HREF=\"/htbin/searchGeneByMotif.pl?SPEC=$species&motid=$motid&motlib=$motlib&limit=100\"><FONT size=\"-1\">$motlib:$motname</FONT></A>\n";
		$disp .= " <A HREF=\"/htbin/searchGeneByMotif.pl?SPEC=$species&motid=$motid&motlib=$motlib&limit=100\"><FONT size=\"-1\">$motname</FONT></A>\n";
	}
	$disp;
}


sub _countMotifHits {
	my($motifhits, $entnames, $motif_cutoff) = @_;
	my(%motif_list, %motif_cnt);
	my($entry_cnt) = scalar(@{$entnames});

	foreach $name (@{$entnames}) {
		($name,$dom) = &MBGD::ClustTab::parse_genename($name);
		foreach $mot (@{$motifhits->{$name}}) {
			my $motid = "$mot->{motlib}:$mot->{motid}:$mot->{motname}";
#			my $motid = "$mot->{motlib}:$mot->{motid}";
			$motif_list{$motid}->{$name} ++;
		}
	}
	foreach $motid (keys %motif_list) {
		$motif_cnt{$motid} = 0 + (keys %{$motif_list{$motid}});
		if ($motif_cnt{$motid} < $entry_cnt * $motif_cutoff / 100) {
			$motif_cnt{$motid} = 0;
		}
	}
	return %motif_cnt;
}

###############################################################################
package MBGD::ClustTab::Score;
@ISA = qw{MBGD::ClustTab::Fields};
sub display_name {
	"Avg.Score";
}
sub add {
	my($this, $clustTab,$opt) = @_;
	my($GeneWeight) = $opt->{GeneWeight};
	my($AvgScore, $ClustScore, $TotScore);
	my $missdist = $opt->{missdist};
	foreach $cluster ($clustTab->list_clusters) {
		@entries = $clustTab->allgenes($cluster);
		my($TotCount, $ClustScore);
		foreach $ent (@entries) {
			my($score, $cnt);
			if ($GeneWeight->{$ent}) {
				($score, $cnt) = split(/:/, $GeneWeight->{$ent});
				$cnt = 1 if (! $cnt);
				$ClustScore += $score;
			}
			$opt->{qcount} = 1 if (! $opt->{qcount});
			$ClustScore += $missdist * ($opt->{qcount}-$cnt);
			$TotCount += $opt->{qcount};
#print STDERR "www>>$ent,$score,$ClustScore,$TotCount,$cnt,$opt->{qcount},$missdist\n";
		}
		if ($TotCount > 0) {
			$AvgScore = $ClustScore / $TotCount;
#print STDERR "tot>$TotCount,$AvgScore<\n";
		} else {
			$AvgScore = 0;
		}
#print STDERR "ave>$TotCount,$ClustScore,$AvgScore<\n";
		$cluster->set_field('Score', $AvgScore);
	}
}
sub print {
	my($this,$data) = @_;
	sprintf "%.1f", $data;
}
sub print_HTML {
	my($this,$data) = @_;
	$this->print($data);
}

###############################################################################
package MBGD::ClustTab::GeneName;
@ISA = qw{MBGD::ClustTab::Fields};

sub add {
	my($this, $clustTab,$opt) = @_;
	foreach $cluster ($clustTab->list_clusters) {
		my(%Count);
		@entries = $clustTab->allgenes($cluster);
		@genes = MBGD::Gene->get(\@entries);
		foreach $g (@genes) {
			$Count{$g->{gene}}++;
		}
		my $maxCount, $maxName;
		foreach $g (keys %Count) {
			if ($Count{$g} > $maxCount) {
				$maxCount = $Count{$g};
				$maxName = $g;
			}
		}
		$cluster->set_field('GeneName', $maxName);
	}
}
sub print {
	my($this,$data) = @_;
	$data;
}
sub print_HTML {
	my($this,$data) = @_;
	$this->print($data);
}
###############################################################################
package MBGD::ClustTab::Func;
@ISA = qw{MBGD::ClustTab::Fields};

sub add {
	my($this, $clustTab,$opt) = @_;
	foreach $cluster ($clustTab->list_clusters) {
		my(%Count);
		@entries = $clustTab->allgenes($cluster);
		@genes = MBGD::Gene->get(\@entries);
		foreach $g (@genes) {
			$Count{$g->{gene}}++;
		}
		my $maxCount, $maxName;
		foreach $g (keys %Count) {
			if ($Count{$g} > $maxCount) {
				$maxCount = $Count{$g};
				$maxName = $g;
			}
		}
		$cluster->set_field('GeneName', $maxName);
	}
}
sub print {
	my($this,$data) = @_;
	$data;
}
sub print_HTML {
	my($this,$data) = @_;
	$this->print($data);
}

###############################################################################
package MBGD::ClustTab::Query;
@ISA = qw{MBGD::ClustTab::Fields};

sub add {
	my($this, $clustTab,$opt) = @_;
	foreach $cluster ($clustTab->list_clusters) {
		my @entries = $clustTab->allgenes($cluster);
		my %Queries;
		foreach my $e (@entries) {
			foreach my $q (keys %{$opt->{Qgene}}) {
				if (defined $opt->{homopair}->{$e,$q}) {
					$Queries{$q} = 1;
				}
			}
		}
		$cluster->set_field('Query', join(' ', keys %Queries));
	}
}
sub print {
	my($this,$data) = @_;
	$data;
}
sub print_HTML {
	my($this,$data) = @_;
	"<font color=#990000>$data</font>";
}



###############################################################################
# for test
package main;
use MBGD;

if ($0 eq __FILE__) {
	my $read_format = "ID,DUMMY,DATA,FuncCat,Gene";
	my $write_format = "Gene,DATA,FuncCat,GeneName,Motifs,Score";
	$file = $ARGV[0];
	$file = "/dbb/project/MBGD/work/default.clusterTab" if (! $file);
	die "Usage: $0 filename\n" if (! $file);
	$clust = MBGD::ClustTab->new(file=>$file,
			format => $MBGD::ClustTab::Fields::DefaultReadFormat,
			limit=>"1,25");

	$gene = 'eco:B0002';
#	$gene = 'efa:EF0675';
	@hom = MBGD::Homology->select({genes => [$gene]});
	foreach $h (@hom) {
		if ($h->{spname1} eq $gene) {
			$homScores{ $h->{spname2} } = $h->{score};
		} elsif ($h->{spname2} eq $gene) {
			$homScores{ $h->{spname1} } = $h->{score};
		} else {
			warn "select failed????\n";
		}
	}
	@homent = keys %homScores;
#print "Hit:", 0+@homent,"\n";
#	foreach $h (@homent) {
#print "$h  $homScores{$h}\n";
#	}

	$clust = MBGD::ClustTab::DB->new(default);
	$clust->retrieve( genes => \@homent,
			minout=>1, outputnum=>500);
	
	$clust2 = MBGD::FilteredClustTab->new($clust, splist=>[eco,bsu,syn,sau,hin,hpy,afu,efa]);

	$clust2->add_field('MBGD::ClustTab::GeneName');
	$clust2->add_field('MBGD::ClustTab::Motifs');
	$clust2->add_field('MBGD::ClustTab::Score', {GeneWeight=> \%homScores});

	$clust3 = $clust2->sort_by_field(Score,{order=>desc});
	$clust3->print(format=>$write_format);
}

###############################################################################
1;#
###############################################################################
