#!/usr/local/bin/perl

use MBGD::ClustTab::Writer;
use MBGD::ClustTab::Reader;
use MBGD::ClustTab::DB;
use MBGD::ClustTab::Fields;
use SimpleSet;;


package MBGD::ClustTab;
use Carp;

#$FIELD_FuncCat = "FuncCat";
#$FIELD_GeneName = "Gene";
#$FIELD_Motifs = "Motifs";
#
#$DefaultReadFormat = [ID,DUMMY,DATA,$FIELD_FuncCat,$FIELD_GeneName];
#$DefaultWriteFormat = [$FIELD_GeneName,DATA,$FIELD_Motifs,'AHO'];

sub new($;%) {
	my($class, %args) = @_;
	my($this);
	if ($args{cluster} && $args{splist}) {
		$this = {};
		bless $this, $class;
		$this->{splist} = $args{splist};
		$this->{cluster} = $args{cluster};

        #
        my($idx) = 0;
        foreach my$sp (@{$this->{splist}}) {
            $this->{'sp_index'}->{"$sp"} = $idx;
            $idx++;
        }

	} elsif ($args{file} || $args{list}) {
		$this = MBGD::ClustTab::Reader->read(%args);
	} else {
		confess "ClustTab: argument specification error\n";
	}
	$this->{add_field} = $args{add_field} if ($args{add_field});
	return $this;
}

sub get_cluster($;$) {
	my($this, $i) = @_;
	if (defined $i) {
		return $this->{cluster}->[$i];
	} else {
		return $this->{cluster};
	}
}
sub get_spdata($$$) {
	my($this, $clust, $spidx) = @_;
	if (ref($clust) && $clust->isa('MBGD::Cluster')) {
		## assuming $clust is cluster data
		return $clust->cluster->[$spidx];
	} elsif ($clust =~ /^\d+$/) {
		$cc = $this->get_cluster($clust);
		return $cc->cluster->[$spidx];
##		return $this->get_cluster($clust)->[$spidx];
	}
}
sub get_spdata_by_sp {
	my($this, $clust, $sp) = @_;

    my($spidx) = $this->{'sp_index'}->{"$sp"};
    my($spdata) = $this->get_spdata($clust, $spidx);

    return $spdata;
}
sub get_cluster_idx($$) {
	my($this, $q_clid) = @_;
	my($idx);
	foreach $idx (0..$#{$this->{cluster}}) {
		my $cl = $this->get_cluster($idx);
		my $clid = $cl->clustid;
		if ($clid eq $q_clid) {
			return($idx);
		}
	}
}
sub findCluster($$@) {
	# $mode = 'clid' or 'index'
	my($this, $mode, @orfids) = @_;

	my(%SearchOrfs);
	my(@spidx);
	my(@retids);
	foreach my $orf (@orfids) {
		my($sp,$orfname) = split(/:/, $orf);
		$SearchOrfs{$orf} = 1;
		$SearchSps{$sp} = 1;
	}
	for ($i = 0; $i < @{$this->{splist}}; $i++) {
		push(@spidx, $i) if ($SearchSps{$this->{splist}->[$i]});
	}
	my $i = 0;
	CLUSTER: foreach $cl (@{$this->{cluster}}) {
		my ($clid, $clust) = ($cl->clustid, $cl->cluster);
		my $j = 0;
		SPEC: foreach $spl (@{$clust}[@spidx]) {
			my $sp = $this->{splist}->[$spidx[$j]];
			GENE: foreach $g (@{$spl}) {
				my $gname = $g->[0];
				if ($SearchOrfs{"$sp:$gname"}) {
					if ($mode eq 'clid') {
						push(@retids, $clid);
					} elsif ($mode eq 'index') {
						push(@retids, $i);
					}
					$i++;
					next CLUSTER;
				}
			}
			$j++;
		}
		$i++;
	}
	return @retids;
}
sub splist($) {
	my($this) = @_;
	return $this->{splist};
}
sub filter_empty_sp($) {
	## remove empty species from @sp_list
	my($this) = @_;
	my($spnum) = $this->spnum;
	my(@found, @sp_list);
	for ($i = 0; $i < $this->clustnum; $i++) {
		for ($spn = 0; $spn < $spnum; $spn++) {
			$d = $this->get_spdata($i, $spn);
			if (@{$d}) {
				$found[$spn] = 1;
			}
		}
	}
	for ($spn = 0; $spn < $spnum; $spn++) {
		if ($found[$spn]) {
			push(@sp_list, $spn);
		}
	}
	MBGD::FilteredClustTab->new($this, sp_sel => \@sp_list);
}
sub list_clusters($;$) {
	my($this,$opt) = @_;
	my(@clustlist);
	foreach $cl (@{$this->{cluster}}) {
		if ($opt eq 'clustid') {
			push(@clustlist, $cl->clustid);
		} else {
			push(@clustlist, $cl);
		}
	}
	return @clustlist;
}
sub clustnum($) {
	my($this) = @_;
	return scalar @{$this->{cluster}};
}
sub spnum($) {
	my($this) = @_;
	return scalar @{$this->{splist}};
}
sub spidx($$) {
	my($this, $spec) = @_;
	my($i, $sp);
	$i = 0;
	foreach $sp (@{$this->{splist}}) {
		return $i if ($spec eq $sp);
		$i++;
	}
}
sub print($$) {
	my($this, %opt) = @_;
	my $clustW = MBGD::ClustTab::Writer->new($this, %opt);
	$clustW->print_table;
}

sub parse_genename($) {
	my($name) = @_;
	my($gene, $dom);
	if (ref($name) eq 'ARRAY') {
		($gene,$dom) = @{$name};
	} elsif ($name =~ /^(.*)\(([0-9]+)\)$/) {
		$gene = $1;
		$dom = $2;
	} else {
		$gene = $name;
	}
	($gene, $dom);
}
sub print_genename(\@) {
	my($gene) = @_;
	if ($gene->[1]) {
		return $gene->[0] . ":" . $gene->[1];
	} else {
		return $gene->[0];
	}
}
sub allgenes($;$)
{
	my($this, $cluster) = @_;
	if ($cluster) {
		return $cluster->allgenes($this->splist);
	} else {
		my(@allgenes);
		foreach $cl ($this->list_clusters) {
			push(@allgenes, $cl->allgenes($this->splist));
		}
		my %geneHash;
		foreach $g (@allgenes){
			$geneHash{$g}=1;
		}
		return keys %geneHash;
	}
}
sub has_field {
	my($this, $fname) = @_;
	return $this->{add_field}->{$fname};
}
sub list_field {
	my($this) = @_;
	return sort keys %{$this->{add_field}};
}
sub add_field {
	my($this, $fname, $opt) = @_;
	if ($fname =~ /^MBGD::ClustTab/) {
		$field = $fname->getInstance();
		if (! $field) {
			warn "Can't create field object $fname\n";
		}
		$field->add($this, $opt);
		$fname = $field->field_name();
		$this->{add_field}->{$fname} = $field;
	} elsif (ref $fname  eq 'HASH') {
		foreach $key (%$fname) {
			$this->add_field($key,$fname->{$key});
		}
	} else {
		$this->{add_field}->{$fname} = $opt->{value} ? $opt->{value} : 1;
	}
}
sub range {
	my($this, $begin, $outnum, $opt) = @_;
	my @filter = ($begin .. $begin+$outnum-1);
	MBGD::FilteredClustTab->new($this, clust_sel => \@filter);
}
sub sort_by_field {
	my($this, $fieldname, $opt) = @_;
	my @clust = $this->list_clusters();
	if ($fieldname eq ID) {
		$sortfunc = sub {$clust[$a]->clustid <=> $clust[$b]->clsutid};
	} else {
		$sortfunc = sub {$clust[$a]->field($fieldname) <=> $clust[$b]->field($fieldname)};
	}
	@id_list = sort $sortfunc (0..$#clust);
	if ($opt->{order} =~ /^des/) {
		@id_list = reverse @id_list;
	}
	MBGD::FilteredClustTab->new($this, clust_sel => \@id_list);
}

sub tabid { return $_[0]->{tabid}; }
sub geneinfo { return $_[0]->{geneinfo}; }
sub totalcount { return $_[0]->{totalcount}; }

sub set_tabid { $_[0]->{tabid} = $_[1]; }
sub set_geneinfo { $_[0]->{geneinfo} = $_[1]; }
sub set_totalcount { $_[0]->{totalcount} = $_[1]; }

#########################################################################
package MBGD::FilteredClustTab;
@ISA = qw( MBGD::ClustTab );

###############################################################################
#
sub new($$;%) {
	my($class, $origtab, %args) = @_;
	my($this) = {};
	bless $this, $class;

if (! $origtab) {
	Carp::confess("origtab is not specified\n");
}	
	$this->{origtab} = $origtab;
	if ($args{clust_sel}) {
		$this->set_clust_sel($args{clust_sel});
	}
	if ($args{sp_sel}) {
		$this->set_sp_sel($args{sp_sel});
	}
	if ($args{splist}) {
		$this->set_splist($args{splist});
	}

#	if ($origtab->{sp_index}) {
#        foreach my$sp (keys(%{$origtab->{'sp_index'}})) {
#            my($idx) = $origtab->{'sp_index'}->{"$sp"};
#            $this->{'sp_index'}->{"$sp"} = $idx;
#        }
#	}

	return $this;
}

###############################################################################
#
sub set_clust_sel($\@) {
	my($this, $clust_sel) = @_;
	my($clnum) = $this->{origtab}->clustnum;
	my(@new_sel);
	foreach $c (@{$clust_sel}) {
		## range check
		if ($c < $clnum) {
			push(@new_sel, $c);
		}
	}
	$this->{clust_idx} = \@new_sel;
}

###############################################################################
## sp_sel: an array of splist indices for the selected species 
sub set_sp_sel($\@) {
	my($this, $sp_sel) = @_;
	$this->{sp_idx} = $sp_sel;
}

###############################################################################
## splist: an array of names of the selected species
sub set_splist($\@) {
	my($this, $splist) = @_;
	my($sp,$i,%orig_spidx,@sp_idx);
	foreach $sp (@{ $this->splist }) {
		$orig_spidx{$sp} = $i++;
	}
	foreach $sp (@{ $splist }) {
		push(@sp_idx, $orig_spidx{$sp});
	}
	$this->set_sp_sel(\@sp_idx);
}

###############################################################################
#
sub get_cluster($;$) {
	my($this, $i) = @_;
	my($idx) = $i;
	if ($this->{clust_idx}) {
		$idx = $this->{clust_idx}->[$i];
	}
	return $this->{origtab}->get_cluster($idx);
}

###############################################################################
#
sub get_spdata($$;$) {
	my($this, $clust, $spidx) = @_;
	if (! $this->{sp_idx}) {
		return $this->{origtab}->get_spdata($clust, $spidx);
	}
	if ($clust =~ /^\d+$/) {
		$clust = $this->get_cluster($clust);
	}
	return $clust->cluster->[ $this->{sp_idx}->[$spidx] ];
}

###############################################################################
#
sub get_spdata_by_sp($$;$) {
	my($this, $clust, $sp) = @_;

	return $this->{origtab}->get_spdata_by_sp($clust, $sp);
}

###############################################################################
#
sub clustnum($) {
	my($this) = @_;
	if ($this->{clust_idx}) {
		return scalar @{$this->{clust_idx}};
	} else {
		return $this->{origtab}->clustnum;
	}
}

###############################################################################
#
sub spnum($) {
	my($this) = @_;
	if ($this->{sp_idx}) {
		return scalar @{$this->{sp_idx}};
	} else {
		return $this->{origtab}->spnum;
	}
}

###############################################################################
#
sub list_clusters($;$) {
	my($this,$opt) = @_;
	if (! $this->{clust_idx}) {
		return $this->{origtab}->list_clusters($opt);
	} else {
		my(@list);
		foreach $idx (@{$this->{clust_idx}}) {
			my $cl = $this->get_cluster($idx);
			if ($opt eq 'clustid') {
				push(@list, $cl->clustid);
			} else {
				push(@list, $cl);
			}
		}
		return @list;
	}
}

###############################################################################
#
sub splist($) {
	my($this) = @_;
	my $origlist = $this->{origtab}->splist;
	if (! $this->{sp_idx}) {
		return $origlist;
	} elsif (! $this->{splist}) {
		my(@newlist);
		foreach $i (@{$this->{sp_idx}}) {
			next if ($i eq "");
			push(@newlist, $origlist->[$i]);
		}
		$this->{splist} = \@newlist;
	}
	return $this->{splist};
}

###############################################################################
#
sub allgenes($$) {
	my($this, $cluster) = @_;
#	if (! $this->{sp_idx}) {
#		return $this->{origtab}->allgenes($cluster);
#	} elsif (! $cluster) {

	if (! $cluster) {
		my @genes;
		foreach $cl ($this->list_clusters) {
			if ($cl) {
				push(@genes, $this->allgenes($cl));
			}
		}
		my %geneHash;
		foreach $g (@genes) {
			$geneHash{$g} = 1;
		}
		return keys %geneHash;
	} else {
		my(%geneHash);
		for (my $i = 0; $i < $this->spnum; $i++) {
			$sp = $this->splist->[$i];
#			$spdata = $this->get_spdata($cluster, $i);
			$spdata = $this->get_spdata_by_sp($cluster, $sp);
			foreach $g (@{$spdata}) {
				my($gene, $dom) = &MBGD::ClustTab::parse_genename($g);
				$geneHash{"$sp:$gene"} = 1;
			}
		}
		return keys %geneHash;
	}
}
##
## WARNING:
## The methods for adding new fields (e.g. Motifs) are not transferred
## to the origtab object, meaning that these columns are added to the
## derived table itself rather than the origtab, using the methods inherited
## from the base class. When retrieving data, the fields of the derived
## table followed by those of origtab ojbect are looked up.
##
#sub add_field {
#	my($this, $fname, $opt) = @_;
#	defined $this->{origtab}->add_field($fname, $opt);
#}

###############################################################################
#
sub has_field {
	my($this, $fname) = @_;
	my($ret);
	if (! ($ret = $this->SUPER::has_field($fname)) ) {
		return $this->{origtab}->has_field($fname);
	}
	return $ret;
}

###############################################################################
#
sub list_field {
	my($this) = @_;
	my(@ret) = $this->SUPER::list_field($fname);
	foreach $r (@ret) {
		$hash{$r} = 1;
	}
	foreach $r ( $this->{origtab}->list_field ) {
		if (! $hash{$r}) {
			push(@ret, $r);
		}
	}
	@ret;
}

sub tabid { return $_[0]->{origtab}->tabid; }
sub geneinfo { return $_[0]->{origtab}->geneinfo; }
sub totalcount { return $_[0]->{origtab}->totalcount; }

sub set_totalcount { $_[0]->{origtab}->set_totalcount($_[1]); }
sub set_geneinfo { $_[0]->{origtab}->set_geneinfo($_[1]); }
sub set_tabid { $_[0]->{origtab}->set_tabid($_[1]); }

#########################################################################
package MBGD::Cluster;
sub new($;%) {
	my($class, $clustid, $cluster, $fields, %opt) = @_;
	my($this) = {};
	$this->{clustid} = $clustid;
	$this->{cluster} = $cluster;
	$this->{fields} = $fields;
    if (exists($opt{'splist'})) {
        $this->{'sp2cluster'} = {};
        my($idx) = 0;
        foreach my$sp (@{$opt{'splist'}}) {
            $this->{'sp2cluster'}->{"$sp"} = $cluster->[$idx];
            $idx++;
        }
    }

	bless $this, $class;
	$this;
}
sub clustid($) {
	$_[0]->{clustid};
}
sub cluster($) {
	$_[0]->{cluster};
}
sub cluster_idx {
    my($this) = shift;
    my($idx) = shift;
    return $this->{'cluster'}->[$idx];
}
sub cluster_sp {
    my($this) = shift;
    my($sp) = shift;
    return $this->{'sp2cluster'}->{"$sp"};
}
sub set_field($$) {
	my($this, $fieldname, $value) = @_;
	$this->{fields}->{$fieldname} = $value;
}
sub field($$) {
	my($this, $fieldname) = @_;
	$this->{fields}->{$fieldname};
}
sub allgenes($;$) {
	my($this, $splist) = @_;
	my $i = 0;
	my %geneHash;
	foreach $spdata (@{$this->{cluster}}) {
		$sp = $splist->[$i++];
		foreach $g (@{$spdata}) {
			my($gene, $dom) = &MBGD::ClustTab::parse_genename($g);
			$geneHash{"$sp:$gene"} = 1;
		}
	}
	return keys %geneHash;
}
#########################################################################
package main;
use MBGD::ClustTab::Fields;
if ($0 eq __FILE__) {
	my $read_format = "ID,DUMMY,DATA,FuncCat,Gene";
#	my $write_format = "DATA,FuncCat,Gene,Motifs";
	my $write_format = "Gene,DATA,Motifs";
	$file = $ARGV[0];
	$file = "/dbb/project/MBGD/work/default.clusterTab" if (! $file);
	die "Usage: $0 filename\n" if (! $file);
#	$clust = MBGD::ClustTab->new(file=>$file, begfld => 2, limit=>"1,200");
	$clust = MBGD::ClustTab->new(file=>$file, format => $read_format,
			limit=>"1,100");

	$newclust = MBGD::FilteredClustTab->new($clust, sp_sel => [1,4,5,10,8]);
	$newclust2 = MBGD::FilteredClustTab->new($newclust, sp_sel => [2,1,4]);

	$newclust->add_field(MBGD::ClustTab::Motifs);

	$cw = MBGD::ClustTab::Writer->new($newclust, format=>$write_format);
	$cw->print_table;

#	$newclust2->print;

#	$cw = MBGD::ClustTab::Writer_HTML->new($clust);
#	$cw->print_table;
}

#########################################################################
1;
