#!/usr/local/bin/perl

package MBGD::ClustTab::DB;
require 'MBGD_Conf.pl';
use MBGD;

#require 'libMBGDclusttab.pl';
#require 'libMBGDFuncCat.pl';
##require 'libMBGDposcluster.pl';
use MBGD::ClustTab;
use MBGD::ClustTab::ClustMap;
use SimpleSet;
use RECOG;

@ISA = qw{ MBGD::ClustTab };

## use RECOG table names instead of MBGD table names;
#$TBL_CLUSTER = "cluster";
#$TBL_CLUSTTAB = "clusttab";
$TBL_CLUSTER = $::TBL_DOMCACHE;
$TBL_CLUSTTAB = $::TBL_DOMRESULT;

#
# package MBGD::ClustTab::DB
# Retrieve clustering data from the database and construct a ClustTab object
#
#$dbname = $main::DBNAME_MBGD;

###############################################################################
#
sub getDbname {
	my($dbname) = $main::DBNAME_MBGD;
    my($uid) = $ENV{'MBGD_UID'};
    if (MbgdUserGenomeCommon::isContainUserGenome($uid) &&
        MbgdUserGenomeCommon::isReadyUserGenome($uid)) {
        $dbname = MbgdUserGenomeCommon::getDbname($uid);
    }

	return $dbname;
}

###############################################################################
#
sub getTmpDbname {
#	my($dbname) = $main::DBNAME_MBGD;
#	my($dbname) = $main::DBNAME_TMP;
	my($dbname) = $main::DBNAME_RECOG;
    my($uid) = $ENV{'MBGD_UID'};
    if (MbgdUserGenomeCommon::isContainUserGenome($uid) &&
        MbgdUserGenomeCommon::isReadyUserGenome($uid)) {
        $dbname = MbgdUserGenomeCommon::getDbname($uid);
    }
	return $dbname;
}

###############################################################################
#
#$tmpdbi = $main::MYSQL_DBTMP;
sub getTmpDbi {
	my($dbi) = "dbi:mysql:" . getTmpDbname();
	return $dbi;
}

###############################################################################
# $tabid $B$,;XDj$5$l$?>l9g!";XDj$5$l$?%/%i%9%?!<%F!<%V%k$,(B
# mbgd_tmp $B$K$"$k$N$+!"(Bmbgd_UID $B$K$"$k$N$+$rD4$Y!"(B
# $BE,@Z$J(B database $B$rA*Br$9$k!#(B
sub new {
	my($class, $tabid) = @_;

    #
	my($this) = {};
    bless $this, $class;

	if ($tabid) {
		$this->{tabid} = $tabid;
        $this->{db} = $this->getUserDbByTabid($tabid);
	} else {
		$this->{tabid} = 'default';
	}

    if (!defined($this->{db})) {
        my($tmpdbi) = getTmpDbi();
        $this->{db} = MBGD::DB->new($tmpdbi);
    }

	return $this;
}

sub getUserDbByTabid {
    my($self) = shift;
    my($tabid) = shift;
    my($tabname) = sprintf("${TBL_CLSUTER}_%d", $tabid);

    my($uid) = $ENV{'MBGD_UID'};
    my($dbname) = MbgdUserGenomeCommon::getDbname($uid);
    my($db) = MBGD::DB->new($dbname);
    if ($db->{'conn'} !~ /^\s*$/) {
        my($sta) = $db->exist_table($tabname);
        if ($sta) {
            return $db;
        }
    }

    return;
}

###############################################################################
#
sub retrieve {
	my($this, %opt) = @_;
	my($clusttab);
	my ($clustRes, $GeneInfo, $optionOut) = $this->searchClustTableDB(\%opt);
	my $tmpsplist = $this->get_splist;
	my $format;
	if ($opt{format}) {
		$format = $opt{format};
	} else {
		$format = $MBGD::ClustTab::Fields::DefaultReadFormat;
	}
	$clusttab = MBGD::ClustTab->new(
			list=>$clustRes, splist=>$tmpsplist,
			format=>$format,
		);

    if ($this->{tabid} eq 'addspec') {
        #
        my(@splist) = split(/[\,\|]/, main::getSpeciesForAddspec(\%opt));
        $clusttab = MBGD::FilteredClustTab->new($clusttab, splist => \@splist);
        if ($::Args{existsp} eq '') {
            # addspec $B$N%U%#%k%?!<=hM}$,=*N;$7$?$N$G(B phylopat $B$r%/%j%"(B
            delete($opt{'phylopat'});
        }
    }

    if (0 < $opt{clustsp_size}) {
        #
        my(@splist) = split(',', $tmpsplist);
        my($tmp_phylopat) ='0' x scalar(@splist);
        my($clustnum) = $clusttab->clustnum();
        for(my$i = 0; $i < $clustnum; $i++) {
            my$clust_ref = $clusttab->get_cluster($i);
            my($idx) = 0;
            foreach my$sp (@splist) {
                my($clust_sp) = $clust_ref->cluster_idx($idx);
                if (scalar(@{$clust_sp}) != 0) {
                    substr($tmp_phylopat, $idx, 1) = '1';
                }
                $idx++;
            }
        }

        my $simpleset_ref = SimpleSet->new( list=>$tmpsplist,sep=>',' );
        my @splist = $simpleset_ref->subset($tmp_phylopat)->list();
        $clusttab = MBGD::FilteredClustTab->new($clusttab, splist => \@splist);
    }
	elsif (! $opt{no_filter} && $opt{phylopat} =~ /[^_]/) {	# including a non-ambiguous character
        if ($opt{mismatch} || $opt{phylopat_neg}) {
            $clusttab = $clusttab->filter_empty_sp;
	    } else {
            my $simpleset_ref = SimpleSet->new( list=>$tmpsplist,sep=>',' );
            my @splist = $simpleset_ref->subset($opt{phylopat})->list();
            $clusttab = MBGD::FilteredClustTab->new($clusttab, splist => \@splist);
	    }
	}

	$clusttab->set_totalcount($optionOut->{totalcount});
	$clusttab->set_geneinfo($GeneInfo);
	$clusttab->set_tabid($this->{tabid});

	return $clusttab;
}

###############################################################################
# cluster_TABID $B%F!<%V%k$N%+%i%`(B dom $B$K$O!"%I%a%$%sHV9f$,3JG<$5$l$F$$$k(B
# $B%I%a%$%sJ,3d$5$l$F$$$J$$%G!<%?$O(B dom=0 $B$G$"$k!#(B
# $BM}M3(B
#   $B%I%a%$%sJ,3d$5$l$F$$$J$$%G!<%?$b(B dom=1 $B$H$7$?>l9g!"0J2<$N6hJL$,=PMh$J$$$?$a!#(B
#   $B!&%I%a%$%sJ,3d$5$l$F(B dom=1
#   $B!&%I%a%$%sJ,3d$5$l$:(B dom=1
sub getDomains {
	my($this, $gene) = @_;
	my($tmpdbi) = getTmpDbi();
	my $db = MBGD::DB->new($tmpdbi);
	my $tabid = $this->{tabid};
    my $sql = "select * from ${TBL_CLUSTER}_$tabid where spname='$gene'";
	my $sth = $db->prepare($sql);
	my($res);
	$sth->execute;
	while ($res = $sth->fetchrow_hashref) {
        my($dom) = $res->{dom};
        $dom = 1 if ($dom == 0); # $B%I%a%$%sJ,3d$5$l$F$$$J$$(B
		$Dom[$dom - 1] = {
			from=>$res->{from1},
			to=>$res->{to1},
			clustid=>$res->{clustid},
		}
	}
	@Dom;
}
###############################################################################
sub getCluster {
	my($this, $clustid, %opt) = @_;
	my($tmpdbi) = getTmpDbi();
	my $db = MBGD::DB->new($tmpdbi);
	my $tabid = $this->{tabid};
    	my $sql = "select * from ${TBL_CLUSTER}_$tabid where clustid='$clustid'";
	my $sth = $db->prepare($sql);
	$sth->execute;
	my($Result) = [];
	if ($opt{asHash}) {
		$Result = {};
	}
	while (my $res = $sth->fetchrow_hashref) {
		if ($opt{asHash}) {
			$Result->{$res->{sp}}->{$res->{name}} = $res;
		} else {
			push(@{$Result}, $res);
		}
	}
	$Result;
}

###############################################################################
#
sub retrieveSummary {
	my($this, %opt) = @_;
	my($clustmap);
	my $tmpsplist = $this->get_splist;
	my ($CountPat, $CountClass, $CountTotClass,$totcnt) = $this->summaryMap($this->{tabid}, %opt);
	$clustmap = MBGD::ClustTab::ClustMap->new($CountPat,
                                              $CountClass,
                                              $CountTotClass,
                                              $totcnt,
                                              $tmpsplist,
                                              $this->{'tabid'});
	$clustmap->{create_opt} = \%opt;
	return $clustmap;
}

###############################################################################
#
#sub get_tabid {
#	$this = shift;
#	return $this->{tabid};
#}
#sub totalcount {
#	$this = shift;
#	return $this->{totalcount};
#}
##
### all class methods are transfered to $this->{clusttab}
#sub get_cluster { $_[0]->{clusttab}->get_cluster($_[1]); }
#sub get_spdata { $_[0]->{clusttab}->get_spdata($_[1],$_[2]); }
#sub get_cluster_idx { $_[0]->{clusttab}->get_cluster_idx($_[1]); }
#sub findCluster { $_[0]->{clusttab}->get_cluster_idx(@_[1..$#_]); }
#sub splist { $_[0]->{clusttab}->splist; }
#sub clustnum { $_[0]->{clusttab}->clustnum; }
#sub list_clusters { $_[0]->{clusttab}->list_clusters($_[1]); }
#sub spnum { $_[0]->{clusttab}->spnum; }
#sub spidx { $_[0]->{clusttab}->spidx($_[1]); }
#sub print { $_[0]->{clusttab}->print; }
#sub allgenes { $_[0]->{clusttab}->allgenes($_[1]); }
#sub has_field { $_[0]->{clusttab}->has_field($_[1]); }
#sub list_field { $_[0]->{clusttab}->list_field($_[1]); }
#sub add_field { $_[0]->{clusttab}->add_field($_[1],$_[2]); }

###############################################################################
#
sub readClusterTable {
    my($CLUST, $opt) = @_;
    my($clustRes, $GeneInfo, $totalcount, $clres_start);
    if ($CLUST =~ /^RDB:(.+)$/) {
	### data should be retrieved by RDB query
	my $tableid = $1;
#	my $selopt = &set_SelectClusterTableOptions($opt);

	($clustRes, $GeneInfo, $optionOut) = &searchClustTableDB($tableid, $opt);

	$clres_start = $opt->{minout} - 1;
	$totalcount = $optionOut->{totalcount};
	$no_ortholog = $optionOut->{no_ortholog};
	if ($optionOut->{refpos}) {
		$opt->{minout} = $optionOut->{refpos} - $opt->{outnum} / 2;
	}
    } else {
	while (<$CLUST>) {
	    next if (/^#/);
	    print $WF $_ if ($WF);
	    push(@{$clustRes}, $_);
	}
    }
    return ($clustRes, $GeneInfo, $totalcount, $clres_start);
}

###############################################################################
#
sub testClustTableID {
	my($id) = @_;
	my($tmpdbi) = getTmpDbi();

	my(@dbiList) = ($main::MYSQL_DBTMP);
	if ($tmpdbi ne $main::MYSQL_DBTMP) {
		push(@dbiList, $tmpdbi);
	}
	foreach $tmpdbi (@dbiList) {
		my $db = MBGD::DB->new($tmpdbi);
		my $sth = $db->prepare("select * from cluster_tables where tabid='$id'");
		$sth->execute();
		my $a = $sth->fetchrow_hashref;
		if ($a) {
			return 1;
		}
	}
	return 0;
}

###############################################################################
#
sub getClustTableID {
	my($command) = @_;

	my($tmpdbi) = getTmpDbi();
	my(@dbiList) = ($main::MYSQL_DBTMP);
	if ($tmpdbi ne $main::MYSQL_DBTMP) {
		push(@dbiList, $tmpdbi);
	}
	foreach $tmpdbi (@dbiList) {
		my $db = MBGD::DB->new($tmpdbi);
#		$db->execute("create table if not exists cluster_tables " .
#					 "(spec text, cmd text,
#		tabid char(10) not null,
#		cdate timestamp,
#		primary key(tabid))");
#		my $sth = $db->prepare("select * from cluster_tables where cmd=?");
#		$sth->execute($command);
#		my $a = $sth->fetchrow_hashref;
#		if ($a) {
#			return $a->{tabid};
#		}

        #
		my $sql = "repair table  cluster_tables ";
		my $sth = $db->prepare($sql);
		$sth->execute();

		my $sql = "select * from cluster_tables ";
		my $sth = $db->prepare($sql);
		$sth->execute();
		my($ref);
		while($ref = $sth->fetchrow_hashref()) {
#print STDERR ">tabid>$ref->{'tabid'}\n";
#print STDERR ">tab-cmd>$ref->{'cmd'}\n";
			if (matchClustringOpt($command, $ref->{'cmd'})) {
				return $ref->{'tabid'};
			}
		}
	}
    return '';
}

###############################################################################
#
sub searchClustTableDB {
	my($this, $opt) = @_;

	my $genes               = $opt->{genes};
	my $phylopat            = $opt->{phylopat};
	my $phylopat_neg        = $opt->{phylopat_neg};
	my $sortby              = $opt->{sortby};
	my $refspec             = $opt->{refspec};
	my $refchrom            = $opt->{refchrom};
	my $with_totalcount     = $opt->{with_totalcount};
	my $find_no_ortholog    = $opt->{find_no_ortholog};
##	my($minout, $outputnum) = @{$opt->{limit}};
	my($minout, $outputnum) = ($opt->{minout}, $opt->{outputnum});
	my $add_geneinfo        = $opt->{add_geneinfo};
	my $minorfnum           = $opt->{minorfnum};
	my $minspnum            = $opt->{minspnum};
	my $mismatch            = $opt->{mismatch};
	my $db_func             = $opt->{cluster_color};
	$db_func = 'mbgd' if (! $db_func);
	my $functionCategory    = $opt->{function_category};
	if ($functionCategory =~ /\:/) {
		($db_func, $functionCategory) = split(':', $functionCategory);
	}

	my $phylopat2		= $opt->{phylopat2};
	my $mismatch2		= $opt->{mismatch2};
	my $phylopat_neg2	= $opt->{phylopat_neg2};
	my $clustsp_size    = $opt->{clustsp_size};

	my $query;
	my $geneinfo;
	my $optionOut = {};
	my $columns;
	my $tabid = $this->{tabid};
	my $dbname = getDbname();

#	$opt->{db} = MBGD::DB->new($tmpdbi) if (! $opt->{db});
	if ($opt->{columns}) {
		foreach $c (@{$opt->{columns}}) {
			$columns .= "," if ($columns);
            $columns .= "c1.$c"
		}
	} else {
		$columns = "c1.*";
	}

    # $BC1=c$K(B gene $B$GJB$YBX$($?>l9g!"(B[gene $B$,%V%i%s%/(B] $B$N%G!<%?$,>e0L$K$-$F$7$^$&!#(B
    # [gene $B$,%V%i%s%/(B] $B$G$"$k$+H=Dj$7!"JB$YBX$(=g$KH?1G$9$k!#(B
    $columns .= ", c1.gene='' as is_gene_blank";

    #
    $columns .= ", l2w(c1.$db_func) as val_l2w";

	if (! ($sortby eq 'geneorder' || @{$genes} || $add_geneinfo) ) {
		$select = "select $columns ";
		$from = "from ${TBL_CLUSTTAB}_$tabid c1 ";
        $where = "where 1 ";
		$where .= "and (c1.$db_func='$functionCategory' or c1.$db_func like '$functionCategory.%') " if ($functionCategory);
	} elsif ($sortby eq 'geneorder' && $refspec) {
		$sortby_geneorder = 1;
		$columns .= ",g.sp,g.name";
		$select = "select distinct $columns ";
		$from = "from $dbname.gene g " .
			"left join ${TBL_CLUSTER}_$tabid c2 " .
##			"on g.sp=c2.sp and g.name=c2.name ".
### the following statement works better than the above, possibly because spname
###  is the primary key of the cluster table.
			"on c2.spname=concat(g.sp,':',g.name) ".
			"left join ${TBL_CLUSTTAB}_$tabid c1 " .
			"on c1.clustid=c2.clustid ";

#		$where = "where g.sp='$refspec' ";
		$where = "where g.sp='$refspec' and c2.sp='$refspec' ";
		$where .= "and (c1.$db_func='$functionCategory' or c1.$db_func like '$functionCategory.%') " if ($functionCategory);
		if ($opt->{ref_gene} && $opt->{outputnum} <= 100) {
			my($gene) = "$refspec:$opt->{ref_gene}";
###			my($g) = MBGD::Gene->get($this->{db}, [$gene]);
			my($g) = MBGD::Gene->get($gene);
			if ($g) {
				my($from, $to);
				my($chrid) = $g->{chrid};
				$from = $g->{from1} - 200000;
				$to = $g->{to1} + 200000;
				$where .= " and g.to1 >= $from  and g.from1 <= $to ";
				$where .= " and g.chrid=$chrid";
			}
		} elsif ($refchrom) {
print STDERR "###>>refchrom>>>>>$refchrom\n";
			($chr) = $MBGD::Chromosome->get("$refspec:$refchrom");
			$where .= " and g.chrid=$chr->{id}";
		}
	} else {
		$select = "select distinct $columns ";
		$from = "from ${TBL_CLUSTTAB}_$tabid c1, ${TBL_CLUSTER}_$tabid c2 ";
		$where = "where c1.clustid=c2.clustid ";
		$where .= "and (c1.$db_func='$functionCategory' or c1.$db_func like '$functionCategory.%') " if ($functionCategory);
	}

    my($orderby_clustsp_size) = '';
	if ($clustsp_size) {
        $where .= " and spnum=$clustsp_size";
        $orderby_clustsp_size = "phylopat desc, clustid,";
	}
	if ($phylopat) {
		my $negop;
		if ($phylopat_neg) {
			$negop = 'not';
		}
		if ($mismatch) {
			### NOTE: mismatch(str1,str2,mismatch) is a user
			###       defined function in mysql
			$where .= " and $negop mismatch(c1.phylopat, '$phylopat', $mismatch)";
		} else {
			$where .= " and c1.phylopat $negop like '$phylopat'";
		}
	}
	if ($phylopat2) {
		my $negop;
		if ($phylopat_neg2) {
			$negop = 'not';
		}
		if ($mismatch2) {
			### NOTE: mismatch(str1,str2,mismatch) is a user
			###       defined function in mysql
			$where .= " and $negop mismatch(c1.phylopat, '$phylopat2', $mismatch2)";
		} else {
			$where .= " and c1.phylopat $negop like '$phylopat2'";
		}
	}
	if (@{$genes} > 0) {
		my($genes) = "'" . join("','", @{$genes}) . "'";
		$where .= " and c2.spname in ($genes)";
	}
if (! ($sortby eq 'geneorder' && $refspec)) {
	if ($minspnum) {
		$where .= " and c1.spnum >= $minspnum";
	}
	if ($minorfnum) {
		$where .= " and c1.orfnum >= $minorfnum";
	}
}
	if ($sortby eq 'geneorder') {
        $orderby = " order by $orderby_clustsp_size";
		$orderby .= " g.chrid,(g.from1 + g.to1)/2";
	} else {
        $orderby = " order by $orderby_clustsp_size";
		$orderby .= " val_l2w,is_gene_blank,c1.gene";
	}
	if ($add_geneinfo) {
		$opt->{get_clustid_list}=1;
	}
	if (! $opt->{no_limit} && $outputnum) {
		$minout = $minout - 1;		## beginning with 0
		$minout = 0 if ($minout < 0);
		$limit = " limit $minout,$outputnum";
	}
	$query = "$select $from $where $orderby $limit";
    print STDERR "DBG :: SQL :: $query\n" if ($main::DEBUG);

	if ($with_totalcount) {
        my $sql = "select count(distinct c1.clustid) $from $where";
		my $sth = $this->{db}->execute($sql);
		($optionOut->{totalcount}) = $sth->fetchrow_array();
	}

	if ($opt->{store_tmptab}) {
		&queryClustTableDB_storeTmpTab($this->{db}, $query, $opt);
		return;
	} else {
		($result, $clustid, $orphans, $optout)
			= &queryClustTableDB($this->{db}, $query, $opt);
		$optionOut->{refpos} = $optout->{refpos};
	}

	if ($add_geneinfo && @{$clustid}) {
#		my($clidlist) = "'" . join("','", @{$clustid}) . "'";
		my($clidlist) = join(',', @{$clustid});
		my(@ginfo_fld) = split(/,/, $add_geneinfo);
		my($ginfo_fld);

		foreach my $fld (@ginfo_fld) {
			$ginfo_fld .= "," if ($ginfo_fld);
			$ginfo_fld .= " g.$fld";
		}
		my $sql;
		if (@{$clustid} < 1000) {
			$sql = "select c.spname, $ginfo_fld " .
				"from ${TBL_CLUSTER}_$tabid c, $dbname.gene g " .
				"where c.sp=g.sp and c.name=g.name " .
				"and c.clustid in ($clidlist)";
		} else {
			$sql = "select $ginfo_fld from $dbname.gene g ";
			if ($opt->{splist}) {
				my $splist = join("','", @{$opt->{splist}});
				$sql .= " where sp in ('$splist')";
			}
		}
		print STDERR "SQL>>$sql\n" if ($main::DEBUG);
		my $sth = $this->{db}->execute($sql);

		while (my $h = $sth->fetchrow_hashref) {
			foreach my $field (@ginfo_fld) {
				$geneinfo->{$h->{spname}}->{$field} = $h->{$field};
			}
		}
		if ($sortby eq 'geneorder' && $refspec && @{$orphans}) {
			my($orphans) = join("','", @{$orphans});
			$sql = "select $ginfo_fld from $dbname.gene g " .
				"where sp='$refspec' and name in ('$orphans')";
			my $sth = $this->{db}->execute($sql);
			while (my $h = $sth->fetchrow_hashref) {
				foreach my $field (@ginfo_fld) {
					$geneinfo->{$h->{spname}}->{$field} = $h->{$field};
				}
			}
		}
	}
	if (@{$genes} > 0 && $find_no_ortholog) {
		my $a, %found;
		my @no_ortholog;
		my $sth = $this->{db}->execute("select c2.spname $from $where");
		while (($a) = $sth->fetchrow_array) {
			$found{$a} = 1;
		}
		foreach $g (@{$genes}) {
			if (! $found{$g}) {
				push(@no_ortholog, $g);
			}
		}
		$optionOut->{no_ortholog} = \@no_ortholog;
	}
	return ($result, $geneinfo, $optionOut);
}

###############################################################################
#
sub get_splist {
	my($this) = @_;


    my($sql) = "select spec,tabid from cluster_tables where tabid='$this->{tabid}'";
	my $sth = $this->{db}->execute($sql);

	return ($sth->fetchrow_array)[0];
}

###############################################################################
#
sub get_cmd {
	my($this) = @_;
	my $sth = $this->{db}->prepare(
		"select cmd from cluster_tables where tabid=?");
	$sth->execute($this->{tabid});
	return ($sth->fetchrow_array)[0];
}

###############################################################################
#
sub queryClustTableDB_storeTmpTab {
	my($db, $query, $opt) = @_;
	my($idxopt) = "(KEY ($opt->{store_key}))" if ($opt->{store_key});
#	my($typeopt) = "TYPE=HEAP"; # HEAP $B$O(B BLOB/TEXT $B7?$r%5%]!<%H$7$J$$$?$a(B
	my($typeopt) = '';
	$query = "create temporary table $opt->{store_tmptab} $idxopt $typeopt $query";
#print STDERR "SQL :: $query\n";
	$sth = $db->execute($query);
	return();
}

###############################################################################
#
sub queryClustTableDB {
	my($db, $query, $opt) = @_;
	my(@result, @clustid, @orphans);
	my($cnt, $refpos, $minpos, $maxpos);
	if ($opt->{ref_gene}) {
		$sth = $db->execute($query);
		while (my $a = $sth->fetchrow_hashref) {
			$cnt++;
	 		if ($a->{name} eq $opt->{ref_gene}) {
				$refpos = $cnt;
				last;
			}
		}
		$minpos = $refpos + ($opt->{minout} - 1)
			- int($opt->{outputnum} / 2);
		$maxpos = $minpos + $opt->{outputnum} - 1;
	}
	$cnt = 0;
#    print STDERR "SQL :: $query\n";
	$sth = $db->execute($query);
	while (my $a = $sth->fetchrow_hashref) {
		$cnt++;
		if ($refpos) {
			next if ($cnt < $minpos); 
			last if ($cnt > $maxpos); 
		}

		push(@result, join("\t", $a->{data},
                                 "mbgd:$a->{'mbgd'}",
                                 "cog:$a->{'cog'}",
                                 "kegg:$a->{'kegg'}",
                                 "tigr:$a->{'tigr'}" ));

		if ($opt->{get_clustid_list}) {
			if ($a->{clustid}) {
				push(@clustid, $a->{clustid});
			} elsif ($a->{name}) {
				## an orphan gene in refspec
				push(@orphans, $a->{name});
			}
		}
	}
	return (\@result, \@clustid, \@orphans, $optout);
}

###############################################################################
#
sub getSortedClusterSimple {
	my($tabid, $sp, $chrid, $contigid, $opt) = @_;
	my $db = MBGD::DB->new();
	my(@output);

	my $sql = "select sp,name,from1,to1,dir,funccat,type from gene where sp='$sp' ";
	$sql .= "and chrid=$chrid " if ($chrid);
	$sql .= "and contigid=$contigid " if ($contigid);
	my($chr, $chrlen);
    if ($chrid) {
	    ($chr) = MBGD::Chromosome->fetch("$chrid");
    }
    elsif ($contigid) {
	    ($chr) = MBGD::Contig->fetch("$contigid");
    } else {
	return();
    }
	my($chrlen) = $chr->length;

	if ($opt->{region}) {
		my($from,$to) = split(/:/,$opt->{region});
		my($regsql);
		$regsql = "to1 >= $from and from1 <= $to ";
		if ($from < 0) {
			$from += $chrlen;
			$to += $chrlen;
			$regsql = "(($regsql) or (to1 >= $from and from1 <= $to))";
		} elsif ($to > $chrlen) {
			$from -= $chrlen;
			$to -= $chrlen;
			$regsql = "(($regsql) or (to1 >= $from and from1 <= $to))";
		}
		$sql .= "and $regsql";
	}
	$sql .= "order by (from1+to1)/2\n";
	my $sth = $db->execute($sql);
	while ($f = $sth->fetchrow_hashref) {
		push(@output, $f);
	}
	return @output;
}

###############################################################################
#
sub getSortedCluster {
	my($tabid, $sp, $chrid, $contigid, $opt) = @_;
	my($dbname) = getDbname();

	if (! $opt->{search}) {
		return getSortedClusterSimple($tabid, $sp, $chrid, $contigid, $opt);
	}

	my($tmpdbi) = getTmpDbi();
	my $db = MBGD::DB->new($tmpdbi);
	my(@output);
	my($table_clusttab, $table_cluster) =
		("${TBL_CLUSTTAB}_$tabid", "${TBL_CLUSTER}_$tabid");

    my($class_name) = "c1.mbgd";
    if (defined($opt->{search}->{'funccat_type'})) {
        $class_name = "c1." . $opt->{search}->{'funccat_type'};
    }

	my $sql = "select c1.clustid,c1.phylopat,$class_name,g.sp,g.name," .
			"g.from1,g.to1,g.dir " .
			"from $dbname.gene g " .
			"left join $table_cluster c2 " .
#			"on g.sp=c2.sp and g.name=c2.name ".
			"on c2.spname=concat(g.sp,':',g.name) ".
			"left join $table_clusttab c1 " .
			"on c1.clustid=c2.clustid ".
			"where g.sp='$sp' ";
	$sql .= "and g.chrid=$chrid " if ($chrid);
	$sql .= "and g.contigid=$contigid " if ($contigid);
	if ($opt->{region}) {
		my($from,$to) = split(/:/,$opt->{region});
		$sql .= "and g.to1 >= $from and g.from1 <= $to ";
	}
	if ($opt->{search}->{phylopat}) {
		if ($opt->{search}->{mismatch}) {
			### NOTE: mismatch(str1,str2,mismatch) is a user
			###       defined function in mysql
			$sql .= "and mismatch(c1.phylopat, '$opt->{search}->{phylopat}', $opt->{search}->{mismatch}) ";
		} else {
			$sql .= "and c1.phylopat like '$opt->{search}->{phylopat}' ";
		}
	}
	$sql .= "order by (g.from1+g.to1)/2\n";

##	print STDERR ">>>>$sql\n";
	my $sth = $db->execute($sql);
	while ($f = $sth->fetchrow_hashref) {
		push(@output, $f);
	}
	return @output;
}

###############################################################################
#
sub countClustersBySP {
	my($tabid) = @_;
	my $Count = {};

	my($tmpdbi) = getTmpDbi();
	my $db = MBGD::DB->new($tmpdbi);
	my $sth = $db->execute("select sp, count(name) cnt from ${TBL_CLUSTER}_$tabid group by sp");
	while (my $f = $sth->fetchrow_hashref) {
		$Count->{$f->{sp}} = $f->{cnt};
	}
	return $Count;
}

###############################################################################
#
sub getMotifListForClustTab {
	my($tabid, $clid, $opt) = @_;
	my($dbname) = getDbname();
	my($tmpdbi) = getTmpDbi();

	my($db) = MBGD::DB->new($tmpdbi);
	my($clid_list) = join(',', @{$clid});
	my($res, $sql);
	my($where_opt);
	return {} if (! @{$clid});
	if ($opt->{motif_eval}) {
		$where_opt = "m.eval <= $opt->{motif_eval}";
	}
	if ($opt->{motlib}) {
		my($motlib_spec);
		foreach $m (split(/,/, $opt->{motlib})) {
			$motlib_spec .= "'$m',";
		}
		chomp $motlib_spec;
		$where_opt = "m.motlib in ($motlib_spec)";
	}
	if (@{$clid} >= 1000) {
		$sql = "select m.sp,m.name,m.motlib,m.motid,m.motname"
			. " from $dbname.protmotif m";
		$sql .= " where 1 ";
		if ($opt->{splist}) {
			my $splist = join("','", @{$opt->{splist}});
			$sql .= " and m.sp in ('$splist') ";
		}
	} else {
		$sql= "select m.sp,m.name,m.motlib,m.program,m.motid,m.motname"
			. " from ${TBL_CLUSTER}_$tabid c, $dbname.protmotif m"
			. " where c.sp=m.sp and c.name=m.name"
			. "   and c.clustid in ($clid_list)";
	}
	$sql .= " and $where_opt" if ($where_opt);

#print STDERR "SQL :: $sql\n";
	my $sth = $db->execute($sql);
	while (my @row = $sth->fetchrow_array) {
		($sp,$name,$motlib,$program,$motid,$motname) = @row;
		push(@{$res->{INFO}->{SPNAME}->{"$sp:$name"}}, $motid);
		$res->{INFO}->{MOTID}->{$motid} = $motname;
		$res->{INFO}->{MOTLIB}->{$motid} = $motlib;
		$res->{INFO}->{PROGRAM}->{$motid} = $program;
	}
	$res;
}

sub summaryMap {
    my($this, $tabid, %Opt) = @_;

    if ($tabid !~ /addspec/i) {
        summaryMap_orig(@_);
    }
    else {
        summaryMapForAddspec(@_);
    }
}

###############################################################################
#
sub summaryMap_orig {
#	my($tabid, $limit, $opt) = @_;
	my($this, $tabid, %Opt) = @_;
	my($tmpdbi) = getTmpDbi();
	my $sql;

	my $opt = \%Opt;

    my($func_cat_field) = $opt->{cluster_color};

	$opt->{db} = MBGD::DB->new($tmpdbi) if (! $opt->{db});
	my $db = $opt->{db};
	my $cltab_tmp = "cltab_tmp";

	my (%CountPat, %CountClass, %CountTotClass,$totcnt);
	## Output:
	##  %CountPat      = { phylopat  => count }
	##  %CountClass    = { phylopat x funccat => count }
	##  %CountTotClass = { funccat => count }
	##  $totcnt        = |phylopat|

	$opt->{store_tmptab} = $cltab_tmp;
	$opt->{store_key} = 'phylopat(255)';
	$opt->{columns} = [ clustid, phylopat, $func_cat_field ];

	$opt->{tabid} = $this->{tabid};

	$this->searchClustTableDB($opt);

	## counting phylogenetic patterns
    $sql = "create temporary table clmap_tmp "
         . "select phylopat,count(*) cnt "
         . "from $cltab_tmp "
         . "group by phylopat "
         . "order by cnt desc ";
    $sql .= "limit $opt->{summaryOutput}" if ($opt->{summaryOutput});

#print STDERR "SQL :: $sql\n";
	$sth = $db->prepare($sql); $sth->execute;

	$sql = "select c.clustid, c.phylopat, c.$func_cat_field, t.cnt "
         . "from clmap_tmp t, $cltab_tmp c "
         . "where c.phylopat=t.phylopat "
         . "order by t.cnt desc";

#print STDERR "SQL>$sql\n";

	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
		$CountPat{$a->{phylopat}} = $a->{cnt};
		$CountClass{$a->{phylopat}}->{$a->{$func_cat_field}}++;
		$totcnt++;
	}
	$totcnt = 0;
	$sql = "select $func_cat_field, count($func_cat_field) cnt "
         . "from $cltab_tmp "
         . "group by $func_cat_field";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
		$CountTotClass{$a->{$func_cat_field}} = $a->{cnt};
		$totcnt+=$a->{cnt};
	}

	return(\%CountPat, \%CountClass, \%CountTotClass, $totcnt);
}

###############################################################################
#
sub summaryMapForAddspec {
	my($this, $tabid, %Opt) = @_;
	my($tmpdbi) = getTmpDbi();
	my $sql;

	my $opt = \%Opt;

    #
    my($phylopat_pattern) = make_phylopat_pattern_for_addspec($opt);

    #
    my($func_cat_field) = $opt->{cluster_color};

	$opt->{db} = MBGD::DB->new($tmpdbi) if (! $opt->{db});
	my $db = $opt->{db};
	my $cltab_tmp = "cltab_tmp";

	my (%CountPat, %CountClass, %CountTotClass,$totcnt);
	## Output:
	##  %CountPat      = { phylopat  => count }
	##  %CountClass    = { phylopat x funccat => count }
	##  %CountTotClass = { funccat => count }
	##  $totcnt        = |phylopat|

	$opt->{store_tmptab} = $cltab_tmp;
	$opt->{store_key} = 'phylopat(255)';
	$opt->{columns} = [ clustid, phylopat, $func_cat_field ];

	$opt->{tabid} = $this->{tabid};

    # addspec $B$N>l9g!"(Bminspec $B$N>r7o$,IT==J,(B
	$this->searchClustTableDB($opt);

	## counting phylogenetic patterns
    $sql = "select phylopat,count(*) cnt "
         . "from $cltab_tmp "
         . "group by phylopat ";
#print STDERR "SQL :: $sql\n";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
        my($phylopat) = $a->{'phylopat'};
        my($phylopat_masked) = mask_phylopat_for_addspec($phylopat, $phylopat_pattern);
        if (0 < $opt->{'minspnum'}) {
            my($p) = $phylopat_masked;
            $p =~ s#[^1]##g;
            my($n) = length($p);
            if ($n < $opt->{'minspnum'}) {
                next;
            }
        }

		$CountPat{"$phylopat_masked"} += $a->{cnt};
    }

    #
	$sql = "select c.clustid, c.phylopat, c.$func_cat_field "
         . "from $cltab_tmp c ";
#         . "where c.phylopatt.phylopat ";
#         . "order by t.cnt desc";
#print STDERR "SQL>$sql\n";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {

        # default $B$H(B refspec $B0J30$N@8J*<o$O4^$^$l$F$$$J$$$h$&$K%^%9%/$9$k(B
        my($phylopat) = $a->{'phylopat'};
        my($phylopat_masked) = mask_phylopat_for_addspec($phylopat, $phylopat_pattern);
        if (0 < $opt->{'minspnum'}) {
            my($p) = $phylopat_masked;
            $p =~ s#[^1]##g;
            my($n) = length($p);
            if ($n < $opt->{'minspnum'}) {
                next;
            }
        }

        #
#		$CountPat{"$phylopat_masked"} += $a->{cnt};
		$CountClass{"$phylopat_masked"}->{$a->{$func_cat_field}}++;
		$totcnt++;
	}

    #
	$totcnt = 0;
	$sql = "select phylopat, $func_cat_field, count($func_cat_field) cnt "
         . "from $cltab_tmp "
         . "group by phylopat,$func_cat_field";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
        my($phylopat) = $a->{'phylopat'};
        my($phylopat_masked) = mask_phylopat_for_addspec($phylopat, $phylopat_pattern);
        if (0 < $opt->{'minspnum'}) {
            my($p) = $phylopat_masked;
            $p =~ s#[^1]##g;
            my($n) = length($p);
            if ($n < $opt->{'minspnum'}) {
                next;
            }
        }

		$CountTotClass{$a->{$func_cat_field}} = $a->{cnt};
		$totcnt+=$a->{cnt};
	}

	return(\%CountPat, \%CountClass, \%CountTotClass, $totcnt);
}

sub make_phylopat_pattern_for_addspec {
    my($opt) = shift;

    #
    my(@all_spec_list) = main::MBGD_SpecTableGetAllSpec();
    my(%all_spec_idx);
    my($idx) = 0;
    foreach my$sp (@all_spec_list) {
        $all_spec_idx{"$sp"} = $idx;
        $idx++;
    }

    #
    my($species_addspec) = main::getSpeciesForAddspec($opt);
    my($pattern) = '_' x scalar(@all_spec_list);
    foreach my$sp (split(/[\,\|]/, $species_addspec)) {
        my($idx) = $all_spec_idx{"$sp"};
        substr($pattern, $idx, 1) = '1';
    }

    return $pattern;
}

#
sub mask_phylopat_for_addspec {
    my($phylopat) = shift;
    my($pattern) = shift;

    my($idx) = 0;
    foreach my$pat (split(//, $pattern)) {
        if ($pat ne '1') {
            substr($phylopat, $idx, 1) = '_';
        }
        $idx++;
    }

    return $phylopat;
}

###############################################################################
#
$main::CACHE_ClustOpt = {};
sub parseClustringOpt {
    my($opt) = @_;
    my($refOptHash) = {};

    if (exists($main::CACHE_ClustOpt->{"$opt"})) {
        return $main::CACHE_ClustOpt->{"$opt"};
    }

    # $B%/%i%9%?%j%s%07k2L$K1F6A$rM?$($J$$%*%W%7%g%s(B
    my(%skipKey) = ('' => 1,
                    'dbname' => 1,
                    'DIR' => 1,
                    'tabout' => 1);

    #
    while($opt =~ m/\s\-([^\s\|]+)/g) {
        my($o) = $1;
        my($k, $v) = split('=', $o);

        next if ($skipKey{"$k"});

        if (! exists($refOptHash->{"$k"})) {
            if ($v =~ /^\'.*\'$/) {     # ' $B$G%/%)!<%H$5$l$F$$$k(B
                $v =~ s#^\'##; $v =~ s#\'$##;
            }
            if ($v =~ /^\".*\"$/) {     # " $B$G%/%)!<%H$5$l$F$$$k(B
                $v =~ s#^\"##; $v =~ s#\"$##;
            }
            $refOptHash->{"$k"} = $v;
        }
    }

    if (exists($refOptHash->{'SPEC'})) {
        # $B@8J*<o%j%9%H$O!"(Bsort $B$7$F$*$/(B
        my(@splist) = split(',', $refOptHash->{'SPEC'});
        $refOptHash->{'SPEC'} = join(',', sort(@splist));
    }

    # $BJQ497k2L$rJ]B8(B
    $main::CACHE_ClustOpt->{"$opt"} = $refOptHash;

    return $refOptHash;
}

###############################################################################
#
sub matchClustringOpt {
    my($opt1, $opt2) = @_;
    my($refOptHash1) = parseClustringOpt($opt1);
    my(@keyList1) = sort keys(%{$refOptHash1});
    my($refOptHash2) = parseClustringOpt($opt2);
    my(@keyList2) = sort keys(%{$refOptHash2});

    print STDERR "DBG :: CHECK clustering options\n" if ($main::TESTMODE);
    if (scalar(@keyList1) != scalar(@keyList2)) {
        # key $B$N?t$,0[$J$k(B
        print STDERR "DBG :: Not match :: # of options\n" if ($main::TESTMODE);
        print STDERR "DBG :: KEY1 :: @keyList1\n" if ($main::TESTMODE);
        print STDERR "DBG :: KEY2 :: @keyList2\n" if ($main::TESTMODE);
        return 0;    # return FALSE;
    }

    #
    my($key);
    foreach $key (@keyList1) {
        if (! exists($refOptHash2->{"$key"})) {
            # key $B$,B8:_$7$J$$(B
            print STDERR "DBG :: Not found key :: $key\n" if ($main::TESTMODE);
            return 0;    # return FALSE;
        }

        if ($refOptHash1->{"$key"} ne $refOptHash2->{"$key"}) {
            # $BCM$,0[$J$k(B
            print STDERR "DBG :: Not same value :: $key\n" if ($main::TESTMODE);
            return 0;    # return FALSE;
        }
    }

    return 1;    # return TRUE;
}

###############################################################################
if ($0 eq __FILE__) {
#	summaryMap(2087162,30,{spnum=>5});
	
#	MBGD::ClustTab::DB::readClusterTable('RDB:default', {
#		minout=>1, outputnum=>100, with_totalcount=>1,
#	});
	$dbtab = MBGD::ClustTab::DB->new('default');

	$clusttab = $dbtab->retrieveSummary(
		minout=>1, outputnum=>100, with_totalcount=>1,
	);
	$clusttab->show_summary;
}

###############################################################################
1;
