#!/usr/local/bin/perl -s

use MBGD;
use MbgdUserGenomeCommon;
require "MBGD_Conf.pl";

##($dbname) = ($ENV{'MYSQL_DB'} =~ /^dbi:mysql:(.+)$/);
##$tmpdbi = $ENV{'MYSQL_DBTMP'};

#$dbname = $main::DBNAME_MBGD;
sub getDbname {
    my($dbname) = $main::DBNAME_MBGD;
    my($uid) = $ENV{'MBGD_UID'};

    if (MbgdUserGenomeCommon::isContainUserGenome($uid) &&
        MbgdUserGenomeCommon::isReadyUserGenome($uid)) {
        $dbname = MbgdUserGenomeCommon::getDbname($uid);
    }
    return $dbname;
}
sub getTmpDbname {
    my($dbname) = $main::DBNAME_TMP;
    my($uid) = $ENV{'MBGD_UID'};
    if (MbgdUserGenomeCommon::isContainUserGenome($uid) &&
        MbgdUserGenomeCommon::isReadyUserGenome($uid)) {
        $dbname = MbgdUserGenomeCommon::getDbname($uid);
    }
    return $dbname;
}

#$tmpdbi = $main::MYSQL_DBTMP;
sub getTmpDbi {
    my($dbi) = "dbi:mysql:" . getTmpDbname();
    return $dbi;
}

#print STDERR "TMPDB : $tmpdbi\n";

sub testClustTableID {
	my($id) = @_;
    my(@dbnameList);

    #
    my($uid) = $ENV{'MBGD_UID'};
    my $dbname = MbgdUserGenomeCommon::getDbname($uid);
    if ($dbname =~ /$main::DBNAME_MBGD\_\d+/) {
        push(@dbnameList, $dbname);
    }
    push(@dbnameList, $main::DBNAME_TMP);

	foreach $tmpdbi (@dbnameList) {
		my $db = MBGD::DB->new($tmpdbi);
        if ($db->{'conn'} =~ /^\s*$/) {
            # $B@\B3<:GT(B
            next;
        }

		my $sql = "select * from cluster_tables where tabid='$id'";
		my $sth = $db->prepare($sql);
		$sth->execute();
		my $a = $sth->fetchrow_hashref;
		if ($a) {
			return 1;
		}
	}

	return 0;
}
sub getClustTableID {
	my($command) = @_;
	my($tmpdbi) = getTmpDbi();

    my(@dbNameList) = ($main::DBNAME_TMP);
    if ($tmpdbi ne $main::DBNAME_TMP) {
        push(@dbNameList, $tmpdbi);
    }

	foreach $tmpdbi (@dbNameList) {
		my $db = MBGD::DB->new($tmpdbi);
		$db->execute("create table if not exists cluster_tables " .
					 "(spec text, cmd text,
		tabid char(10) not null,
		cdate timestamp,
		primary key(tabid))");
#	my $sth = $db->prepare("select * from cluster_tables where cmd=?");
#	$sth->execute($command);
#	my $a = $sth->fetchrow_hashref;
#	if ($a) {
#		return $a->{tabid};
#	} else {
#		return '';
#	}

		my $sql = "select * from cluster_tables ";
		my $sth = $db->prepare($sql);
		$sth->execute();
		my($ref);
		while($ref = $sth->fetchrow_hashref()) {
			if (matchClustringOpt($command, $ref->{'cmd'})) {
				return $ref->{'tabid'};
			}
		}
	}
    return '';
}
sub searchClustTableDB {
	my($pid, $opt) = @_;
	my $genes = $opt->{genes};
	my $phylopat = $opt->{phylopat};
	my $sortby = $opt->{sortby};
	my $refspec = $opt->{refspec};
	my $with_totalcount = $opt->{with_totalcount};
	my $find_no_ortholog = $opt->{find_no_ortholog};
##	my($minout, $outputnum) = @{$opt->{limit}};
	my($minout, $outputnum) = ($opt->{minout}, $opt->{outputnum});
	my $add_geneinfo = $opt->{add_geneinfo};
	my $minorfnum = $opt->{minorfnum};
	my $minspnum = $opt->{minspnum};
	my $query;
	my $geneinfo;
	my $optionOut = {};
	my $columns;
	my($dbname) = getDbname();
	my($tmpdbi) = getTmpDbi();

	$opt->{db} = MBGD::DB->new($tmpdbi) if (! $opt->{db});
	if ($opt->{columns}) {
		foreach $c (@{$opt->{columns}}) {
			$columns .= "," if ($columns);
			$columns .= "c1.$c"
		}
	} else {
		$columns = "c1.*";
	}
	if (! ($sortby eq 'geneorder' || @{$genes} || $add_geneinfo)) {
		$select = "select $columns ";
		$from = "from clusttab_$pid c1 ";
		$where = 'where 1 ';
	} elsif ($sortby eq 'geneorder' && $refspec) {
		$sortby_geneorder = 1;
		$columns .= ",g.sp,g.name";
		$select = "select distinct $columns ";
#		$from = "from clusttab_$pid c1, cluster_$pid c2, $dbname.gene g ";
#		$where = "where c1.clustid=c2.clustid and c2.sp = g.sp and c2.name = g.name and g.sp='$refspec' ";
		$from = "from $dbname.gene g " .
			"left join cluster_$pid c2 " .
##			"on g.sp=c2.sp and g.name=c2.name ".
### the following statement works better than the above, possibly because spname
###  is the primary key of the cluster table.
			"on c2.spname=concat(g.sp,':',g.name) ".
			"left join clusttab_$pid c1 " .
			"on c1.clustid=c2.clustid ";
		$where = "where g.sp='$refspec' ";
		if ($opt->{ref_gene} && $opt->{outputnum} <= 100) {
			my($gene) = "$refspec:$opt->{ref_gene}";
			my($db) = MBGD::DB->new;
			my($g) = MBGD::Gene->get($db, [$gene]);
			if ($g) {
				my($from, $to);
				$from = $g->{from1} - 100000;
				$to = $g->{to1} + 100000;
				$where .= " and g.to1 >= $from  and g.from1 <= $to ";
			}
		}
	} else {
		$select = "select distinct $columns ";
		$from = "from clusttab_$pid c1, cluster_$pid c2 ";
		$where = "where c1.clustid=c2.clustid";
	}
	if ($phylopat) {
		if ($mismatch) {
			### NOTE: mismatch(str1,str2,mismatch) is a user
			###       defined function in mysql
			$where .= " and mismatch(c1.phylopat, '$phylopat', $mismatch)";
		} else {
			$where .= " and c1.phylopat like '$phylopat'";
		}
	}
	if (@{$genes} > 0) {
		my($genes) = "'" . join("','", @{$genes}) . "'";
		$where .= " and c2.spname in ($genes)";
	}
if (! ($sortby eq 'geneorder' && $refspec)) {
	if ($minspnum) {
		$where .= " and c1.spnum >= $minspnum";
	}
	if ($minorfnum) {
		$where .= " and c1.orfnum >= $minorfnum";
	}
}
	if ($sortby eq 'geneorder') {
		$orderby = " order by g.chrid,(g.from1 + g.to1)/2";
	} else {
		$orderby = " order by c1.class,c1.gene";
	}
	if ($add_geneinfo) {
		$opt->{get_clustid_list}=1;
	}
	if (! $opt->{no_limit} && $outputnum) {
		$minout = $minout - 1;		## beginning with 0
		$limit = " limit $minout,$outputnum";
	}
	$query = "$select $from $where $orderby $limit";

	if ($with_totalcount) {
#print STDERR "$opt->{db}\n";
		my $sth = $opt->{db}->execute("select count(distinct c1.clustid) $from $where");
		($optionOut->{totalcount}) = $sth->fetchrow_array;
#print STDERR "$select count(c1.clustid) $from $where\n";
#print STDERR "$optionOut->{totalcount}\n";
	}
#print STDERR "Query>$query\n";

	if ($opt->{store_tmptab}) {
		&queryClustTableDB_storeTmpTab($opt->{db}, $query, $opt);
		return;
	} else {
		my $optout={};
		($result, $clustid, $orphans, $optout)
			= &queryClustTableDB($opt->{db}, $query, $opt,$optout);
		$optionOut->{refpos} = $optout->{refpos};
#print STDERR "opt refpos>>$optionOut->{refpos}\n";
	}

	if ($add_geneinfo && @{$clustid}) {
#		my($clidlist) = "'" . join("','", @{$clustid}) . "'";
		my($clidlist) = join(',', @{$clustid});
		my(@ginfo_fld) = split(/,/, $add_geneinfo);
		my($ginfo_fld);

		foreach my $fld (@ginfo_fld) {
			$ginfo_fld .= "," if ($ginfo_fld);
			$ginfo_fld .= " g.$fld";
		}
		my $sql;
		if (@{$clustid} < 1000) {
			$sql = "select c.spname, $ginfo_fld " .
				"from cluster_$pid c, $dbname.gene g " .
				"where c.sp=g.sp and c.name=g.name " .
				"and c.clustid in ($clidlist)";
		} else {
			$sql = "select $ginfo_fld from $dbname.gene g ";
			if ($opt->{splist}) {
				my $splist = join("','", @{$opt->{splist}});
				$sql .= " where sp in ('$splist')";
			}
		}
#		print STDERR "$sql\n";
		my $sth = $opt->{db}->execute($sql);

		while (my $h = $sth->fetchrow_hashref) {
			foreach my $field (@ginfo_fld) {
				$geneinfo->{$h->{spname}}->{$field} = $h->{$field};
			}
		}
		if ($sortby eq 'geneorder' && $refspec && @{$orphans}) {
			my($orphans) = join("','", @{$orphans});
			$sql = "select $ginfo_fld from $dbname.gene g " .
				"where sp='$refspec' and name in ('$orphans')";
			my $sth = $opt->{db}->execute($sql);
			while (my $h = $sth->fetchrow_hashref) {
				foreach my $field (@ginfo_fld) {
					$geneinfo->{$h->{spname}}->{$field} = $h->{$field};
				}
			}
		}
	}
#print STDERR "find>$find_no_ortholog\n";
	if (@{$genes} > 0 && $find_no_ortholog) {
		my $a, %found;
		my @no_ortholog;
#print STDERR "Start\n";
#print STDERR "select c2.spname $from $where\n";
		my $sth = $opt->{db}->execute("select c2.spname $from $where");
		while (($a) = $sth->fetchrow_array) {
			$found{$a} = 1;
		}
		foreach $g (@{$genes}) {
			if (! $found{$g}) {
				push(@no_ortholog, $g);
			}
		}
		$optionOut->{no_ortholog} = \@no_ortholog;
#print STDERR "End\n";
	}
	return ($result, $geneinfo, $optionOut);
}
sub queryClustTableDB_storeTmpTab {
	my($db, $query, $opt) = @_;
	my($idxopt) = "(KEY ($opt->{store_key}))" if ($opt->{store_key});
	my($typeopt) = "TYPE=HEAP";
	$query = "create temporary table $opt->{store_tmptab} $idxopt $typeopt $query";
#print STDERR "$query\n";
	$sth = $db->execute($query);
	return();
}
sub queryClustTableDB {
	my($db, $query, $opt) = @_;
	my(@result, @clustid, @orphans);
	my($cnt, $refpos, $minpos, $maxpos);
	if ($opt->{ref_gene}) {
		$sth = $db->execute($query);
		while (my $a = $sth->fetchrow_hashref) {
			$cnt++;
	 		if ($a->{name} eq $opt->{ref_gene}) {
				$refpos = $cnt;
				last;
			}
		}
		$minpos = $refpos + ($opt->{minout} - 1)
			- int($opt->{outputnum} / 2);
		$maxpos = $minpos + $opt->{outputnum} - 1;
		$optout->{refpos} = $refpos;
	}
	$cnt = 0;
	$sth = $db->execute($query);
	while (my $a = $sth->fetchrow_hashref) {
		$cnt++;
		if ($refpos) {
			next if ($cnt < $minpos); 
			last if ($cnt > $maxpos); 
		}
		push(@result, $a->{data});
		if ($opt->{get_clustid_list}) {
			if ($a->{clustid}) {
				push(@clustid, $a->{clustid});
			} elsif ($a->{name}) {
				## an orphan gene in refspec
				push(@orphans, $a->{name});
			}
		}
	}
	return (\@result, \@clustid, \@orphans, $optout);
}
sub findNoOrtholog {
	my($tabid, $genes) = @_;
	my($tmpdbi) = getTmpDbi();

	my $db = MBGD::DB->new($tmpdbi);
}
sub getSortedCluster {
	my($tabid, $seq, $opt) = @_;
	my($dbname) = getDbname();
	my($tmpdbi) = getTmpDbi();

	my $db = MBGD::DB->new($tmpdbi);
	my($sp, $seqid) = split(/:/, $seq);
#	my($seqid);
	my(@output);
	my($table_clusttab, $table_cluster) =
		("clusttab_$tabid", "cluster_$tabid");
#	if ($opt->{search}) {	## search conditions
#		my %srchopt = %{$opt->{search}};
#		$srchopt{store_tmptab} = 'cltab_tmp';
#		&searchClustTableDB($tabid, \%srchopt);
#		$table_clusttab = 'cltab_tmp';
#	}

#	if ($tabid = 'default') {
#		$table_clusttab = "mbgd.$table_clusttab";
#		$table_cluster = "mbgd.$table_cluster";
#	}

#	my $sql = "select c1.clustid,c1.phylopat,c1.class,g.sp,g.name," .
#			"g.from1,g.to1 " .
#			"from clusttab_$tabid c1, cluster_$tabid c2, $dbname.gene g " .
#			"where c1.clustid=c2.clustid and c2.sp=g.sp " .
#			"and c2.name=g.name and g.sp='$sp' " .
#			"order by (g.from1+g.to1)/2\n";

#	if ($seq) {
#		my $sql = "select id from $dbname.chromosome where sp='$sp' and seqno=$seq";
#		my $sth = $db->execute($sql);
#		my @a = $sth->fetchrow_array;
#		$seqid = $a[0];
#	}

	my $sql = "select c1.clustid,c1.phylopat,c1.class,g.sp,g.name," .
			"g.from1,g.to1,g.dir " .
			"from $dbname.gene g " .
			"left join $table_cluster c2 " .
#			"on g.sp=c2.sp and g.name=c2.name ".
			"on c2.spname=concat(g.sp,':',g.name) ".
			"left join $table_clusttab c1 " .
			"on c1.clustid=c2.clustid ".
			"where g.sp='$sp' ";
	$sql .= "and g.chrid=$seqid " if ($seqid);
	if ($opt->{region}) {
		my($from,$to) = split(/:/,$opt->{region});
		$sql .= "and g.to1 >= $from and g.from1 <= $to ";
	}
	if ($opt->{search}->{phylopat}) {
		if ($opt->{search}->{mismatch}) {
			### NOTE: mismatch(str1,str2,mismatch) is a user
			###       defined function in mysql
			$sql .= "and mismatch(c1.phylopat, '$opt->{search}->{phylopat}', $opt->{search}->{mismatch}) ";
		} else {
			$sql .= "and c1.phylopat like '$opt->{search}->{phylopat}' ";
		}
	}
	$sql .= "order by (g.from1+g.to1)/2\n";

#	print STDERR ">>>>$sql\n";
	my $sth = $db->execute($sql);
	while ($f = $sth->fetchrow_hashref) {
		push(@output, $f);
	}
#print STDERR "OK\n";
	return @output;
}
sub countClustersBySP {
	my($tabid) = @_;
	my $Count = {};
	my($dbname) = getDbname();
	my($tmpdbi) = getTmpDbi();

	my $db = MBGD::DB->new($tmpdbi);
	my $sth = $db->execute("select sp, count(name) cnt from cluster_$tabid group by sp");
	while (my $f = $sth->fetchrow_hashref) {
		$Count->{$f->{sp}} = $f->{cnt};
#print STDERR "$f->{sp},$f->{cnt}\n";
	}
	return $Count;
}

sub getMotifListForClustTab {
	my($tabid, $clid, $opt) = @_;
	my($tmpdbi) = getTmpDbi();

	my($db) = MBGD::DB->new($tmpdbi);
	my($clid_list) = join(',', @{$clid});
	my($res, $sql);
	my($where_opt);
	return {} if (! @{$clid});
	if ($opt->{motif_eval}) {
		$where_opt = "m.eval <= $opt->{motif_eval}";
	}
	if (@{$clid} >= 1000) {
		$sql = "select m.sp,m.name,m.motlib,m.motid,m.motname"
			. " from $dbname.protmotif m";
		$sql .= " where 1 ";
		if ($opt->{splist}) {
			my $splist = join("','", @{$opt->{splist}});
			$sql .= " and m.sp in ('$splist') ";
		}
	} else {
		$sql= "select m.sp,m.name,m.motlib,m.program,m.motid,m.motname"
			. " from cluster_$tabid c, $dbname.protmotif m"
			. " where c.sp=m.sp and c.name=m.name"
			. "   and c.clustid in ($clid_list)";
	}
	$sql .= " and $where_opt" if ($where_opt);

#print STDERR "$sql\n";
	my $sth = $db->execute($sql);
	while (my @row = $sth->fetchrow_array) {
		($sp,$name,$motlib,$program,$motid,$motname) = @row;
		push(@{$res->{INFO}->{SPNAME}->{"$sp:$name"}}, $motid);
		$res->{INFO}->{MOTID}->{$motid} = $motname;
		$res->{INFO}->{MOTLIB}->{$motid} = $motlib;
		$res->{INFO}->{PROGRAM}->{$motid} = $program;
	}
	$res;
}

sub summaryMap {
	my($tabid, $limit, $opt) = @_;
	my $sql;
	my($tmpdbi) = getTmpDbi();

	$opt->{db} = MBGD::DB->new($tmpdbi) if (! $opt->{db});
	my $db = $opt->{db};
	my $cltab_tmp = "cltab_tmp";
	my $totcnt;

	$opt->{store_tmptab} = $cltab_tmp;
	$opt->{store_key} = 'phylopat';
	$opt->{columns} = [clustid,phylopat,class];
	&searchClustTableDB($tabid, $opt);

	$sql = "create temporary table clmap_tmp " .
		"select phylopat,count(*) cnt from $cltab_tmp " .
		"group by phylopat " .
		"order by cnt desc limit $limit";
#print STDERR "$sql\n";
	$sth = $db->prepare($sql); $sth->execute;

#	$sql = "select c.clustid,c.phylopat,c.class,t.cnt from clmap_tmp t, clusttab_${tabid} c where c.phylopat=t.phylopat order by t.cnt desc";
	$sql = "select c.clustid,c.phylopat,c.class,t.cnt from clmap_tmp t, $cltab_tmp c where c.phylopat=t.phylopat order by t.cnt desc";
#print STDERR "$sql\n";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
		$CountClass{$a->{phylopat}}->{$a->{class}}++;
		$CountPat{$a->{phylopat}} = $a->{cnt};
		$totcnt++;
	}
	$totcnt = 0;
	$sql = "select class, count(class) cnt from $cltab_tmp group by class";
	$sth = $db->prepare($sql); $sth->execute;
	while ($a = $sth->fetchrow_hashref) {
		$CountTotClass{$a->{class}} = $a->{cnt};
		$totcnt+=$a->{cnt};
	}
	(\%CountPat, \%CountClass, \%CountTotClass,$totcnt);
}

$main::CACHE_ClustOpt = {};
sub parseClustringOpt {
    my($opt) = @_;
    my($refOptHash) = {};

    if (exists($main::CACHE_ClustOpt->{"$opt"})) {
        return $main::CACHE_ClustOpt->{"$opt"};
    }

    # $B%/%i%9%?%j%s%07k2L$K1F6A$rM?$($J$$%*%W%7%g%s(B
    my(%skipKey) = ('' => 1,
                    'tabout' => 1);

    #
    while($opt =~ m/\s\-([^\s\|]+)/g) {
        my($o) = $1;
        my($k, $v) = split('=', $o);

        next if ($skipKey{"$k"});

        if (! exists($refOptHash->{"$k"})) {
            if ($v =~ /^\'.*\'$/) {     # ' $B$G%/%)!<%H$5$l$F$$$k(B
                $v =~ s#^\'##; $v =~ s#\'$##;
            }
            if ($v =~ /^\".*\"$/) {     # " $B$G%/%)!<%H$5$l$F$$$k(B
                $v =~ s#^\"##; $v =~ s#\"$##;
            }
            $refOptHash->{"$k"} = $v;
        }
    }

    if (exists($refOptHash->{'SPEC'})) {
        # $B@8J*<o%j%9%H$O!"(Bsort $B$7$F$*$/(B
        my(@splist) = split(',', $refOptHash->{'SPEC'});
        $refOptHash->{'SPEC'} = join(',', sort(@splist));
    }

    # $BJQ497k2L$rJ]B8(B
    $main::CACHE_ClustOpt->{"$opt"} = $refOptHash;

    return $refOptHash;
}

sub matchClustringOpt {
    my($opt1, $opt2) = @_;
    my($refOptHash1) = parseClustringOpt($opt1);
    my(@keyList1) = keys(%{$refOptHash1});
    my($refOptHash2) = parseClustringOpt($opt2);
    my(@keyList2) = keys(%{$refOptHash2});

    if (scalar(@keyList1) != scalar(@keyList2)) {
        # key $B$N?t$,0[$J$k(B
        return 0;    # return FALSE;
    }

    #
    my($key);
    foreach $key (@keyList1) {
        if (! exists($refOptHash2->{"$key"})) {
            # key $B$,B8:_$7$J$$(B
            return 0;    # return FALSE;
        }

        if ($refOptHash1->{"$key"} ne $refOptHash2->{"$key"}) {
            # $BCM$,0[$J$k(B
            return 0;    # return FALSE;
        }
    }

    return 1;    # return TRUE;
}

sub main::check_addspec_tab {
    my($tabid) = shift;
    my($spec) = shift;

    if ($tabid =~ /^default$/i) {
        my(@default_spec_list) = main::MBGD_SpecTableGetDefaultSpecies();
        my(%default_spec_hash);
        foreach my$sp (@default_spec_list) {
            $default_spec_hash{"$sp"} = 1;
        }

        if (!exists($default_spec_hash{"$spec"})) {
            $tabid = 'addspec';
        }
    }

    return $tabid;
}

if ($0 eq __FILE__) {
	summaryMap(2087162,30,{spnum=>5});
#	summaryMap(2324380,30,{spnum=>5});
}

1;
