#!/usr/bin/perl -s

require "MBGD_Conf.pl";
use MBGD;
use MBGD::DB;

##require 'libMBGDUserInfo.pl';
##require 'libCGI.pl';

use Property::HomolParam;

$MAXSPECLEN = 300;

if ($GetParam) {

    %param = Property::HomolParam->new(datafile=>$GetParam)->asHash;

    if ($BESTHIT) {
        $BESTHIT   = $param{'besthit'};
        $RATIOCUT   = $param{'ratiocut'};
    }
    if (! $SPEC) {
        $SPEC = (defined($param{'species'}) ? $param{'species'} : '');
        $SPEC =~ s/\|/,/g;
    }
}
if (-f $SPECFILE) {
	open(F, $SPECFILE) || die "Can't open $SPECFILE\n";
	while(<F>){
		chomp;
		$SPEC .= $_;
	}
	close(F);
}

$param{'eval'} = $EVAL if ($EVAL);
$param{'score'} = $SCORE if ($SCORE);
$param{'pam'} = $PAM if ($PAM);
$param{'ident'} = $IDENT if ($IDENT);

$| = 1;
$BESTHIT = '' if ($BESTHIT =~ /^non/);
$flagGetGeneLen = 1 if ($COVER || $ORIGOUT);

if ($main::dbname) {
	$ENV{'MYSQL_DB'} = $MBGD_dbname = $main::dbname;
    $OPT .= " -dbname='$main::dbname'";
}
elsif ($ENV{'MYSQL_DB'} =~ /dbi:mysql:(.+)$/) {
	$MBGD_dbname = $1;
}
else {
	$MBGD_dbname = $ENV{'MYSQL_DB'};
}

if ( ($GENES || $GENES2) && ! $BESTHIT ) {
    my($db) = MBGD::DB->new($MBGD_dbname);
    my($sta) = $db->exist_table('homology');
    if ($sta) {
        print STDERR "DBG :: found '$MBGD_dbname.homology' table\n";
        $UseDB = 1;
    }
}
$DIR = "$DIR_database/bldp" if (! $DIR);
if ($defaulttab) {
    # make default cluster table
    $DIR = "$DIR_database.work/bldp";
    if ($tabout) {
	$tabout = "$DIR_database.work/taball.domclust";
    }
}

if ($tabout) {
    if ($GENES && $UseDB) {
        $| = 1;
        print_geneinfo(split(/,/, $GENES));
        print "//\n";
    } elsif ($main::QSPEC && $main::mergetree) {
        my(%spec_hash_cluster);
        foreach my$spec (split(/,/, $SPEC)) {
            $spec_hash_cluster{"$spec"} = 1;
        }
        foreach my$spec (split(/,/, $QSPEC)) {
            delete($spec_hash_cluster{"$spec"});
        }
        my(@spec_list_base) = keys(%spec_hash_cluster);

        #
        my($spec_base) = join(',', @spec_list_base);
        my($file_taball_dist) = "$ENV{'MBGD_HOME'}/etc/taball.dist";
        print_geneinfo_all($file_taball_dist, $spec_base);
        print "--\n";

        #
        print_geneinfo_all($tabout, $main::QSPEC);
		print "//\n";
    } else {
        print_geneinfo_all($tabout);
		print "//\n";
    }
}

if ( $UseDB ) {
	&query_mysql;
	exit(0);
}

if ($flagGetGeneLen && ! defined %Length) {
	&get_seqlen;
}

$DIR .= ":$ADDDIR" if ($ADDDIR);
$NAME = 'blastdpres';
#$TAIL = 'score.sort.pack';
$TAIL = 'pack';
$PATH = `dirname $0`; chop $PATH;
if ($DIR =~ /:/) {
    @DIR = split(/:/,$DIR);
} else {
    @DIR = ($DIR);
}


if ($param{'pam'}) {
    $OPT .= " -P$param{'pam'}";
}
if ($param{'score'}) {
    $OPT .= " -S$param{'score'}";
}
if ($param{'eval'}) {
    $OPT .= " -E$param{'eval'}";
}
if ($param{'ident'}) {
    $OPT .= " -I$param{'ident'}";
}
if ($GENES) {
    $OPT .= " -G $GENES";
    foreach $g (split(/,/, $GENES)) {
        ($sp,$name) = split(/:/, $g);
        $GeneSP{$sp} = 1;
    }
    $BOTH = 1;
} elsif ($GENES2) {
    $OPT .= " -g $GENES2";
    foreach $g (split(/,/, $GENES2)) {
        ($sp,$name) = split(/:/, $g);
        $GeneSP{$sp} = 1;
    }
    $ONE = 1;
}

if ($BESTHIT) {
    if ($BESTHIT =~ /^(uni|asym)/) {
        $Oflag = 2;
    } else {
	# bidirectinal
        $Oflag = 1;
    }
    $RATIOCUT = 90 if (! $RATIOCUT);
    $OPT .= " -o$Oflag -n$RATIOCUT";
    $noparacheck = 1 if(! $paracheck);
}
if ($RANK eq 'dist') {
	$OPT .= " -rd";
} else {
	$OPT .= " -rs";
}
if ($FULLOUT) {
	$OPT .= " -a";
} elsif ($BINOUT) {
	$OPT .= " -b";
}

if ($plasmid) {
    $OPT .= " -m";
}

if ($SPEC) {
    @SPEC = split(/,/, $SPEC);
} else {
    my %tmpsp;
    my @tmp_files;
    foreach $DIR (@DIR) {
	my $f;
	opendir(D, $DIR);
	while ($f = readdir(D)) {
		next if ($f !~ /^$NAME.*$TAIL$/);
		push(@tmp_files, $f);
	}
    }
    foreach $f (@tmp_files) {
        ($sp1,$sp2) = ($f =~ /\.([^\-\.]+)\-([^\-\.]+)/);
        $tmpsp{$sp1} = 1 if ($sp1 !~ /_p/);
        $tmpsp{$sp2} = 1 if ($sp2 !~ /_p/);
    }
    @SPEC = keys %tmpsp;
    $SPEC=join(",",@SPEC);
}
if ($QSPEC && $QSPEC ne 'all') {
    @QSPEC = split(/,/, $QSPEC);
}

$OPT .= " -D$DIR -s$SPEC";
$OPT .= " -q$QSPEC" if ($QSPEC);
$OPT .= " -p" if ($noparacheck);
$cmd = "$PATH/select $OPT";
print STDERR "CMD :: $cmd\n";
if (! $COVER) {
	exec("$cmd");
} else {
	open(O, "$cmd|");
	while(<O>){
        	my ($spname1,$spname2,$from1,$to1,$from2,$to2,$pam,$score)
			 = split;
		my $coverage = &coverage(
			$spname1,$from1,$to1,$spname2,$from2,$to2,$Length{$spname1},$Length{$spname2});
		next if ($coverage < $COVER);
		print;
	}
	close(O);
}
exit(0);
#
#foreach $sp1 (@SPEC) {
#    next if ($BOTH && ! $GeneSP{$sp1});
#    foreach $sp2 (@SPEC) {
#
#        next if ($BOTH && ! $GeneSP{$sp2});
#        next if ($ONE && ! $GeneSP{$sp1} && ! $GeneSP{$sp2});
#        next if ($BESTHIT && ! $paracheck && $sp1 eq $sp2);
#        next if ($noparacheck && $sp1 eq $sp2);
#	next if ($Found{join(',', sort ($sp1,$sp2))});
#
#        foreach $DIR (@DIR) {
#            if (-f "$DIR/$NAME.$sp1-$sp2.$TAIL") {
#                push(@files, "$DIR/$NAME.$sp1-$sp2.$TAIL");
#		$Found{join(',', sort ($sp1,$sp2))} = 1;
#            }
#            if ($plasmid) {
#                if (-f "$DIR/$NAME.${sp1}_p-$sp2.$TAIL") {
#                    push(@files, "$DIR/$NAME.${sp1}_p-$sp2.$TAIL");
#                }
#                if (-f "$DIR/$NAME.$sp1-${sp2}_p.$TAIL") {
#                    push(@files, "$DIR/$NAME.$sp1-${sp2}_p.$TAIL");
#                }
#                if (-f "$DIR/$NAME.${sp1}_p-${sp2}_p.$TAIL") {
#                    push(@files, "$DIR/$NAME.${sp1}_p-${sp2}_p.$TAIL");
#                }
#            }
#        }
#    }
#}
#
#if (! $COVER) {
#	$OPT .= " -D$DIR -s$SPEC";
#	$OPT .= " -p" if ($noparacheck);
#	system("$PATH/select $OPT");
#} else {
#
#    die "No file selected\n" if (! @files);
#
#    $files = join (' ', @files);
#
#    foreach $f (@files) {
#	if ($COVER) {
#		open(O, "$PATH/select $OPT $f |");
#		while(<O>){
#        		my ($spname1,$spname2,$from1,$to1,$from2,$to2,$pam,$score)
#				 = split;
#			my $coverage = &coverage(
#				$spname1,$from1,$to1,$spname2,$from2,$to2);
#			next if ($coverage < $COVER);
#			print;
#		}
#		close(O);
#	} else {
#		system("$PATH/select $OPT $f");
#	}
#    }
#}
sub max {
	my($a,$b) = @_;
	$a < $b ? $b : $a;
}

###############################################################################
#
sub get_seqlen {
	my($OPT);
	if ($SPEC) {
		my($Q_spec) = $SPEC;
		$Q_spec =~ s/,/','/g;
		$Q_spec = "'$Q_spec'";
		$OPT .= " g.sp in ($Q_spec)";
	}
	if ($GENES) {
		foreach $g (split(/,/, $GENES)) {
			($sp,$name) = split(/:/,$g);
			push(@q_genes, "g.sp = '$sp' and g.name = '$name'");
		}
		$OPT .= " and " if ($OPT);
		$OPT .= " ( " . join(" or ", @q_genes) . " )";
	}
#	$cmd = "$CMD_mysql -q -N -u$MBGD_User -p$MBGD_PW"

#	$cmd = "$CMD_mysql -q -N "
#        	. " -e \"select g.sp,g.name,p.length from gene g, proteinseq p "
#		. "where g.aaseq=p.id and $OPT\" $MBGD_dbname";

	$cmd = "$CMD_mysql -q -N "
        	. " -e \"select g.sp,g.name,g.aalen from gene g "
		. "where $OPT\" $MBGD_dbname";

	open(C, "$cmd |");
	while (<C>) {
		my($sp,$name,$len) = split;
		$Length{"$sp:$name"} = $len;
	}
	close(C);
}

sub query_mysql {
    my($Q_SPEC, $SPEC_FILT);
    if ($SPEC) {
	if (split(/,/, $SPEC) <=20) {
		$Q_SPEC = $SPEC;
	        $Q_SPEC =~ s/,/','/g;
        	$Q_SPEC = "'$Q_SPEC'";
	} else {
		$SPEC_FILT = $SPEC;
		$SPEC_FILT =~ s/,/\|/g;
	}
    }
    if ($GENES || $GENES2) {
        if ($GENES2) {
            $Qgenes = $GENES2;
        } else {
            $Qgenes = $GENES;
        }
        $Qgenes =~ s/,/','/g;
        $Qgenes = "'$Qgenes'";
    }

    if ($GENES2) {
        $OPT = "h.spname1 in ($Qgenes) ";
    } elsif ($GENES) {
        $OPT = "h.spname1 in ($Qgenes) and h.spname2 in ($Qgenes) ";
        $OPT .= "and h.spname1 <= h.spname2 ";
    }
    if ($ORIGOUT) {
        $OUTFIELD = 'h.spname1,h.spname2,h.from1,h.to1,h.from2,h.to2,h.ident,h.eval,h.pam,h.score';
    } else {
        $OUTFIELD = 'h.spname1,h.spname2,h.from1,h.to1,h.from2,h.to2,h.pam,h.score';
    }
    $OPT .= "1 " if (! $OPT);
    $OPT .= "and h.sp1 in ($Q_SPEC) and h.sp2 in ($Q_SPEC) " if ($Q_SPEC);
    $OPT .= "and h.pam <= $param{'pam'} " if ($param{'pam'});
    $OPT .= "and h.eval <= $param{'eval'} " if ($param{'eval'});
    $OPT .= "and h.score >= $param{'score'} " if ($param{'score'});
    $OPT .= "and h.ident >= $param{'ident'} " if ($param{'ident'});
    $OPT .= "limit $LIMIT" if ($LIMIT);


    $TABLES = "homology h";
    if ($flagGetGeneLen) {
	$OUTFIELD .= ',g1.aalen,g2.aalen';
	$TABLES .= ", gene g1, gene g2";
	$OPT .= "and g1.sp=h.sp1 and g1.name=h.name1 " .
		"and g2.sp=h.sp2 and g2.name=h.name2 ";
    }

    my $sql = "select $OUTFIELD from $TABLES where $OPT";

    my $dbname = $ENV{'MYSQL_DB'};
#    my $optfile = "$ENV{'MBGD_HOME'}/etc/mysql_selecthom.cnf";
    my $optfile = "$ENV{'HOME'}/.my.cnf";
    my $optgroup = "select_hom";
    my $dsn = "$dbname;mysql_read_default_file=$optfile;"
		. "mysql_read_default_group=$optgroup";
    my $db = MBGD::DB->new($dsn);
    my $sth = $db->execute($sql);

    $db->{conn}->{RowCacheSize}=1;

	while ( @hdata = $sth->fetchrow_array ) {
        	my ($spname1,$spname2) = @hdata;
		if ($SPEC_FILT) {
			next if ($spname1 !~ /$SPEC_FILT/ || $spname2 !~ /$SPEC_FILT/);
		}
		if ($flagGetGeneLen) {
        		my ($spname1,$spname2,$from1,$to1,$from2,$to2,
				$ident,$eval,$pam,$score,$len1,$len2) = @hdata;
			my $coverage = &coverage($spname1,$from1,$to1,
					$spname2,$from2,$to2,$len1,$len2);
			if ($ORIGOUT) {
				chomp;
				print join(' ',
					$spname1, $spname2, $from1, $to1,
					$from2, $to2, $ident, $eval, $pam,
					$score, $coverage), "\n";
				next;
			}
			next if ($coverage < $COVER);
		}
		print join(' ', @hdata),"\n";
	}
}

sub coverage {
	my($name1,$from1,$to1,$name2,$from2,$to2,$len1,$len2) = @_;
	my($cov1) = ($to1 - $from1 + 1) * 100 / $len1;
	my($cov2) = ($to2 - $from2 + 1) * 100 / $len2;
	my($coverage) = sprintf("%.1f", &max($cov1,$cov2));
##print STDERR "$name1,$name2,$from1,$to1,$from2,$to2,$len1,$len2,$cov1,$cov2,$coverage\n";
	return $coverage;
}

###############################################################################
#
sub print_geneinfo {
    my(@genes) = @_;
#    use DBI;

    my $dbname = $ENV{'MYSQL_DB'};
    my $optfile = "$ENV{'HOME'}/.my.cnf";
    my $optgroup = "select_hom";
    my $dsn = "$dbname;mysql_read_default_file=$optfile;"
		. "mysql_read_default_group=$optgroup";
    my $db = MBGD::DB->new($dsn);
    my $sth = $db->do(qq{
        create temporary table tmp_glist (
            sp char(5), name char(30)
        ) type=heap
    });
    foreach $g (@genes) {
         ($sp,$name) = split(/:/,$g);
        $db->do(qq{insert into tmp_glist values('$sp','$name')});
    }

    my $sth = $db->prepare(qq{
        select g.sp,g.name,p.length,g.from1,g.dir
        from gene g, tmp_glist gl, proteinseq p
        where gl.sp=g.sp and gl.name=g.name and g.aaseq=p.id
        order by g.sp,g.from1
    });
    $sth->execute;
    while (($sp,$name,$length,$from,$dir) = $sth->fetchrow_array) {
        $name =~ tr/a-z/A-Z/;
        print "$sp $name $length $from $dir\n";
	if ($flagGetGeneLen){
		$Length{"$sp:$name"} = $length;
	}
    }
}
sub print_geneinfo_all {
	my($file) = shift;
	my($spec) = shift;

	if (! -f $file) {
		$file = "$FILE_taball";
	}
	if (! $spec) {
		$spec = $SPEC;
	}
	$spec =~ s/,/\|/g;
	if ($flagGetGeneLen){
		if (length($spec) < $MAXSPECLEN) {
			open(G, "$CMD_egrep '^($spec)' $file|");
		} else {
			open(G, $file);
		}
		while(<G>){
			($sp,$name,$length,$from,$dir)= split;
			next if ($sp !~ /^($spec)/);
			$Length{"$sp:$name"} = $length;
			print;
		}
		close(G);
	} else {
		if (length($spec) < $MAXSPECLEN) {
			system("$CMD_egrep '^($spec)' $file");
		} else {
			open(G, $file);
			while(<G>){
				($sp,$name,$length,$from,$dir)= split;
				print if ($sp =~ /^($spec)/);
			}
			close(G);
		}
	}
#	print "//\n";
}
