#!/usr/bin/perl

$ENV{'BLASTDB'} = "/bio/db/blast/db";
$ENV{'BLASTMAT'} = "/bio/db/blast/matrix";

use MBGD;
use MBGD::WWW;
use MBGD::SeqRegion;
use RECOG::RecogCommon;
require "libMBGDDBM.pl";
require "libMBGDaxes.pl";
require "htblast.pl";
require "MBGD_commonPath.pl";


$WWWROOT   = $ENV{'WWWROOT'};
$BLASTALL  = "$CMD_blastall -p blastp";
$READBLAST = "$WWWROOT/bin/read_blast";
$BLASTDP   = "$WWWROOT/bin/blastdp2";
$BLDB      = "$WWWROOT/bldb";

$AliButtonColor = '#a0ffff';

$LIMIT = 2000;        # maximal residues allowed in the query.

$MAXOUT = 250;       # maximal number of hits to be shown.
$BEGIN = 0;

$ENV{'PAMFILE'} = $FILE_pamfile;

$| = 1;

$WWW = MBGD::WWW->new( title => "Homology Search" , type => );
%Args = $WWW->cgiGetArgs;

$uInfo = $WWW->uInfo;
%Param = $uInfo->getHomolParamHash;

if ($Args{'showall'}) {
    $EVALCUTOFF = $Args{'evalcut'};
    $EVALCUTOFF = .05 if (! $EVALCUTOFF);
} else {
    $EVALCUTOFF = $Param{'eval'};
}
if ($Args{'begin'}) {
    $BEGIN = $Args{'begin'};
}
if (defined $Args{'maxout'}) {
    $MAXOUT = $Args{'maxout'};
}

$species = join(',', split(/\|/, $Param{'species'}));

$WWW->start_html;

if ($Args{'sequence'} !~ /[A-Z]/ && $Args{'sequencef'}) {
    my($seqfile, $seq);
    $seqfile = $Args{'sequencef'};
    while (<$seqfile>){
        $seq .= $_;
    }
    $Args{'sequence'} = $seq;
}

#############################################################################
# Query Sequence Input Form
if ($Args{'sequence'} !~ /[A-Z]/ && ! $Args{'orfname'}) {

    &seq_input_form();
}
#############################################################################
# Print the Search Result
else {
    print "<H2> Homology Search Results </H2>\n";
    if ($Args{'sequence'}) {
        &doSimSearch();
        @output = &readSeqSimFile();
    }
    else {
        selectSimSearchRes($Args{'tabid'});
    }
    print qq{<A HREF="RECOG_seqreg_html.pl?name=$Args{orfname}&displayMode=Homology&tabid=$Args{'tabid'}">[Graphical Display]</A><b>[Text Display]</b><br>};
    print "<hr>\n";
    $CreateForm = 1;
    if ($CreateForm) {
        print "<FORM METHOD=\"POST\" ACTION=\"/htbin/cluster\">\n";
        print "<TABLE>\n";
        print "<TR><TD></TD>";
        print "<TH>Qname</TH>" if ($numquery > 1);
        print "<TH>Name</TH><TH>",join("</TH><TH>", ('Ident','E-value','Score','PAM','MatchReg','Cover','Product')), "</TH>\n";

#        print "<TH>Name</TH><TH>",join("</TH><TH>", ('Ident','E-value','Score', 'PAM',
###		'from1','to1','from2','to2',
#		'Product')), "</TH>\n";
        if (! $queryseq) {
            foreach $orfname (@orfnames) {
                print "<INPUT TYPE=\"hidden\" NAME=\"genes\" VALUE=\"$orfname\">";
            }
        }
        $TOTALCOUNT = 0+@output;
        if ($Args{sortby} =~ /score|pam|ident|eval/) {
            $sortby = $Args{sortby};
        }
        elsif ($Param{sim_measure}) {
            $sortby = $Param{sim_measure};
        }
        else {
            $sortby = 'score';
        }
        $by = "by_$sortby";
        @output_sorted = sort $by @output;
        if ($MAXOUT) {
            my $END;
            if ($BEGIN + $MAXOUT - 1 < $#output_sorted) {
                $END = $BEGIN + $MAXOUT - 1;
                $NEXT = $BEGIN+$MAXOUT;
            }
            else {
                $END = $#output_sorted;
                $NEXT = 0;
            }
            @output_sorted = @output_sorted[$BEGIN..$END];
        }
        foreach $l (@output_sorted) {
            push(@hit_orfnames, $l->{name});
        }

        %gene_title = MBGD_GetGeneTitles(@orfnames, @hit_orfnames);

        $rank = $BEGIN;
        foreach $l (@output_sorted) {
            ($sp,$orf) = split(/:/,$l->{'name'});
            print "<TR>\n";
            if ((!$Param{'ident'} || $l->{'ident'} >= $Param{'ident'}) &&
                (!$Param{'eval'} || $l->{'eval'} <= $Param{'eval'}) &&
                (!$Param{'score'} || $l->{'score'} >= $Param{'score'}) &&
                (!$Param{'pam'} || $l->{'pam'} <= $Param{'pam'}) &&
##                (! $Args{'ovlpcheck'} ||
                       $l->{'coverage'} >= $Param{'coverage'}
#)
                ) {
                    $CHECKED = 'CHECKED';
            }
            else {
                    $CHECKED = '';
                    next if (! $Args{'showall'});
            }
            if ($queryseq) {
#                print "<TD><INPUT TYPE=\"checkbox\" NAME=\"homopair\" VALUE=\"$l->{'name'},$l->{'qname'},$l->{'pam'},$l->{'score'}\" $CHECKED></TD>\n";
            }
            else {
                next if ($Found{$l->{'name'}});
                $Found{$l->{'name'}} = 1;
#                print "<TD><INPUT TYPE=\"checkbox\" NAME=\"hgenes\" VALUE=\"$l->{'name'},$l->{'pam'},$l->{'score'}\" $CHECKED></TD>\n";
            }
            print "<TH>", ++$rank,"</TH>";
            print "<TH>$l->{'qname'}</TH>" if ($numquery > 1);

            my $genelen;
            if ($queryseq) {
                $genelen  = $seqLen{$l->{qname}};
            }
            else {
                my $gene = MBGD::Gene->get($l->{qname});
                $genelen = $gene->length;
            }

            my $regstr = &MBGD::SeqRegion::getRegionString($l->{from1}, $l->{to1}, $genelen);
            $title = '';
            if ($l->{'name'} !~ /^qry:/){
#            ($name,$gene,@title) =
#                split(/\s/,&get_title($l->{'name'}));
#            ($name,$orf,$gene,@title) =
#                split(/\s/,`$CMD_mbget $l->{'name'}`);
#            $title = join(' ', @title);
                $title = $gene_title{$l->{'name'}};
            }
            print "<TD><TABLE><TH><A HREF=\"RECOG_gene_info_frame.pl?&name=$sp:$orf&tabid=$Args{'tabid'}\">$l->{'name'}</TH>";
            print "<TD bgcolor=\"$AliButtonColor\">";
            print "<A HREF=\"align?seq1=$l->{'qname'}\&seq2=$l->{'name'}\"><b>A</b></A>";
            print "</TD></TABLE></TD>";
            print "<TD align=\"right\">\n";
            print join("</TD><TD align=\"right\">",
                $l->{'ident'},sprintf("%.2g",$l->{'eval'}),
#                $l->{'ident'},$l->{'eval'},
                $l->{'score'},$l->{'pam'},
                "<PRE>$regstr</PRE>",
                $l->{'coverage'}
#               $l->{from1}, $l->{to1},
#               $l->{from2}, $l->{to2},
                ),
                "</TD><TD> $title </TD>\n";
                print "</TR>\n";
        }   ## foreach @output_sorted

        if ($Param{clustmode} eq 'homology') {
            $checked{'all'}  = 'CHECKED';
        }
        else {
            $checked{'nearest'}  = 'CHECKED';
        }
        print "</TABLE>\n";
        if ($MAXOUT && $NEXT) {
            my $cgi = $WWW->cgi;
            $cgi->param(-name=>'begin', -value=>$NEXT);
            my $qstring = $cgi->self_url;
            print "[<A HREF=$qstring>Next $MAXOUT</A>]\n";
            $cgi->param(-name=>'begin', -value=>0);
            $cgi->param(-name=>'maxout', -value=>0);
            my $qstring = $cgi->self_url;
            print "[<A HREF=$qstring>Display all ($TOTALCOUNT)</A>]<br>\n";
        }
    }
    else {
    }
}
#############################################################################

#$WWW->page_footer;
#$WWW->end_html;

#############################################################################
sub by_eval {
    if ($a->{eval} == $b->{eval}) {
    	$b->{score} <=> $a->{score};
    } else {
    	$a->{'eval'} <=> $b->{'eval'};
    }
}

#############################################################################
sub by_score {
    $b->{score} <=> $a->{score};
}

#############################################################################
sub by_pam {
    if ($a->{pam} == $b->{pam}) {
	$b->{score} <=> $a->{score};
    } else {
	$a->{pam} <=> $b->{pam};
    }
}

#############################################################################
sub by_ident {
    if ($a->{ident} == $b->{ident}) {
	$b->{score} <=> $a->{score};
    } else {
	$b->{ident} <=> $a->{ident};
    }
}


#############################################################################
sub seq_input_form {

	print <<EOF;
<H2> Homology Search </H2>
Sequence similarities are calculated for your sequences
by the same way <A HREF="/htbin/SetParamScreen.pl?mode=help"> as for those stored in MBGD </A>
<i>i. e.</i>
BLAST search followed by dynamic programming global alignment.
You can later overlay your own sequences on the MBGD gene cluster table.
<HR>
EOF

	print <<EOF;
<FORM METHOD=POST ACTION=/htbin/SeqSearch.pl ENCTYPE="multipart/form-data">
<B>Enter your query (in FASTA format).<br>
Multiple sequences (within $LIMIT residues in total) can be accepted.</B><br>
<TEXTAREA rows=\"10\" cols=\"60\" NAME=\"sequence\">$Args{'sequence'}</TEXTAREA>
<br>
or specify a sequence file <INPUT TYPE=\"file\" NAME=\"sequencef\"><br>
EOF

#    print "<INPUT TYPE=\"checkbox\" NAME=\"showall\" CHECKED> Show all results with p-value <= <INPUT SIZE=\"8\" NAME=\"evalcut\" VALUE=\"0.05\"><br>\n";
    print "<INPUT TYPE=\"submit\" VALUE=\"Submit Query\">\n";
    print "<INPUT TYPE=\"reset\"  VALUE=\"Reset\">\n";
    print "</FORM>\n";
    &Property::HomolParam::ChangeHomolParamButton();
}

#############################################################################
sub get_coverage {
    my($cov1, $cov2) = @_;
    ## take larger value (i.e. coverage against the shorter sequence)
    sprintf("%.1f", $cov1 < $cov2 ? $cov2 : $cov1);
}

#############################################################################
sub main::MBGD_GetGeneTitles {
    my(@entnames) = @_;

    my %titles;
    foreach my $a (MBGD::Gene->get(\@entnames)) {
            $a->{name} =~ tr/a-z/A-Z/;
            my $entname = "$a->{sp}:$a->{name}";
            $titles{"$entname"} = $a->{descr};
    }
    %titles;
}

#############################################################################
sub doSimSearch {
        $queryseq = 1;    ## Homology Search mode

        $Args{'sequence'} =~ tr/\r/\n/;     # '\r'  '\n' Ѵwin  mac ǤϤб
        $Args{'sequence'} =~ s/\n+/\n/g;    # Ϣ³Ƥפʲԥ('\n')
        $Args{'sequence'} =~ s/^\n+//;      # Ƭβԥ('\n')
        if (! ($Args{'sequence'} =~ /^>/)) {

            
            $Args{'sequence'} = ">Query\n" . $Args{'sequence'};
        }

        $Args{'sequence'} =~ s/> */>/g;         # '>' ľ ' ' 
        $Args{'sequence'} =~ s/>[^: ]+:/>/g;    #
        $Args{'sequence'} =~ s/>/>qry:/g;       #

        #
        @seqList = split("\n", $Args{'sequence'});

	foreach $seq (@seqList) {
		if ($seq =~ /^>(\S+)\s/) {
			$qryName{$1} = 1;
	    	}
	}

        # ȥ̤ϤФ
        $seqNo = 1;
        foreach $seq (@seqList) {
            if ($seq =~ /^>qry:$/) {
                my($newName);
                for(;;) {
                    $newName = sprintf("QRY%03d", $seqNo++);
                    if ($qryName{$newName} == 1) {
                        # ̤Ѥ ORF ̾Ƥ
                        $qryName{$newName} = 1;
                        last;
                    }
                }
                $seq .= $newName;
            }
        }

        # domclust taball κ
        $seqNo = 1;
        $orfName  = '';
        $sequence = '';

        $filename = "$DIR_MbgdUser/$uid.taball";
        open(FH, ">$filename") || die "Can not open $filename($!)";
        foreach $seq (@seqList) {
            if ($seq =~ /^>(\S+)/) {
                $newOrfName = $1;
                if ($sequence ne '') {
                    my($spec, $orf) = split(':', $orfName);
                    $sequence =~ s/[\r\n]+//g;
		    $seqLen{$orfName} = length($sequence);
                    print FH $spec, " ";
                    print FH $orf, " ";
                    print FH length($sequence) * 3, " ";
                    print FH $seqNo++, " ";
                    print FH "1", "\n";       # ϡȷǤ
                }

                $orfName = $newOrfName;
                $sequence = '';
            }
            else {
                $sequence .= $seq;
            }
        }
        if ($sequence ne '') {
            my($spec, $orf) = split(':', $orfName);
            $sequence =~ s/[\r\n]+//g;
	    $seqLen{$orfName} = length($sequence);
            print FH $spec, " ";
            print FH $orf, " ";
            print FH length($sequence) * 3, " ";
            print FH $seqNo++, " ";
            print FH "1", "\n";       # ϡȷǤ
        }
        close(FH);

        $Args{'sequence'} = join("\n", @seqList);

        @tmparry = ($Args{'sequence'} =~ /(^>|\n>)/g);
        $numquery = @tmparray;

        $uInfo->saveQuerySequence($Args{'sequence'});
	$SeqFileName = $uInfo->{seqfile};
	$SeqSimFile = $uInfo->{seqsimfile};

#        &saveQuerySequence($Cookies{'uid'}, $Args{'sequence'});
#        $SeqFileName = &getSeqFileName($Cookies{'uid'});
#        $filename = "$DIR_MbgdUser/$uid.blastres";

        unlink("$SeqSimFile");

        my($dir);
        $dir = "$DIR_database/bldb";
        foreach $sp (split('\|', $Param{'species'})) {
            push(@dbList, "$dir/$sp");
        }

        $pipeCmd = '';
        $pipeCmd .= "$READBLAST -addDbName";
#        $pipeCmd .= "| $BLASTDP -Qfile=$SeqFileName -qdb=qry -sp2=$sp -PVAL_CUTOFF=$EVALCUTOFF";
        $pipeCmd .= "| $BLASTDP -keepSpName -Qfile=$SeqFileName -qdb=qry -PVAL_CUTOFF=$EVALCUTOFF";
###        $pipeCmd .= "| $CMD_convupper";

	### Execute BLAST

        undef($main::BLAST_HOST);    # local blast

##        my($fileRule) = &makeHtblastRule("blastp", "@dbList", $BlastOpt);
##       &execHtblast($fileRule, $SeqFileName,
##                               $pipeCmd,
##                               $filename);
#        unlink("$fileRule");

	$tmpdb = "$main::DIR_work/dbtmp_$$";
	open(F,">$tmpdb.pal") || die;
	print F "DBLIST " . join(' ', @dbList) . "\n";
	close(F);

        system("$main::CMD_blastall -p blastp -d $tmpdb -i $SeqFileName -a 6 $BlastOpt | $pipeCmd > $SeqSimFile");

        # BLAST ̥ե sort + pack
        $cmd = "$CMD_create_sortres -PAM   $SeqSimFile >${SeqSimFile}.pam.sort";
        system("$cmd");
        $cmd = "$CMD_create_sortres -SCORE $SeqSimFile >${SeqSimFile}.score.sort";
        system("$cmd");
        $cmd = "$CMD_create_sortres -PAM   -PACK $SeqSimFile >${SeqSimFile}.pam.sort.pack";
        system("$cmd");
        $cmd = "$CMD_create_sortres -SCORE -PACK $SeqSimFile >${SeqSimFile}.score.sort.pack";
        system("$cmd");
}

#############################################################################
sub readSeqSimFile {
	my $SeqSimFile = $uInfo->{seqsimfile};
	my @output;
        open(FH, "$SeqSimFile") || die("Can not open $SeqSimFile");
        while(<FH>) {
            chomp();

            my($name1, $name2,
                $from1,$to1,$len1,$from2,$to2,$len2,
                $matchlen,$matchnum,$ident,$bscore,$eval,
                $pam,$exp,$sd,$score) = split;
            my $cov1 = ($to1 - $from1 + 1) * 100 / $len1;
            my $cov2 = ($to2 - $from2 + 1) * 100 / $len2;
            my $coverage = &get_coverage($cov1,$cov2);

            $wOutput = {'qname' => $name1,
                        'name'  => $name2,
                        'from1'=>$from1,
                        'to1'=>$to1,
                        'from2'=>$from2,
                        'to2'=>$to2,
                        'ident' => $ident,
                        'eval'  => $eval,
                        'score' => $score,
                        'pam'   => $pam,
			'coverage' => $coverage,
            };
            push(@output, $wOutput);
        }
        close(FH);
	@output;
}


#############################################################################
####sub getRegionString {
####	my($from,$to,$len) = @_;
####	my($STRLEN)= 10;
####	my($str);
####	$from = $from * $STRLEN / $len;
####	$to = $to * $STRLEN / $len;
####	for ($i = 1; $i <= $STRLEN; $i++) {
####		if ($from < $i && $i - 1 <= $to) {
####			$str .= "=";
####		} else {
####			$str .= " ";
####		}
####	}
####	"<PRE>$str</PRE>";
####}

#############################################################################
sub selectSimSearchRes {
    my($tabid) = shift;

    @orfnames = split(/,/, $Args{'orfname'});
    foreach $on (@orfnames) {
        $orfnames{MBGD::Gene::conv_upper($on)} = 1;
    }
    $numquery = @orfnames;

    my($species) = RECOG::RecogCommon::getSpeciesByTabid($tabid);
#    my($cmd) = "$CMD_select -SPEC=$species -GENES2='$Args{orfname}' -ORIGOUT -FULLOUT -FILTER";
    my($cmd) = "$CMD_select_homlist -SPEC=$species -ORIGOUT $Args{orfname} ";
print STDERR "cmd>>$cmd |\n";
    open(BLAST, "$cmd |") || die;

    while (<BLAST>) {
        my($name1, $name2, $from1, $to1, $from2, $to2, $ident, $eval, $pam, $score, $coverage) = split;
        next if ($name1 eq $name2);
        $name1 = MBGD::Gene::conv_upper($name1);
        $name2 = MBGD::Gene::conv_upper($name2);
        if ($orfnames{$name1}) {
        }
        elsif ($orfnames{$name2}) {
            my($tmp);
            $tmp = $name1; $name1 = $name2; $name2 = $tmp;
            $tmp = $from1; $from1 = $from2; $from2 = $tmp;
            $tmp = $to1; $to1 = $to2; $to2 = $tmp;
        } else {
            next;
        }
        push(@output, { 'qname' => $name1,
                        'name'  => $name2,
                        'from1' => $from1, 'to1' => $to1,
                        'from2' => $from2, 'to2' => $to2,
                		'ident' => $ident,
                        'eval'  => $eval,
                        'score' => $score,
                        'pam'   => $pam,
                        'coverage' => $coverage});
    }
    close(BLAST);

    return;
}

#############################################################################
1;
#############################################################################
