#!/usr/bin/perl -s
use DirHandle;
use File::Path;
use FileHandle;
use MBGD;
use MbgdUpdate;
use RECOG::RecogProject;
require "MBGD_common.pl";
require "libUpdate.pl";

$main::MAX_Dp2Pairs = 10000;   # ٤ dp2 ¹Ԥڥξ

if (! defined $TMPDIR) {
	$TMPDIR = "$DIR_work";
}
$PID_FILE = "$TMPDIR/tmphom.pid";
if (! $PID) {
	if (-f $PID_FILE) {
		open(F, $PID_FILE) || die "Can't open $PID_FILE\n";
		$PID = <F>; chomp $PID; close F;
		print "DBG :: PID = $PID\n" if ($main::DEBUG);
	} else {
		$PID = $$;
		open(F, ">$PID_FILE");
		print F "$PID\n"; close F;
	}
}
if (! $OUTNAME) {
	$OUTNAME = "tmphom_$PID";
	if ($TMPDIR) {
		$OUTNAME = "$TMPDIR/$OUTNAME";
	}
}
$MBGD_HOME = $ENV{'MBGD_HOME'};

$seqdbfile=shift @ARGV;
$qseqfile=shift @ARGV;
$resultfile=shift @ARGV if (@ARGV);

# ʪǡϿ
my($procname) = "m0400NewProtseq";
my(@fStat) = stat("$DIR_work/.$procname.end");
my($sec, $min, $hour, $day, $mon, $year)  = localtime($fStat[9]);
$year += 1900;
$mon++;
my($date_new_spid) = sprintf("%04d%02d%02d", $year, $mon, $day);

#
my($spid_set_mode) = 'HomSearch';

#
my($recog_proj_ref) = RECOG::RecogProject->new();
my($n_project) = $recog_proj_ref->count_project();
print STDERR "DBG :: Found $n_project project(s)\n";
if ($n_project <= 1) {
    execHomSearch_all();
}
else {
    execHomSearch_each($recog_proj_ref);
}

unlink($PID_FILE) if (-f $PID_FILE);

exit(0);

###############################################################################
#
sub execHomSearch_each {
    my($recog_proj_ref) = shift;
    my($user) = $ENV{'USER'};

    my(@proj_id_list) = $recog_proj_ref->get_project_id_list();

    #
    my($spid_all_hash_ref) = {};
    my($spid_upd_hash_ref) = {};
    my(@file_out_list);
    foreach my$proj_id (sort {$a <=> $b} @proj_id_list) {
        next if ($proj_id <= 1);

        my($file_blastdb) = "$DIR_work/blastdb.projectid_$proj_id";
        my($file_newprot) = "$DIR_work/newprotseq.projectid_$proj_id";

        print STDERR "LOG :: Start ID=$proj_id\n";
        my($file_out) = execHomSearch_project($proj_id, $file_blastdb, $file_newprot);
	next if (! $file_out); # skip if file is not created
        push(@file_out_list, $file_out);
    }

    #
    my($file_blastdpres_out) = "$ENV{'MBGD_HOME'}/work/homology.blastdpres.out";
    IO::File->new(">$file_blastdpres_out");
    foreach my$file_out (@file_out_list) {
        my($cmd) = "$main::CMD_cat $file_out >> $file_blastdpres_out";
        system("$cmd");
    }

    return;
}

###############################################################################
#
sub execHomSearch_all {
    my($qseqfile)  = "$ENV{'MBGD_HOME'}/work/newprotseq";
    if (!-e $qseqfile || -z $qseqfile) {
        return;
    }

    my($seqdbfile) = "$ENV{'MBGD_HOME'}/work/blastdb";
    extract_db($extractdb, $seqdbfile);

    my $blastout   = "$OUTNAME.mqblast";
    my $listfile   = "$OUTNAME.mdpin";
    my $outfile    = "$OUTNAME.mdpout";
    my $restfile   = "$OUTNAME.mdprest";
    my $resultfile = "$OUTNAME.out";

    execHomSearch($seqdbfile, $qseqfile, $blastout, $listfile, $outfile, $restfile, $resultfile);

    return;
}

###############################################################################
#
sub execHomSearch_project {
    my($proj_id) = shift;
    my($file_blastdb) = shift;
    my($file_newprot) = shift;

    if (!-e $file_newprot || -z $file_newprot) {
        print STDERR "LOG :: NO protseq :: $file_newprot\n";
        return;
    }

    my $blastout   = "$OUTNAME.mqblast." . $proj_id;
    my $listfile   = "$OUTNAME.mdpin."   . $proj_id;
    my $outfile    = "$OUTNAME.mdpout."  . $proj_id;
    my $restfile   = "$OUTNAME.mdprest." . $proj_id;
    my $resultfile = "$OUTNAME.out."     . $proj_id;

    if (-e $resultfile && -s $resultfile) {
	if (&cmprFileDate($file_newprot, $resultfile) < 0 &&
		&cmprFileDate($file_blastdb, $resultfile) < 0)  {
		## resultfile is newer than input
	        print STDERR "Found result (skip):: $resultfile\n";
       		return $resultfile;
	}
    }

    #
    execHomSearch($file_blastdb,
                  $file_newprot,
                  $blastout,
                  $listfile,
                  $outfile,
                  $restfile,
                  $resultfile);

    return $resultfile;
}

###############################################################################
#
sub execHomSearch {
    my($seqdbfile) = shift;
    my($qseqfile)  = shift;
    my $blastout   = shift;
    my $listfile   = shift;
    my $outfile    = shift;
    my $restfile   = shift;
    my $resultfile = shift;

    $retainTmp = 1;	## temporary
    $retainTmp = 0 if ($deleteTmp);

    $NCPUS = $main::NUM_cpu if(! $NCPUS);

    $RETAIN_TMP='-retainTmp' if ($retainTmp);

    if (! defined $NAMELEN) {
        $NAMELEN = 22;	## length of MD5 value; for check
    }

    die "Usage: $0 dbfile [qseqfile]\n" if (! $seqdbfile);

    if ($STEP) {
    	goto $STEP;
    }

    if ($RETRY_MQBLAST) {
    	die "You must specify MQTMPDIR\n" if (! $MQTMPDIR);
    	$MQBLAST_OTHEROPT .= " -ADD -TMPDIR=$MQTMPDIR ";
    }
    if ($TMPDIR && ! -d $TMPDIR) {
    	mkpath($TMPDIR);
    }
#    if ($extractdb) {
#    	&extract_db($extractdb, $seqdbfile, $extract_spec);
#    	exit(0) if ($extractdb_only);
#    }
    if ($main::usergenome) {
        system("$main::CMD_cat $qseqfile >> $seqdbfile");
    }

    die "$0: $seqdbfile file not found" if (! -f $seqdbfile);

    mqblast:
    if (! -f $blastout || -z $blastout) {
        my($modeQsub) = '';
        if ($main::CMD_qsub && -x $main::CMD_qsub) {
            $modeQsub = ' -mode=qsub -CopyAll';
        }
        print STDERR "mqblast\n";
        $cmd = "$main::CMD_mqblast -OUTFILE='$blastout' "
    		 . "$modeQsub "
             . "-PROCNUM=$NCPUS "
    		 . "-CMD_qsub=$main::CMD_qsub "
    		 . "-CMD_qstat=$main::CMD_qstat "
    		 . "-CMD_qmod=$main::CMD_qmod "
    		 . "-DIR_nqs_pub='$main::DIR_nqs_pub' "
    		 . "-QUEUE_nqs='$main::NQS_queue' "
#    		 . "-CMD_blastall='$main::CMD_blastall' "
#    		 . "-CMD_formatdb='$main::CMD_formatdb' "
    		 . "-PID=$main::PID "
    		 . "$RETAIN_TMP "
    		 . "-CHECK -CHECK_NAMELEN=$NAMELEN ";

	if ($main::CMD_legacy_blast) {
		$cmd .= " -CMD_legacy_blast=$::CMD_legacy_blast";
	}
  	$cmd .=   " $MQBLAST_OTHEROPT "
		. " $seqdbfile $qseqfile";

print STDERR ">>>$cmd\n";
    	system("$cmd");
        &check_status_and_die($?, "mqblast");
    } else {
        print STDERR "SKIP :: mqblast\n";
    }

    create_list:
    if (! -f $listfile || -z $listfile) {
        print STDERR "create_list: $listfile\n";
        system("$main::CMD_createList $blastout > $listfile");
        &check_status_and_die($?, "create_list");
    } else {
        print STDERR "SKIP :: create_list\n";
    }


    exec_mdp:
    if (! -f $outfile || -z $outfile) {
        execdp2("$listfile", "$seqdbfile", "$outfile", "$resultfile");
    } else {
        print STDERR "SKIP :: execdp2\n";
    }


    if (! $retainTmp) {
    	unlink($blastout) if (-f $blastout);
    	unlink($outfile) if (-f $outfile);
    	unlink($listfile) if (-f $listfile);
    	unlink($restfile) if (-f $restfile);
    }

    return;
}

###############################################################################
#
sub extract_db {
	my($extractdb, $seqdbfile, $extract_spec) = @_;
	my($SQL, $NAME, $db);
	if ($extractdb =~ /dbi:/) {
		$db = MBGD::DB->new($extractdb);
	} elsif ($extract_spec) {
		$db = MBGD::DB->new($MYSQL_DB);
	} else {
		$db = MBGD::DB->new($MYSQL_DBACCUM);
	}
	if ($extract_spec) {
		my($splist, @splist);
		foreach my $sp (split(/,/, $extract_spec)) {
			push(@splist, "'$sp'");
		}
		$splist = join(',', @splist);
		$SQL = "select * from proteinseq p, gene g " .
			"where g.sp in ($splist) and p.id=g.aaseq";
		$NAME = "spname";
	} else {
		$SQL = "select * from proteinseq";
		$NAME = "chksum";
	}
	open(O, ">$seqdbfile") || die;
	my $sth = $db->execute($SQL);
	while (my $h = $sth->fetchrow_hashref) {
		my $seq = $h->{seq};
		my $name;
		$seq =~ s/(.{60})/$1\n/g;
		if ($NAME eq 'spname') {
			$name = "$h->{sp}:$h->{name}";
		} else {
			$name = "$h->{md5sum}";
		}
		print O ">$name\n$seq\n";
	}
	close(O);
}

###############################################################################
#
sub check_status_and_die {
	my($status, $step) = @_;
	die "Update process was terminated abnormally. step=$step\n" if ($status);
}

###############################################################################
#
sub execDp2AndCheck {
    my($fileList) = shift;
    my($fileSeqDb) = shift;
    my($fileOut) = shift;
    my($fileResult) = shift;
    my($modeQsub) = shift;
    my($cmd);

    # ΥץȤƼ¹Ԥݡ̤ɵǽ뤿
    # ѥե򥯥ꥢ
    FileHandle->new(">fileResult");

    #
    my($fileRest) = "$fileList.rest";
    my($ret);
    my($max_retry) = 5;
    for(my$nTry = 0;; $nTry++) {
        #
        $cmd = "$main::CMD_execdp2 "
             . "-CMD_qsub=$main::CMD_qsub "
             . "-CMD_qstat=$main::CMD_qstat "
             . "-CMD_qmod=$main::CMD_qmod "
             . "-DIR_nqs_pub=$main::DIR_nqs_pub "
		     . "-QUEUE_nqs='$main::NQS_queue' "
             . "-MXJOB=$NCPUS "
             . "-MX=$main::MAX_Dp2Pairs "
             . "$modeQsub "
             . "-CopyAll "
             . "$RETAIN_TMP "
             . "$fileList $fileSeqDb $fileOut";

        print STDERR "EXEC :: $cmd\n";
        for (my$nTryDp2 = 0;; $nTryDp2++) {
            $ret = system("$cmd");
            if (($ret >> 8) == 0) {
                last;
            }
            if ($max_retry < $nTryDp2) {
                print STDERR "ERROR :: Too many errors.\n";
                check_status_and_die($!, "execdp2");
            }
            print STDERR "WARNING :: Retry dp2\n";
        }

        #
        $cmd = "$main::CMD_updateList -listfile=$fileList -outfiles=$fileOut "
             . "-CHECK_NAMELEN=$NAMELEN "
             . "-resultout=$fileResult.0";
        print STDERR "EXEC :: $cmd\n";
        for (my$nTryDp2 = 0;; $nTryDp2++) {
            $ret = system("$cmd > $fileRest");
            if (($ret >> 8) == 0) {
                last;
            }
            if ($max_retry < $nTryDp2) {
                print STDERR "ERROR :: Too many errors.\n";
                check_status_and_die($!, "updateList");
            }
            print STDERR "WARNING :: Retry updateList\n";
        }

        #
        $cmd = "$main::CMD_cat $fileResult.0 >> $fileResult";
        system("$cmd");
        unlink("$fileResult.0");

        if (-z $fileRest) {
            unlink($fileRest);
            last;
        }

        print STDERR "Found REST data.\n";
        if ($max_retry < $nTry) {
            print STDERR "ERROR :: Too many errors.\n";
            check_status_and_die(1, "execdp2(retry)");
        }

        # ȥ饤Τν
        print STDERR "RETRY\n";
        rename("$fileRest", "$fileRest.0");
        $fileList = "$fileRest.0";
    }

    return;
}

###############################################################################
#
sub execdp2 {
    my($fileList) = shift;
    my($fileSeqDb) = shift;
    my($fileOut) = shift;
    my($fileResult) = shift;
    my($cmd);

    my($modeQsub) = '';
    if ($main::CMD_qsub && -x $main::CMD_qsub) {
        $modeQsub = "-mode=qsub "
                  . "-qsub=$main::CMD_qsub "
                  . "-qstat=$main::CMD_qstat "
                  . "-qmod=$main::CMD_qmod ";
    }

    #
    my($ext) = 0;
    my($n) = 0;
    my($fhr) = new FileHandle("$fileList");
    if (!$fhr) {
        print STDERR "ERROR :: Can not open $fileList($!)\n";
        check_status_and_die($!, "execdp2($fileList)");
    }
    my($fhw) = new FileHandle(">$fileList.$ext");
    if (!$fhw) {
        print STDERR "ERROR :: Can not open $fileList.$ext($!)\n";
        check_status_and_die($!, "execdp2($fileList.$ext)");
    }

    #
    while($line = $fhr->getline()) {
        $fhw->print($line);
        $n++;
        if ($main::MAX_Dp2Pairs * $NCPUS * 5 <= $n) {
            $fhw->close();
            execDp2AndCheck("$fileList.$ext", $fileSeqDb, "$fileOut.$ext", "$fileResult.$ext", $modeQsub);
            unlink("$fileList.$ext");

            $ext++;
            $n= 0;
            $fhw = new FileHandle(">$fileList.$ext");
            if (!$fhw) {
                print STDERR "ERROR :: Can not open $fileList.$ext($!)\n";
                check_status_and_die($!, "execdp2($fileList.$ext)");
            }
        }
    }
    $fhr->close();
    $fhw->close();

    if (-z "$fileList.$ext") {
        new FileHandle(">$fileOut.$ext");
    } else {
        execDp2AndCheck("$fileList.$ext", $fileSeqDb, "$fileOut.$ext", "$fileResult.$ext", $modeQsub);
    }
    unlink("$fileList.$ext");

    #
    my($fhOut)    = new FileHandle(">$fileOut");
    if (!$fhOut) {
        print STDERR "ERROR :: Can not open $fileOut($!)\n";
        check_status_and_die($!, "execdp2($fileOut)");
    }

    #
    my($fhResult) = new FileHandle(">$fileResult");
    if (!$fhResult) {
        print STDERR "ERROR :: Can not open $fileResult($!)\n";
        check_status_and_die($!, "execdp2($fileResult)");
    }

    #
    my(@unlink_file_list);
    for(my$i = 0; $i <= $ext; $i++) {
        #
        my($fhr) = new FileHandle("$fileOut.$i");
        if (!$fhr) {
            print STDERR "ERROR :: Can not open $fileResult.$i($!)\n";
            check_status_and_die($!, "execdp2($fileResult.$i)");
        }

        #
        while($line = $fhr->getline()) {
            $fhOut->print($line);
        }
        $fhr->close();
        push(@unlink_file_list, "$fileOut.$i");

        #
        my($fhr) = new FileHandle("$fileResult.$i");
        if (!$fhr) {
            print STDERR "ERROR :: Can not open $fileResult.$i($!)\n";
            check_status_and_die($!, "execdp2($fileResult.$i)");
        }

        #
        while($line = $fhr->getline()) {
            $fhResult->print($line);
        }
        $fhr->close();
        push(@unlink_file_list, "$fileResult.$i");
    }
    $fhOut->close();
    $fhResult->close();

    #
    foreach my$unlink_file (@unlink_file_list) {
        unlink($unlink_file);
    }

    return;
}
