#!/usr/bin/perl -s

use File::Path;
use MBGD;

if (! defined $TMPDIR) {
	$TMPDIR = "$DIR_tmp";
}
$PID = $$ if (! $PID);
if (! $OUTNAME) {
	$OUTNAME = "tmphom_$PID";
	if ($TMPDIR) {
		$OUTNAME = "$TMPDIR/$OUTNAME";
	}
}
$MBGD_HOME = $ENV{'MBGD_HOME'};

$blastout="$OUTNAME.mqblast";
$listfile="$OUTNAME.mdpin";
$outfile="$OUTNAME.mdpout";
$restfile="$OUTNAME.mdprest";
$resultfile="$OUTNAME.out";

$seqdbfile=shift @ARGV;
$qseqfile=shift @ARGV;
$resultfile=shift @ARGV if (@ARGV);

$retainTmp = 1;	## temporary
$retainTmp = 0 if ($deleteTmp);

$NCPUS = 16 if(! $NCPUS);
$NCPUS_MPI = 8 if(! $NCPUS_MPI);

$RETAIN_TMP='-retainTmp' if ($retainTmp);

if (! defined $NAMELEN) {
	$NAMELEN = 22;	## length of MD5 value; for check
}

die "Usage: $0 dbfile [qseqfile]\n" if (! $seqdbfile);

$ENV{'PATH'} .= ":/dbb/project/MBGD/package/mqblast";

if ($STEP) {
	goto $STEP;
}

if ($RETRY_MQBLAST) {
	die "You must specify MQTMPDIR\n" if (! $MQTMPDIR);
	$MQBLAST_OTHEROPT .= " -ADD -TMPDIR=$MQTMPDIR ";
}
if ($TMPDIR && ! -d $TMPDIR) {
	mkpath($TMPDIR);
}
if ($extractdb) {
	&extract_db($extractdb, $seqdbfile, $extract_spec);
}

die "$0: $seqdbfile file not found\n" if (! -f $seqdbfile);

mqblast:
print STDERR "mqblast\n";
system("mqblast -OUTFILE='$blastout' " .
	"-mode=qsub -CopyAll -PROCNUM=$NCPUS " .
	"$RETAIN_TMP " .
	"-CHECK -CHECK_NAMELEN=$NAMELEN " .
	"$MQBLAST_OTHEROPT " .
	"$seqdbfile $qseqfile");
&check_status_and_die($?, "mqblast");

create_list:
print STDERR "create_list\n";
system("create_list $blastout > $listfile");
&check_status_and_die($?, "create_list");

exec_mdp:
print STDERR "exec_mdp\n";
system("execmdp.pl -CopyAll $RETAIN_TMP -NCPUS=$NCPUS_MPI " .
	"$listfile $seqdbfile $outfile");
&check_status_and_die($?, "execmdp");

update_list:
system("update_list.pl -listfile=$listfile -outfiles=$outfile " .
	"-CHECK_NAMELEN=$NAMELEN " .
	"-resultout=$resultfile > $restfile");
&check_status_and_die($?, "update_list");

retry:
if (! -z $restfile) {
	## retry
	my $newlistfile = "$restfile.1";
	my $newresultfile = "$resultfile.1";
	rename($restfile, $newlistfile);
	system("execmdp.pl -CopyAll $RETAIN_TMP -NCPUS=$NCPUS_MPI " .
		"$newlistfile $seqdbfile $outfile.2");
	system("update_list.pl -listfile=$newlistfile -outfiles=$outfile.2 " .
		"-CHECK_NAMELEN=$NAMELEN " .
		"-resultout=$newresultfile -nonuniq > $restfile");
	&check_status_and_die($?, "retry");
	if (! -z $newresultfile) {
		system("cat $newresultfile >> $resultfile");
	}
}

&check_status_and_die( (! -z $restfile), "restfile" );

if (! $retainTmp) {
	unlink($blastout);
	unlink($outfile);
	unlink($listfile);
	unlink($restfile);
}

sub extract_db {
	my($extractdb, $seqdbfile, $extract_spec) = @_;
	my($SQL, $NAME, $db);
	if ($extractdb =~ /dbi:/) {
		$db = MBGD::DB->new($extractdb);
	} elsif ($extract_spec) {
		$db = MBGD::DB->new($MYSQL_DB);
	} else {
		$db = MBGD::DB->new($MYSQL_DBACCUM);
	}
	if ($extract_spec) {
		my($splist, @splist);
		foreach my $sp (split(/,/, $extract_spec)) {
			push(@splist, "'$sp'");
		}
		$splist = join(',', @splist);
		$SQL = "select * from proteinseq p, gene g " .
			"where g.sp in ($splist) and p.id=g.aaseq";
		$NAME = "spname";
	} else {
		$SQL = "select * from proteinseq";
		$NAME = "chksum";
	}
	open(O, ">$seqdbfile") || die;
	my $sth = $db->execute($SQL);
	while (my $h = $sth->fetchrow_hashref) {
		my $seq = $h->{seq};
		my $name;
		$seq =~ s/(.{60})/$1\n/g;
		if ($NAME eq 'spname') {
			$name = "$h->{sp}:$h->{name}";
		} else {
			$name = "$h->{chksum}";
		}
		print O ">$name\n$seq\n";
	}
	close(O);
}

sub check_status_and_die {
	my($status, $step) = @_;
	die "Update process was terminated abnormally. step=$step\n" if ($status);
}
