#!/usr/bin/perl -s

use File::Copy;
use File::Basename;
use File::Path;
use FileHandle;
use POSIX;

$MX = 10 if (! $MX || $MX < 10);         # $B#12s$N(Bhmmer$B$G=hM}$9$kG[Ns%Z%"$N>e8B(B
$MXJOB = 10  if (! $MXJOB || $MXJOB < 1);     # PBS $B$K:GBg(B $MXJOB $BJB9TEjF~$9$k(B

$UserName = getpwuid($<);
$HOME=($ENV{HOME} ? $ENV{HOME} : "/home/$UserName");
$MBGDHOME = ($ENV{MBGD_HOME} ? $ENV{MBGD_HOME} : "/db5/project/MBGD");
$EXECDIR="/scratch/$UserName/hmmertmp.$$" if (! $EXECDIR);

$CMDPATH='/bio/bin';
$DB="$MBGDHOME/package/tigrfams/6.0/TIGRFAMs_6.0_HMM.LIB" if(!$DB);
$CMD="hmmpfam" if(!$CMD);
$OPT="";
$CMD_hmmer = $CMDPATH . "/" . $CMD;

$QSUB_CMD = "/usr/pbs/bin/qsub";
$QSTAT_CMD = "/usr/pbs/bin/qstat";

$TMPCMD="exechmmer";
$CURRDIR=$ENV{PWD};

if (@ARGV < 1) {
	die "Usage: $0 [-DB=database -CMD=hmmer_command -OPT='option' (option)] seqfile [outputfile (option)] \n";
}
$INPUTLIST=$ARGV[0];
$OUTPUTFILE=$ARGV[1];
if (! $OUTPUTFILE) {
	$OUTPUTFILE= $CMD . '.out';
}

$INPUTLIST_BASE = basename($INPUTLIST);
$OUTPUTFILE_BASE = basename($OUTPUTFILE);
$DB_BASE = basename($DB);

system("mkdir -p $EXECDIR");
if ($CopyAll) {
	copy($INPUTLIST, "$EXECDIR/$INPUTLIST_BASE") || die("Can not copy $INPUTLIST to $EXECDIR/$INPUTLIST_BASE");
	copy($DB, "$EXECDIR/$DB_BASE") || die("Can not copy $DB to $EXECDIR/$DB_BASE");
	$DB = "$EXECDIR/$DB_BASE";
} else {
	if ($INPUTLIST =~ /^\//) {
		symlink($INPUTLIST, "$EXECDIR/$INPUTLIST_BASE") || die("Can not symlink $INPUTLIST $EXECDIR/$INPUTLIST_BASE");
	} else {
		symlink("$CURRDIR/$INPUTLIST", "$EXECDIR/$INPUTLIST_BASE") || die("Can not symlink $CURRDIR/$INPUTLIST $EXECDIR/$INPUTLIST_BASE");
	}
}
$INPUTLIST = $INPUTLIST_BASE;

chdir $EXECDIR;
print STDERR "CWD :: $EXECDIR\n";

symlink($PAMFILE, "$EXECDIR/allpamout");

# $B<B9T(B
execHmmer("$EXECDIR/$INPUTLIST_BASE", "$OUTPUTFILE_BASE");

#
if (-f $OUTPUTFILE_BASE) {
	if ($OUTPUTFILE =~ /^\//) {
		# absolute path
		copy($OUTPUTFILE_BASE, "$OUTPUTFILE");
	} else {
		# relative path
		copy($OUTPUTFILE_BASE, "$CURRDIR/$OUTPUTFILE");
	}
}
if (! $retainTmp) {
	chdir "/";
	sleep(1);
	rmtree($EXECDIR);
}
exit(0);

###############################################################################
sub getFilenameLst {
    my($idx) = shift;
    return sprintf("$main::EXECDIR/$main::INPUTLIST_BASE.%d", $idx);
}

sub getFilenameOut {
    my($idx) = shift;
    return sprintf("$main::EXECDIR/$main::OUTPUTFILE_BASE.%d", $idx);
}

sub waitJobs {
    my($maxJob) = shift;
    my($refHashJobId) = shift;

    my@listJobId = keys(%{$refHashJobId});
    for(;;) {
        foreach my$jobid (@listJobId) {
            system("$main::QSTAT_CMD $jobid >/dev/null 2>&1");
            if($?) {
                unlink($refHashJobId->{"$jobid"});
                delete($refHashJobId->{"$jobid"});
            }
        }

        @listJobId = keys(%{$refHashJobId});
        if (scalar(@listJobId) <= $maxJob) {
            # $BJBNs<B9T$N>e8B0J2<$G$"$k(B
            last;
        }
        sleep 1;
    }
}

sub divideListFile {
    my($fileList) = shift;
    my($maxList) = shift;

    #
    my($fhr) = new FileHandle("$fileList") || die("Can not open $fileList($!)");
    my($fhw) = undef();
    my($nDiv) = 0;            # $B%U%!%$%kJ,3d?t(B
    my($nList) = 0;           # $B=PNO$7$?%j%9%H$N?t(B
    while($_ = $fhr->getline()) {
        if (/^>\s*(\S+)/) {
            if (($maxList <= $nList) ||    # $B=PNO$7$?%j%9%H$N?t$,!"J,3d>e8B$KC#$7$?(B
                !defined($fhw)) {          # $B=PNO%U%!%$%k$,L$(Bopen
                $nDiv++;
                $nList = 0;
                my($fileDiv) = getFilenameLst($nDiv);
                $fhw = new FileHandle(">$fileDiv");
                if (! $fhw) {
                    print STDERR "Can not open $fileDiv($!)";

                    # $BJ,3d$G$-$?%U%!%$%k?t$rJV$9(B
                    return ($nDiv - 1);
                }
            }
            $nList++;
            $fhw->print($_);
            next;
        }
        $fhw->print($_);
    }
    $fhw->close();
    $fhr->close();

    return $nDiv;
}

sub mergeResultFiles {
    my($fileOut) = shift;
    my($maxJob) = shift;

    # $BJ,3d<B9T$7$?7k2L$r0l$D$N%U%!%$%k$K$^$H$a$k(B
    my($fh) = new FileHandle(">$fileOut");
    my($fhBack) = select($fh);

    for(my$idx = 1; $idx <= $maxJob; $idx++) {
        my$fileOutIdx = getFilenameOut($idx);
        printHmmer($fileOutIdx);
#        unlink("$fileOutIdx");

        my$fileLst = getFilenameLst($idx);
#        unlink("$fileLst"); # $BJ,3d$7$?%j%9%H%U%!%$%k$r:o=|(B
    }

    #
    select($fhBack);
    $fh->close();
}

sub execHmmer {
    my($fileList) = shift;
    my($fileOut) = shift;
    my($fh);

    # $B%j%9%H%U%!%$%k$rJ,3d!J>e8B!'(B$MX $BG[Ns%Z%"!K(B
    my($maxJobFile) = divideListFile($fileList, $main::MX);
    if ($maxJobFile <= 0) {
        # $BJ,3d$5$l$J$+$C$?!#6u$N7k2L%U%!%$%k$r:n@.$7$F=*N;$9$k!#(B
        new FileHandle(">$fileOut");
        return;
    }

    #
    my($nRetry) = 0;
    my($refHashJobId) = {};
    for(my$idx = 1; $idx <= $maxJobFile; $idx++) {
        my($fileDivList) = getFilenameLst($idx);
        my($fileDivOut)  = getFilenameOut($idx);

        my($cmd) = "$main::CMD_hmmer $main::OPT $main::DB $fileDivList $main::OPT > $fileDivOut";
        print STDERR "$cmd\n";

        my($fileJob) = sprintf("$main::TMPCMD.%d", $idx);
        $fh = new FileHandle(">$fileJob");
        if (! $fh) {
            print STDERR "Can not open $fileJob($!)\n";

            $nRetry++;
            if ($maxRetry < $nRetry) {
                # $B%j%H%i%$$7$F$_$?$,2~A1$;$:(B
                # die $B$;$:!"<B9T$7$?(B JOB $B7k2L$@$1$G$b=PNO$9$k(B
                last;
            }
            sleep($nRetry * 60);    # $B$7$P$i$/BT$C$F:F<B9T(B
            redo;
        }

        $fh->print("cd $main::EXECDIR", "\n");
        $fh->print("$cmd", "\n");
        $fh->close();

        #
        chmod(0755, $fileJob);
        print STDERR "system :: $fileJob\n";
        my($fileJobId) = sprintf("$main::EXECDIR/jobid%d.$$", idx);
        system("$main::QSUB_CMD $fileJob > $fileJobId");
        sleep(1);
        $fh = new FileHandle($fileJobId);
        if (! $fh) {
            print STDERR "Can not open $fileJobId($!)\n";

            $nRetry++;
            if ($maxRetry < $nRetry) {
                # $B%j%H%i%$$7$F$_$?$,2~A1$;$:(B
                # die $B$;$:!"<B9T$7$?(B JOB $B7k2L$@$1$G$b=PNO$9$k(B
                last;
            }
            sleep($nRetry * 60);    # $B$7$P$i$/BT$C$F:F<B9T(B
            redo;
        }

        $jobid = $fh->getline();
        $fh->close();
        ($jobid) = ($jobid =~ /(\d+)/);
        if (! $jobid) {
            print STDERR "Can not get JOB-ID(PBS)\n";

            $nRetry++;
            if ($maxRetry < $nRetry) {
                # $B%j%H%i%$$7$F$_$?$,2~A1$;$:(B
                # die $B$;$:!"<B9T$7$?(B JOB $B7k2L$@$1$G$b=PNO$9$k(B
                last;
            }
            sleep($nRetry * 60);    # $B$7$P$i$/BT$C$F:F<B9T(B
            redo;
        }
        print STDERR "JobID=$jobid\n";
        $refHashJobId->{"$jobid"} = $fileJob;         # $B<B9T$7$?(B JOB ID $B$rJ]B8(B

        waitJobs($main::MXJOB - 1, $refHashJobId);    # $BJBNs<B9T2DG=$K$J$k$^$GBT$D(B

        $nRetry = 0;
    }

    # $B<B9T$7$?A4$F$N(B JOB $B$,=*N;$9$k$N$rBT$D(B
    waitJobs(0, $refHashJobId);

    # $BJ,3d<B9T$7$?7k2L$r0l$D$N%U%!%$%k$K$^$H$a$k(B
    mergeResultFiles($fileOut, $maxJobFile);
}

#
sub parseHmmerDesctiption {
    my($fh) = shift;
    my($ent) = shift;

    my($line);
    while($line = $fh->getline()) {
        next if ($line =~ /^\-+/);
        last if ($line =~ /no hits above thresholds/);
        last if ($line =~ /^\s*$/);
        $line =~ s#[\r\n]*$##;
    }

    return;
}

sub parseHmmerDomain {
    my($fh) = shift;
    my($ent) = shift;

    my($model);
    my($line);
    while($line = $fh->getline()) {
        next if ($line =~ /^\-+/);
        last if ($line =~ /^\s*$/);
        if ($line =~ /no hits above thresholds/) {
            $model = {};
            $model->{'model'} = 'NO_HITS';
            push(@{$ent->{'MODELS'}}, $model);
            last;
        }
        $line =~ s#[\r\n]*$##;

        my(@d) = split(/\s+/, $line);
        $model = {};
        $model->{'model'}   = shift(@d);
        $model->{'domain'}  = shift(@d);
        $model->{'seq-f'}   = shift(@d);
        $model->{'seq-t'}   = shift(@d);
                              shift(@d);
        $model->{'hmm-f'}   = shift(@d);
        $model->{'hmm-t'}   = shift(@d);
                              shift(@d);
        $model->{'score'}   = shift(@d);
        $model->{'e-value'} = shift(@d);

        push(@{$ent->{'MODELS'}}, $model);
    }

    return;

}

sub parseHmmer {
    my($filename) = shift;
    my($func) = shift;
    my($fh) = new FileHandle("$filename") or return;

    my($ent);
    my($line);
    while($line = $fh->getline()) {
        $line =~ s#[\r\n]*$##;

        if ($line =~ /^Query sequence:\s+(\S+)/) {
            $ent = {};
            $ent->{'NAME_QUERY'} = $1;
            ($ent->{'sp'}, $ent->{'name'}) = split(/:/, $ent->{'NAME_QUERY'});
            $ent->{'MODELS'} = [];
        }
        elsif ($line =~ /^\/\//) {
            &$func($ent);
        }
        elsif ($line =~ /^Model\s+Description/) {
            parseHmmerDesctiption($fh, $ent);
        }
        elsif ($line =~ /^Model\s+Domain/) {
            parseHmmerDomain($fh, $ent);
        }
    }

    return;
}

sub printHmmer {
    my(@fileList) = @_;

    my(@keyList) = ('model', 'domain', 'seq-f', 'seq-t', 'hmm-f', 'hmm-t', 'score', 'e-value');
    my($func) = sub {
        my($ent) = shift;

        foreach my$model (@{$ent->{'MODELS'}}) {
            my(@d) = ($ent->{'sp'}, $ent->{'name'});
            foreach my$k (@keyList) {
                push(@d, $model->{"$k"});
            }
            print join("\t", @d), "\n";
        }
    };

    foreach my$f (@fileList) {
        parseHmmer($f, $func);
    }
}
