#!/usr/bin/perl -s
use strict;
use DirHandle;
use FileHandle;
use File::Basename;
use File::Path;
use POSIX "sys_wait_h";
require "MBGD_Conf.pl";

$main::PACK_TEMPL38 = "a38 a38 S S S S f f f f";
$main::Prefix = "blastdpres";
$main::Suffix = "pack";
$main::FileIndex = "spindex";
$main::Delim = ":";
$main::Uniqcheck='0';

###############################################################################
#
sub print_usage {
    print STDERR "Usage :: $0 [-UPD] [-PROC=n] [-DIR=dir] [-FILE_IN=file_in] [blastdpres.spec1 ]\n";
    print STDERR "    dir :: target bldp directory\n";
    print STDERR "            (default = \$MBGD_HOME/database.work/bldp)\n";
    print STDERR "\n";
    print STDERR "\n";

    return;
}

###############################################################################
#
sub convert_bldp_sp {
    my($file_in) = shift;
    my($dir) = shift;
    my($tmp_prefix) = shift;

    #
    my($handle_hash_ref) = {};
    my($n_total) = 0;

    #
    my($fhi) = FileHandle->new("$file_in") || die("Can't open $file_in($!)");
    while(my$line=$fhi->getline()) {
        $n_total++;
        if ($n_total % 100000 == 0) {
            print STDERR "LOG :: Pass :: $n_total\n";
        }

        $line =~ s#[\r\n]*$##;
        my($sp1, $name1, $spname1, $from1, $to1,
           $sp2, $name2, $spname2, $from2, $to2,
           $ident, $eval, $pam, $score) = split(/\t/, $line);

#        my($sp1) = split(/:/, $name1);
#        my($sp2) = split(/:/, $name2);
        if ( ($sp1 cmp $sp2) > 0) {
            swap(\$sp1,     \$sp2);
            swap(\$name1,   \$name2);
            swap(\$spname1, \$spname2);
            swap(\$from1,   \$from2);
            swap(\$to1,     \$to2);
        }
        if (! $handle_hash_ref->{"$sp1"}) {
            my($fname) = "$dir/$tmp_prefix.$sp1";
            $handle_hash_ref->{"$sp1"} = FileHandle->new(">$fname") || die("Can not open $fname($!)");
        }
        my($fh) = $handle_hash_ref->{"$sp1"};
        $fh->print(join(" ", $sp2, $name1, $name2, $from1, $to1,
                            $from2, $to2, $ident, $eval, $score, $pam));
        $fh->print("\n");
    }
    $fhi->close();
    print STDERR "LOG :: Done :: $n_total\n";

    my(@species) = keys(%{$handle_hash_ref});
    foreach my$sp1 (@species) {
        my($fh) = $handle_hash_ref->{"$sp1"};
        $fh->close();
    }

    return @species;
}

###############################################################################
#
sub find_bldp_sp {
    my($dir) = shift;
    my($prefix) = shift;

    my(@species);
    my($dh) = DirHandle->new("$dir") || die("Can not open $dir($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /^$prefix/);

        my($sp1) = ($file =~ /^$prefix\.(\S+)$/);
        next  if ($sp1 =~ /\./);
        push(@species, $sp1);
    }

    return sort(@species);
}

###############################################################################
#
sub build_bldp {
    my($sp1) = shift;
    my($file_in) = shift;
    my($file_out) = shift;

    print STDERR "Start :: $file_in.$sp1\n";

    #
    my($file_sorted)  = "$file_in.$sp1.sort";
    my($cmd) = "$main::CMD_sort $file_in.$sp1 | $main::CMD_uniq > $file_sorted";
    my($rc) = system("$cmd");
    if ($rc != 0) {
        die("Failed(sort).");
    }

    #
    my($fhi) = FileHandle->new("$file_sorted") || die "Can't open $file_sorted";
    my($fho);
    my($filename_out);
    if (! $main::pairout) {
        $filename_out = "$file_out.$sp1";
        $fho = FileHandle->new(">$filename_out") || die "Can't open $filename_out($!)\n";
    }
    my($prevsp2);
    my($prevname1, $prevname2);
    my($errcnt);
    my($MAXERR) = 20;
    my(%hompair_hash);
    while (my$line=$fhi->getline()) {
        $line =~ s#^\s*##;
        $line =~ s#[\r\n]*$##;
        my($sp2, $name1, $name2, $from1, $to1, $from2, $to2,
           $ident, $eval, $score, $pam) = split(/\s+/, $line);

        #
        my($key1) = join(":", $sp1, $name1, $from1, $to1);
        my($key2) = join(":", $sp2, $name2, $from2, $to2);
        if (exists($hompair_hash{"$key1:$key2"})
         || exists($hompair_hash{"$key2:$key1"})) {
print STDERR "Found :: duplicated pair :: $key1 - $key2\n";
            next;
        }
        $hompair_hash{"$key1:$key2"} = 1;

        #
        if ($prevsp2 ne $sp2) {
            %hompair_hash = ();
            if ($main::pairout) {
                my($filename) = "$file_out.$sp1-$sp2";
                $fho = FileHandle->new(">$filename") || die "Can't open $filename($!)\n";
            }
            else {
                $prevsp2 = $sp2;
            }
        }
        if ($main::pairout) {
            $fho->print(join(" ", "$sp1:$name1", "$sp2:$name2", $from1, $to1,
                              $from2, $to2, $ident, $eval, $score, $pam) . "\n");
        }
        else {
            $fho->print(pack($main::PACK_TEMPL38, "$sp1:$name1", "$sp2:$name2", $from1, $to1,
                                      $from2, $to2, $ident, $eval, $score, $pam));
        }
        $prevname1 = $name1;
        $prevname2 = $name2;
    }
    $fhi->close();
    $fho->close();

    return;
}

###############################################################################
#
sub create_dirinfo {
    my($filename) = shift;

    my($fho) = FileHandle->new(">$filename") || die("Can not open $filename($!)");
    $fho->print("uniqcheck=" . $main::Uniqcheck . "\n");
    $fho->print("idxfile="   . $main::FileIndex . "\n");
    $fho->print("prefix="    . $main::Prefix    . "\n");
    $fho->print("suffix="    . $main::Suffix    . "\n");
    $fho->print("delim="     . $main::Delim     . "\n");
    $fho->close();

    return;
}

###############################################################################
#
sub swap {
    my($a, $b) = @_;
    my($tmp);
    $tmp = $$a; $$a=$$b; $$b=$tmp;
}

###############################################################################
if ($0 eq __FILE__) {
    my($n_proc) = $main::PROC;
    if ($n_proc < 1) {
        $n_proc = 16;
    }

    my($prefix)    = 'blastdpres';
    my($tmp_prefix) = "tmp_$prefix";
    my(@species);
    my($dir) = "$ENV{'MBGD_HOME'}/database.work/bldp";
    if ($main::DIR) {
        $dir = $main::DIR;
    }
    mkpath($dir, 0, 0750);
    if ($main::FILE_IN && -e "$main::FILE_IN") {
        @species = convert_bldp_sp($main::FILE_IN, $dir, $tmp_prefix);
    }
    elsif (@ARGV) {
        foreach my$file (@ARGV) {
            my($spec) = ($file =~ /blastdpres\.(\S+)$/);
            push(@species, $spec);
        }
    }
    else {
        @species = find_bldp_sp($dir, $tmp_prefix);
    }

    #
    my($file_index) = "$dir/$main::FileIndex";
    my($file_bldp_in)  = "$dir/$tmp_prefix";
    my($file_bldp_out) = "$dir/$prefix";

    my(%pid_hash);
    foreach my$sp1 (@species) {
        for (;;) {
            my(@pid_list) = keys(%pid_hash);
            foreach my$pid (@pid_list) {
                my($ret) = waitpid($pid, POSIX::WNOHANG);
                if ($ret == $pid) {
                    delete($pid_hash{"$ret"});
                }
            }

            my(@pid_list) = keys(%pid_hash);
            if (scalar(@pid_list) <= $n_proc) {
                last;
            }
            sleep(5);
        }
        my($pid) = fork();
        if ($pid) {
            #
            $pid_hash{"$pid"} = 1;
        }
        elsif (defined($pid)) {
            build_bldp($sp1,
                       $file_bldp_in,
                       $file_bldp_out);
            exit(0);
        }
        else {
            die;
        }
    }
    for (;;) {
        my(@pid_list) = keys(%pid_hash);
        if (scalar(@pid_list) == 0) {
            last;
        }

        foreach my$pid (@pid_list) {
            my($ret) = waitpid($pid, POSIX::WNOHANG);
            if ($ret == $pid) {
                delete($pid_hash{"$ret"});
            }
        }
        sleep(5);
    }

    # create spindex
    $ENV{'DIR_SPINDEX'} = $dir;
    my($cmd_spindex) = "$ENV{'MBGD_HOME'}/binaries/spindex";
    my($cmd) = "$cmd_spindex @species";
    system("$cmd");

    # merge spindex
    my($cmd) = "$cmd_spindex -m @species";
    system("$cmd");
    
    my($file_dirinfo) = "$dir/dirinfo";
    create_dirinfo($file_dirinfo);
}

###############################################################################
1;#
###############################################################################
