#!/usr/bin/perl -s
use strict;
use File::Path;
use IO::Dir;
use MBGD::DB;
require "MBGD_common.pl";

###############################################################################
#
sub print_usage {
    print STDERR "Usage :: $0 [-QSUB] [-DIR=output_dir] [-DBNAME=dbname] -TABID=cluster_table_id [ cluster_id1 .. cluster_idN ]\n";
    print TSERR "    DBNAME :: $main::DBNAME_TMP (default)\n";
    print TSERR "    DIR    :: cluster_align (default)\n";
    print TSERR "\n";

    return;
}

###############################################################################
#
sub get_dir_method {
    my($dir) = sprintf("%s/WWW/bin/align.d", $ENV{'MBGD_HOME'});

    return $dir;
}

###############################################################################
#
sub get_method_align {

    my($list_method_align_ref) = [];

    #
    my($dir_method) = get_dir_method();
    my($dh) = IO::Dir->new($dir_method) || die("Can not open $dir_method($!)");
    foreach my$file (sort($dh->read())) {
        next if ($file =~ /^\./);
        next if ($file =~ /\.sample$/);
        next if (! -f "$dir_method/$file");
        next if (! -x "$dir_method/$file");

        push(@{$list_method_align_ref}, $file);
    }

    return $list_method_align_ref;
}

###############################################################################
#
sub get_clustid_list {
    my($db) = shift;
    my($tabid) = shift;

    my(@clustid_list);

    my($tab) = "cluster_func_clust2sql_$tabid";
    my($sql) = "select clustid from $tab";
    my($sth) = $db->execute($sql);
#print STDERR "SQL :: $sql\n";
    while (my$ref=$sth->fetchrow_hashref()) {
        push(@clustid_list, $ref->{'clustid'});
    }

    return @clustid_list;
}

###############################################################################
#
sub get_sequence {
    my($db) = shift;
    my($dir_out) = shift;
    my($tabid) = shift;
    my(@list_clustid) = @_;

    mkpath($dir_out, 0, 0750);

    my($cols) = "c.clustid, c.sp, c.name, c.dom, c.from1, c.to1, ps.seq";
    my($tabs) = "$main::DBNAME_MBGD.gene g, $main::DBNAME_MBGD.proteinseq ps, cluster_domclust_cache_$tabid c";
    my($where) = "g.sp=c.sp and g.name=c.name and g.aaseq=ps.id";
    if (scalar(@list_clustid) != 0) {
        $where .= " and c.clustid in(-1";
        foreach my$clustid (@list_clustid) {
            next if ($clustid !~ /^\d+$/);

            $where .= ",$clustid"
        }
        $where .= ")";
    }
    my($sql) = "select $cols from $tabs where $where";
#print STDERR "SQL :: $sql\n";
    my($sth) = $db->execute($sql);
    my($fh);
    my($prev_clustid) = -1;
    while (my$ref=$sth->fetchrow_hashref()) {
        my($clustid) = $ref->{'clustid'};
        if ($clustid != $prev_clustid) {
            if ($fh) {
                $fh->close();
            }

            my($filename) = "$dir_out/$clustid.fas";
            $fh = IO::File->new(">$filename") || die("Can not open $filename($!)");
        }
        my($sp)     = $ref->{'sp'};
        my($name)   = $ref->{'name'};
        my($dom)    = $ref->{'dom'};
        my($from1)  = $ref->{'from1'};
        my($to1)    = $ref->{'to1'};
        my($seq)    = $ref->{'seq'};
        if ($dom) {
            $seq = substr($seq, $from1 - 1, $to1 - $from1 + 1);
        }
        $seq =~ y/Uu/Tt/;
        $seq =~ s/(.{1,60})/$1\n/g;

        $fh->print(">", join('#', $sp, $name, $dom, $from1, $to1), "\n");
        $fh->print($seq);

        $prev_clustid = $clustid;
    }
    $fh->close() if ($fh);

    return;
}

###############################################################################
#
sub cluster_align {
    my($dir_out) = shift;
    my($dbname) = shift;
    my($tabid) = shift;
    my(@list_clustid) = @_;

    #
    my($db) = MBGD::DB->new($dbname);
    if (scalar(@list_clustid) == 0) {
        @list_clustid = get_clustid_list($db, $tabid);
    }

    #
    my($dir_seq) = "$dir_out/sequence";
    my($list_method_align_ref) = get_method_align();
    my($dir_method) = get_dir_method();

    #
    for (;;) {
        my($n) = scalar(@list_clustid);
        print STDERR "Left $n cluster(s).\n";

        my(@cid_list) = splice(@list_clustid, 0, $main::NUM_cpu);
        if (scalar(@cid_list) == 0) {
            last;
        }

        my(@file_list);
        get_sequence($db, $dir_seq, $tabid, @cid_list);
        foreach my$clustid (@cid_list) {
            push(@file_list, "$dir_seq/$clustid.fas");
        }

        foreach my$method (@{$list_method_align_ref}) {
            my($name_method) = ($method =~ /^[0-9\_\.]*(.+)$/);
            if (!$name_method) {
                $name_method = $method;
            }
            my($dir) = sprintf("%s/%s", $dir_out, $name_method);
            mkpath($dir, 0, 0750);

            #
            my($cmd_align) = sprintf("%s/%s", $dir_method, $method);
            my($cmd) = "$cmd_align -DIR=$dir -QSUB='$main::QSUB' @file_list";
            print STDERR "CMD :: $cmd\n" if ($main::DEBUG);
            system("$cmd");
        }
#last; # for TTST
    }

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    if ($main::h) {
        print_usage();
        exit(0);
    }

    my($tabid) = $main::TABID;
    if (!$tabid) {
        print_usage();
        exit(-1);
    }

    my($dbname) = $main::DBNAME_RECOG;
    if ($main::DBNAME) {
        $dbname = $main::DBNAME;
    }

    my($dir) = "$ENV{'MBGD_HOME'}/database/clusters/$tabid";
    if ($main::DIR) {
        $dir = $main::DIR;
    }
    mkpath($dir, 0, 0750);

    #
    cluster_align($dir, $dbname, $tabid, @ARGV);
}

###############################################################################
1;#
###############################################################################
