#!/usr/bin/perl -s

###############################################################################
# DB 鹹оݤ Protein-Seq Ф
#
# Usage : $0 [-SPEC=spec1,spec2, ... ,specN] [-DATE=YYYYMMDD]
#
###############################################################################
use File::Path;
use FileHandle;
use RECOG::RecogProject;
require "MBGD_Conf.pl";
require "libMBGDaxes.pl";
require "MBGD_commonUpdate.pl";
$main::DEBUG=1;

###############################################################################
#
sub selectUpdSeq {
    my($dbname) = shift;
    my($updDate) = shift;
    my($file_updseq) = shift;
    my(@spid_list) = @_;
    my($db);
    my($sql, $tab, $opt);
    my($sp);
    my($res, $ent);
    my($md5, $seq);

    my($where_spid) = '';
    foreach my$spid (@spid_list) {
        next if ($spid !~ /^g[mu]\d+/);

        $where_spid .= ',' if ($where_spid ne '');
        $where_spid .= "'$spid'";
    }

    #
    my($fh) = FileHandle->new(">$file_updseq") || die("Can not open $file_updseq($!)");

    #
    print STDERR "select new protein sequence\n";
    print STDERR "dbname : $dbname\n";
    $db = MBGD::DB->new($dbname);
    $tab              = "proteinseq ps";
    $opt->{'columns'} = "distinct ps.md5sum as md5 , ps.seq as seq";
    $opt->{'where'}   = "";
    if ($where_spid) {
        $tab .= ", project p, geneset gs, geneset_gene gsg, gene g, transcript t";
        $opt->{'where'} .= "p.spid in($where_spid)";
        $opt->{'where'}  .= " and ";
        $opt->{'where'}  .= "p.id=gs.project_id and gs.id=gsg.geneset_id and gsg.gene_id=g.id and g.id=t.gene_id and t.proteinseq_id=ps.id";
    }
    if ($updDate) {
        $opt->{'where'}  .= " and " if ($opt->{'where'} ne '');
        $opt->{'where'}  .= "ps.udate >= '$updDate'";
    }
    print STDERR "SQL where :: $opt->{'where'}\n" if ($main::DEBUG);

    #
    $res = $db->select_fetch($tab, $opt);

    #
    print STDERR "LOG :: Found " . scalar(@{$res->{'INFO'}}) . "\n";
    foreach $ent (@{$res->{'INFO'}}) {
        $md5 = $ent->{'md5'};
        $seq = $ent->{'seq'};
        $seq =~ s#(.{1,60})#$1\n#g;

        $fh->print(">", $md5, "\n");
        $fh->print($seq);
    }
    $fh->close();

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    my($user) = $ENV{'USER'};
    my($dbname);

    # STDERR ˽ϤƤե˽
    &openLogfile($log);

    mkpath("$DIR_work", 0, 0750);

    if (! $main::dbname) {
        $dbname = $main::DBNAME_ACCUM;
    }
    else {
        $dbname = $main::dbname;
    }

    # оݤ
    my($file_newprotseq) = "$DIR_work/newprotseq";
    if ($main::PROJECT) {
        FileHandle->new(">$file_newprotseq");

        my($recog_proj_ref) = RECOG::RecogProject->new();
        my(@proj_id_list) = $recog_proj_ref->get_project_id_list();
        my($n_project) = scalar(@proj_id_list);
        print STDERR "DBG :: Found $n_project project(s)\n";

        foreach my$proj_id (sort { $a <=> $b} @proj_id_list) {
            next if ($proj_id <= 1);

            my(@spid_list) = ();
            my(@spid_list_base) = ();
#            push(@spid_list, sort $recog_proj_ref->get_spid_list($user, $proj_id));
            push(@spid_list,      sort $recog_proj_ref->get_spid_list($user, $proj_id));
            push(@spid_list_base, sort $recog_proj_ref->get_spid_list_base_cluster($user, $proj_id));

            print STDERR "LOG :: Create newprotseq for Project $proj_id : @spid_list\n";
            print STDERR "LOG :: base : @spid_list_base\n";

	    ## database should include both spid_list and spid_list_base
            my($file_out) = "$DIR_work/blastdb.projectid_$proj_id";
            selectUpdSeq($dbname, '', $file_out, @spid_list, @spid_list_base);

	    ## newprotseq should include only spid_list (does not include species in the base cluster)
            my($file_out) = "$DIR_work/newprotseq.projectid_$proj_id";
            selectUpdSeq($dbname, $DATE, $file_out, @spid_list);

#            my($cmd) = "$main::CMD_cat $file_out >> $file_newprotseq";
#            system("$cmd");
        }
    }
    my($file_out) = $file_newprotseq;
    my(@spid_list) = split(/,/, $main::SPEC);
    selectUpdSeq($dbname, $DATE, $file_out, @spid_list);

    exit;
}

###############################################################################
1;#
###############################################################################
