#!/usr/local/bin/perl -s
package MbgdUpdate;
use strict;
use DirHandle;
use FileHandle;
use MBGD;
use MBGD::ProteinSeq;

###############################################################################
#
sub getReleaseDateByGbk {
    my $filename = shift;
    my $date = '';
    my(%monHash) = ('JAN' =>  1,
                    'FEB' =>  2,
                    'MAR' =>  3,
                    'APR' =>  4,
                    'MAY' =>  5,
                    'JUN' =>  6,
                    'JUL' =>  7,
                    'AUG' =>  8,
                    'SEP' =>  9,
                    'OCT' => 10,
                    'NOV' => 11,
                    'DEC' => 12);

    #
    my $fh = new FileHandle("$filename") || return $date;

    # LOCUS ԡʣܡˤդ
    my $line = $fh->getline();
    my($mday, $mon, $year) = (/(\d+)\-([A-Z]+)\-(\d{4})$/i);
    $date = sprintf("%04d%02d%02d", $year, $monHash{"$mon"}, $mday);

    return $date;
}

###############################################################################
# $sp ǻꤷʪ protein sequence ƱǤ뤫å롣
#     $main::MYSQL_DBACCUM  $MBGD_HOME/species/$sp/MBGD/data/*.protseq
sub matchSpecSequence {
    my $db = shift;
    my $spid = shift;
    my $dir = shift;
    if (!$dir) {
        $dir = "$ENV{'MBGD_HOME'}/species/$spid/gm/data";
    }

    # MySQL  $sp  proteinseq ɤ߹
    my $refMysql = {};
    my $tab = "project p, geneset gs, geneset_gene gsg, gene g, transcript t, proteinseq ps";
    my $where = "p.spid='$spid'"
              . " and "
              . "p.id=gs.project_id"
              . " and "
              . "gs.selected is not null"
              . " and "
              . "gs.id=gsg.geneset_id"
              . " and "
              . "gsg.gene_id=g.id"
              . " and "
              . "g.id=t.gene_id"
              . " and "
              . "t.type='CDS'"
              . " and "
              . "t.seqno=1"
              . " and "
              . "t.proteinseq_id=ps.id";
    my $sql = "select md5sum, seq_length, seq from $tab where $where";
    my $sth = $db->execute($sql);
    if ($sth) {
        while (my $ref = $sth->fetchrow_hashref) {
            my $chksum = $ref->{'md5sum'};
            my $length = $ref->{'seq_length'};
            my $seq    = $ref->{'seq'};

            $refMysql->{$chksum,$length} = $seq;
        }
    }
    my $nMysql = scalar(keys(%{$refMysql}));
print STDERR "Registerd proteinseq for $spid :: $nMysql\n";

    #
    my $refProtseq = {};
    my $dh = new DirHandle("$dir") or return 0;
    foreach my$file ($dh->read()) {
        next if ($file !~ /\.protseq$/);

        my $fh = new FileHandle("$dir/$file") or next;
        my $seq = '';
        while (my $line = $fh->getline()) {
            $line =~ s#[\r\n]+$##;

            if ($line =~ /^>/) {
                if ($seq) {
                    my $seqInfo = { 'seq' => $seq };
                    my $length = length($seq);
                    my $objSeq = new MBGD::ProteinSeq($seqInfo);
                    my($chksum) = $objSeq->getValues('chksum');
                    ($refProtseq->{$chksum,$length}) = $objSeq->getValues('seq');
                    if ($refMysql->{$chksum,$length} ne $refProtseq->{$chksum,$length}) {
                        # MySQL ̤Ͽ or Ƥۤʤ
                        $fh->close();
                        return 0;
                    }
                }
                $seq = '';
            }
            else {
                $seq .= $line;
            }
        }
        if ($seq) {
            my $seqInfo = { 'seq' => $seq };
            my $length = length($seq);
            my $objSeq = new MBGD::ProteinSeq($seqInfo);
            my($chksum) = $objSeq->getValues('chksum');
            ($refProtseq->{$chksum,$length}) = $objSeq->getValues('seq');

            if ($refMysql->{$chksum,$length} ne $refProtseq->{$chksum,$length}) {
                # MySQL ̤Ͽ or Ƥۤʤ
                $fh->close();
                return 0;
            }
        }
        $fh->close();
    }

    # protseq եˤϡMySQL ϿѤߤǤ
    my $nProtseq = scalar(keys(%{$refProtseq}));
print STDERR "count(protseq) :: $nProtseq\n";

    #
    if ($nMysql != $nProtseq) {
        # MySQL ϿƤο protseq եˤοۤʤ
        return 0;
    }

    return 1;
}

###############################################################################
# glimpse ѥǥåե
sub makeSearchIndex {
    my($baseDir) = @_;
    my(@dirList) = ("sptit");
    my($dir);
    my($dirIndex);
    my($cmd);

    foreach $dir (@dirList) {
        $dirIndex = "$baseDir/$dir/.index";

        #
        mkpath("$dirIndex", 0, 0750);

        #
        $cmd = "$main::CMD_glimpseindex -H $dirIndex $baseDir/$dir";
        print STDERR "make index [$baseDir/$dir]\n";
        if ($main::DEBUG) {
            print STDERR "CMD : $cmd\n";
        }
        else {
            system("$cmd");
        }
    }
}

###############################################################################
# chromosome ơ֥ count_genes եɤι
sub updateChromosomeCountGenes {
    my($db) = shift;
    my($sql);
    my($sth);

    #
    $sql = "create temporary table tmpCountGenes "
         . "select chrid, count(*) as count_genes "
         . "from gene "
         . "group by chrid";
    $sth = $db->do($sql);

    #
    $sql = "update chromosome, tmpCountGenes "
         . "set chromosome.count_genes=tmpCountGenes.count_genes "
         . "where chromosome.seq=tmpCountGenes.chrid";
    $sth = $db->do($sql);

}

###############################################################################
# chromosome ơ֥ seq_length եɤι
sub updateChromosomeSeqLength {
    my($db) = shift;
    my($sql);
    my($sth);

    #
    $sql = "update chromosome, dnaseq "
         . "set chromosome.seq_length=dnaseq.length "
         . "where chromosome.id=dnaseq.id";
    $sth = $db->do($sql);

}

###############################################################################
#
sub get_proteinseq_accum {
    my($spid) = shift;
    my($sel_opt) = shift;

    my($dbname) = $main::MYSQL_DBACCUM;
    my($db) = MBGD::DB->new($dbname);

    my($tab) = "project p, geneset gs, geneset_gene gsg, gene g, transcript t, proteinseq ps";
    my($opt) = {};
    $opt->{'columns'} = "g.locus_tag locus_tag, ps.md5sum md5sum, ps.seq seq";
    $opt->{'where'}   = "p.spid='$spid'"
                      . " and "
                      . "p.id=gs.project_id"
                      . " and "
                      . "gs.selected is not null"
                      . " and "
                      . "gs.id=gsg.geneset_id"
                      . " and "
                      . "gsg.gene_id=g.id"
                      . " and "
                      . "g.id=t.gene_id"
                      . " and "
                      . "t.type='CDS'"
                      . " and "
                      . "t.seqno=1"
                      . " and "
                      . "t.proteinseq_id=ps.id";
    if ($sel_opt->{'udate'}) {
        my($udate) = $sel_opt->{'udate'};
        $opt->{'where'} .= " and "
                      . "ps.udate >= '$udate'";
    }
    my($res) = $db->select_fetch($tab, $opt);

    return $res;
}

###############################################################################
if ($0 eq __FILE__) {
    my $db  = new MBGD::DB();
    foreach my $sp (@ARGV) {
        my $sta = matchSpecSequence($db, $sp, $main::DIR);
        print "STA[$sp] :: $sta\n";
    }
}

###############################################################################
1;#
###############################################################################
