#!/usr/bin/perl -s

use strict;
use DirHandle;
use FileHandle;
use Digest::MD5;
use MBGD::DB;

use lib qw(./lib);
use MBGD_ACCUM::Schema;
require "MBGD_Conf.pl";
require "MBGD_common.pl";

###############################################################################
#
sub check_selected_geneset {
    my($db) = shift;
    my($src_db) = shift;

    #
    my($func_sort) = sub {
        $b->ver <=> $a->ver;
    };

    #
    my($search_cond_ref) = {
        'order_by' => 'spid',
    };
    my(@proj_list) = $db->resultset('Project')->search(undef,
                                                       $search_cond_ref);
    foreach my$proj_ref (@proj_list) {
        my(@selected_list);
        my(@geneset_list) = $proj_ref->genesets;
        foreach my$gset_ref (@geneset_list) {
            if (0 < $gset_ref->selected) {
                push(@selected_list, $gset_ref);
            }
        }

        #
        if (scalar(@selected_list) == 0) {
            my($msg) = sprintf("Found no selected geneset. (%s)", $proj_ref->spid);
            print STDERR "WARNING :: $msg\n";
            my($ref) = sort $func_sort @geneset_list;
            my($msg) = sprintf("Select a geneset. (%s:%s)", $proj_ref->spid, $ref->ver);
            print STDERR "LOG :: $msg\n";
            $ref->selected(1);
            $ref->update;
        }
        elsif (1 < scalar(@selected_list)) {
            my($msg) = sprintf("Found multiple selected genesets. (%s)", $proj_ref->spid);
            print STDERR "WARNING :: $msg\n";
            my($ref, @gset_list) = sort $func_sort @geneset_list;
            my($msg) = sprintf("Use this geneset. (%s:%s)", $proj_ref->spid, $ref->ver);
            print STDERR "LOG :: $msg\n";

            foreach my$ref (@gset_list) {
                my($msg) = sprintf("Unelect a geneset. (%s:%s)", $proj_ref->spid, $ref->ver);
                print STDERR "LOG :: $msg\n";
                $ref->selected(undef);
                $ref->update;
            }
        }
    }

    return;
}

###############################################################################
#
sub convert_genome {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'genome';

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_ins_list) = ('id',
                         'spid', 'sp',
                         'abbrev', 'orgname', 'strain',
                         'taxid', 'specweight', 'source',
                         'institution', 'wwwlink',
#                         'medid', 'journal',
                         'date_release',
                         'date_modify',
                         'mbgd_update',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('p.id',
                         'p.spid', 'p.sp',
                         'o.abbrev', 'o.orgname', 'o.strain',
                         'o.taxid', 'o.specweight', 'p.source',
                         'i.name', 'i.wwwlink',
#                         'medid', 'journal',
                         'p.date_release',
                         'p.date_modify',
                         'p.mbgd_update',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.project p, "
         .      "$src_db.organism o, "
         .      "$src_db.institution i "
         . "where p.organism_id=o.id and p.institution_id=i.id "
         . "order by p.spid";
    $sth = $dbi->prepare($sql);
print STDERR "SQL :: $sql\n";
    $sth->execute();

    my($t2) = time() - $t1;
    print STDERR "DBG :: Done.($tab_name) $t2 [s]\n";

    return;
}

###############################################################################
#
sub convert_chromosome {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'chromosome';

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_ins_list) = ('id',
                         'spid', 'sp',
                         'name', 'seqno',
                         'type', 'shape',
                         'status',
                         'accession',
                         'seq',
                         'seq_length',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('c.id',
                         'p.spid', 'p.sp',
                         'c.name', 'c.seqno',
                         'c.type', 'c.shape',
                         'c.status',
                         'c.seqno',              # 'sr.source_id',
                         'ds.id',
                         'ds.seq_length',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.geneset gs, "
         .      "$src_db.project p, "
         .      "$src_db.geneset_chromosome gsc, "
         .      "$src_db.chromosome c, "
         .      "$src_db.seqreg sr, "
         .      "$src_db.dnaseq ds "
         . "where gs.selected is not null "
           . "and p.id=gs.project_id "
           . "and gs.id=gsc.geneset_id "
           . "and gsc.chromosome_id=c.id "
           . "and c.seqreg_id=sr.id "
           . "and sr.type!='contig' "
           . "and sr.dnaseq_id=ds.id "
         . "order by p.spid, c.seqno";
    $sth = $dbi->prepare($sql);
print STDERR "SQL :: $sql\n";
    $sth->execute();

    #
    $sql = "update $dst_db.$tab_name c, $dst_db.genome g "
         . "set c.genome=g.id "
         . "where c.spid=g.spid ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    my($t2) = time() - $t1;
    print STDERR "DBG :: Done.($tab_name) $t2 [s]\n";

    return;
}

###############################################################################
#
sub convert_contig {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'contig';

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    ####################################
    #
    my(@col_ins_list) = ('sp', 'spid',
                         'type',
                         'shape',
                         'accession',
                         'seq',
                         'seq_length',
                         'count_genes',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('p.sp',
                         'p.spid',
                         'sr2.type',
                         "'linear'",
                         "concat(p.sp,'-',ds.id)",
                         'ds.id',
                         'ds.seq_length',
                         'count(*)',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.project p, "
         .      "$src_db.geneset gs, "
         .      "$src_db.geneset_gene gsg, "
         .      "$src_db.gene g, "
         .      "$src_db.transcript t, "
         .      "$src_db.seqreg sr1, "
         .      "$src_db.location l, "
         .      "$src_db.seqreg sr2, "
         .      "$src_db.dnaseq ds "
         . "where p.id=gs.project_id "
           . "and gs.id=gsg.geneset_id "
           . "and gsg.gene_id=g.id "
           . "and g.id=t.gene_id "
           . "and t.seqreg_id=sr1.id "
           . "and sr1.id=l.seqreg_id "
           . "and l.seqreg_id2=sr2.id "
           . "and sr2.type='contig' "
           . "and sr2.dnaseq_id=ds.id "
         . "group by ds.id";
    $sth = $dbi->prepare($sql);
print STDERR "SQL :: $sql\n";
    $sth->execute();

    #
    $sql = "update $dst_db.$tab_name c, $dst_db.genome g "
         . "set c.genome=g.id "
         . "where c.spid=g.spid ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    # update seqno
    my(@spid_list);
    $sql = "select spid from $dst_db.$tab_name group by spid order by spid";
    $sth = $dbi->prepare($sql);
    $sth->execute();
    while (my$ref=$sth->fetchrow_hashref()) {
        my($spid) = $ref->{'spid'};
        push(@spid_list, $spid);
    }
    foreach my$spid (@spid_list) {
        $sql = "set \@i := 0;";
        $sth = $dbi->prepare($sql);
        $sth->execute();

        $sql = "update $dst_db.$tab_name set seqno=(\@i := \@i + 1) where spid='$spid' order by id;";
        $sth = $dbi->prepare($sql);
        $sth->execute();
    }

    return;
}

###############################################################################
#
sub convert_gene_with_geneseq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'gene';
    my($sql);
    my($sth);

    #
    my(@col_ins_list) = ('id',
                         'sp', 'name', 'gene',
                         'from1', 'to1', 'dir',
                         'location',
                         'type',
                         'protid',
                         'gi',
                         'geneid',
                         'ntseq', 'aaseq',
                         'descr',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('g.id',
                         'p.sp', 'g.locus_tag', 'g.name',
                         'l.from1', 'l.to1', 'l.dir1',
                         'li.location_text',
                         't.type',
                         't.protid',
                         't.gi',
                         'g.geneid',
                         'sr.dnaseq_id', 't.proteinseq_id',
                         'g.descr',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.project p, "
         .      "$src_db.geneset gs, "
         .      "$src_db.geneset_gene gsg, "
         .      "$src_db.gene g, "
         .      "$src_db.transcript t, "
         .      "$src_db.seqreg sr, "
         .      "$src_db.location l, "
         .      "$src_db.location_info li "
         . "where p.id=gs.project_id "
           . "and gs.selected is not null "
           . "and gs.id=gsg.geneset_id "
           . "and gsg.gene_id=g.id "
           . "and g.id=t.gene_id "
           . "and t.seqno=1 "
           . "and t.seqreg_id=sr.id "
           . "and sr.id=l.seqreg_id "
           . "and l.id=li.location_id "
         . "group by g.id "
         . "order by p.spid ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    return;
}

###############################################################################
#
sub convert_gene_without_geneseq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'gene';
    my($sql);
    my($sth);

    #
    my(@col_ins_list) = ('id',
                         'sp', 'name', 'gene',
                         'from1', 'to1', 'dir',
                         'location',
                         'type',
                         'protid',
                         'gi',
                         'geneid',
                         'ntseq', 'aaseq',
                         'descr',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('g.id',
                         'p.sp', 'g.locus_tag', 'g.name',
                         "''", "''", '1',
                         "''",
                         't.type',
                         't.protid',
                         't.gi',
                         'g.geneid',
                         "''", 't.proteinseq_id',
                         'g.descr',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.project p, "
         .      "$src_db.geneset gs, "
         .      "$src_db.geneset_gene gsg, "
         .      "$src_db.gene g, "
         .      "$src_db.transcript t "
         . "where p.id=gs.project_id "
           . "and gs.selected is not null "
           . "and gs.id=gsg.geneset_id "
           . "and gsg.gene_id=g.id "
           . "and g.id=t.gene_id "
           . "and t.seqno=1 "
           . "and t.seqreg_id=0 "
         . "group by g.id "
         . "order by p.spid ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    return;
}

###############################################################################
#
sub convert_gene {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'gene';

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    convert_gene_with_geneseq($dbi, $src_db, $dst_db);
    convert_gene_without_geneseq($dbi, $src_db, $dst_db);

    #
    my(@col_upd_list2) = ('g.aalen=ps.seq_length',
                         );
    my($col_upd2) = join(',', @col_upd_list2);

    #
    $sql = "update $dst_db.$tab_name g, "
         .        "$src_db.transcript t, "
         .        "$src_db.proteinseq ps "
         . "set $col_upd2 "
         . "where g.id=t.gene_id "
           . "and t.proteinseq_id=ps.id ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_upd_list3) = ('g.chrid=c.id',
                         );
    my($col_upd3) = join(',', @col_upd_list3);

    #
    $sql = "update $dst_db.$tab_name g, "
         .        "$src_db.transcript t, "
         .        "$src_db.seqreg sr1, "
         .        "$src_db.location l, "
         .        "$src_db.seqreg sr2, "
         .        "$src_db.chromosome c "
         . "set $col_upd3 "
         . "where g.id=t.gene_id "
           . "and t.seqreg_id=sr1.id "
           . "and sr1.id=l.seqreg_id "
           . "and l.seqreg_id2=sr2.id "
           . "and sr2.type!='contig' "
           . "and sr2.id=c.seqreg_id ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_upd_list3) = ('g.contigid=c.id',
                         );
    my($col_upd3) = join(',', @col_upd_list3);

    #
    $sql = "update $dst_db.$tab_name g, "
         .        "$src_db.transcript t, "
         .        "$src_db.seqreg sr1, "
         .        "$src_db.location l, "
         .        "$src_db.seqreg sr2, "
         .        "$dst_db.contig c "
         . "set $col_upd3 "
         . "where g.id=t.gene_id "
           . "and t.seqreg_id=sr1.id "
           . "and sr1.id=l.seqreg_id "
           . "and l.seqreg_id2=sr2.id "
           . "and sr2.type='contig' "
           . "and sr2.dnaseq_id=c.seq ";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    my($t2) = time() - $t1;
    print STDERR "DBG :: Done.($tab_name) $t2 [s]\n";

    return;
}

###############################################################################
#
sub convert_seq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;
    my($tab_name) = shift;
    my(@type_list) = @_;

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    my($type) = '';
    foreach my$t (@type_list) {
        $type .= ',' if ($type ne '');
        $type .= "'$t'";
    }

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_ins_list) = ('id',
                         'chksum', 'length', 'seq',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('ds.id',
                         'ds.md5sum', 'ds.seq_length', 'ds.seq',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.dnaseq ds, "
         .      "$src_db.seqreg sr "
         . "where ds.id=sr.dnaseq_id "
           . "and sr.type in($type) "
         . "group by md5sum ";
    $sth = $dbi->prepare($sql);
print STDERR "SQL :: $sql\n";
    $sth->execute();

    my($t2) = time() - $t1;
    print STDERR "DBG :: Done.($tab_name) $t2 [s]\n";

    return;
}

###############################################################################
#
sub convert_dnaseq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'dnaseq';

    convert_seq($dbi, $src_db, $dst_db, $tab_name, 'chromosome', 'plasmid', 'contig');

    return;
}

###############################################################################
#
sub convert_geneseq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'geneseq';

    convert_seq($dbi, $src_db, $dst_db, $tab_name, 'transcript');

    return;
}

###############################################################################
#
sub convert_proteinseq {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    my($tab_name) = 'proteinseq';

    my($t1) = time();
    print STDERR "DBG :: Converting...($tab_name)\n";

    #
    my($sql);
    my($sth);

    #
    $sql = "truncate table $dst_db.$tab_name";
    $sth = $dbi->prepare($sql);
    $sth->execute();

    #
    my(@col_ins_list) = ('id',
                         'chksum', 'length', 'seq',
                         );
    my($col_ins) = join(',', @col_ins_list);

    #
    my(@col_sel_list) = ('ps.id',
                         'ps.md5sum', 'ps.seq_length', 'ps.seq',
                         );
    my($col_sel) = join(',', @col_sel_list);

    #
    $sql = "insert $dst_db.$tab_name ($col_ins) "
         . "select $col_sel "
         . "from $src_db.proteinseq ps ";
    $sth = $dbi->prepare($sql);
print STDERR "SQL :: $sql\n";
    $sth->execute();

    my($t2) = time() - $t1;
    print STDERR "DBG :: Done.($tab_name) $t2 [s]\n";

    return;
}

###############################################################################
#
sub get_splist_genome_type {
    my($dbi) = shift;
    my($src_db) = shift;
    my($type) = shift;

    #
    my(@spec_list) = ();
    my($tab_name) = 'project';
    my($sql) = "select sp from $src_db.$tab_name where type=?";
    my($sth) = $dbi->prepare($sql);
    $sth->execute($type);
    while (my$ref=$sth->fetchrow_hashref()) {
        push(@spec_list, $ref->{'sp'});
    }

    return @spec_list;
}

###############################################################################
#
sub update_attribute {
    my($dbi) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

    #
    my($tab_name);
    my($sql);
    my($sth);

    #
    foreach my$type ('metagenome', 'incomplete', 'partial') {
        my(@spec_list) = get_splist_genome_type($dbi, $src_db, $type);

        $tab_name = 'attribute';
        my($category) = 'genome';
        my($name)     = 'species_' . $type;
        $sql = "delete from $dst_db.$tab_name where category=? and name=?";
        $sth = $dbi->prepare($sql);
        $sth->execute($category, $name);

        $sql = "insert $dst_db.$tab_name (category, name, value) values (?, ?, ?)";
        $sth = $dbi->prepare($sql);
        $sth->execute($category, $name, join(',', @spec_list));
    }

    return;
}

###############################################################################
#
sub create_prot_chksum {
    my($dbi) = shift;
    my($dst_db) = shift;
    my($sql);
    my($sth);

    # create prot_chksum
    my($tabname) = "$dst_db.prot_chksum";
    $sql = "drop table if exists $tabname";
    $sth = $dbi->prepare("$sql");
    $sth->execute();
    if ($sth && $sth->err) {
        &MBGD_DbAccessError($sth, $sql);
        return undef();
    }
    $sql = "CREATE TABLE $tabname ("
         . "sp     varchar($main::SIZE_SP) NOT NULL DEFAULT '',"
         . "name   varchar($main::SIZE_NAME) NOT NULL DEFAULT '',"
         . "chksum varchar(22) NOT NULL DEFAULT '',"
         . "KEY idx_protchksum_name (sp, name),"
         . "KEY idx_protchksum_chksum (chksum)"
         . ")";
    $sth = $dbi->prepare("$sql");
    $sth->execute();
    if ($sth && $sth->err) {
        &MBGD_DbAccessError($sth, $sql);
        return undef();
    }
    $sql = "insert $tabname select g.sp,g.name,ps.chksum "
         . "from $dst_db.gene g, $dst_db.proteinseq ps "
         . "where g.aaseq=ps.id";
    $sth = $dbi->prepare("$sql");
    $sth->execute();
    if ($sth && $sth->err) {
        &MBGD_DbAccessError($sth, $sql);
        return undef();
    }

    return;
}

###############################################################################
#
sub convert_mbgd_data {
    my($db) = shift;
    my($src_db) = shift;
    my($dst_db) = shift;

#$db->storage->debug(1);

    my($t) = time();

    #
    check_selected_geneset($db, $src_db);

    #
    my($dbi) = $db->storage->dbh;

    convert_genome($dbi, $src_db, $dst_db);
    convert_chromosome($dbi, $src_db, $dst_db);
    convert_contig($dbi, $src_db, $dst_db);
    convert_gene($dbi, $src_db, $dst_db);
    convert_dnaseq($dbi, $src_db, $dst_db);
    convert_geneseq($dbi, $src_db, $dst_db);
    convert_proteinseq($dbi, $src_db, $dst_db);

    create_prot_chksum($dbi, $dst_db);

    update_attribute($dbi, $src_db, $dst_db);

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    my($src_db) = $main::DBNAME_ACCUM;
    my($dst_db) = $main::DBNAME_WORK;
    if (defined($main::DBNAME)) {
        $dst_db = $main::DBNAME;
    }

    my($dbs) = 'dbi:mysql:' . $src_db;
    my($file_mycnf) = "$ENV{'RECOG_HOME'}/etc/my.cnf";
    if (-e $file_mycnf) {
        $dbs .= ";mysql_read_default_file=$file_mycnf";
    }

    my($db) = MBGD_ACCUM::Schema->connect($dbs, $main::USER, $main::PASS);

    #
    convert_mbgd_data($db, $src_db, $dst_db);
}

###############################################################################
1;#
###############################################################################
