#!/usr/bin/perl -s

use strict;
use DirHandle;
use FileHandle;
use IO::File;
use Digest::MD5;

use lib qw(./lib);
use MBGD_ACCUM::Schema;
require 'MBGD_Conf.pl';

###############################################################################
#
sub get_path_spid {
    my($spid) = shift;

    my($dir) = sprintf("$ENV{'MBGD_HOME'}/species/%s", $spid);

    return $dir;
}

###############################################################################
#
sub get_path_spid_gm {
    my($spid) = shift;

    my($dir_spid) = get_path_spid($spid);
    my($dir) = sprintf("%s/gm", $dir_spid);

    return $dir;
}

###############################################################################
#
sub get_path_spid_data {
    my($spid) = shift;

    my($dir_spid_gm) = get_path_spid_gm($spid);
    my($dir) = sprintf("%s/data", $dir_spid_gm);

    return $dir;
}

###############################################################################
#
sub find_or_create_seq {
    my($db) = shift;
    my($tab_name) = shift;
    my($seq) = shift;

    #
    my($digest_md5) = Digest::MD5->new();
    $digest_md5->add($seq);
    my($md5) = $digest_md5->b64digest();
    my($len) = length($seq);

    #
    my(%seq_hash);
    $seq_hash{'md5sum'}     = $md5;
    $seq_hash{'seq_length'} = $len;
    $seq_hash{'seq'}        = $seq;
    my($seq_ref) = $db->resultset($tab_name)->find_or_create(\%seq_hash);

    return $seq_ref;
}

###############################################################################
#
sub load_chromosome_seq {
    my($db) = shift;
    my($spid) = shift;
    my($name) = shift;
    my($source_db) = shift;
    my($source_id) = shift;

    my($seq_ref) = {};

    #
    my($dir) = get_path_spid_data($spid);
    my($filename) = "$dir/$name.chrseq";
    my($fh) = FileHandle->new("$filename") || return;

    my($seq) = '';
    while (my$line=$fh->getline()) {
        next if ($line =~ /^\s*>/);
        $seq .= $line;
    }
    $fh->close();

    $seq = lc($seq);
    $seq =~ s#[^a-z]+##g;
    my($seqlen) = length($seq);

    return if ($seqlen == 0); ## no sequence

    my($dnaseq_ref) = find_or_create_seq($db, 'Dnaseq', $seq);

    #
    my(%seqreg_hash);
    $seqreg_hash{'dnaseq'} = $dnaseq_ref;
    $seqreg_hash{'source_db'} = $source_db;
    $seqreg_hash{'source_id'} = $source_id;
    my($seqreg_ref) = $db->resultset('Seqreg')->find_or_create(\%seqreg_hash);

    return $seqreg_ref;
}

###############################################################################
#
###############################################################################
#
sub load_gene_seq {
    my($db) = shift;
    my($spid) = shift;

    my($seq_ref) = {};

    #
    my($dir) = get_path_spid_data($spid);
    my($dh) = DirHandle->new("$dir") || die("Can not open $dir($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /\.geneseq$/);

#        my($fname) = ($file =~ /^([^\.]+)\./);
        my($fname) = ($file =~ /^(.+)\.\w+$/);

        #
        my($name, $type, $seqno);
        my($seq) = '';
        my($filename) = "$dir/$file";
        my($fh) = FileHandle->new("$filename") || die("Can not open $filename($!)");
        print STDERR "DBG :: Loading geneseq for $spid [$file]\n";
        while (my$line=$fh->getline()) {
            if ($line =~ /^>\s*(\S+)/) {
                my($spid2, $name2, $type2, $seqno2) = split(/\:/, $1);
                $name2  = $spid2 if (!$name2);
                $type2  = 'CDS'  if (!$type2);
                $seqno2 = 1      if (!$seqno2);
                if ($seq ne '') {
                    $seq = lc($seq);
                    $seq =~ s#[^a-z]+##g;
                    my($geneseq) = find_or_create_seq($db, 'Dnaseq', $seq);

                    #
                    $seq_ref->{"$spid:$name"}              = $geneseq;
                    $seq_ref->{"$spid:$name:$type:$seqno"} = $geneseq;
                }
                $name  = $name2;
                $type  = $type2;
                $seqno = $seqno2;
                $seq = '';
            }
            else {
                $seq .= $line;
            }
        }
        $fh->close();

        if ($seq ne '') {
            $seq = lc($seq);
            $seq =~ s#[^a-z]+##g;
            my($geneseq) = find_or_create_seq($db, 'Dnaseq', $seq);

            #
            $seq_ref->{"$spid:$name"}              = $geneseq;
            $seq_ref->{"$spid:$name:$type:$seqno"} = $geneseq;
        }
    }

    return $seq_ref;
}

###############################################################################
#
sub set_protein_seq {
    my($db) = shift;
    my($spid) = shift;
    my($name) = shift;
    my($type) = shift;
    my($seqno) = shift;
    my($seq) = shift;
    my($seq_ref) = shift;

    $seq = uc($seq);
    $seq =~ s#[^A-Z]+##ig;
    my($proteinseq) = find_or_create_seq($db, 'Proteinseq', $seq);
    $seq_ref->{"$spid:$name"}              = $proteinseq;
    $seq_ref->{"$spid:$name:$type:$seqno"} = $proteinseq;

    return;
}

###############################################################################
#
sub load_protein_seq {
    my($db) = shift;
    my($spid) = shift;

    my($seq_ref) = {};

    #
    my($dir) = get_path_spid_data($spid);
    my($dh) = DirHandle->new("$dir") || die("Can not open $dir($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /\.protseq$/);

        #
        my($name, $type, $seqno);
        my($seq) = '';
        my($filename) = "$dir/$file";
        my($fh) = FileHandle->new("$filename") || die("Can not open $filename($!)");
        print STDERR "DBG :: Loading proteinseq for $spid [$file]\n";
        while (my$line=$fh->getline()) {
            if ($line =~ /^>\s*(\S+)/) {
                my($spid2, $name2, $type2, $seqno2) = split(/\:/, $1);
                $name2  = $spid2 if (!$name2);
                $type2  = 'CDS'  if (!$type2);
                $seqno2 = 1      if (!$seqno2);
                if ($seq ne '') {
                    set_protein_seq($db, $spid, $name, $type, $seqno, $seq, $seq_ref);
                }
                $name  = $name2;
                $type  = $type2;
                $seqno = $seqno2;
                $seq = '';
            }
            else {
                $seq .= $line;
            }
        }
        $fh->close();

        if ($seq ne '') {
            set_protein_seq($db, $spid, $name, $type, $seqno, $seq, $seq_ref);
        }
    }

    return $seq_ref;
}

###############################################################################
#
sub load_gene {
    my($db) = shift;
    my($spid) = shift;
    my($geneset_ref) = shift;
    my($chromosome_hash_ref) = shift;
    my($spid2, $locus_tag, $name, $chr_acc,
       $from, $to, $dir, $location,
       $type, $transl_table, $codon_start,
       $protid, $gi, $geneid,
       $descr);

    #
    my($geneseq_ref) = load_gene_seq($db, $spid, $chromosome_hash_ref);
    my($protseq_ref) = load_protein_seq($db, $spid);

    my(@gene_ref_list);

    #
    my($sta_read_header) = 0;
    my(@col_list);
#    my(@col_list) = ('spid', 'locus_tag', 'name', 'chr_acc',
#                     'from', 'to', 'dir', 'location',
#                     'type', 'transl_table', 'codon_start',
#                     'protid', 'gi', 'geneid', 'descr');
    my($dir_spid) = get_path_spid_data($spid);
    my($dh) = DirHandle->new("$dir_spid") || die("Can not open $dir_spid($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /\.gene$/);

#        my($fname) = ($file =~ /^([^\.]+)\./);
        my($fname) = ($file =~ /^(.+)\.\w+$/);
        my($seqreg_chr_ref) = $chromosome_hash_ref->{"$fname"}->{'seqreg'};

        my($filename) = "$dir_spid/$file";
        print STDERR "DBG :: Loading gene for $spid [$file]\n";
        my($fh) = FileHandle->new("$filename") || die("Can not open $filename($!)");
        while (my$line=$fh->getline()) {
            next if ($line =~ /^\s*$/);

            $line =~ s#[\r\n]*$##;

            if ($line =~ /^\s*#(.+)/) {
                if (!$sta_read_header) {
                    my($line_head) = $1;
                    @col_list = split(/\s/, $line_head);

                    $sta_read_header = 1;
                }
                next;
            }
            elsif ($line =~ /^\s*#/) {
                next;
            }

            my(@d) = split(/\t/, $line);
            if ($sta_read_header) {
                foreach my$col (@col_list) {
                    if ($col =~ /^spid$/i) {
                        $spid2 = shift(@d);
                    }
                    elsif ($col =~ /^locus_tag$/i) {
                        $locus_tag = shift(@d);
                    }
                    elsif ($col =~ /^name$/i) {
                        $name = shift(@d);
                    }
                    elsif ($col =~ /^chr_acc$/i) {
                        $chr_acc = shift(@d);
                    }
                    elsif ($col =~ /^from$/i) {
                        $from = shift(@d);
                    }
                    elsif ($col =~ /^to$/i) {
                        $to = shift(@d);
                    }
                    elsif ($col =~ /^dir$/i) {
                        $dir = shift(@d);
                    }
                    elsif ($col =~ /^location$/i) {
                        $location = shift(@d);
                    }
                    elsif ($col =~ /^type$/i) {
                        $type = shift(@d);
                    }
                    elsif ($col =~ /^transl_table$/i) {
                        $transl_table = shift(@d);
                    }
                    elsif ($col =~ /^codon_start$/i) {
                        $codon_start = shift(@d);
                    }
                    elsif ($col =~ /^protid$/i) {
                        $protid = shift(@d);
                    }
                    elsif ($col =~ /^gi$/i) {
                        $gi = shift(@d);
                    }
                    elsif ($col =~ /^geneid$/i) {
                        $geneid = shift(@d);
                    }
                    elsif ($col =~ /^descr$/i) {
                        $descr = shift(@d);
                    }
                    else {
                        print STDERR "WARNING :: Unknown col_name :: $col\n";
                    }
                }
            }
            elsif (scalar(@d) <= 7) {
                ($locus_tag, $name, $from, $to, $dir, $type, $descr) = @d;
            }
            else {
                ($spid2, $locus_tag, $name, $chr_acc,
                 $from, $to, $dir, $location,
                 $type, $transl_table, $codon_start,
                 $protid, $gi, $geneid,
                 $descr) = @d;
            }

            # set default
            $codon_start = 0 if ($codon_start <= 0);
            $gi          = 0 if ($gi <= 0);

            #
            my($seqno) = 1;

            #
            my($key_pseq1) = "$spid:$locus_tag";
            my($key_pseq2) = "$spid:$locus_tag:$type:$seqno";

            #
            my(%gene_hash);
            $gene_hash{'spid'}       = $spid;
            $gene_hash{'locus_tag'}  = $locus_tag;
            my($gene_ref) = $db->resultset('Gene')->find_or_create(\%gene_hash);
            push(@gene_ref_list, $gene_ref);

            $gene_hash{'name'}       = $name        if ($name);
            $gene_hash{'geneid'}     = $geneid      if ($geneid);
            $gene_hash{'descr'}      = $descr       if ($descr);
            $gene_ref->update(\%gene_hash);

            # search transcript
            my($cond_transcript_ref) = { 'gene_id' => $gene_ref->id,
                                         'type'    => $type,
                                         'seqno'   => $seqno,
                                       };
            my($transcript_ref) = $db->resultset('Transcript')->find($cond_transcript_ref);
	    my($count);
                #
                my($seqreg_ref);
                my(%seqreg_hash);
                $seqreg_hash{'dnaseq'} = $geneseq_ref->{"$key_pseq2"};
                $seqreg_hash{'dnaseq'} = $geneseq_ref->{"$key_pseq1"} if (!$seqreg_hash{'dnaseq'});
                if (!$seqreg_hash{'dnaseq'} && $seqreg_chr_ref) {
                        my($dnaseq_ref) = $seqreg_chr_ref->dnaseq();
                        my($dseq) = $dnaseq_ref->seq();
                        my($gseq) = substr($dseq, $from, $to - $from + 1);
                        my($geneseq) = find_or_create_seq($db, 'Dnaseq', $gseq);
                        $seqreg_hash{'dnaseq'} = $geneseq_ref->{"$key_pseq2"}
                                               = $geneseq_ref->{"$key_pseq1"}
                                               = $geneseq;
                }
		delete $seqreg_hash{'dnaseq'} if (! $seqreg_hash{'dnaseq'});

#                if ($seqreg_hash{'dnaseq'}) {
                    $seqreg_hash{'source_db'} = '';
                    $seqreg_hash{'source_id'} = $locus_tag;
                    $seqreg_hash{'type'}      = 'transcript';
                    $seqreg_ref = $db->resultset('Seqreg')->find_or_create(\%seqreg_hash);

                    #
                    if (defined($from) && defined($to)) {
                        my(%location_hash);
                        $location_hash{'seqreg_id'}  = undef;
                        if ($seqreg_ref) {
                            $location_hash{'seqreg_id'} = $seqreg_ref->id;
                        }
                        $location_hash{'from1'}   = $from;
                        $location_hash{'to1'}     = $to;
                        $location_hash{'dir1'}    = $dir;
                        $location_hash{'seqreg_id2'} = undef;
                        if ($seqreg_chr_ref) {
                            $location_hash{'seqreg_id2'} = $seqreg_chr_ref->id;
                        }
                        my($location_ref) = $db->resultset('Location')->find_or_create(\%location_hash);

                        #
                        my(%location_info_hash);
                        $location_info_hash{'location'} = $location_ref;
                        $location_info_hash{'location_text'} = $location;
                        $db->resultset('LocationInfo')->find_or_create(\%location_info_hash);
                    }
#                }

                my(%transcript_hash);
                $transcript_hash{'gene'}        = $gene_ref;
                if ($seqreg_ref) {
                    $transcript_hash{'seqreg'}      = $seqreg_ref;
                }
                if (exists($protseq_ref->{"$key_pseq2"})) {
                    $transcript_hash{'proteinseq'}  = $protseq_ref->{"$key_pseq2"};
                }
                elsif (exists($protseq_ref->{"$key_pseq1"})) {
                    $transcript_hash{'proteinseq'}  = $protseq_ref->{"$key_pseq1"};
                }
                elsif ($type =~ /^cds$/i) {
                    print STDERR "WARNING :: Can not found proteinseq for $spid:$locus_tag\n";
                }
                $transcript_hash{'type'}        = $type;
                $transcript_hash{'seqno'}       = $seqno;
                $transcript_hash{'protid'}      = $protid;
                $transcript_hash{'gi'}          = $gi;
                $transcript_hash{'codon_start'} = $codon_start;
                $transcript_hash{'product'}     = $descr;

            if (!$transcript_ref) {
                my($transcript_ref) = $db->resultset('Transcript')->find_or_create(\%transcript_hash);
            } else {
                $transcript_ref->update(\%transcript_hash);
            }
		if (++$count % 10000 == 0) {
			print STDERR "#$count\n";
		}elsif (++$count % 1000 == 0) {
			print STDERR "+";
		}elsif (++$count % 100 == 0) {
			print STDERR ".";
		}
        }
        $fh->close();
    }

    # update geneset-geme
    my($rs_geneset_gene) = $db->resultset('GenesetGene');
    my($cond_ref) = { 'geneset_id' => $geneset_ref->id,
                    };
    my$gene_list = $rs_geneset_gene->search($cond_ref);
    while (my$ref=$gene_list->next) {
        $ref->delete;
    }

    foreach my$gene_ref (@gene_ref_list) {
        my(%geneset_gene_hash) = ();
        $geneset_gene_hash{'geneset'} = $geneset_ref;
        $geneset_gene_hash{'gene'}    = $gene_ref;

        $db->resultset('GenesetGene')->find_or_create(\%geneset_gene_hash);
    }

    return;
}

###############################################################################
#
sub read_file_keyval {
    my($filename) = shift;
    my($keyval_ref) = {};

    my($fh) = FileHandle->new("$filename");
    if (!$fh) {
        return $keyval_ref;
    }

    while (my$line=$fh->getline()) {
        $line =~ s#[\r\n]*$##;
        my($k, $v) = split(/\t/, $line);

        $keyval_ref->{"$k"} = $v;
    }
    $fh->close();

    return $keyval_ref;
}

###############################################################################
#
sub update_chromosome_seqno {
    my($chromosome_hash_ref) = shift;

    # update chromosome.seqno
    #     KEY1 : type2(chromosome/plasmid/mitochondrion/others)
    #     KEY2 : seq_length
    my(%chr_type) = ('chromosome'    => 1,
                     'plasmid'       => 2,
                     'mitochondrion' => 3,
                     );
    my(@chr_name_list) = sort {
        my($type_a) = $chromosome_hash_ref->{"$a"}->{'chromosome'}->type;
        my($w_a) = $chr_type{"$type_a"} || 99;
#        my($len_a) = $chromosome_hash_ref->{"$a"}->{'chromosome'}->dnaseq->seq_length;
        my($len_a) = $chromosome_hash_ref->{"$a"}->{'chromosome'}->seqreg->dnaseq->seq_length;
        my($type_b) = $chromosome_hash_ref->{"$b"}->{'chromosome'}->type;
        my($w_b) = $chr_type{"$type_b"} || 99;
#        my($len_b) = $chromosome_hash_ref->{"$b"}->{'chromosome'}->dnaseq->seq_length;
        my($len_b) = $chromosome_hash_ref->{"$b"}->{'chromosome'}->seqreg->dnaseq->seq_length;
        
        $w_a <=> $w_b
            or
            $len_b <=> $len_a;
    } keys(%{$chromosome_hash_ref});
    my($seqno) = 1;
    foreach my$name (@chr_name_list) {
        $chromosome_hash_ref->{"$name"}->{'chromosome'}->seqno($seqno);
        $chromosome_hash_ref->{"$name"}->{'chromosome'}->update;
        
        $seqno++;
    }

    return $chromosome_hash_ref;
}

###############################################################################
#
sub load_chromosome {
    my($db) = shift;
    my($spid) = shift;
    my($geneset_ref) = shift;

    my($chromosome_hash_ref) = {};

    #
    my($update_seqno) = 0;
    my($found_chromosome) = 0;
    my(%seqno_hash);
    my($dir) = get_path_spid_data($spid);
    my($dh) = DirHandle->new("$dir") || die("Can not open $dir($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /\.chrseq$/);

        #
#        my($fname) = ($file =~ /^([^\.]+)\./);
#        my($fname) = $file; $fname =~ s/\.chrseq//;
        my($fname) = ($file =~ /^(.+)\.\w+$/);
        my($filename) = "$dir/$fname.chromosome.txt";
        my($chromosome_ref) = read_file_keyval($filename);

        #
        my($ac) = $chromosome_ref->{'accession'};
        print STDERR "LOG :: Loading chromosome for $spid [$fname]\n";
        my($chr_seqreg_ref) = load_chromosome_seq($db, 
                                                  $spid,
                                                  $fname,
                                                  undef(),
                                                  $ac);
        $chromosome_hash_ref->{"$fname"}->{'seqreg'} = $chr_seqreg_ref;

        #
        if (-e $filename) {
            $found_chromosome++;

            #
            my(%chromosome_hash) = ();
	    if ($chr_seqreg_ref) {
            	$chromosome_hash{'seqreg'}   = $chr_seqreg_ref;
	    }
            $chromosome_hash{'source'}   = $chromosome_ref->{'accession'};
            $chromosome_hash{'name'}     = $chromosome_ref->{'name'};
            my($seqno) = $chromosome_ref->{'seqno'};
            if ((0 < $seqno) && !exists($seqno_hash{"$seqno"})) {
                $chromosome_hash{'seqno'} = $seqno;
                $seqno_hash{"$seqno"} = 1;
            }
            else {
                $update_seqno = 1;
            }
            $chromosome_hash{'type'}   = $chromosome_ref->{'type'};
            $chromosome_hash{'shape'}  = $chromosome_ref->{'shape'};
            $chromosome_hash{'status'} = $chromosome_ref->{'status'} || 'complete';
            my($chromosome_ref) = $db->resultset('Chromosome')->update_or_create(\%chromosome_hash);
            $chromosome_hash_ref->{"$fname"}->{'chromosome'} = $chromosome_ref;

            # set seqreg.type
            if ($chr_seqreg_ref) {
            	my($upd_ref) = { 'type' => $chromosome_hash{'type'} };
            	$chr_seqreg_ref->update($upd_ref);
	    }
        }
        else {
            if (defined($chr_seqreg_ref)) {
                # set seqreg.type
                my($upd_ref) = { 'type' => 'contig' };
                $chr_seqreg_ref->update($upd_ref);
            }
        }
    }

    if ($update_seqno) {
        update_chromosome_seqno($chromosome_hash_ref);
    }

    #
    if ($found_chromosome) {
        # update geneset-chromosome
        my($cond_ref) = { 'geneset_id' => $geneset_ref->id,
                      };
        my($rs_geneset_chromosome) = $db->resultset('GenesetChromosome');
        my$chr_list = $rs_geneset_chromosome->search($cond_ref);
        while (my$ref=$chr_list->next) {
            $ref->delete;
        }
        
        my(@fname_list) = keys(%{$chromosome_hash_ref});
        foreach my$fname (@fname_list) {
            my(%geneset_chromosome_hash) = ();
            $geneset_chromosome_hash{'geneset'} = $geneset_ref;
            $geneset_chromosome_hash{'chromosome'} = $chromosome_hash_ref->{"$fname"}->{'chromosome'};

            $db->resultset('GenesetChromosome')->find_or_create(\%geneset_chromosome_hash);
        }
    }

    return $chromosome_hash_ref;
}

###############################################################################
#
sub load_geneset {
    my($db) = shift;
    my($spid) = shift;
    my($project_ref) = shift;

    my($dir) = get_path_spid_gm($spid);
    my($filename) = "$dir/genome.txt";
    my($genome_ref) = read_file_keyval($filename);

    #
    my(%geneset_hash);
    $geneset_hash{'project'} = $project_ref;
    $geneset_hash{'ver'}     = $genome_ref->{'ver'} || 1;
    my($geneset_ref) = $db->resultset('Geneset')->find_or_create(\%geneset_hash);

    return $geneset_ref;
}

###############################################################################
#
sub load_project {
    my($db) = shift;
    my($spid) = shift;

    my($dir) = get_path_spid_gm($spid);
    my($filename) = "$dir/genome.txt";
    my($genome_ref) = read_file_keyval($filename);
    my($n_err) = 0;
    if ($genome_ref->{'spid'} =~ /^\s*$/) {
        print STDERR "WARNING :: Not defined 'spid' in $filename. Instead, use '$spid'.\n";
        $genome_ref->{'spid'} = $spid;
    }
    elsif ($genome_ref->{'spid'} ne $spid) {
        print STDERR "WARNING :: Mismatch 'spid' in $filename. Instead, use '$spid'.\n";
        $genome_ref->{'spid'} = $spid;
    }

    if ($genome_ref->{'institution'} =~ /^\s*$/) {
        print STDERR "WARNING :: Not defined 'institution' in $filename\n";
        $genome_ref->{'institution'} = 'Unknown';
    }

    if ($genome_ref->{'sp'} =~ /^\s*$/) {
        print STDERR "ERROR :: Not found 'sp' in $filename\n";
        $n_err++;
    }
    if (!$genome_ref->{'taxid'}) {
        print STDERR "WARNING :: This 'taxid' was blank. Set 32644(=Unknown).\n";
        $genome_ref->{'taxid'} = 32644;
    }

    if (0 < $n_err) {
        print STDERR "ERROR :: \n";
        return;
    }

    #
    my(%environment_hash);
    $environment_hash{'environ'} = '';
    my($environment_ref) = $db->resultset('Environment')->find_or_create(\%environment_hash);

    #
    my(%organism_hash);
    $organism_hash{'orgname'}    = $genome_ref->{'orgname'};
    $organism_hash{'abbrev'}     = $genome_ref->{'abbrev'};
    $organism_hash{'strain'}     = $genome_ref->{'strain'};
    my($organism_ref) = $db->resultset('Organism')->find_or_create(\%organism_hash);
    if ($organism_ref->taxid != $genome_ref->{'taxid'}) {
        $organism_ref->taxid($genome_ref->{'taxid'});
        $organism_ref->update();
    }
    if ($organism_ref->specweight != $genome_ref->{'specweight'}) {
        $organism_ref->specweight($genome_ref->{'specweight'});
        $organism_ref->update();
    }

    #
    my($institution_ref);
    if ($genome_ref->{'institution'}) {
        my(%institution_hash);
        $institution_hash{'name'}    = $genome_ref->{'institution'};
        $institution_ref = $db->resultset('Institution')->find_or_create(\%institution_hash);
        if ($institution_ref->wwwlink ne $genome_ref->{'wwwlink'}) {
            $institution_ref->wwwlink($genome_ref->{'wwwlink'});
            $institution_ref->update();
        }
    }

    #
    my(%project_hash);
    $project_hash{'sp'}           = $genome_ref->{'sp'};
    $project_hash{'spid'}         = $genome_ref->{'spid'};
    $project_hash{'source'}       = $genome_ref->{'source'} || 'refseq';
    my($project_ref) = $db->resultset('Project')->find_or_create(\%project_hash);

    $project_hash{'type'}         = $genome_ref->{'type'} || 'complete';
    $project_hash{'wwwlink'}      = $genome_ref->{'wwwlink'};
    $project_hash{'date_release'} = $genome_ref->{'date_release'};
    $project_hash{'date_modify'}  = $genome_ref->{'date_modify'};
    $project_hash{'institution'}  = $institution_ref;
    $project_hash{'organism'}     = $organism_ref;
    $project_hash{'environment'}  = $environment_ref;
    $project_ref->update(\%project_hash);

    return $project_ref;
}

###############################################################################
#
sub load_mbgd_data {
    my($db) = shift;
    my($spid) = shift;

    my($t) = time();

    #
    my($project_ref) = load_project($db, $spid);
    my($geneset_ref) = load_geneset($db, $spid, $project_ref);
    my($chromosome_hash_ref) = load_chromosome($db, $spid, $geneset_ref);
    load_gene($db, $spid, $geneset_ref, $chromosome_hash_ref);

    print STDERR "DBG :: " . (time() - $t) . "[s]\n";

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    my($dbname) = $main::DBNAME_ACCUM;
    if (defined($main::DBNAME)) {
        $dbname = $main::DBNAME;
    }

    my($dbs) = 'dbi:mysql:' . $dbname;
    my($file_mycnf) = "$ENV{'RECOG_HOME'}/etc/my.cnf";
    if (-e $file_mycnf) {
        $dbs .= ";mysql_read_default_file=$file_mycnf";
    }
    my($db) = MBGD_ACCUM::Schema->connect($dbs, $main::USER, $main::PASS);
    $db->storage->debug($main::DEBUG);

    #
    my(@spid_list) = @ARGV;
    foreach my$spid (@spid_list) {
        load_mbgd_data($db, $spid);

        my($file_updated) = "$main::DIR_species/$spid/gm/updated";
        IO::File->new(">$file_updated");
    }
}

###############################################################################
1;#
###############################################################################
