#!/usr/bin/perl -s

################################################################################
# GenBank Υǡեɤ߹
# MBGD ѤΥǡեΥեޥåѴ
#
################################################################################
#use strict;
use File::Path;
use FileHandle;
use DirHandle;
use GenBank;
use MBGD;
use ParseKEGGcomplete;
use ParseNCBIcomplete;
use ParseGOLDcomplete;
require "MBGD_Conf.pl";
require "Complement.pl";
require "InfoSpec.pl";
require "na2aa.pl";
require "MBGD_commonUpdate.pl";

###############################################################################
#
sub getTaxonomyNames {
    my($taxid) = shift;
    my($refTaxName) = {};

    return $refTaxName;

    my($tab) = "taxonomy";
    my($opt) = {};
    $opt->{'where'} = "taxid=$taxid and class='scientific name'";

    my($dbname) = "taxonomy";
    my($db);
    eval {
        $db = new MBGD::DB($dbname);
    };
    if (!$db->sta_connect()) {
        return $refTaxName;
    }

    my($refRes) = $db->select_fetch($tab, $opt);
    if ($refRes->{'ROWS'} == 0) {
        return $refTaxName;
    }

    my(@h);
    foreach my$ref (@{$refRes->{'INFO'}}) {
        @h = split(/\./, $ref->{'hierarchy'});
        last;
    }

    my($name) = '';
    $opt = {};
    while(scalar(@h) != 0) {
        my$hierarchy = join('.', @h);
        $opt->{'where'} = "hierarchy='$hierarchy' and class='scientific name'";

        my($refRes) = $db->select_fetch($tab, $opt);
        if ($refRes->{'ROWS'} == 0) {
            next;
        }

        my($ref) = @{$refRes->{'INFO'}};
        if ($ref->{'rank'} =~ /^species$/i) {
            $refTaxName->{'species'} =  $ref->{'name'};
        }
        elsif ($ref->{'rank'} =~ /^genus$/i) {
            $refTaxName->{'genus'} =  $ref->{'name'};
        }
        elsif ($ref->{'rank'} =~ /^family$/i) {
            $refTaxName->{'family'} =  $ref->{'name'};
        }

        pop(@h);
    }

    return $refTaxName;
}

###############################################################################
#
sub GBK_GetXrefInfo {
    my(@record_list) = @_;
    my($gi);
    my($geneid);

    $gi = '';
    foreach my$rec (@record_list) {
        foreach my$xref (@{$rec}) {
            if ($xref =~ /gi\:(\d+)/i) {
                $gi = $1 if (!$gi);
            } elsif ($xref =~ /geneid\:(\d+)/i) {
                $geneid = $1 if (!$geneid);
            }
        }
    }
    return ($gi,$geneid);
}

################################################################################
#
sub printGenome {
    my($filename, $spid, $sp, $genbank, $refKegg, $refNcbi, $refGold) = @_;
    my($ent);
    my($keyname);
    my($spName, $abbrev);
    my($strain)       = "";
    my($xref);
    my($taxid)        = "";
    my($tax_family)   = "";
    my($tax_genus)    = "";
    my($tax_species)  = "";
    my($specweight)   = 2030000000;
    my($source)       = "";
    my($version)      = "";
    my($institution)  = "";
    my($wwwlink)      = "";
    my($medline)      = "";
    my($journal)      = "";
    my($dateRelease)  = "";
    my($dateModify)   = "";
    my($mbgdUpdate)   = "";
    my($fh);

    if ($main::TEST) {
        if ($main::TEST =~ /^genome$/i) {
            $filename = "-";
        }
        else {
            return;
        }
    }

    # *.gbk 
    if ($genbank->{'data'}->{'date_YYYYMMDD'}) {
        my($y, $m, $d) = ($genbank->{'data'}->{'date_YYYYMMDD'} =~ /(\d{4})(\d{2})(\d{2})/);
        $dateModify  = sprintf("%04d-%02d-%02d", $y, $m, $d);
    }
    $medline = $genbank->{'data'}->{'medline'};
    $journal = ${$genbank->{'data'}->{'journal'}}[0];

    $taxid = 0;
    foreach $ent (@{$genbank->{'data'}->{'features'}}) {
        $keyname = $ent->{'keyname'};
        if ($keyname =~ /^source$/i) {
            # name μ
            $spName = $ent->{'attr'}->{'organism'};
            $spName =~ s/^"//;
            $spName =~ s/"$//;

            my($abbrev1, $abbrev2, $abbrev3) = ($spName =~ /^(\S)(\S+)\s+(\S+)/);
            if ($abbrev3 =~ /^sp\.*$/) {
                $abbrev = "$abbrev1$abbrev2 $abbrev3";
            }
            else {
                $abbrev = "$abbrev1.$abbrev3";
            }

            # strain μ
            $strain = $ent->{'attr'}->{'strain'};
            $strain =~ s/^"//;
            $strain =~ s/"$//;

            # tax id μ
            foreach $xref (@{$ent->{'attr'}->{'db_xref'}}) {
                if ($xref =~ /taxon:(\d+)/) {
                    $taxid = $1;
                    last;
                }
            }

            #
            $source = 'refseq';
            last;
        }
    }

    #
    my($kegg) = $refKegg->{'sym'}->{"$sp"};
    my($acc) = $kegg->{'accession'}->[0];
    my($ncbi) = $refNcbi->{'accession'}->{"$acc"}->[0];
    if ($ncbi) {
        $taxid       = $ncbi->{'taxid'};
        $dateRelease = $ncbi->{'date_released'};
        $dateModify  = $ncbi->{'date_modified'};
#        $mbgdUpdate = '\N';
    }
    my($gold) = $refGold->getByTaxonId($taxid);
    $gold = $refGold->getByStrain($strain) if (! $gold);
    if ($gold) {
        $dateRelease    = $gold->[0]->{'DATE'};
    }
    if ($dateRelease) {
        my($yyyymmdd) = sprintf("%04d%02d%02d", ($dateRelease =~ m#(\d+)[\/\-](\d+)[\/\-](\d+)#));
        $specweight = (20300000 - $yyyymmdd) * 100;
    }

    #
    my($refTaxName) = getTaxonomyNames($taxid);
    my($family)  = $refTaxName->{'family'};
    my($genus)   = $refTaxName->{'genus'};
    my($species) = $refTaxName->{'species'};

    # DB  genome 
    my($db) = MBGD::DB->new($main::MYSQL_DBACCUM);
    my($opt) = {};
    my(@genomeList);
#    @genomeList = MBGD::Genome->get($db, [$sp], $opt);
    if (scalar(@genomeList) != 0) {
        # DB ξͥ ---> ե˽
        my($ent) = shift(@genomeList);
        $abbrev      = $ent->{"abbrev"};
        $spName      = $ent->{"orgname"};
        $strain      = $ent->{"strain"};
#        $taxid       = $ent->{"taxid"};
        $specweight  = $ent->{"specweight"};
        $source      = $ent->{"source"};
#        $version     = $ent->{"version"};
        $institution = $ent->{"institution"};
        $wwwlink     = $ent->{"wwwlink"};
        $medline     = $ent->{"medid"};
        $journal     = $ent->{"journal"};
        $dateRelease = $ent->{"date_release"};
#        $dateModify  = $ent->{"date_modify"};
    }

    #
    $fh = new FileHandle(">$filename.txt") || die("Can not open $filename($!)");

    $fh->print('sp',          "\t", $sp,          "\n");     # sp
    $fh->print('abbrev',      "\t", $abbrev,      "\n");     # abbrev
    $fh->print('orgname',     "\t", $spName,      "\n");     # orgname
    $fh->print('strain',      "\t", $strain,      "\n");     # strain
    $fh->print('taxid',       "\t", $taxid,       "\n");     # taxid
    $fh->print('tax_family',  "\t", $family,      "\n");     # family
    $fh->print('tax_genus',   "\t", $genus,       "\n");     # genus
    $fh->print('tax_species', "\t", $species,     "\n");     # species
    $fh->print('specweight',  "\t", $specweight,  "\n");     # specweight
    $fh->print('source',      "\t", $source,      "\n");     # source
    $fh->print('institution', "\t", $institution, "\n");     # institution
    $fh->print('wwwlink',     "\t", $wwwlink,     "\n");     # wwwlink
    $fh->print('medid',       "\t", $medline,     "\n");     # medid
    $fh->print('journal',     "\t", $journal,     "\n");     # journal
    $fh->print('date_release',"\t", $dateRelease, "\n");     # date_release
    $fh->print('date_modify', "\t", $dateModify,  "\n");     # date_modify
    $fh->close();

    return;
}

################################################################################
#
sub printChromosome {
    my($file, $spid, $genbank, $optRoman) = @_;
    my($name, $seqno, $type, $shape, $accession, $gi);
    my($filename);
    my($fh);

    $filename = "$file.chromosome";
    if ($main::TEST) {
        if ($main::TEST =~ /^chromosome$/i) {
            $filename = "-";
        }
        else {
            return;
        }
    }

    $seqno = '';

    # TYPE(chromosome/plasmid/contig)
    $type = "chromosome";
    if ($main::contig) {
        $type = "contig";
    }
    elsif ($genbank->{'definition'} =~ /plasmid/i) {
        $type = "plasmid";
    }
    elsif ($genbank->{'definition'} =~ /extrachromosomal/i) {
        # mja  plasmid 
        $type = "plasmid";
    }
    elsif ($genbank->{'definition'} =~ /mitochondrion/i) {
        # sce  mitochondrion 
        $type = "mitochondrion";
    }

    # NAME
    if (! $name) {
        my($ent);
        foreach $ent (@{$genbank->{'data'}->{'features'}}) {
            my($keyname) = $ent->{'keyname'};
            if ($keyname =~ /^source$/i) {
#                $name = $type;
                $name = '';
                if ($type =~ /chromosome/) {
                    if ($ent->{'attr'}->{'chromosome'}) {
                        if ($ent->{'attr'}->{'chromosome'} =~ /^$type$/i) {
#                            $name = 'chromosome ' . $ent->{'attr'}->{'chromosome'};
                        } else {
                            $name = $ent->{'attr'}->{'chromosome'};
                        }
                    }
                }
                elsif ($type =~ /plasmid/) {
                    if ($ent->{'attr'}->{'plasmid'}) {
                        if ($ent->{'attr'}->{'plasmid'} =~ /^$type$/i) {
#                            $name = 'plasmid ' . $ent->{'attr'}->{'plasmid'};
                        } else {
                            $name = $ent->{'attr'}->{'plasmid'};
                            $name =~ s#symbiotic plasmid\s+##; # for gm00302
                        }
                    }
                    elsif ($genbank->{'definition'} =~ /$type\s+(\S+)/) {
                        $name = $1;
                        $name =~ s#[\.\,]+$##;       #  ','  '.' 
                    }
                    elsif ($ent->{'attr'}->{'note'} =~ /extrachromosomal\s+(\S+)/) {   # for gm00004(mja)
                        $name = $1;
                    }
                }
                elsif ($type =~ /mitochondrion/i) {
                }
                last;
            }
        }
        $name =~ s#"##g;
    }

    # SHAPE
    if (! $shape) {
        if ($genbank->{'shape'} =~ /^\s*$/) {
            # shape ̤ ---> 'linear'
            $shape = "linear";
        }
        else {
            $shape = $genbank->{'shape'};
        }
    }

    # VERSION  accession  gi 
    ($accession, $gi) = split(/\s+/, $genbank->{'version'});
    if ($accession =~ /^\s*$/) {
        # VERSION  accession Ǥʤä
        $accession = $genbank->{'accession'};
    }
    if ($accession =~ /^\s*$/) {
        # ACCESSION  accession Ǥʤä
        $accession = $genbank->{'locus_name'};
    }

    my($db) = MBGD::DB->new($main::MYSQL_DBACCUM);
    my($opt) = {};
    $opt->{'keys'} = "sp:accession";

    my(@chrList);
#    @chrList = MBGD::Chromosome->get($db, ["$spid:$accession"], $opt);
    # use data in the database if exist.
    if (scalar(@chrList) != 0) {
        my($ent) = shift(@chrList);
        $name  = $ent->{'name'};
        $seqno = $ent->{'seqno'};
        $type  = $ent->{'type'};
        $shape = $ent->{'shape'};
        $gi    = $ent->{'gi'};
    }
    if ($name =~ /^I$/) {
        $optRoman->{'roman'} = 1;
    }

    #
    $fh = new FileHandle(">$filename.txt") || die("Can not open $filename.txt($!)");
    $fh->print('spid',      "\t", $spid,      "\n");
    $fh->print('seqno',     "\t", $seqno,     "\n");
    $fh->print('name',      "\t", $name,      "\n");
    $fh->print('type',      "\t", $type,      "\n");
    $fh->print('shape',     "\t", $shape,     "\n");
    $fh->print('accession', "\t", $accession, "\n");
    $fh->print('gi',        "\t", $gi,        "\n");
    $fh->close();

    return $accession;
}

################################################################################
#
sub printChrSeq {
    my($file, $genbank) = @_;
    my($filename);
    my($fh);

    $filename = "$file.chrseq";
    if ($main::TEST) {
        if ($main::TEST =~ /^chrseq$/i) {
            $filename = "-";
        }
        else {
            return;
        }
    }

    $fh = new FileHandle(">$filename") || die("Can not open $filename($!)");

    $fh->print($genbank->{'data'}->{'seq'}, "\n");
    $fh->close();

    return;
}

################################################################################
#
sub printGene {
    my($outfile, $spid, $sp, $genbank, $chrAcc,$orfnameHash) = @_;
    my($fileGene);
    my($fileGeneSeq);
    my($fileProtSeq);
    my($seq);
    my($keyname);
    my($key);
    my($name, $gene, $desc, $protid, $gi, $geneid);
    my($from, $to, $dir, $location, $comp);
    my($type);
    my($start, $len);
    my($geneSeq);
    my($protSeq);
    my($infoOrfname);
    my($infoGbk);
    my($orfname);
    my($fh_gene);
    my($fh_geneseq);
    my($fh_protseq);
    my(%count);
    my($MIN_SEQLEN, $MAX_SEQLEN) = (48, 60000);

    ## sequences taken from FASTA file
    ## global variable which is initialized for each sequence
    local($FastaAASeq) = ();

    $fileGene     = "$outfile.gene";
    $fileGeneSeq  = "$outfile.geneseq";
    $fileProtSeq  = "$outfile.protseq";
    if ($main::TEST) {
        $fileGene     = "/dev/null";
        $fileGeneSeq  = "/dev/null";
        $fileProtSeq  = "/dev/null";
        if ($main::TEST =~ /^gene$/i) {
            $fileGene = "-";
        }
        elsif ($main::TEST =~ /^geneseq$/i) {
            $fileGeneSeq = "-";
        }
        elsif ($main::TEST =~ /^protseq$/i) {
            $fileProtSeq = "-";
        }
        else {
            return;
        }
    }

    $fh_gene     = new FileHandle(">>$fileGene")     || die("Can not open $fileGene($!)");
    $fh_geneseq  = new FileHandle(">>$fileGeneSeq")  || die("Can not open $fileGeneSeq($!)");
    $fh_protseq  = new FileHandle(">>$fileProtSeq")  || die("Can not open $fileProtSeq($!)");

    $seq =  $genbank->{'data'}->{'seq'};

    my($ent);
    foreach $ent (@{$genbank->{'data'}->{'features'}}) {
        $keyname = $ent->{'keyname'};
        ($from,$to,$dir) = $genbank->get_region($ent->{'location'});
        ($from)  = ($from =~ /(\d+)/);
        ($to)    = ($to  =~ /(\d+)/);
        $location = $ent->{'location_string'};

        if ($keyname =~ /^gene$/i) {
            #
            $infoOrfname = {};

            $infoOrfname->{'gene'} = {};
            foreach $key (keys(%{$ent->{'attr'}})) {
                $infoOrfname->{'gene'}->{"$key"} = $ent->{'attr'}->{"$key"};
                $infoOrfname->{'gene'}->{"$key"} =~ s#^"##;
                $infoOrfname->{'gene'}->{"$key"} =~ s#"$##;
            }
        } elsif ($keyname =~ /^mrna$/i) {
            ### skip mRNA
        } elsif (($keyname =~ /^(cds)$/i) ||
                 ($keyname =~ /^(.*rna)$/i)) {
            $type = $1;      # CDS or *RNA


            $infoOrfname->{"$type"}  = {};
            foreach $key (keys(%{$ent->{'attr'}})) {
                $infoOrfname->{"$type"}->{"$key"} = $ent->{'attr'}->{"$key"};
                $infoOrfname->{"$type"}->{"$key"} =~ s#^"##;
                $infoOrfname->{"$type"}->{"$key"} =~ s#"$##;
            }

            if (exists($ent->{'attr'}->{"pseudo"})) {
                # pseudo gene Ǥ ---> SKIP
                $infoOrfname = {};    # ɤ߹ǡ򥯥ꥢ
                next;
            }

            # ʪбȤ
            my($wtype);
            if ($type =~ /cds/i) {
                $infoGbk = &GBK_Translate($infoOrfname);
                $count{cds}++;
                $wtype = 'cds';
            }
            else {
                $infoGbk = &GBK_RNA($infoOrfname, $type);
                $count{rna}++;
                $wtype = 'rna';
            }
            &postcheck_GBK_Translate($infoGbk);
            if (defined($infoGbk->{"pseudo"})) {
                # pseudo gene Ǥ ---> SKIP
                $infoOrfname = {};    # ɤ߹ǡ򥯥ꥢ
                next;
            }

            $name    = $infoGbk->{'orfname'};   $name =~ tr/a-z/A-Z/;
            $gene    = $infoGbk->{'gene'};
            $desc    = $infoGbk->{'product'};
            $protid  = $infoOrfname->{"$type"}->{'protein_id'};
            ($gi,$geneid) = &GBK_GetXrefInfo($infoOrfname->{"$type"}->{'db_xref'}, $infoOrfname->{'gene'}->{'db_xref'});
            if (defined($infoGbk->{'translation'})) {
                $ent->{'attr'}->{'translation'} = $infoGbk->{'translation'};
            }
            if ($keyname eq 'CDS' && ! $ent->{'attr'}->{'translation'}) {
                ## no translation field
                $ent->{'attr'}->{'translation'} = &getFastaSeq($sp, $gi);
            }
            my$transl_table = $ent->{'attr'}->{'transl_table'} = $infoOrfname->{'CDS'}->{'transl_table'};
            my$codon_start  = $ent->{'attr'}->{'codon_start'}  = $infoOrfname->{'CDS'}->{'codon_start'};

            if ($name =~ /^\s*$/) {
                if ($gi ne '') {
                    $name = "${sp}_" . $gi;
                } elsif ($keyname ne 'CDS') {
                    $main::SpecIndexKey{$keyname}++;
                    $name = "${sp}_${keyname}_" . $main::SpecIndexKey{$keyname};
                } else {
                    $name = "${sp}_" . $main::SpecIndex;
                    $main::SpecIndex++;
                }
                $name =~ tr/a-z/A-Z/;
                &warning_out("$keyname Name is blank. ---> $name: ($from,$to)", $wtype,\%count);
            } elsif ($orfnameHash->{"$name"}) {
                my($ext);
                my($prevname) = $name;
                for($ext = 2; ; $ext++) {
                    if (! exists($orfnameHash->{"$name-$ext"})) {
                        $name = "$name-$ext";
                        last;
                    }
                }
                $name =~ tr/a-z/A-Z/;
                &warning_out("already used $keyname Name:$prevname. ---> $name", $wtype, \%count);
            } elsif ($name =~ /\s/) {
                &warning_out("The orfname contains a space character: $name", $wtype, \%count);
            }
            $orfnameHash->{"$name"} = 1;

            if ($keyname eq 'CDS' && ! $desc) {
                $desc = &getDescription($sp, $gi);
                if (! $desc) {
                    &warning_out("No product name: $name",$wtype,\%count);
                }
                if ($desc eq '-') {
                    $desc = '';
                }
            }

            $len = $to - $from + 1;
            if ($len < $MIN_SEQLEN || $len > $MAX_SEQLEN) {
                &warning_out("sequence length is too long or short: $name: $len", $wtype, \%count);
            }

            if ($main::NO_LOCATION) {
                $from = $to = $dir = $location = '';
            }

            $fh_gene->print($spid,    "\t");
            $fh_gene->print($name,    "\t");
            $fh_gene->print($gene,    "\t");
            $fh_gene->print($chrAcc,  "\t");
            $fh_gene->print($from,    "\t");
            $fh_gene->print($to,      "\t");
            $fh_gene->print($dir,     "\t");
            $fh_gene->print($location,    "\t");
            $fh_gene->print($type,    "\t");
            $fh_gene->print($transl_table,    "\t");
            $fh_gene->print($codon_start,    "\t");
            $fh_gene->print($protid,  "\t");
            $fh_gene->print($gi,      "\t");
            $fh_gene->print($geneid,      "\t");
            $fh_gene->print($desc,    "\n");

            #
            $geneSeq = $genbank->get_subseq($ent->{'location'});
            $geneSeq->print_fasta("$spid:$name", {fh => $fh_geneseq});

            if ($type =~ /^cds$/i) {
                # protein sequence
                ($protSeq) = ($ent->{'attr'}->{'translation'} =~ /^"*([^"]+)"*$/);
                if ($protSeq eq '') {
                    &warning_out("Not found prot-sequence('translation').($sp:$name)", $wtype, \%count);
                    my($codon_start) = $ent->{'attr'}->{'codon_start'};
                    my($transl_table) = $ent->{'attr'}->{'transl_table'};
                    $protSeq = main::na2aa($geneSeq->getseq(), $codon_start, $transl_table);
                } else {
                    if (length($geneSeq->getseq()) / 3 != length($protSeq) + 1) {
                        &warning_out("Not match gene and protein sequence length.($sp:$name)", $wtype, \%count);
                    }
                }

                #
                if ($protSeq ne '') {
                    $protSeq =~ s#(.{1,60})#$1\n#g;
                    $fh_protseq->print(">$spid:$name\n");
                    $fh_protseq->print($protSeq);
                }
            }

            $infoOrfname = {};    # ɤ߹ǡ򥯥ꥢ
        }
    }
    print STDERR "Statistics:\n";
    print STDERR "CDS($sp): $count{cds} ($count{warnings_cds})\n";
    print STDERR "RNA($sp): $count{rna} ($count{warnings_rna})\n";
    print STDERR "\n";

    $fh_protseq->close();
    $fh_geneseq->close();
    $fh_gene->close();

    return;
}
################################################################################
sub getFastaSeq {
	my($sp, $gi) = @_;
    my($ginum);
    my($dmy);

	$gi =~ s/GI://;
	if (! defined $FastaAASeq) {
		my($dir) = "$main::DIR_species/$sp/GB";
		my($dh) = new DirHandle("$dir") || die("Can not open $dir($!)");
		while(my$file = $dh->read()) {
			next if ($file !~ /\.faa/);

			my $f = "$dir/$file";
			if ($f =~ /.(gz|Z)/) {
				$f = "$main::CMD_gzip -d -c $f|";
			}
			my($fh) = new FileHandle("$f") || die("Can not open $f($!)");
			while ($_ = $fh->getline()) {
				if (/^>(\S*)/) {
					($dmy,$ginum) = split(/\|/, $1);
					$FastaAASeq->{$ginum} = "";
				} else {
					chomp;
					$FastaAASeq->{$ginum} .= $_;
				}
			}
			$fh->close();
		}
	}
	print STDERR "Error: sequence not found\n" if (! $FastaAASeq->{$gi});
	$FastaAASeq->{$gi};
}

sub getDescription {
	my($sp, $gi) = @_;
	my($flag);
	$gi =~ s/GI://;
	if (! defined $Description) {
		$Description = {};
		my($dir) = "$main::DIR_species/$sp/GB";
		my($dh) = new DirHandle("$dir") || die("Can not open $dir($!)");
		while(my$file = $dh->read()) {
			next if ($file !~ /\.ptt/);

			my($f) = "$dir/$file";
			if ($f =~ /.(gz|Z)/) {
				$f = "$main::CMD_gzip -d -c $f|";
			}
			my($fh) = new FileHandle("$f") || die("Can not open $f($!)");
			while ($_ = $fh->getline()) {
				if (/^Location/) {
					$flag = 1;
				} else {
					chomp;
					my($loc,$str,$len,$ginum,$gene,$syn,$code,$cog,$prod)
						=split(/\t/);
					$Description->{$ginum} = $prod;
				}
			}
			$fh->close();
		}
	}
	print STDERR "Error: product not found\n" if (! $Description->{$gi});
	$Description->{$gi};
}

################################################################################
#
sub ConvGenBank2Mbgd {
    my($spid, $sp, $refKegg, $refNcbi, $refGold) = @_;
    my($dir);
    my($dirSp);
    my($dirSpGb);
    my($dirData);
    my($filename);
    my($file);
    my($genbank);
    my($dh);
    my($orfnameHash);

    # ORF ̾Ǥʤä硢ʲѿѤ̾դԤ
    # ʪñ̤Ϣ֤դ
    $main::SpecIndex = 1;              # ХѿȤ

    $orfnameHash = {};

    my$dirSpid = "$main::DIR_species/$spid";

    my($sec, $min, $hour, $mday, $mon, $year) = localtime();
    $year += 1900;
    $mon++;

    #
    $dirSpGb = "$dirSpid/GB";
    if (! -e "$dirSpGb") {
        print STDERR "Can not found [$dirSpGb].\n";
        return;
    }

    # make output dir
    my$dirGm   = "$dirSpid/gm";
    my$dirData = "$dirGm/data";
    mkpath($dirData, 0, 0750);

    #
    my($optRoman) = {};
    $optRoman->{'roman'} = 0;

    #
    my(%fileDone);
    my$wtype = 'file';
    $dh = new DirHandle("$dirSpGb") || die("Can not open $dirSpGb($!)");
    foreach $file (sort($dh->read())) {
        next if ($file =~ /^\./);
        next if ($file !~ /\.gbk/);

        if ($file =~ /\.gbk$/) {
            if (exists($fileDone{"$file"})) {
                my($msg) = "This file has been processed.($file)\n";
                warning_out($msg, $wtype, \%count);
                next;
            }
            $fileDone{"$file"} = 1;
            $filename = "$dirSpGb/$file";
        } elsif ($file =~ /(.+\.gbk)\.(gz|Z)$/) {
            if (exists($fileDone{"$1"})) {
                my($msg) = "This file has been processed.($file)\n";
                warning_out($msg, $wtype, \%count);
                next;
            }
            $fileDone{"$1"} = 1;
            $filename = "$main::CMD_gzip -d -c $dirSpGb/$file |";
        } else {
            next;
        }
        $genbank = GenBank->new($filename);


        eval {
            do "$main::DIR_build/GB/default.pl";
        };
        if (-f "$main::DIR_build/GB/$spid.pl") {
            eval {
                do "$main::DIR_build/GB/$spid.pl";
            };
        }

        #
        $file =~ s#\.gbk.*$##;

        # clear output files
        unlink("$dirGm/genome.txt",
               "$dirData/$file.chromosome.txt",
               "$dirData/$file.chrseq",
               "$dirData/$file.gene",
               "$dirData/$file.genefeat",
               "$dirData/$file.geneseq",
               "$dirData/$file.protseq",
               );

        #
        while($genbank->read_entry()) {
            # GenBank ΥǡɬפʾϤ

            #
            &printGenome("$dirGm/genome", $spid, $sp, $genbank, $refKegg, $refNcbi, $refGold);

            #
            my($chrAcc);
            $chrAcc = &printChromosome("$dirData/$file", $spid, $genbank, $optRoman);

            #
            if ($main::NO_LOCATION) {
                IO::File->new(">$dirData/$file.chrseq");
            }
            else {
                &printChrSeq("$dirData/$file", $genbank);
            }

            #
            &printGene("$dirData/$file", $spid, $sp, $genbank, $chrAcc,$orfnameHash);
        }
    }
    undef($dh);

#    if ($sp eq 'sce') {
#        $opt->{'roman'} = 1;
#    }
    &setChrSeqno($dirData, $optRoman);

    return;
}

################################################################################
#
sub setChrSeqno {
    my($dirname, $opt) = @_;
    my(@entList);
    my($ent);
    my($seq);
    my($dh);
    my($fh);
    my($file);
    my($filename);
    my($seqOrder);

    $seqOrder = SeqOrderFactory->create($opt);

    $dh = new DirHandle("$dirname") || die("Can not open $dirname($!)");
    foreach $file ($dh->read()) {
        next if ($file !~ /\.chromosome\.txt$/);
        my $ent = {};

        # Read MBGD chromosome file
        $filename = "$dirname/$file";
        $ent->{'file'} = $filename;
        $ent->{'info'} = {};
        $fh = new FileHandle("$filename") || die("Can not open $filename($!)");
        while(my$line = $fh->getline()) {
            next if ($line =~ /^\s*$/);
            next if ($line =~ /^\s*#/);
            $line =~ s#[\r\n]*$##;

            my($k, $v) = split(/\t/, $line);
            $ent->{'info'}->{"$k"} = $v;
        }
        $fh->close();

        if (! $opt->{force} && $ent->{'info'}->{'seqno'} ne '') {
            # seqno is already assigned -- skip
            undef($dh);
            return;
        }
        $ent->{'type'} = $ent->{'info'}->{'type'};
        $ent->{'name'} = $ent->{'info'}->{'name'};

        ## set $ent->{order} for assigning seqno
        $seqOrder->set_seq_order($ent);

        push(@entList, $ent);
    }
    undef($dh);

    # seqno 
    my($seqno) = 1;
    my(%seqTypeNo) = ();
    foreach $ent (sort funcSortChr @entList) {
        $file = $ent->{'file'};
        $ent->{'info'}->{'seqno'} = $seqno++;
        my($typ) = $ent->{'info'}->{'type'};
        $seqTypeNo{"$typ"}++;

        $fh = new FileHandle(">$file") || die("Can not open $file($!)");
        foreach my$k (sort(keys(%{$ent->{'info'}}))) {
            my($v) = $ent->{'info'}->{"$k"};
            if (($k eq 'name') && ($v =~/^\s*$/)) {
                $v = $seqTypeNo{"$typ"};
            }
            $fh->print(join("\t", $k, $v), "\n");
        }
        $fh->close();
    }
}
sub getSeqLen {
    my($filename) = @_;
    $filename =~ s#\.chromosome$#.chrseq#;
    my($fh) = new FileHandle("$filename") || die("Can not open $filename($!)");
    my($seq) = '';
    while(<$fh>) {
        chomp();

        next if (/^>/);

        $seq .= $_;
    }
    $fh->close();
    length($seq);
}

################################################################################
sub warning_out {
    my($message,$type,$count) = @_;
    print STDERR "WARNING: $message\n";
    $count->{"warnings_$type"}++;
}

################################################################################
# chromosome ǡ seqno  sort ɾؿ
sub funcSortChr {
    if ($a->{'type'} ne $b->{'type'}) {
        # type(chromosome/plasmid)
        return $a->{'type'} cmp $b->{'type'};
    }

    return $a->{'order'} <=> $b->{'order'};
}

################################################################################
## Package SeqOrder: methods to determine the order of sequences (chromosomes)
##
package SeqOrderFactory;    # factory class
sub create {
	my($class, $opt) = @_;
	if ($opt->{roman}){
		return RomanSeqOrder->new;
	} else {
		return SeqOrder->new;
	}
}
package SeqOrder;
sub new {
	my($class) = @_;
	bless {}, $class;
}
sub set_seq_order {
	my($this, $chrInfo) = @_;
	$chrInfo->{order} = 1000-&main::getSeqLen($chrInfo->{file});
}
package RomanSeqOrder;
@ISA = (qw{SeqOrder});
sub new {
	my($class) = @_;
	my(@roman) = ('I','II','III','IV','V','VI','VII','VIII','IX','X',
			'XI','XII','XIII','XIV','XV','XVI','XVII','XVIII',
			'XIX', 'XX');
	my(%roman, $i);
	my($this) = {};
	foreach my$c (@roman) {
		$this->{conv}->{$c} = ++$i;
	}
	bless $this, $class;
}
sub set_seq_order {
	my($this, $chrInfo) = @_;
	$chrInfo->{order} = $this->{conv}->{$chrInfo->{name}};
}

################################################################################
package main;

if ($0 eq __FILE__) {
    my($sp);
    my($cmd);

    # STDERR ˽ϤƤե˽
    &openLogfile();

    #
    my($refSpid) = getInfoSpecTab("$ENV{'MBGD_HOME'}/etc/spid.tab");
    my(@spidList) = @ARGV;
    if ($main::ALL) {
        @spidList = sort(keys(%{$refSpid->{'SPID2DIR'}}));
    }

    #
    my($dir) = "$ENV{'MBGD_HOME'}/database.work";
    if ($main::DIR) {
        $dir = $main::DIR;
    }

    # KEGG
    my($fileKegg) = "$dir/kegg_org_list.html";
    my($refKegg) = new ParseKEGGcomplete($fileKegg);

    # NCBI
    my($fileNcbi) = "$dir/ncbi_lproks.txt";
    my($refNcbi) = new ParseNCBIcomplete($fileNcbi);

    # GOLD
    my($fileGold) = "$dir/gold_published_table.txt";
    my($refGold) = new ParseGOLDcomplete($fileGold);

    foreach my$spid (@spidList) {
        my($sp) = $refSpid->{'SPID2NAME'}->{"$spid"};
        print STDERR "Start :: $spid($sp)\n";
        $refKegg->updateAccessions($spid);
        &ConvGenBank2Mbgd($spid, $sp, $refKegg, $refNcbi, $refGold);
    }

    exit();
}

################################################################################
1;#
################################################################################
