#!/usr/bin/perl -s
use strict;
use IO::Dir;
use IO::File;

###############################################################################
#
sub read_seq {
    my($filename) = shift;
    my($seq_ref) = {};

    #
    my($name);
    my($fh) = IO::File->new("$filename") || die("Can not open $filename($!)");
    while (my$line=$fh->getline()) {
        $line =~ s#[\r\n]*$##;
        if ($line =~ /^>\s*(\S+)/) {
            $name = $1;
            $seq_ref->{"$name"} = '';
        }
        else {
            $seq_ref->{"$name"} .= $line;
        }
    }
    $fh->close();

    return $seq_ref;
}

###############################################################################
#
sub checkConvGB2Mbgd {
    my($spid) = shift;

    my($dir_spid) = sprintf("%s/species/%s", $ENV{'MBGD_HOME'}, $spid);
    my($dir_data) = "$dir_spid/gm/data";

    my($dh) = IO::Dir->new("$dir_data") || die("Can not open $dir_data($!)");
    while (my$file=$dh->read()) {
        next if ($file !~ /\.gene$/);

        my($file_cseq) = "$dir_data/$file";
        $file_cseq =~ s#\.gene#.chrseq#;
        my($cseq_ref) = read_seq($file_cseq); # read chrseq, but chrseq is not fasta-format.
        my($len_chr) = length($cseq_ref->{''}); # then, hash-key is ''.

        my($file_gseq) = "$dir_data/$file";
        $file_gseq =~ s#\.gene#.geneseq#;
        my($gseq_ref) = read_seq($file_gseq);

        my($file_pseq) = "$dir_data/$file";
        $file_pseq =~ s#\.gene#.protseq#;
        my($pseq_ref) = read_seq($file_pseq);

        my($filename) = "$dir_data/$file";
        my($fh) = IO::File->new("$filename") || die("Can not open $filename($!)");
        while (my$line=$fh->getline()) {
            $line =~ s#[\r\n]*$##;
            my(@d) = split(/\t/, $line);

            my($spec) = $d[0];
            my($name) = $d[1];
            my($gene) = $d[2];
            my($acc)  = $d[3];
            my($from) = $d[4];
            my($to)   = $d[5];
            my($type) = $d[8];

            if ($len_chr < $to) {
                print STDERR "WARNING :: Position is too bigger than chrseq-length for $file $spec:$name\n";
            }
            if (!exists($gseq_ref->{"$spec:$name"})) {
                print STDERR "WARNING :: No geneseq for $file $spec:$name\n";
            }

            next if ($type !~ /^cds$/i);
            if (!exists($pseq_ref->{"$spec:$name"})) {
                print STDERR "WARNING :: No proteinseq for $file $spec:$name\n";
            }
        }

    }

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    my(@spid_list) = @ARGV;
    foreach my$spid (@spid_list) {
        checkConvGB2Mbgd($spid);
    }
}

###############################################################################
1;#
###############################################################################

