#!/usr/bin/perl -s
use strict;
use FileHandle;
package ParseGOLDcomplete;

###############################################################################
# $BL>>N(B
#     new()
# $B35MW(B
#     $B%3%s%9%H%i%/%?(B
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub new {
    my($class) = shift;
    my($filename) = shift;
    my($self) = {};

    bless($self, $class);


    $self->clear();
    $self->read($filename);

    return $self;
}

###############################################################################
# $BL>>N(B
#     clear()
# $B35MW(B
#
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub clear {
    my($self) = shift;

    $self->{'NAME'} = {};
    $self->{'TAXON_ID'} = {};
    $self->{'STRAIN_KEY'} = {};

    return;
}

###############################################################################
# $BL>>N(B
#     read()
# $B35MW(B
#
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub read {
    my($self) = shift;
    my($filename) = shift;
    my(%monstr) = ( 'JAN' =>  1, 'FEB' =>  2, 'MAR' =>  3,
                    'APR' =>  4, 'MAY' =>  5, 'JUN' =>  6,
                    'JUL' =>  7, 'AUG' =>  8, 'SEP' =>  9,
                    'OCT' => 10, 'NOV' => 11, 'DEC' => 12);

    my($fh) = new FileHandle("$filename") || return;
    my($line) = $fh->getline();    # $B@hF,9T$O%?%$%H%k9T(B
    $line =~ s#[\r\n]*$##;
    my(@keyList) = split(/\t/, uc($line));

    while($line = $fh->getline()) {
        $line =~ s#[\r\n]*$##;

        my($ent) = {};
        my(@fieldList) = split(/\t/, $line);
        foreach my$key (@keyList) {
            $ent->{"$key"} = shift(@fieldList);
        }
        my($m, $d, $y) = ($ent->{'DATE'} =~ /(\S{3})\S*\s+(\d+),\s+(\d+)/);
        $m = uc($m);
        $m = $monstr{"$m"};
        $ent->{'DATE'} = sprintf("%04d-%02d-%02d", $y, $m, $d);

        # ORGANISM
        my($name) = uc($ent->{'ORGANISM'});
        if (! exists($self->{'NAME'}->{"$name"})) {
            $self->{'NAME'}->{"$name"} = [];
        }
        push(@{$self->{'NAME'}->{"$name"}}, $ent);

        # TAXON_ID
        my($taxid) = $ent->{'TAXON ID'};
        if (! exists($self->{'TAXON_ID'}->{"$taxid"})) {
            $self->{'TAXON_ID'}->{"$taxid"} = [];
        }
        push(@{$self->{'TAXON_ID'}->{"$taxid"}}, $ent);

        # STRAIN
        my($strain) = $ent->{'STRAIN'};
        my(@strainList) = uc($strain);
        if ($strain =~ /\(([^\(\)]+)\)/) {
            my($s) = uc($1);
            $s =~ s#[^A-Z0-9]##g;
            push(@strainList, $s);

            $strain =~ s#\([^\(\)]+\)##;
        }
        foreach my$s (split(/,/, $strain)) {
            $s = uc($s);
            $s =~ s#^\s*##;
            $s =~ s#\s*$##;
            $s =~ s#[^A-Z0-9]##g;
            push(@strainList, $s);
        }
        foreach my$strain (@strainList) {
            next if ($strain =~ /^\s*$/);
            if (! exists($self->{'STRAIN_KEY'}->{"$strain"})) {
                $self->{'STRAIN_KEY'}->{"$strain"} = [];
            }
            push(@{$self->{'STRAIN_KEY'}->{"$strain"}}, $ent);
        }
    }
    $fh->close();

    return;
}

###############################################################################
# $BL>>N(B
#     getByName()
# $B35MW(B
#
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub getByName {
    my($self) = shift;
    my($name) = shift;

    $name = uc($name);
    return $self->{'NAME'}->{"$name"};
}

###############################################################################
# $BL>>N(B
#     getByTaxonId()
# $B35MW(B
#
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub getByTaxonId {
    my($self) = shift;
    my($taxid) = shift;

    return $self->{'TAXON_ID'}->{"$taxid"};
}

###############################################################################
# $BL>>N(B
#     getByTaxonId()
# $B35MW(B
#
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub getByStrain {
    my($self) = shift;
    my($strain) = shift;

    $strain = uc($strain);
    $strain =~ s#[^A-Z0-9]##g;
    return $self->{'STRAIN_KEY'}->{"$strain"};
}

###############################################################################
1;#
###############################################################################
