#!/usr/local/bin/perl -s
package KeggOrthology;
use strict;
use DirHandle;
use FileHandle;
use MBGD;
use GenBank;
require "MBGD_commonPath.pl";

###############################################################################
# $BL>>N(B
#     new()
# $B35MW(B
#     $B%3%s%9%H%i%/%?(B
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub new {
    my($class) = shift;
    my($dir) = shift;
    my($self) = {};

    bless($self, $class);
    $self->_init($dir);

    return $self;
}

###############################################################################
# $BL>>N(B
#     _init()
# $B35MW(B
#     $B=i4|=hM}(B
# $B0z?t(B
#
# $BLaCM(B
#
# $B@bL@(B
#
# $BHw9M(B
#
sub _init {
    my($self) = shift;
    my($dir) = @_;

    #
    $self->setDir($dir);
    $self->readTable();

    return;
}

###############################################################################
#
sub setDir {
    my($self) = shift;
    my($dir) = shift;

    if (! $dir) {
        $dir = "/bio/ftp/genomenet-ftp/kegg";
    }
    $self->{'DIR'} = $dir;

    return;
}

###############################################################################
#
sub getDir {
    my($self) = shift;

    return $self->{'DIR'};
}

###############################################################################
#
sub parseEntry {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    ($ent->{'ENTRY'}) = ($line =~ /(K\d+)/);
}

###############################################################################
#
sub parseName {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    foreach my$n (split(/,/, $line)) {
        $n =~ s#\s+##g;
        push(@{$ent->{'NAME'}}, $n);
    }
}

###############################################################################
# $BB?$/$N>l9g(B DEFINITION $B$O!"#19T$H;W$o$l$k$,!"J#?t9TB8:_$7$?$H$-$N$?$a(B
# $BG[Ns$K3JG<$9$k!#(B
sub parseDefinition {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    push(@{$ent->{'DEFINITION'}}, $line);
}

###############################################################################
#
sub parseClass {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    if ($line !~ /\[path:(ko\d+)\]/i) {
        $self->{'CLASS_prev'} .= " " if ($self->{'CLASS_prev'} ne '');
        $self->{'CLASS_prev'} .= $line;
        return;
    }

    #
    if ($self->{'CLASS_prev'} ne '') {
        $line = $self->{'CLASS_prev'} . " " . $line;
    }
    my($classes, $path) = ($line =~ /^([^\[\]]+)\[PATH\:(ko\d+)\]/i);

    my($ref) = \$ent->{'CLASS'};
    foreach my$c (split(/\;/, $classes)) {
        $c =~ s#^\s+##;
        $c =~ s#\s+$##;

        if (! ref($$ref)) {
            $$ref = {};
        }
        if (! exists($$ref->{"$c"})) {
            $$ref->{"$c"} = $path;
        }
        $ref = \$$ref->{"$c"};
    }

    $self->{'CLASS_prev'} = '';
}

###############################################################################
#
sub parseDblinks {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    my($db);
    my($entries);
    if ($line =~ /^(\S+)\:\s+(.+$)/) {
        $db = lc($1);
        $entries = $2;
    }
    else {
        $db = $ent->{'DBLINKS_prev_db'};
        $entries = $line;
    }

    foreach my$e (split(/\s+/, $entries)) {
        push(@{$ent->{'DBLINKS'}->{"$db"}}, $e);
    }

    # DBLINKS $B$,J#?t9TB8:_$9$k>l9g$KHw$((B DB $B$rJ]B8(B
    $ent->{'DBLINKS_prev_db'} = $db;
}

###############################################################################
#
sub parseGenes {
    my($self) = shift;
    my($ent) = shift;
    my($line) = shift;

    my($sp);
    my($genes);
    if ($line =~ /^(\S+)\:\s+(.+$)/) {
        $sp = lc($1);
        $genes = $2;
    }
    else {
        $sp = $ent->{'GENES_prev_sp'};
        $genes = $line;
    }

    if (! exists($ent->{'GENES'}->{"$sp"})) {
        $ent->{'GENES'}->{"$sp"}->{'LIST'} = [];     # $BBeI=L>$r3JG<(B
        $ent->{'GENES'}->{"$sp"}->{'HASH'} = {};     # $BBeI=L>$r%-!<$K3JG<(B

        # $BBeI=L>$HJLL>!G(B '(', ')' $B$G0O$^$l$F$$$?L>>N!K$H$N4X78$rJ]B8(B
        $ent->{'GENES'}->{"$sp"}->{'NAME_P2S'} = {};      # $BBeI=L>(B(P)$B$+$iJLL>(B(S)$B$r<hF@(B
        $ent->{'GENES'}->{"$sp"}->{'NAME_S2P'} = {};      # $BJLL>(B(S)$B$+$iBeI=L>(B(P)$B$r<hF@(B
    }
    foreach my$g (split(/\s+/, $genes)) {
        if ($g =~ /([^\(\)]+)\(([^\(\)]+)\)/) {
            my($g1) = $1;    # orf name
            my($g2) = $2;    # gene name

            push(@{$ent->{'GENES'}->{"$sp"}->{'LIST'}}, $g1);

            $g1 = lc($g1); $g2 = lc($g2);
            $ent->{'GENES'}->{"$sp"}->{'HASH'}->{"$g1"} = 1;

            $ent->{'GENES'}->{"$sp"}->{'NAME_P2S'}->{"$g1"} = $g2;
            $ent->{'GENES'}->{"$sp"}->{'NAME_S2P'}->{"$g2"} = $g1;
        }
        else {
            push(@{$ent->{'GENES'}->{"$sp"}->{'LIST'}}, $g);

            $g = lc($g);
            $ent->{'GENES'}->{"$sp"}->{'HASH'}->{"$g"} = 1;
        }
    }

    # GENES $B$,J#?t9TB8:_$9$k>l9g$KHw$((B sp $B$rJ]B8(B
    $ent->{'GENES_prev_sp'} = $sp;
}

###############################################################################
#
sub readTable {
    my($self) = shift;
    my($fileKo) = shift;
    my($key);

    #
    if (! $fileKo) {
        my($dir) = $self->getDir();
        $fileKo = "$dir/ko";
    }

    $self->{'ENTRY_NAME_LIST'} = [];
    $self->{'ENTRY_NAME'} = {};
    $self->{'SPNAME2ENTRY'} = {};
    $self->{'NAME_P2S'} = {};
    $self->{'NAME_S2P'} = {};

    #
    my($line);
    my($ent);
    my($total) = 0;
    my($fh) = new FileHandle("$fileKo") || die("Can not open $fileKo($!)");
    while($line = $fh->getline()) {
        next if ($line =~ /^\s*$/);

        $line =~ s#[\r\n]+$##;

        if ($line =~ /^\/+$/) {
            my($entName) = $ent->{'ENTRY'};
            push(@{$self->{'ENTRY_NAME_LIST'}}, $entName);
            $self->{'ENTRY_NAME'}->{"$entName"} = $ent;

            $self->makeRelationSp2Entry($ent);

            $total++;
        }
        elsif ($line =~ /^(\S+)\s+(.+)$/) {
            $key = $1;
            $line = $2;
        }

        #
        $line =~ s#^\s+##;    # $B9TF,$N6uGrJ8;z$r:o=|(B
        $line =~ s#\s+$##;    # $B9TKv$N6uGrJ8;z$r:o=|(B
        if ($key =~ /^ENTRY$/) {
            $ent = {};
            $ent->{'ENTRY'}      = '';
            $ent->{'NAME'}       = [];
            $ent->{'DEFINITION'} = [];        # $BJ#?t9TB8:_$7$?$H$-$KBP1~$G$-$k$h$&G[Ns$H$9$k(B
            $ent->{'CLASS'}      = {};
            $ent->{'DBLINKS'}    = {};
            $ent->{'GENES'}      = {};

            $self->parseEntry($ent, $line);
        }
        elsif ($key =~ /^NAME$/) {
            $self->parseName($ent, $line);
        }
        elsif ($key =~ /^DEFINITION$/) {
            $self->parseDefinition($ent, $line);
        }
        elsif ($key =~ /^CLASS$/) {
            $self->parseClass($ent, $line);
        }
        elsif ($key =~ /^DBLINKS$/) {
            $self->parseDblinks($ent, $line);
        }
        elsif ($key =~ /^GENES$/) {
            $self->parseGenes($ent, $line);
        }
        else {
            print STDERR "WARNING :: Unknown key :: $key\n";
        }
    }
    $fh->close();
    print STDERR "TOTAL :: $total\n" if ($main::v);

    return;
}

###############################################################################
# GENES $B$+$i(B ENTRY $B$r<hF@2DG=$K$9$k(B
sub makeRelationSp2Entry {
    my($self) = shift;
    my($ent) = shift;

    foreach my$sp (keys(%{$ent->{'GENES'}})) {
        $sp = lc($sp);
        foreach my$g (@{$ent->{'GENES'}->{"$sp"}->{'LIST'}}) {
            my($g0) = lc($g);
            if (! exists($self->{'SPNAME2ENTRY'}->{"$sp"}->{"$g0"})) {
                $self->{'SPNAME2ENTRY'}->{"$sp"}->{"$g0"} = [];
            }
            push(@{$self->{'SPNAME2ENTRY'}->{"$sp"}->{"$g0"}}, $ent);
        }

        foreach my$g1 (keys(%{$ent->{'GENES'}->{"$sp"}->{'NAME_P2S'}})) {
            my($g2) = $ent->{'GENES'}->{"$sp"}->{'NAME_P2S'}->{"$g1"};
            if (! exists($self->{'NAME_P2S'}->{"$sp"})) {
                $self->{'NAME_P2S'}->{"$sp"} = {};
                $self->{'NAME_S2P'}->{"$sp"} = {};
            }
            $self->{'NAME_P2S'}->{"$sp"}->{"$g1"} = $g2;
            $self->{'NAME_S2P'}->{"$sp"}->{"$g2"} = $g1;
        }
    }
}

###############################################################################
#
sub getEntriesBySpname {
    my($self) = shift;
    my($sp) = shift;
    my($name) = shift;

    $sp = lc($sp);
    $name = lc($name);
    if (! $name) {
        ($sp, $name) = split(/:/, $sp);
    }

    return $self->{'SPNAME2ENTRY'}->{"$sp"}->{"$name"};
}

###############################################################################
#
sub getEntryNames {
    my($self) = shift;

    return @{$self->{'ENTRY_NAME_LIST'}};
}

###############################################################################
#
sub getEntry {
    my($self) = shift;
    my($entName) = shift;

    return $self->{'ENTRY_NAME'}->{"$entName"};
}

###############################################################################
1;#
###############################################################################
