#!/usr/bin/perl -s
package MBGD::FunctionCategory;
use strict;
use DirHandle;
use FileHandle;
use File::Basename;
use File::Path;
use MBGD::DB;

###############################################################################
# $self->{'DB_TYPE'}       # mbgd/cog/kegg/tigr
# $self->{'IS_ORIG'}       # orig / all
# $self->{'DIR_BASE'}
# $self->{'DEFAULT_COLOR_LEVEL'}
# $self->{'LEVEL_FUNCTION'}->{"$level"}
# $self->{'NAME_FUNCTION'}->{'1'}->{"$name1"}
# $self->{'NAME_FUNCTION'}->{'2'}->{"$name2"}
#                             :
# $self->{'NAME_FUNCTION'}->{'N'}->{"$nameN"}
# $self->{'ORIG_ID_FUNCTION'}->{"$origId"}
# $self->{'COLOR'}->{"$lev"}
#
# gene ʣ FunctionCategory ƲǽȤ
# $self->{'GENE_FUNCTION'}->{"$sp"}->{"$lcname"}->{"$level"};
#
# gene ˳ƤƤ ID
# $self->{'GENE_ORIG_ID'}->{"$sp"}->{"$lcname"}->{"$orig_id"}++;
###############################################################################
# Function
# $refFunction = {};
# $refFunction->{'LEVEL'}
# $refFunction->{'LEVEL_PACK'}
# $refFunction->{'NAME'}
# $refFunction->{'ORIG_ID'}
###############################################################################
#
sub new {
    my($class) = shift;
    my($dbType) = shift;
    my($dirBase) = shift;
    my($isOrig) = shift;
    my($self) = {};

    bless($self, $class);
    $self->_init($dbType, $dirBase, $isOrig);

    return $self;
}

###############################################################################
#
sub _init {
    my($self) = shift;
    my($dbType) = shift;
    my($dirBase) = shift;
    my($isOrig) = shift;

    #
    $self->setDbType($dbType);
    $self->setBaseDir($dirBase);
    $self->setOrig($isOrig);
    $self->read();

    return;
}

###############################################################################
#
sub setDbType {
    my($self) = shift;
    my($dbType) = shift;

    $dbType = 'mbgd' if (! $dbType);
    $self->{'DB_TYPE'} = $dbType;
}

###############################################################################
#
sub getDbType {
    my($self) = shift;

    return $self->{'DB_TYPE'};
}

###############################################################################
#
sub setBaseDir {
    my($self) = shift;
    my($dir) = shift;

    if (-d $dir) {
        $self->{'DIR_BASE'} = $dir;
    }

    return;
}

###############################################################################
#
sub getBaseDir {
    my($self) = shift;

    if (exists($self->{'DIR_BASE'})) {
        return $self->{'DIR_BASE'}
    }

    return "$ENV{'MBGD_HOME'}/database/function";
}

###############################################################################
#
sub setOrig {
    my($self) = shift;
    my($isOrig) = shift;

    $self->{'IS_ORIG'} = $isOrig;
}

###############################################################################
#
sub getOrig {
    my($self) = shift;

    return $self->{'IS_ORIG'};
}

###############################################################################
#
sub getDbDir {
    my($self) = shift;

    my($dirBase) = $self->getBaseDir();
    my($dbType) = $self->getDbType();
    my($dirDb) = "$dirBase/$dbType";
    if (! -e $dirDb) {
        print STDERR "WARNING :: Not found '$dirDb'.\n";
    }
    if (! -d $dirDb) {
        print STDERR "WARNING :: '$dirDb' is not directory.\n";
    }

    return $dirDb;
}

###############################################################################
# ǽƥɤ߹
# TAB ڤǰʲιܤ򣱹Ԥ˵
# - ǽƥ
# - ǽƥ٥
# - ǽƥ̾
# - ꥸʥ뵡ǽƥ٥
=pod
#default	99.1
kegg	1	Metabolism	01100
kegg	1.1	Carbohydrate Metabolism	01110
kegg	1.2	Energy Metabolism	01120
kegg	1.3	Lipid Metabolism	01130
kegg	1.4	Nucleotide Metabolism	01140
kegg	1.5	Amino Acid Metabolism	01150
kegg	1.6	Metabolism of Other Amino Acids	01160
kegg	1.7	Glycan Biosynthesis and Metabolism	01170
kegg	1.8	Biosynthesis of Polyketides and Nonribosomal Peptides	01180
kegg	1.9	Metabolism of Cofactors and Vitamins	01190
kegg	1.10	Biosynthesis of Secondary Metabolites	01195
kegg	1.11	Xenobiotics Biodegradation and Metabolism	01196
kegg	2	Genetic Information Processing	01200
kegg	2.1	Transcription	01210
kegg	2.2	Translation	01220
kegg	2.3	Folding, Sorting and Degradation	01230
kegg	2.4	Replication and Repair	01240
kegg	3	Environmental Information Processing	01300
kegg	3.1	Membrane Transport	01310
kegg	3.2	Signal Transduction	01320
kegg	4	Cellular Processes	01400
kegg	4.1	Cell Motility	01410
kegg	4.2	Cell Growth and Death	01420
kegg	98	Others
kegg	98.1	Others
kegg	99	Unkown
kegg	99.1	Unkown
=cut
sub readFunction {
    my($self) = shift;

    #
    my($dir) = $self->getDbDir();
    my($dbType) = $self->getDbType();
    my($file) = "$dir/function.$dbType";
    my($fh) = new FileHandle("$file");
    if (! $fh) {
        print STDERR "ERROR :: Can not open $file($!)\n";
        return;
    }

    #
    my($line);
    while($line = $fh->getline()) {
        next if ($line =~ /^\s*$/);

        $line =~ s#[\r\n]*$##;


        if ($line =~ /^#default\s+(\S+)/) {
            my($default) = $1;
            print STDERR "DBG :: FOUND default :: $default\n" if ($main::DEBUG);
            $self->setDefaultColorLevel($default);
            next;
        }
        elsif ($line =~ /^\s*#/) {
            next;
        }

        my($db, $lev, $name, $orgLev) = split(/\t/, $line);
        next if ($db !~ /^$dbType$/i);
        $self->setFunction($lev, $name, $orgLev);
    }
    $fh->close();

    return;
}

###############################################################################
# ǽƥοɤ߹
# TAB ڤǰʲιܤ򣱹Ԥ˵
# - ǽƥ
# - ǽƥ٥
# - 
=pod
kegg	1.1	bfffff
kegg	1.2	bfff7f
kegg	1.3	bfffbf
kegg	1.4	ff7f7f
kegg	1.5	bf7fbf
kegg	1.6	cf9fcf
kegg	1.7	bfbfff
kegg	1.8	9f9fff
kegg	1.9	bf7fff
kegg	1.10	7f7fff
kegg	1.11	8f8fff
kegg	2.1	ffbf7f
kegg	2.2	ffbfbf
kegg	2.3	bf7f7f
kegg	2.4	ff7fff
kegg	3.1	ffbfff
kegg	3.2	ff7fbf
kegg	4.1	bfbf9f
kegg	4.2	bfbfbf
kegg	98.1	ffff00
kegg	99.1	ffffcf
=cut
sub readColorTab {
    my($self) = shift;

    #
    my($dir) = $self->getDbDir();
    my($dbType) = $self->getDbType();
    my($file) = "$dir/colorTab.$dbType";
    my($fh) = new FileHandle("$file");
    if (! $fh) {
        print STDERR "ERROR :: Can not open $file($!)\n";
        return;
    }

    #
    my($line);
    while($line = $fh->getline()) {
        next if ($line =~ /^\s*$/);

        $line =~ s#[\r\n]*$##;

        if ($line =~ /^\s*#/) {
            next;
        }

        my($db, $lev, $color) = split(/\t/, $line);
        next if ($db !~ /^$dbType$/i);

        $self->setColor($lev, $color);
    }
    $fh->close();

    return;
}

###############################################################################
# Gene ȵǽƥ٥Ȥбɤ߹
# TAB ڤǰʲιܤ򣱹Ԥ˵
# - ʪ拾
# - ORF ̾
# - ǽƥ
# - ǽƥ٥
# - ǽƥ ID
=pod
eco	b0002	kegg	1.5	K00928
eco	b0003	kegg	1.5	K00872
eco	b0004	kegg	1.5	K01733
eco	b0007	kegg	3.1	K03310
eco	b0008	kegg	1.1	K00616
eco	b0009	kegg	1.9	K03831
eco	b0014	kegg	3.1	K04043
eco	b0019	kegg	3.1	K03313
eco	b0021	kegg	2.4	K07480
=cut
sub loadGene2FunctionTab {
    my($self) = shift;
    my($sp) = shift;

    #
    my($dir) = $self->getDbDir();
    my($dbType) = $self->getDbType();
    my($ext) = 'all';
    if ($self->getOrig()) {
        $ext = $dbType;
    }
    my($file) = "$dir/geneFunction.$sp.$ext";
    if (! -e $file) {
        print STDERR "Warning :: Not exist $file\n";
        return;
    }

    print STDERR "DBG :: LOAD $file\n" if ($main::DEBUG);
    my($fh) = new FileHandle("$file");
    if (! $fh) {
        print STDERR "ERROR :: Can not open $file($!)\n";
        return;
    }

    #
    my($line);
    while($line = $fh->getline()) {
        next if ($line =~ /^\s*$/);

        $line =~ s#[\r\n]*$##;

        if ($line =~ /^\s*#/) {
            next;
        }

        print STDERR "DBG :: $line\n" if ($main::DEBUG);

        my($sp, $name, $db, $lev, $orig_id) = split(/\t/, $line);
        next if ($db !~ /^$dbType$/i);

        $self->setGeneFunction($sp, $name, $lev, $orig_id);
    }
    $fh->close();

    return;
}

###############################################################################
#
sub read {
    my($self) = shift;

    $self->readFunction();
    $self->readColorTab();

    return;
}

###############################################################################
#
sub setFunction {
    my($self) = shift;
    my($level) = shift;
    my($name) = shift;
    my($origId) = shift;

    #
    my($ent) = {};
    $ent->{'LEVEL'}   = $level;
    $ent->{'NAME'}    = $name;
    $ent->{'ORIG_ID'} = $origId;

    #
    my(@lev) = split(/\./, $level);
    $ent->{'LEVEL_PACK'} = $self->levelPack($level);

    $self->{'LEVEL_FUNCTION'}->{"$level"} = $ent;
    $self->{'ORIG_ID_FUNCTION'}->{"$origId"} = $ent;

    #
    my(@l) = split(/\./, $level);
    $name = lc($name);
    my($n) = scalar(@l);
    $self->{'NAME_FUNCTION'}->{"$n"}->{"$name"} = $ent;
}

###############################################################################
#
sub getFunctionByLevel {
    my($self) = shift;
    my($level) = shift;


    my(@l) = split(/\./, $level);
    my($l1) = join('.', $l[0]);
    my($l2) = join('.', @l[0 .. 1]);
    my($l3) = join('.', @l[0 .. 2]);
    foreach my$l ($l3, $l2, $l1) {
        if (exists($self->{'LEVEL_FUNCTION'}->{"$l"})) {
            return $self->{'LEVEL_FUNCTION'}->{"$l"};
        }
    }

    return;
}

###############################################################################
#
sub loadFunctionInfo {
    my($self) = shift;
    my($sp) = shift;
    my($opt) = shift;

    if ($opt->{'fromdb'}) {
        if (! exists($self->{'FUNC_FROM_DB'}->{"$sp"})) {
            my($dbType) = $self->getDbType();
            my($db) = new MBGD::DB($main::DBNAME_FUNC);
            my($tab) = 'gene_category';
            my($isOrig) = $self->getOrig();
            my($staOrig) = 'orig_id is null';
#            if ($isOrig ne '') {
                $staOrig = "orig_id is not null";
#            }
            my($opt) = { 'where' => "dbname='$dbType' and sp='$sp' and $staOrig " };
            my($refRes) = $db->select_fetch($tab, $opt);
            foreach my$ref (@{$refRes->{'INFO'}}) {
                my($nm) = $ref->{'name'};
                $self->setGeneFunction($sp, $ref->{'name'}, $ref->{'level'}, $ref->{'orig_id'});
            }
            $self->{'FUNC_FROM_DB'}->{"$sp"} = 1;
        }
    }
    else {
        if (! exists($self->{'GENE_FUNCTION'}->{"$sp"})) {
            $self->loadGene2FunctionTab($sp);
        }
    }

    return;
}

###############################################################################
#
sub getFunctionListBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    my($sp, $name) = split(":", $ent);
    $self->loadFunctionInfo($sp, $opt);

    #
    my($f) = sub {
        $a->{'LEVEL_PACK'} cmp $b->{'LEVEL_PACK'};
    };

    my($lcname) = lc($name);
    my($refFuncList) = [ sort($f values(%{$self->{'GENE_FUNCTION'}->{"$sp"}->{"$lcname"}})) ];

    return $refFuncList;
}

###############################################################################
#
sub _getFunctionBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    # ҤȤĤ Gene ʣεǽƥ꤬ƤƤ
    # ־ LEVEL εǽƥ֤
    my($refList) = $self->getFunctionListBySporf($ent, $opt);
    my($ref) = $refList->[0];

    return $ref;
}

###############################################################################
# Gene ˳ƤƤ뵡ǽƥο֤
sub countFunctionListBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    #
    my($refList) = $self->getFunctionListBySporf($ent, $opt);
    my($n) = 0;
    foreach my$ref (@{$refList}) {
        if (defined($ref->{'LEVEL'})) {
            $n++;
        }
    }    

    return $n;
}

###############################################################################
#
sub getOrfListByLevel {
    my($self) = shift;
    my($sp) = shift;
    my($lev) = shift;
    my($opt) = shift;

    my($optLike) = $opt->{'LIKE'};

    my($isOrig) = $self->getOrig();

    my($dbType) = $self->getDbType();
    my($db) = new MBGD::DB($main::DBNAME_FUNC);
    my($tab) = 'gene_category';
    my($staOrig) = 'orig_id is null';
#    if ($isOrig ne '') {
        $staOrig = "orig_id is not null";
#    }
    my($opt) = {};
    $opt->{'where'} = "dbname='$dbType' and sp='$sp' and $staOrig ";
    if (! $optLike) {
        $opt->{'where'} .= " and level='$lev'";
    }
    else {
        $opt->{'where'} .= " and (level='$lev' or level like '$lev.%')";
    }

    my($refOrfList) = [];
    my($refRes) = $db->select_fetch($tab, $opt);
    foreach my$ref (@{$refRes->{'INFO'}}) {
        my($nm) = $ref->{'name'};
        push(@{$refOrfList}, "$sp:$nm");
    }

    return $refOrfList;
}

###############################################################################
#
sub getFunctionByOrigId {
    my($self) = shift;
    my($origId) = shift;

    return $self->{'ORIG_ID_FUNCTION'}->{"$origId"};
}

###############################################################################
#
sub getFunctionByName {
    my($self) = shift;
    my(@nameList) = @_;

    my($n) = scalar(@nameList);
    for(my$i = $n; 0 < $i; $i-- ) {
        my($name) = lc($nameList[$i - 1]);
        if (exists($self->{'NAME_FUNCTION'}->{"$i"}->{"$name"})) {
            return $self->{'NAME_FUNCTION'}->{"$i"}->{"$name"};
        }
    }

    return {};
}

###############################################################################
#
sub getLevels {
    my($self) = shift;
    my($f) = sub {
        my($enta) = $self->{'LEVEL_FUNCTION'}->{"$a"};
        my($entb) = $self->{'LEVEL_FUNCTION'}->{"$b"};

        $enta->{'LEVEL_PACK'} cmp $entb->{'LEVEL_PACK'};
    };

    my($refLev) = [ sort $f keys(%{$self->{'LEVEL_FUNCTION'}}) ];

    return $refLev;
}

###############################################################################
#
sub setDefaultColorLevel {
    my($self) = shift;
    my($lev) = shift;

    #
    $self->{'DEFAULT_COLOR_Level'} = $lev;

    return;
}

###############################################################################
#
sub getDefaultColorLevel {
    my($self) = shift;

    return $self->{'DEFAULT_COLOR_Level'};
}

###############################################################################
#
sub isDefaultColorLevel {
    my($self) = shift;
    my($lev) = shift;

    my($default) = $self->getDefaultColorLevel();
    if ($default eq $lev) {
        return 1; # ǥեȤƱ
    }

    #
    $default =~ s#\..*$##;
    $lev     =~ s#\..*$##;
    if ($default eq $lev) {
        return 1; # 쳬ؤǥեȤƱ
    }

    return 0;
}

###############################################################################
#
sub setColor {
    my($self) = shift;
    my($lev) = shift;
    my($col) = shift;

    if (exists($self->{'COLOR'}->{"$lev"})) {
        my($old) = $self->{'COLOR'}->{"$lev"};
        if ($old ne $col) {
            print STDERR "WARNING :: Change color :: $lev :: $old ==> $col\n";
        }
    }

    $self->{'COLOR'}->{"$lev"} = $col;
}

###############################################################################
# ںβ
#     ʣ 'function' ƤƤ gene ФƤȤν
#
# getFunctionColorByLevel() λѤ侩
#
sub _getFunctionColorBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    my($refFunc) = $self->_getFunctionBySporf($ent, $opt);
    my($level) = $refFunc->{'LEVEL'};

    return $self->getFunctionColorByLevel($level);
}

###############################################################################
#
sub getFunctionColorByLevel {
    my($self) = shift;
    my($level) = shift;

    if ($level =~ /\:/) {
        my($wk, $lev) = split(':', $level);
        $level = $lev if ($lev);
    }

    my($levDefault) = $self->getDefaultColorLevel();

    foreach my$lev ($level, $levDefault) {
        my(@l) = split(/\./, $lev);
        my($lev1) = join('.', $l[0]);
        my($lev2) = join('.', @l[0 .. 1]);
        my($lev3) = join('.', @l[0 .. 2]);

        #
        foreach my$lev ($lev3, $lev2, $lev1) {
            if (exists($self->{'COLOR'}->{"$lev"})) {
                return $self->{'COLOR'}->{"$lev"};
            }
        }
    }

    return 0xffffff;
}

###############################################################################
# ںβ
#     ʣ 'function' ƤƤ gene ФƤȤν
sub setGeneFunction {
    my($self) = shift;
    my($sp) = shift;
    my($name) = shift;
    my($level) = shift;
    my($orig_id) = shift;

    #
    my($refFunction) = $self->getFunctionByLevel($level);
    my($lcname) = lc($name);
    $self->{'GENE_FUNCTION'}->{"$sp"}->{"$lcname"}->{"$level"} = $refFunction;

    #
    $self->{'GENE_ORIG_ID'}->{"$sp"}->{"$lcname"}->{"$orig_id"}++;
}

###############################################################################
#
sub levelPack {
    my($self) = shift;
    my($srcLev) = shift;
    my(@lev) = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
                'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
                'U', 'V', 'W', 'X', 'Y', 'Z');
    my($n) = scalar(@lev);

    my($levPack) = '';
    my($lev0, $lev1);
    foreach my$level (split(/\./, $srcLev)) {
        $lev0 = int($level / $n);
        $lev1 = $level % $n;
        if ($n < $lev0) {
            # 夢դ $lev[$#lev] 
            $lev0 = $lev1 = $n;
        }

        $levPack .= '.' if ($levPack ne '');
        $levPack .= $lev[$lev0] . $lev[$lev1];
    }

    return $levPack;
}

###############################################################################
#
sub getFunctionNameByLevel {
    my($self) = shift;
    my($lev) = shift;

    my($refFunc) = $self->getFunctionByLevel($lev);

    return $refFunc->{'NAME'};
}

###############################################################################
#
sub getFunctionFullnameByLevel {
    my($self) = shift;
    my($level) = shift;

    my($fullname) = '';
    my(@lev) = split(/\./, $level);
    my($lev) = '';
    foreach my$l (@lev) {
        $lev .= '.' if ($lev ne '');
        $lev .= $l;

        $fullname .= $self->getFunctionNameByLevel($lev) . ' ; ';
    }

    return $fullname;
}

###############################################################################
# ںβ
#     ʣ 'function' ƤƤ gene ФƤȤν
#
# getFunctionFullnameByLevel() λѤ侩
#
sub _getFunctionFullnameBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    my($refFunc) = $self->_getFunctionBySporf($ent, $opt);
    my($fullname) = $self->getFunctionFullnameByLevel($refFunc->{'LEVEL'});

    return $fullname;
}

###############################################################################
#
sub getOrigIdBySporf {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    my($sp, $name) = split(":", $ent);
    $self->loadFunctionInfo($sp, $opt);

    my($lcname) = lc($name);
    my(@id_list) = keys(%{$self->{'GENE_ORIG_ID'}->{"$sp"}->{"$lcname"}});

    return @id_list;
}

###############################################################################
#
sub getOrigIdBySporfOne {
    my($self) = shift;
    my($ent) = shift;
    my($opt) = shift;

    my(@id_list);
    if ($opt->{'fromdb'}) {
        my($sp, $name) = split(/\:/, $ent);
        my($dbType) = $self->getDbType();
        my($db) = new MBGD::DB($main::DBNAME_FUNC);
        my($tab) = 'gene_category';
        my($isOrig) = $self->getOrig();
        my($staOrig) = 'orig_id is null';
#        if ($isOrig ne '') {
            $staOrig = "orig_id is not null";
#        }
        my($opt) = { 'where' => "dbname='$dbType' and sp='$sp' and name='$name' and $staOrig " };
        my($refRes) = $db->select_fetch($tab, $opt);
        foreach my$ref (@{$refRes->{'INFO'}}) {
            my($nm) = $ref->{'name'};
            $self->setGeneFunction($sp, $ref->{'name'}, $ref->{'level'}, $ref->{'orig_id'});
        }

        my($lcname) = lc($name);
        @id_list = keys(%{$self->{'GENE_ORIG_ID'}->{"$sp"}->{"$lcname"}});
    }
    else {
        @id_list = $self->getOrigIdBySporf($ent, $opt);
    }

    return @id_list;
}

###############################################################################
1;#
###############################################################################
