#!/usr/bin/perl -s

###############################################################################
# KEGG(http://www.genome.ad.jp/kegg/kegg2.html) ꡢǽʬ
#
# ºݤˤϡʲ CGI ư뤳Ȥˤʤ
#     http://www.genome.ad.jp/dbget-bin/get_htext?ʪ̾.kegg+-f+T+w+Z
#
#  CGI Ѥ [ʪ̾] ϡKEGG ĤڥǤɬפ
# ʸʸ˵Ĥɬפ
# ξ KEGG  genes ǥ쥯ȥˤ all_species.tab 
#
###############################################################################
use KeggAllSpecTab;
require "MBGD_Conf.pl";
require "MBGD_commonUpdate.pl";

###############################################################################
#
sub convClassFile {
    my($sp, $spname, $kegg) = @_;
    my($url);
    my($host, $cgi, $ext);
    my($infoClass);
    my($fileBase);
    my($filename);
    my($cmd);
    local(*FH);

    #
    mkdir("$DIR_work/class", 0750);

    $host = $URL_kegg;
    $cgi  = "/dbget-bin/get_htext";
    $ext  = "";
    if ($kegg) {
        $ext  = ".kegg";
    }
    $ext  = "$ext+-f+T+w+Z";
    $url  = "${host}${cgi}?${spname}${ext}";
    $fileBase = "$DIR_work/class/$sp";

    # ǡ
    $cmd = "$CMD_wget  -q -O - '$url'";
    print STDERR "CMD : $cmd\n" if ($DEBUG);
    $infoClass = `$cmd`;

    if (length($infoClass) < 2000 && $infoClass =~ /error/i) {
        # 顼 ---> ե뤿ᡢѿ
        $infoClass = '';
    }

    # HTML Υ
    $infoClass =~ s/<[^>]*>//g;

    if ($kegg) {
        $filename = "${fileBase}_kegg.cl";
    }
    else {
        $filename = "$fileBase.cl";
    }
    open(FH, "> $filename") || die("Can not open $filename($!)");
    select(FH);
    &printClFile($infoClass) if ($infoClass);
    close(FH);

    if ($kegg) {
        $filename = "${fileBase}_kegg.class";
    }
    else {
        $filename = "$fileBase.class";
    }
    open(FH, "> $filename") || die("Can not open $filename($!)");
    select(FH);
    &printClassFile($infoClass) if ($infoClass);
    close(FH);

    return;
}

###############################################################################
#
sub printClFile {
    my($info) = @_;
    my($category1);
    my($category2);
    my($category3);
    my($categoryname1);
    my($categoryname2);
    my($categoryname3);
    my($flag);

    foreach $_ (split(/\n/, $info)) {
        if (/^\s*([0-9]+)\.*[ ]+([^ ].*)$/) {
            $category1 = $1;
            $categoryname1 = $2;
            $flag = 0;
        }
        elsif (/^[ ]+([0-9]+\.[0-9]+)\.*[ ]+([^ ].*)/) {
            $category2 = $1;
            $categoryname2 = $2;
	    $category3 = $categoryname3 = '';
            $flag = 0;
        }
        elsif (/^  +([0-9]+\.[0-9]+\.[0-9]+)\.*[ ]+([^ ].*)$/) {
            $category3 = $1;
            $categoryname3 = $2;
            $flag = 0;
        }
        elsif (! $flag && /^  +([a-zA-Z0-9_]+)[ ]+([^ ].*)$/) {
            if ($category3) {
                print "$category3";
            }
            else {
                print "$category2";
            }
            print "\t", "$categoryname1 // $categoryname2";
            print " // $categoryname3" if ($categoryname3);
            print "\n";
            $flag = 1;
        }
        elsif (/^  [ ]+([^ ].*)$/) {
        }
    }
    if (! $flag) {
        print "$category3\t$categoryname1 / $categoryname2 / $categoryname3\n";
    }
}

###############################################################################
#
sub printClassFile {
    my($info) = @_;
    my($name);
    my($spcode);
    my($category);
    my(%categoryname);
    my($gene, $nextgene);
    my($name, $nextname);

    foreach $_ (split(/\n/, $info)) {
        last if (/Last updated/);    # ǡνλ

        if (/^(  [ ]+)([^ ].*)$/) {
	    if ($status eq 'gene' && length($1) > $head_len + 5) {
            	$name .= $2;
            	next;
	    }
        }
        if (! /^$/ && $name) {
            if ($name =~ s/\[SP:([A-Z0-9_\.\-]+)\]//g) {
                $spcode = $1;
            }
            else {    
                $spcode = '';
            }
            &print_category($category, $gene, $spcode, $name);
            $name = '';
        }
    
        if (/^([0-9]+)\.*[ ]+([^ ].*)$/) {
            $category = $1;
            $categoryname{$category} = $2;
	    $status = 'category';
        }
        elsif (/^[ ]+([0-9]+\.[0-9]+)\.*[ ]+([^ ].*)/) {
            $category = $1;
            $categoryname{$category} = $2;
	    $status = 'category';
        }
        elsif (/^\s+([0-9]+\.[0-9]+\.[0-9]+)\.*[ ]+([^ ].*)$/) {
            $category = $1;
            $categoryname{$category} = $2;
	    $status = 'category';
        }
        elsif (/^(\s+\**)([a-zA-Z0-9_]+)[ ]+([^ ].*)$/) {
	    $head = $1; $head_len = length($head);
            $nextgene = $2;
            $nextname = $3;
    
            $gene = $nextgene;
            $name = $nextname;
	    $status = 'gene';
        }
    }
    if ($name) {
        if ($name =~ s/\[SP:([A-Z0-9_\.\-]+)\]//g) {
            $spcode = $1;
        }
        else {    
            $spcode = '';
        }
        &print_category($category, $gene, $spcode, $name);
    }
}

###############################################################################
#
sub print_category {
    my($category, $gene, $spcode, $name) = @_;

    $name =~ s/\[..:([A-Z0-9_\.\-]+)\]//g;

    $gene =~ tr/a-z/A-Z/;
    print "$category\t$gene\t$spcode\t$name\n";
}

sub read_keggtab {
	open(K, "$ENV{MBGD_HOME}/tmp/kegg2.html");
	while (<K>) {
       		if (/www_bget\?genome\+([a-z0-9]+)/) {
       			$spabb = $1;
		} elsif (/HREF=".*get_htext\?(.*)"/) {
			$orgname = $1;
			if ($spabb && $orgname) {
				$KeggName{$spabb} =  $orgname;
			}
		}
	}
	close(K);
}

###############################################################################
if ($0 eq __FILE__) {
    my($objAllSpec);
    my($sp);
    my($spname);
    my($info);

    # STDERR ˽ϤƤե˽
    &openLogfile($log);

#&read_keggtab;
    $objAllSpec = KeggAllSpecTab->new();
    foreach $sp (sort(@ARGV)) {
#	$spname = $KeggName{$sp};
        $info = $objAllSpec->getInfoByAbbr($sp);
        $spname = $info->{'FILE'};
print STDERR "$sp,$spname\n";
        if ($spname =~ /^\s*$/) {
            print STDERR "SKIP :: Can not found spec-name(KEGG) for '$sp'.\n";
            next;
        }
        &convClassFile($sp, $spname, $kegg);
    }

    exit();
}

###############################################################################
1;#
###############################################################################
