#!/usr/bin/perl -s
package RECOG::MBGD::ClusterTable;
###############################################################################
# ̾
#     RECOG::MBGD::SortClusterTable.pm
#
# 
#     ClusterTable쥯饹ȤMBGDͭ°䵡ǽɲä饹
#
# 
#     ClusterTable饹ˡMBGDղþ(̾ǽ̾)ɲ
#     ȤȤˡMBGDϽ(ʪ)˴Ťơơ֥
#     󤹤롣
#
#     㡧
#       my $ctbl = new DomClust::CGI(....);
#
#       # 饹ơ֥MBGDղþͿ롣
#       my $mbgdTable = new MBGD::ClusterTable($ctbl);
#
#       # ֶڤΥƥȤɽϤ롣
#       $mbgdTable->write(format => 'text', type => 'boolean');
#
# ѥåѿ
#
# Сѿ (ܥ饹ͭΤ)
#
# ᥽åɰ
#    getTypicalGeneInfo()
#    addGeneInfoFuncLog2()
#    sortGeneList()
#    getGeneInfoTop()
#    getGeneInfoDescrPoint()
#    getGeneInfoTopPri2()
#    getGeneInfoMBGD_Cache()
#
# 
#
###############################################################################
use strict;
our(@ISA);                                   # require 5.6.0;
use FileHandle;
use RECOG;
use RECOG::ClusterTable;
use MBGD;
use MBGD::Taxonomy;
use MBGD::FuncCat;
use RECOG::DomClustCommon;

###############################################################################
# ̾
#     getTypicalGeneInfo
# 
#     ҾΥ롼פɽŪʰҤ롣
#
# 
#     $ghash : и٤Υͤޤϥå
#     $ghash ʰ̾=APP)
#       $ghash->{'APP'}          : (APP)ϰ̾
#       $ghash->{'APP'  }{'count'} : ̾νи
#       $ghash->{'APP'}{'prod'}  : ҤΥץ ʥϥåΥե)
#       $ghash->{'APP'}{'func'}  : Ҥεǽƥ(ϥåΥե)
#       $ghash->{'APP'}{'select'}: ̾ޤäȤˡб뵡ǽ̾
#                                  ץ̾򤹤뤿Υϥåġ
#
# 
#     Ǥޤꥹ
#     1)ɽ̾
#     2)ɽҤбץ
#     3)ɽҤб뵡ǽƥ̾Υϥå
#
# 
#
# 
#
sub getTypicalGeneInfo {
	my $self = shift;
	my $ghash = shift;
	my($genename, $product, $genefunc);

	my $gselect = $self->getGeneInfoDescrPoint($ghash);
	if(keys%{$gselect}) {
		($genename, $product, $genefunc) = $self->getGeneInfoTop($gselect);
	}
	else {
            # ʤξ(descriptionʤä)
		($genename, $product, $genefunc) = $self->getGeneInfoTopPri2($ghash);
	}
	
	return ($genename, $product, $genefunc);
}

###############################################################################
#
# addGeneInfoFuncοĽtmpե˽ϤС
# MBGDǡ١
sub addGeneInfoFuncLog2 {
	my $self    = shift;
    my $file    = shift;
    my $dbname  = shift;      # ǡ١̾
    $dbname = $main::DBNAME_FUNC unless(defined $dbname);

    # å夬̵ɤǧ롣
    # СνϹԤʤʤ
    # 'Table'(饹ơ֥)˴ޤޤ饹оݤȤ롣

    # Ľե˿ĽϤ롣
    my $counter=0;
    my $cmd = "echo \'create_result_table $counter\' >> $file";
    system($cmd);
    my $table = $self->{'Table'};
    foreach my $clst (values %$table) {
        my $gall = {};               # 饹ñ̤ΰҾ
		my $l = join(" ", keys%{$clst});
        ## ǥХå
        if($main::DEBUG) {
            print STDOUT "#=" . "=" x 70 . "\n";
            printf(STDOUT "# Cluster ID    : %s\n", $clst->{id});
        }

        # ֥饹˰̾ǽƥξ
        foreach my $subc (values %{$clst->{'subcluster'}}) {
            if($main::DEBUG) {
                print (STDOUT "#" . '-' x 72 . "\n");
                printf(STDOUT "# Subcluster ID : %s\n", $subc->{subid});
            }

            # ֥饹ORF̾
            my @orfs;
            foreach my $sp (keys %{$subc->{'ingroup'}}) {
                push(@orfs, keys %{$subc->{'ingroup'}{$sp}});
            }
            # ORF̾ΥꥹȤбҾ
            my $ginfo = $self->getGeneInfo($dbname, @orfs);
            # ҾˤĤơ̾ȵǽƥ̾
            # и٤򥫥Ȥ롣
            my $gsub = {};
            # 饹ե뤬Ĥʤerr80
            # ǽƥ꤬Ҥͥ褵Τǡ80ʾΤΤϵǽƥ̵ΤȤ롣
            $gsub->{''}{'err'}{"80"}=1;
            $gall->{''}{'err'}{"80"}=1;
            foreach my $e (@$ginfo) {
                next if(length($e->{'gene'}) == 0);
                # ̾ݻ
                my $gname = $e->{'gene'};
                # ̾ ̤˽и򥫥Ȥ
                $gsub->{$gname}{'count'}++;  # ֥饹Υ
                $gall->{$gname}{'count'}++;  # 饹Υ

                # ץ̾ORF̤̾¸
                if(length($e->{'function_no'}) > 0) {
                    $gsub->{$gname}{'prod'}{$e->{'descr'}}++ ;
                    $gall->{$gname}{'prod'}{$e->{'descr'}}++ ;
                }

                # ǽƥORF̤̾¸
                if(length($e->{'function_no'}) > 0) {
                    my $funcName = MBGD::FuncCat::getFunctionCategory($dbname,$e->{'sp'}, $e->{'function_no'});
                    my $funcNum = MBGD::FuncCat::get_global_func_category($funcName);

                    if(!$funcName) {
                        $funcNum = "82 \'$e->{'sp'} $dbname $e->{'function_no'}\'"; # ǽ̾
                        $gsub->{$gname}{'err'}{$dbname}{$funcNum}=1;
                        $gall->{$gname}{'err'}{$dbname}{$funcNum}=1;
                    } elsif(!$funcNum) {
                        $funcNum = "81 \'$dbname $funcName\'"; # univбFunction ID̵
                        $gsub->{$gname}{'err'}{$dbname}{$funcNum}=1;
						$gall->{$gname}{'err'}{$dbname}{$funcNum}=1;
                    } else {
                        $gsub->{$gname}{'func'}{$dbname}{$funcNum}++;
                        $gall->{$gname}{'func'}{$dbname}{$funcNum}++;
                        if(length {$e->{'descr'}} > 0) {
                            $gsub->{$gname}{'select'}{$e->{'descr'}}{$dbname}{$funcNum}++;
                            $gall->{$gname}{'select'}{$e->{'descr'}}{$dbname}{$funcNum}++;
                        }
                    }
                } else {
                    my $funcNum = 90; # ǽ̤
                    $gsub->{$gname}{'err'}{$dbname}{"$funcNum"}=1;
                    $gall->{$gname}{'err'}{$dbname}{"$funcNum"}=1;
                }

#                # Ҿ̡̾ORF̤̾¸
#                $gsub->{$gname}{'info'}{$e->{'name'}} = $e;
#                $gall->{$gname}{'info'}{$e->{'name'}} = $e;
            }

            # Ǥͥ٤ι⤤̾ǽƥ̾
			# Description ȤӤ
			my($genename, $product, $genefunc) = $self->getTypicalGeneInfo($gsub);
	   
            # äȤͥ٤⤤Τǡǽƥ̵꤬äΤˤĤƤ90줹롣
            if(!$genefunc) {
                $genefunc->{$dbname} = 90;
                my $er = "";
                foreach my $k (keys %{$gsub->{$genename}{'err'}}) {
                    if(length($er) < 1) {
                        $er = $k;
                    } else {
                        $er .= " ". $k;
                    }
                }
                $subc->{'err'} = $er; # 90errϿ
            }

            # 򥵥֥饹ɲä
            $subc->{'genename'} = $genename;
            $subc->{'product'}  = $product;
            $subc->{'function'}->{$dbname} = $genefunc->{$dbname};
        }

        # ˤĤƤƱͤ˰̾ǽƥ̾
        my $ginfo = $self->getGeneInfo($dbname, values %{$clst->{'outgroup'}});
        # 륯饹ե뤬̵
        $gall->{''}{'err'}{"80"}=1;
        foreach my $e (@$ginfo) {

            next if(length($e->{'gene'}) == 0);

            # ̾ݻ
            my $gname = $e->{'gene'};

            # ̤̾˽и򥫥Ȥ
            $gall->{$gname}{'count'}++;

            # ץ̤̾̾¸
            if(length($e->{'descr'}) > 0) {
                $gall->{$gname}{'prod'}{$e->{'descr'}}++ ;
            }
            # ǽƥ̤̾¸
            if(length($e->{'function_no'}) > 0) {

                my $funcName = MBGD::FuncCat::getFunctionCategory($dbname,$e->{'sp'}, $e->{'function_no'});
                my $funcNum = MBGD::FuncCat::get_global_func_category($funcName);

                if(!$funcName) {
                    $funcNum = "82 \'$e->{'sp'} $dbname $e->{'function_no'}\'";   # ǽ̾
                    if(length {$e->{'descr'}} > 0) {
						$gall->{$gname}{'err'}{$dbname}{$funcNum}{$e->{'descr'}}=1;
                    }
                } elsif(!$funcNum) {
                    $funcNum = "81 \'$dbname $funcName\'"; # univбFunction ID̵
                    if(length {$e->{'descr'}} > 0) {
						$gall->{$gname}{'err'}{$dbname}{$funcNum}{$e->{'descr'}}=1;
                    }
                } else {
                    $gall->{$gname}{'func'}{$dbname}{$funcNum}++;
                    if(length {$e->{'descr'}} > 0) {
                        $gall->{$gname}{'select'}{$e->{'descr'}}{$dbname}{$funcNum}++;
                    }
                }
            } else {
                my $funcNum = 90; # ǽ̤
                $gall->{$gname}{'func'}{$dbname}{$funcNum}++;
                if(length {$e->{'descr'}} > 0) {
                    $gall->{$gname}{'err'}{$dbname}{$funcNum}{$e->{'descr'}}=1;
                }
            }
#            # ҾORF̤̾¸
#            $gall->{$gname}{'info'}{$e->{'name'}} = $e;
        }

        # ʻ饹ɽ롢̾ǽƥ̾
		my($genename, $product, $genefunc) = $self->getTypicalGeneInfo($gall);

        # äȤͥ٤⤤Τǡǽƥ̵꤬äΤˤĤƤ90줹롣
        if(!$genefunc) {
            $genefunc->{$dbname} = 90;
            my $er = "";
            foreach my $k (keys %{$gall->{$genename}{'err'}}) {
                if(length($er) < 1) {
                    $er = $k;
                } else {
                    $er .= " ". $k;
                }
            }
            $clst->{'err'} = $er; # 90errϿ
        }

        $clst->{'genename'} = $genename;
        $clst->{'product'}  = $product;
        $clst->{'function'}->{$dbname} = $genefunc->{$dbname};

        $counter++;
        my $dis = $counter % $main::PROGRESS_CREATE_TABLE;
        if($dis == 0) {
            $cmd = "echo \'create_result_table $counter\' >> $file";
            system($cmd);
        }
    }
}

###############################################################################
# ̾
#     sortGeneList
# 
#     MBGD̾
#
# 
#     $ghash : и٤Υͤޤϥå
#     $ghash ʰ̾=APP)
#       $ghash->{'APP'}          : (APP)ϰ̾
#       $ghash->{'APP'  }{'count'} : ̾νи
#       $ghash->{'APP'}{'prod'}  : ҤΥץ ʥϥåΥե)
#       $ghash->{'APP'}{'func'}  : Ҥεǽƥ(ϥåΥե)
#       $ghash->{'APP'}{'select'}: ̾ޤäȤˡб뵡ǽ̾
#                                  ץ̾򤹤뤿Υϥåġ
#
# 
#     \@gnames   󤵤줿ҤΥꥹ
#
# 
#
# 
#
sub sortGeneList {
	my $self = shift;
    my $ghash = shift;

    my $gname;  # ɽ̾
    my $gprod;  # ɽҤΥץ
    my $gfunc;  # ɽҤεǽƥ

    # ξ缡ɾͥ٤ꤹ롣
    # 1. ǽƥ꤬ܤƤ
    # 2. ̾νи()
    # 3. ̾ե٥åȽ
    my $evfnc  = sub {
		(abs(exists $ghash->{$b}{'func'}) <=> abs(exists $ghash->{$a}{'func'})) ||
			($ghash->{$b}{'count'} <=> $ghash->{$a}{'count'}) ||
    $a cmp $b
    };

    # ̤ͥ󤷤̾ΥꥹȤ
    my @gnames = sort $evfnc keys %{$ghash};
	
	return \@gnames;
}

###############################################################################
# ̾
#     getGeneInfoTop
#
# 
#     ʣ¸ߤ򣱤Ĥ˷ꤹ
#     Ƚʤäʣص塢Ĥ˷ꤹ롣
#     ̾Υե٥åȽ硣
#
# 
#     $ghash : ʣΰҥϥå
#
# 
#     Ǥޤꥹ
#     1)ɽ̾
#     2)ɽҤбץ
#     3)ɽҤб뵡ǽƥ̾Υϥå
#
# 
#
# 
#
sub getGeneInfoTop {
	my $self = shift;
	my $ghash = shift;

	my $gname;  # ɽ̾
    my $gprod;  # ɽҤΥץ
    my $gfunc;  # ɽҤεǽƥ

	my @glist = sort {$a cmp $b} (keys %{$ghash});
	$gname = shift(@glist);

    foreach my $gp (keys %{$ghash->{$gname}->{'select'}}) {
        $gprod = $gp;
        foreach my $db (keys %{$ghash->{$gname}->{'select'}->{$gp}}) {
            my @func = keys %{$ghash->{$gname}->{'select'}->{$gp}->{$db}};
            $gfunc->{$db} = $func[0];
        }
    }

	return ($gname, $gprod, $gfunc);
}

###############################################################################
# ̾
#     getGeneInfoDescrPoint
# 
#     Geneꥹ  description  word ʬ䤹롣 word ñ̤ǽи򥫥Ȥ롣
#     иݥȤȤGene Ȥ Description ιץݥȤ׻롣
#     ץݥȤֹ⤤ Description Ѥ롣
#
# 
#     $ghash : и٤Υͤޤϥå
#     $ghash ʰ̾=APP)
#       $ghash->{'APP'}          : (APP)ϰ̾
#       $ghash->{'APP'}{'count'} : ̾νи
#       $ghash->{'APP'}{'prod'}  : ҤΥץ ʥϥåΥե)
#       $ghash->{'APP'}{'func'}  : Ҥεǽƥ(ϥåΥե)
#       $ghash->{'APP'}{'select'}: ̾ޤäȤˡб뵡ǽ̾
#                                  ץ̾򤹤뤿Υϥåġ
#     $geneList : ¿Ȥʤäҥꥹ
#
# 
#    $ghash->{'APP'}{'select'} : ֥ݥȤ⤤Descriptionĥϥåʣ
#
# 
#
# 
#
sub getGeneInfoDescrPoint {
	my $self = shift;
	my $ghash = shift;

    # ݥȤκ
    # Ҥ description  wordñ̤ǶڤꥫȤ
    my $pointH;

	foreach my $gene (keys %{$ghash}) {
		my @info = keys(%{$ghash->{$gene}{'prod'}});
		foreach my $sent (@info) {
			if(length($sent) > 0) {
				# wordʬ䤹
				# ޤϽ
				$sent =~ s/,//g;
				foreach my $w (split/[\s]/,$sent) {
					$pointH->{$w}++;
				}
			}
		}
	}

    my $Hscore={};
    my $highdec;
    # ݥȤ׻˥ϥ
    foreach my $gene (keys %{$ghash}) {
        my @descr = keys(%{$ghash->{$gene}{'prod'}});
        foreach my $sent (@descr) {
            my $score = 0;
            if(length($sent) > 0) {
                foreach my $w (split/[\s,\,]/,$sent) {
                    $score += $pointH->{$w};
                }
            }

            if($highdec) {
                if($score > $highdec) {
                    $highdec = $score;
                    undef($Hscore);
                    foreach my $db (keys%{$ghash->{$gene}{'select'}{$sent}}) {
                        my @func = keys%{$ghash->{$gene}{'select'}{$sent}{$db}};
                        $Hscore->{$gene}{'select'}{$sent}{$db}{$func[0]}++;
                    }
                }
                elsif($score == $highdec) {
                    foreach my $db (keys%{$ghash->{$gene}{'select'}{$sent}}) {
						my @func = keys%{$ghash->{$gene}{'select'}{$sent}{$db}};
                        $Hscore->{$gene}{'select'}{$sent}{$db}{$func[0]}++;
                    }
                }
            } else {
                $highdec = $score;
                foreach my $db (keys%{$ghash->{$gene}{'select'}{$sent}}) {
                    my @func = keys%{$ghash->{$gene}{'select'}{$sent}{$db}};
                    $Hscore->{$gene}{'select'}{$sent}{$db}{$func[0]}++;
                }
            }
        }
    }
    return  ($Hscore);
}

###############################################################################
# ̾
#     getGeneInfoTopPri2
# 
#     MBGD̾ǽƥ̾ǺǤɽͥ٤⤤Τ
#     򤷡֤
# 
#     $ghash : и٤Υͤޤϥå
#     $ghash ʰ̾=APP)
#       $ghash->{'APP'}          : (APP)ϰ̾
#       $ghash->{'APP'}{'count'} : ̾νи
#       $ghash->{'APP'}{'prod'}  : ҤΥץ ʥϥåΥե)
#       $ghash->{'APP'}{'func'}{sourceDB}  : Ҥεǽƥ(ϥåΥե)
#       $ghash->{'APP'}{'select'}: ̾ޤäȤˡб뵡ǽ̾
#                                  ץ̾򤹤뤿Υϥåġ
#
# 
#     Ǥޤꥹ
#     1)ɽ̾
#     2)ɽҤбץ
#     3)ɽҤб뵡ǽƥ̾
# 
#
# 
#
sub getGeneInfoTopPri2 {
	my $self = shift;
    my $ghash = shift;

    my $gname;  # ɽ̾
    my $gprod;  # ɽҤΥץ
    my $gfunc;  # ɽҤεǽƥ

    # ξ缡ɾͥ٤ꤹ롣
    # 1. ǽƥ꤬ܤƤ
    # 2. ̾νи()
    # 3. ̾ե٥åȽ
	# ʣǡ١бʤΤǡʣбʤƤϤʤʤ
	# ɤμͥ褵뤫ɤΤ褦Ӥ뤫׸Ƥ

    my $evfnc  = sub {
		(abs(exists $ghash->{$b}{'func'}) <=> abs(exists $ghash->{$a}{'func'})) ||
			($ghash->{$b}{'count'} <=> $ghash->{$a}{'count'}) ||
        $a cmp $b
    };

    # ̤ͥ󤷤̾ΥꥹȤ
    my @gnames = sort $evfnc keys %{$ghash};

    ## ǥХå ##
    if($main::DEBUG) {
		print STDOUT "#-" . " -" x 30 . "\n";
		for(my $i = 0; $i < scalar(@gnames); $i++) {
			printf(STDOUT "gene[%d] : %-15s  ,count : %3d  ,func : %d [%s] ,prod : %d [%s]\n",
			$i + 1, $gnames[$i],
			   $ghash->{$gnames[$i]}{'count'},
			   scalar(keys %{$ghash->{$gnames[$i]}{'func'}}),
			   join("|", keys %{$ghash->{$gnames[$i]}{'func'}}),
			   scalar(keys %{$ghash->{$gnames[$i]}{'prod'}}),
			   join("|", keys %{$ghash->{$gnames[$i]}{'prod'}}),
			   );
		}
    }

    #
    # ̾İʾ¸ߤ, ꥹȤƬΰ̾
    #
    if(scalar(@gnames) > 0) {
		$gname = $gnames[0];

    # ꤷ̾б뵡ǽƥǺǤи٤⤤Τ
    # Ѥ롣Ʊ٤ξ硢ǽƥֹ椬㤤Τͥ褹롣
    # ޤбץ̾Ѥ롣
		my $gfh = $ghash->{$gname}{'func'};
		my @funclist = sort {$gfh->{$a} <=> $gfh->{$b} || $b <=> $a} keys %$gfh;
		$gfunc = pop @funclist;

    # ꤷ̾ǽƥǺǤ٤⤤ץ̾
    # 롣
		my $gpd = $ghash->{$gname}{'select'};
		my @prodlist = sort {$gpd->{$a} <=> $gpd->{$b} || length($a) <=> length($b)} keys %{$gpd};
		$gprod = pop @prodlist;
    ## ǥХå ##
		if($main::DEBUG) {
			print  STDOUT "#" . '=' x 72 . "\n";
			print  STDOUT "# daihyo gene\n";
			printf(STDOUT "# gene : %s\n", $gname);
			printf(STDOUT "# prod : %s\n", $gprod);
			printf(STDOUT "# func : %s\n", $gfunc);
			print  STDOUT "# \n";
			foreach my $g (@gnames) {
        #
				printf(STDOUT "count: %s -> %d\n",
               $g, $ghash->{$g}{'count'});
		#
				foreach my $f (sort keys %{$ghash->{$g}{'func'}}) {
					printf(STDOUT "func : %s -> %s -> %d\n",
				   $g, $f, $ghash->{$g}{'func'}{$f});
				}
        #
				foreach my $p (sort keys %{$ghash->{$g}{'prod'}}) {
					printf(STDOUT "prod : %s -> %s -> %d\n",
				   $g, $p, $ghash->{$g}{'prod'}{$p});
				}
        #
				foreach my $s (sort keys %{$ghash->{$g}{'select'}}) {
					printf(STDOUT "selct: %s -> %s -> %s -> %d\n",
						   $g, $s, each(%{$ghash->{$g}{'select'}{$s}}));
				}
			}
		}
    }
    #
    # ɽ̾Ȥεǽƥ̾꥿󤹤
    #
    ($gname, $gprod, $gfunc);
}

# cluster_resultơ֥spnameơ饹IDȥ֥饹ID
# бGeneInfo롣GeneInfoΥǡ
sub getGeneInfoMBGD_Cache {
	my $self = shift;
	my $db = shift;
	my $clusterID = shift;
	my $dbname = shift;
    $dbname = $main::FUNCTION_DB unless(defined $dbname);

	# cluster_resultΥǡ
	my $lines = selectAllClusterResult($db, $clusterID);

	foreach my $li (@$lines) {
		my($cid, $subid, $name) = @$li;
		# 饹IDȥ֥饹ID곺Ҥ뤫ɤåȤ
		# ޤ档
		# ѡ饹ȥ饹ʥ֥饹ˤΰա
		my @orfs;
        foreach my $n (split/ /, $name) {
            if($n=~/:/) {
                push(@orfs, $n);
            }
        }
		# spnameMBGDΰҥǡ
		my $ginfo = $self->getGeneInfo($dbname, @orfs);
		foreach my $e (@$ginfo) {
#			if($e->{'gene} eq 
			if(length($e->{'function_no'}) > 0) {
				my $funcName = MBGD::FuncCat::getFunctionCategory('MBGD',$e->{'sp'}, $e->{'function_no'});
				my $funcNum = MBGD::FuncCat::get_global_func_category($funcName);
			}
		}

	}
}

##############################################################################
1; #
##############################################################################
__END__
