#!/usr/bin/perl
use strict;
use FileHandle;
use MBGD::DB;

package GenomeData;

###############################################################################
#
sub new {
	my($class, %options) = @_;
	## Singleton
	if ($GenomeData::THE_DATA) {
		return $GenomeData::THE_DATA;
	}
	$GenomeData::THE_DATA = {};
	if ($options{datapath}) {
		$GenomeData::THE_DATA->{datapath} = $options{datapath};
	} else {
		$GenomeData::THE_DATA->{datapath} = "../data";
	}
	return bless $GenomeData::THE_DATA, $class;
}

###############################################################################
#
sub getInstance {
	## Do not create even if $THE_DATA is not defined
	return $GenomeData::THE_DATA;
}

###############################################################################
#
sub read_genome {
    my($this, $file_genomes) = @_;

	my(@species, @out_species);
	my($fh) = FileHandle->new("$file_genomes") || die "Can't open $file_genomes";
	while(my$line = $fh->getline()){
		$line =~ s#[\r\n]*$##;

		my($sp,$name,$length,$order,$origin) = split(/\s/, $line);
		$this->{length}->{$sp} = $length;
		$this->{name}->{$sp}   = $name;
		$this->{order}->{$sp}  = $order;
		$this->{origin}->{$sp} = $origin;
#		if (! $options{genome_only}) {
#			$this->read_genetab($sp);
#		}
		push(@species, $sp);
		push(@out_species, $sp) if ($order);
	}
	$fh->close();

	my($cmpr_ref) = sub {
		$this->{order}->{$a} <=> $this->{order}->{$b}
	};
	@out_species = sort $cmpr_ref @out_species;
	$this->{species}     = [ @species ];
	$this->{spnum}       = scalar(@species);
	$this->{out_species} = [ @out_species ];

    return;
}

###############################################################################
#
sub select_genome {
    my($this, $genomes, %options) = @_;

    my(@species) = split(',', $genomes);
    $this->{'species'}     = [ @species ];
    $this->{'spnum'}       = scalar(@{$this->{'species'}});
    $this->{'out_species'} = [ @species ];

    #
    my(%out_species_hash);
    foreach my$sp (@{$this->{'out_species'}}) {
        $out_species_hash{"$sp"} = 1;
    }

    my($dsn) = $options{datapath};
    my($db) = MBGD::DB->new($dsn);

    my($tab) = 'genome g, chromosome c';
    my($opt) = {};
    $opt->{'columns'} = 'g.sp as sp, g.orgname as name, sum(c.seq_length) as length';
    foreach my$sp (@{$this->{'species'}}) {
        $opt->{'where'} .= ',' if ($opt->{'where'} ne '');
        $opt->{'where'} .= "'$sp'";
    }
    $opt->{'where'} = 'g.sp in(' . $opt->{'where'} . ')';
    $opt->{'where'} .= ' and g.id=c.genome';
    $opt->{'group'} = 'g.id';
    my($refRes) = $db->select_fetch($tab, $opt);
    if ($db->exist_table("contig")) {
    	my($tab2) = 'genome g, contig c';
    	my($refRes2) = $db->select_fetch($tab2, $opt);
	push(@{$refRes->{INFO}}, @{$refRes2->{INFO}});
    }
    my($i) = 1;
    foreach my$ref (@{$refRes->{'INFO'}}) {
        my($sp) = $ref->{'sp'};
        my($name) = $ref->{'name'};
        my($length) = $ref->{'length'};
        my($origin) = '';

		$this->{'length'}->{$sp} = $length;
		$this->{'name'}->{$sp}   = $name;
		$this->{'origin'}->{$sp} = $origin;
        if (!exists($out_species_hash{"$sp"})) {
    		$this->{'order'}->{$sp}  = $i;
            $i++;
        }
    }

    return;
}

###############################################################################
#
sub read_file {
	my($this, %options) = @_;

	foreach my$sp (@{$this->{species}}) {
		$this->read_genetab($sp);
	}

    return;
}

###############################################################################
#
sub read_db {
	my($this, %options) = @_;

	foreach my$sp (@{$this->{species}}) {
		$this->select_genetab($sp);
	}

    return;
}

###############################################################################
#
sub read {
	my($class, $file_genomes, %options) = @_;
	my($this) = $class->new(%options);

	if (! $options{genome_only}) {
    	if ($options{datapath} =~ /^dbi\:/i) {
            $this->select_genome($file_genomes, %options);
        	$this->read_db(%options);
	    }
    	else {
            $this->read_genome($file_genomes, %options);
        	$this->read_file(%options);
		}
    }

	return $this;
}

###############################################################################
#
sub set_species {
	my($this, $species, $out_species) = @_;
	$this->{species} = $species;
	$this->{spnum} = scalar(@{$species});
	if ($out_species) {
		$this->{out_species} = $out_species;
	} else {
		$this->{out_species} = $species;
	}
}

###############################################################################
#
sub set_genetab {
    my($this, $sp, $name, $gene, $chrid, $from, $to, $dir, $type) = @_;

	my $pos = $this->pos_calc($sp, $from, $to);

	my $ent = {};
	$ent->{'name'}  = $name;
	$ent->{'gene'}  = $gene;
	$ent->{'chrid'} = $chrid;
	$ent->{'from'}  = $from;
	$ent->{'to'}    = $to;
	$ent->{'dir'}   = $dir;
	$ent->{'type'}  = $type;
	$ent->{'pos'}   = $pos;

	$this->{data}->{$sp}->{$name} = $ent;

    return;
}

###############################################################################
#
sub read_genetab {
	my($this, @SPLIST) = @_;
	@SPLIST = split(/,/, $::orgs) if (! @SPLIST);

	my($GENEDIR) = $this->{datapath};
	my($ln) = 0;
	foreach my $sp0 (@SPLIST) {
		my($fh) = FileHandle->new("$GENEDIR/$sp0.txt") || die "Can't open $GENEDIR/$sp0.txt";
		while (my$line = $fh->getline()) {
			$line =~ s#[\r\n]*$##;
			next if ($ln++==0);
			next if ($line =~ /^$/);
			my ($sp, $name, $gene, $from, $to, $dir, $type, $descr) = split(/\t/, $line);
            my($chrid);
			next if ($type ne 'CDS');

			$this->set_genetab($sp, $name, $gene, $chrid, $from, $to, $dir, $type);
		}
		$fh->close();
	}

    return;
}

###############################################################################
#
sub select_genetab {
	my($this, @SPLIST) = @_;
	@SPLIST = split(/,/, $::orgs) if (! @SPLIST);

    my($dsn) = $this->{datapath};
    my($db) = MBGD::DB->new($dsn);
    my($tab) = 'gene';
    my($opt) = {};

    #
    foreach my $sp0 (@SPLIST) {
        $opt->{'where'} = "sp='$sp0'";

        my($refRes) = $db->select_fetch($tab, $opt);
        foreach my$ref (@{$refRes->{'INFO'}}) {
	    if ($ref->{'contigid'}>0) {
		$ref->{'chrid'} = "c-$ref->{contigid}";
	    }
            $this->set_genetab( $ref->{'sp'},
                                $ref->{'name'},
                                $ref->{'gene'},
                                $ref->{'chrid'},
                                $ref->{'from1'},
                                $ref->{'to1'},
                                $ref->{'dir'},
                                $ref->{'type'},
                                );
        }
    }

    return;
}

###############################################################################
#
sub pos_calc {
	my($this, $sp, $from, $to, $unit) = @_;
	my $pos = ($from + $to) /2;
	die "$0: Error: zero length: $sp\n" if ($this->{length}->{$sp} == 0);
	$pos = ($pos - $this->{origin}->{$sp}) % $this->{length}->{$sp};
	if ($unit eq 'degree') {
		## $unit = 'degree';
		if ($pos < $this->{length}->{$sp} / 2) {
			$pos = $pos / $this->{length}->{$sp} * 360;
		} else {
			$pos = ($pos / $this->{length}->{$sp} - 1) * 360;
		}
	} else {
		if ($pos >= $this->{length}->{$sp} / 2) {
			$pos = $pos - $this->{length}->{$sp};
		}
		$pos /= 1000;
	}
	$pos = sprintf("%.1f", $pos);
	return $pos;
}

###############################################################################
#
sub get {
	my($this, @opt) = @_;
	$this->getGene(@opt);
}

###############################################################################
#
sub spnum {
	my($this) = @_;
	$this->{spnum} ? $this->{spnum} : scalar(@{ $this->{species} });
}

###############################################################################
#
sub getGene {
	my($this, $sp, $name) = @_;
	return $this->{data}->{$sp}->{$name};
}

###############################################################################
#
sub setSpGroup {
	my($this, $spGroup) = @_;
	foreach my $splist (split(/,/, $spGroup)) {
		my @spnames = split(/:/, $splist);
		foreach my$sp (@spnames) {
			$this->{SpGroup}->{$sp} = $spnames[0];
		}
	}
}

###############################################################################
#
sub getSpGroup {
	my($this, $sp) = @_;
	return $this->{SpGroup}->{$sp} ? $this->{SpGroup}->{$sp} : $sp;
}

###############################################################################
#
sub setSpIndex {
	my($this) = @_;
	my(%SpIdx, @SpList, $i);
	my($sp, $sp0);
	$i = 0;
	foreach $sp (@{ $this->{species} }) {
		if ($this->getSpGroup($sp) eq $sp) {
			$SpIdx{$sp} = $i;
			$SpList[$i] = $sp;
			$i++;
		}
	}
	$this->{spnum} = $i;
	foreach $sp (@{ $this->{species} }) {
		if (($sp0 = $this->getSpGroup($sp)) ne $sp) {
			$SpIdx{$sp} = $SpIdx{ $sp0 };
		}
	}
	$this->{SpIdx} = \%SpIdx;
	$this->{SpList} = \@SpList;
}

###############################################################################
#
sub getSpIndex {
	my($this, $sp) = @_;
	$this->{SpIdx}->{$sp};
}

###############################################################################
#
sub getPhyloPat {
	my($this, $species, $opt) = @_;
	my $vect = BitVector->new(0,$this->spnum);
	foreach my$sp (@{$species}) {
		$vect->add( $this->getSpIndex($sp) );
	}
	$vect->toString;
}

###############################################################################
#
sub countSpecies {
	my($this, $species, %opt) = @_;
	my(%tmpFoundIn, %tmpFoundOut);
	foreach my$sp (@{$species}) {
		if ($this->isOutGroup($sp)) {
			$tmpFoundOut{$this->getSpGroup($sp)} = 1;
		} else {
			$tmpFoundIn{$this->getSpGroup($sp)} = 1;
		}
	}
	if ($opt{separate_outgrp}){
		return (scalar(keys %tmpFoundIn), scalar(keys %tmpFoundOut));
	} elsif ($opt{without_outgrp}){
		return scalar(keys %tmpFoundIn);
	} else {
		return scalar(keys %tmpFoundIn) + scalar(keys %tmpFoundOut);
	}
}

###############################################################################
#
sub setOutGroup {
	my($this, $outGroup) = @_;
	foreach my$sp (split(/,/, $outGroup)) {
		$this->{OutGroup}->{$sp} = 1;
	}
}

###############################################################################
#
sub isOutGroup {
	my($this, $sp) = @_;
	return $this->{OutGroup}->{$sp};
}

###############################################################################
#
sub isOutGroupIndex {
	my($this, $idx) = @_;
	return $this->isOutGroup( $this->{SpList}->[$idx] );
}

###########################################################
package BitVector;

###############################################################################
#
sub new {
	my($class, $vect, $length) = @_;
	my($this) = {};
	$this->{vect} = $vect;
	$this->{length} = $length;
	return bless $this, $class;
}

###############################################################################
#
sub add {
	my($this, $idx) = @_;
	my($add_vect)= (1<<$idx);
	if ( ($this->{vect} & $add_vect) == 0 ) {
		$this->{cnt}++;
		$this->{vect} |= $add_vect;
	}
}

###############################################################################
#
sub and {
	my($this, $other) = @_;
	BitVector->new($this->{vect} & $other->{vect});
}

###############################################################################
#
sub or {
	my($this, $other) = @_;
	BitVector->new($this->{vect} | $other->{vect});
}

###############################################################################
#
sub contain {
	my($this, $other) = @_;
	my $or_vect = ($this->{vect} | $other->{vect});
	if ($or_vect == $this->{vect}) {
		return 1;
	} elsif ($or_vect == $other->{vect}) {
		return -1;
	}
}

###############################################################################
#
sub toString {
	my($this) = @_;
	my $vect = $this->{vect};
	my $bitstr;
	my $i;
	while ($vect) {
		my$c = $vect & 1;
		$bitstr .= "$c";
		$vect >>= 1;
		last if (++$i > $this->{length});
	}
	while ($i++ < $this->{length}) {
		$bitstr .= "0";
	}
	$bitstr;
}

###############################################################################
#
sub print {
	my($this) = @_;
	print $this->toString, "\n";
}

###########################################################
package FuncCat;
$FuncCat::GeneFuncFname = "geneFunction";
$FuncCat::FuncInfoFname = "function";
$FuncCat::FuncColorFname = "colorTab";

###############################################################################
#
sub new {
	my($class, $dir, $sp, $db, %opt) = @_;
	my($this) = {};
	$this->{dir} = $dir;
	$this->{db} = $db;
	bless $this, $class;
	if ($db) {
		$this->readFuncInfo($db);
		$this->readFuncColor($db);
	}
	if ($opt{default_color}) {
		$this->{default_color} = $opt{default_color};
	}
	return $this;
}

###############################################################################
#
sub setGeneFunc {
	my($this, $sp, $name, $db, $func) = @_;

	push(@{$this->{Func}->{$db}->{$sp}->{$name}}, $func);

    return;
}

###############################################################################
#
sub readGeneFunc_file {
	my($this, $sp, %options) = @_;

    #
	my($db) = $this->{db} if (! $options{db});

    my($filename) = "$this->{dir}/${FuncCat::GeneFuncFname}.$sp.$db";
    my($fh) = FileHandle->new($filename);
	if (!$fh) {
		warn "Can't open funcfile for $sp.$db\n";
		return 0;
	}
	while(my$line = $fh->getline()) {
		$line =~ s#[\r\n]*$##;

		my($sp, $name, $db, $func) = split(/\t/, $line);

		$this->setGeneFunc($sp, $name, $db, $func);
	}
	close(F);

    return;
}

###############################################################################
#
sub selectGeneFunc {
	my($this, $sp, %options) = @_;
	my($db) = $this->{db} if (! $options{db});

    my($dsn) = $this->{dir};
    my($db) = MBGD::DB->new($dsn);
    my($tab) = 'gene_category';
    my($opt) = {};
    $opt->{'where'} = "sp='$sp' and dbname='$db' and orig_id is not null";
    my($refRes) = $db->select_fetch($tab, $opt);
    foreach my$ref (@{$refRes->{'INFO'}}) {
        $this->setGeneFunc($ref->{'sp'},
                           $ref->{'name'},
                           $ref->{'dbname'},
                           $ref->{'level'},
                        );
    }

    return;
}

###############################################################################
#
sub readGeneFunc {
	my($this, $sp, %options) = @_;

    if ($this->{dir} =~ /^dbi\:/i) {
        $this->selectGeneFunc($sp, %options);
    }
    else {
        $this->readGeneFunc_file($sp, %options);
    }

    return;
}

###############################################################################
#
sub getGeneFunc {
	my($this, $name, $db, $idx) = @_;
	my($sp, $gene) = split(/:/, $name);
	$db = $this->{db} if (! $db);
	if (! $this->{Func}->{$db}->{$sp}) {
        my(%options) = ('db' => $db,
                        );
		$this->readGeneFunc($sp, %options);
	}
	if (! $this->{Func}->{$db}->{$sp}->{$gene}) {
		$this->{Func}->{$db}->{$sp}->{$gene}
				= $this->getDefaultFunc($db);
	}
	if (defined $idx) {
		return $this->{Func}->{$db}->{$sp}->{$gene}->[$idx];
	} else {
		## arrayref
		return $this->{Func}->{$db}->{$sp}->{$gene};
	}
}

###############################################################################
#
sub getGeneFunc1 {
	my($this, $name, $db) = @_;
	return $this->getGeneFunc($name,$db, 0);
}

###############################################################################
#
sub readFuncInfo {
	my($this, $db) = @_;
	$db = $this->{db} if (! $db);
	my $funcfile = "$this->{dir}/${FuncCat::FuncInfoFname}.$db";
	if (! open(F, $funcfile) ) {
		warn "Can't open funcinfo file of $db ($funcfile)\n";
		return 0;
	}
	while(<F>) {
		chomp;
		if (/^#(\S+)\s+(\S+)/) {
			my$var = $1;
            my$val = $2;
			if ($var =~ /^default$/) {
				$this->{Variable}->{$db}->{$var} = $val;
			}
		}
		my($db,$func,$name,$code) = split(/\t/);
		$this->{FuncInfo}->{$db}->{$func}->{name} = $name;
		$this->{FuncInfo}->{$db}->{$func}->{code} = $code;
	}
	close(F);
}

###############################################################################
#
sub getFuncInfo {
	my($this, $func, $infotype, $db) = @_;
	$db = $this->{db} if (! $db);
	if (! $this->{FuncInfo}->{$db}) {
		$this->readFuncInfo($db);
		$this->readFuncColor($db);
	}
	if ($infotype) {
		return $this->{FuncInfo}->{$db}->{$func}->{$infotype};
	} else {
		return $this->{FuncInfo}->{$db}->{$func};
	}
}

###############################################################################
#
#sub listColorDefinedFuncID {
#	my($this, $db) = @_;
#	push(@{$this->{DefColors}}, $func);
#}

###############################################################################
#
sub listFuncID {
	my($this, $db, %opt) = @_;
	$db = $this->{db} if (! $db);
	my @funcIDs =  (sort keys %{ $this->{FuncInfo}->{$db} });
	if ($opt{colordef}) {
		my @newFuncIDs;
		foreach my$func (@funcIDs) {
			if (defined $this->{FuncInfo}->{$db}->{$func}->{color}){
				push(@newFuncIDs, $func);
			}
		}
		return @newFuncIDs;
	} else {
		return@funcIDs;
	}
}

###############################################################################
#
sub getDefaultFunc {
	my($this, $db) = @_;
	$db = $this->{db} if (! $db);
	$this->{Variable}->{$db}->{default};
}

###############################################################################
#
sub readFuncColor {
	my($this, $db) = @_;
	$db = $this->{db} if (! $db);
	my $funcfile="$this->{dir}/${FuncCat::FuncColorFname}.$db";
	if (! open(F, $funcfile) ) {
		warn "Can't open funccolor file of $db ($funcfile)\n";
		return 0;
	}
	while(<F>) {
		chomp;
		my($db,$func,$color) = split(/\t/);
		$this->{FuncInfo}->{$db}->{$func}->{color} = $color;
	}
	close(F);
}

###############################################################################
#
sub getFuncColor {
	my($this, $func, $db) = @_;
	$db = $this->{db} if (! $db);
	if (! $this->{FuncInfo}->{$db}) {
		$this->readFuncColor($db);
	}
	if ( $this->{FuncInfo}->{$db}->{$func}->{color} ) {
		return $this->{FuncInfo}->{$db}->{$func}->{color};
	} else {
		my(@func) = split(/\./, $func);
		for (my$n = $#func - 1; $n >= 0; $n--) {
			my$func = join(".", @func[0..$n]);
print STDERR ">>>$func\n";
			if ( $this->{FuncInfo}->{$db}->{$func}->{color} ) {
				return $this->{FuncInfo}->{$db}->{$func}->{color};
			}
			
		}
		if ($this->{default_color}) {
			$this->{default_color};
		} else {
			$func = $this->getDefaultFunc;
			return $this->{FuncInfo}->{$db}->{$func}->{color};
		}
	}
}

###########################################################
package main;
if (__FILE__ eq $0) {
	my$g = GenomeData->read("../data.test/genomes");
	my$d = $g->get("bsu","BSU00110"),"\n";
	print "$d,$d->{from},$d->{to}\n";
	my$funcCat = FuncCat->new("../func", "cog");
	my$func = $funcCat->getGeneFunc("eco:B0002");
	my$color = $funcCat->getFuncColor($func);
	print "eco:B0002, $func,$color\n";
	$func = $funcCat->getGeneFunc("eco:B0014");
	$color = $funcCat->getFuncColor($func);
	print "eco:B0014, $func,$color\n";

	my$b = BitVector->new;
	my$c = BitVector->new;
	$b->add(1); $b->add(2); $b->add(5); $b->add(3); $b->add(2);
	$b->print;
	print $b->{cnt},"\n";

	$c->add(3); $c->add(2); $c->add(3);
	$c->print;
	print $c->{cnt},"\n";

	($b->or($c))->print;

	print $b->contain($c),"\n";
	print $c->contain($b),"\n";
}
###########################################################
1;
