#!/usr/bin/perl
#use strict;
use FileHandle;
package ClustTab;
#use GeneData;

###############################################################################
$ClustTab::BIGVALUE = 9999999999999;
if ($main::BIGVALUE) {
    $ClustTab::BIGVALUE = $main::BIGVALUE;
}

###############################################################################
#
sub new {
	my($class, %opt) = @_;
	my($this) = {};

	bless($this, $class);

	$this->setOpt(%opt);

	return $this;
}

###############################################################################
#
sub setOpt {
	my($this, %opt) = @_;

	foreach my$var (keys %opt) {
		$this->{$var} = $opt{$var};
	}
}

###############################################################################
#
sub set_genes {
	my($this, $clustid, $genes_ref, %OPT) = @_;
    my(@Genes) = @{$genes_ref};
	my $spcnt = 0;
	my $i = 0;
	my(%foundSp, @spList, @TMP_ClustData);
	foreach my $genes (@Genes) {
		my $sp = $this->{species}->[$i];
        $i++;
		foreach my $g (sort split(/ /, $genes)) {
			my($sp0, $name, $dom) = $this->parse_name($g);
			my $gdata = $this->{genomeData}->get($sp, $name);
			next if (! $gdata);
			$foundSp{$sp} = 1;
			my $data = {
				'sp'    => $sp,
				'name'  => $name,
				'dom'   => $dom,
				'clust' => $clustid,
				'chrid' => $gdata->{'chrid'},
				'pos'   => $gdata->{'pos'},
				'gene'  => $gdata->{gene},
				'dir'   => $gdata->{dir},
			};
			push(@TMP_ClustData, $data);
		}
	}
	@spList = keys %foundSp;
	my$spcnt = $this->{genomeData}->countSpecies(\@spList, without_outgrp => 1 );

	if ($spcnt >= $OPT{'MIN_SPCNT'}) {
		## incorporate only conserved clusters
		push(@{$this->{data_all}}, @TMP_ClustData);
	}

    return;
}

###############################################################################
#
sub read_clusttab_file {
	my($this, $clusttab, $genomeData, %OPT) = @_;
	my($dmy,$i,@species);
	my($format);
	my(@ClustDataAll);

	if ($OPT{"rmhash"}) {
		$this->{'rmhash'} = $OPT{'rmhash'};
	}

    #
	$this->{data_all} = [];

    #
	my($fh) = FileHandle->new($clusttab) || die "Can't open clusttab: $clusttab";
	while(my$line = $fh->getline()) {
		$line =~ s#[\r\n]*$##;
		if ($line =~ /^#/) {
			($dmy, @species) = split(/\t/, $line);
			$this->{species} = [ @species ];
		} elsif ($line =~ /^ClusterID\t/) {
			$format='mbgd';
			($dmy, $dmy, @species) =split(/\s/, $line);
			pop(@species); # Gene
			pop(@species); # FuncCat
			$this->{species} = [ @species ];
		} else {
			my($clustid, @Genes);
			my(@Fields) = split(/\t/, $line);
			if ($format eq 'mbgd') {
				($clustid, $dmy, @Genes) = @Fields[0..$#species+2];
			} else {
				($clustid, @Genes) = @Fields;
			}

            $this->set_genes($clustid, \@Genes, %OPT);
if (0) {
			my $spcnt = 0;
			my $i = 0;
			my(%foundSp, @spList, @TMP_ClustData);
			foreach my $genes (@Genes) {
				my $sp = $species[$i++];
				foreach my $g (sort split(/ /, $genes)) {
					my($sp0, $name, $dom) = $this->parse_name($g);
					my $gdata = $this->{genomeData}->get($sp, $name);
					next if (! $gdata);
					$foundSp{$sp} = 1;
					my $data = {
						'sp'    => $sp,
						'name'  => $name,
						'dom'   => $dom,
						'clust' => $clustid,
						'pos'   => $gdata->{'pos'},
						'gene'  => $gdata->{gene},
						'dir'   => $gdata->{dir},
					};
					push(@TMP_ClustData, $data);
				}
			}
			@spList = keys %foundSp;
			$spcnt = $this->{genomeData}->countSpecies(
					\@spList, without_outgrp => 1 );

			if ($spcnt >= $OPT{'MIN_SPCNT'}) {
				## incorporate only conserved clusters
				push(@ClustDataAll, @TMP_ClustData);
			}
}
		}
	}

	return;
}

###############################################################################
#
sub set_genes_db_old {
    my($this, $cid, $cluster_list_ref, %OPT) = @_;

    my($n) = scalar(@{$this->{species}});
    my(@genes);
    foreach my$ref (@{$cluster_list_ref}) {
        my(@f) = split(/\t/, $ref->{'data'});

        for(my$i = 0; $i <= $n; $i++) {
            if ($f[$i] ne '') {
                $genes[$i] .= " " if ($genes[$i] ne '');
                $genes[$i] .= $f[$i];
            }
        }
    }

    $this->set_genes($cid, \@genes, %OPT);

    return;
}

###############################################################################
#
sub set_genes_db {
    my($this, $cid, $entig_list_ref, $entog_list_ref, %OPT) = @_;

    my(@genes);
    my($i) = 0;
    my(%sp2idx);
    foreach my$sp (@{$this->{species}}) {
        $sp2idx{"$sp"} = $i;
        $genes[$i] = '';
        $i++;
    }

    foreach my$ref (@{$entig_list_ref}, @{$entog_list_ref}) {
        my($sp)   = $ref->{'sp'};
        my($idx) = $sp2idx{"$sp"};

        my($name) = $ref->{'name'};
        if ($ref->{'dom'}) {
            $name .= "($ref->{'dom'})";
        }

        $genes[$idx] .= " " if ($genes[$idx] ne '');
        $genes[$idx] .= $name;
    }

#print STDERR "DBG :: $cid, @genes\n";
    $this->set_genes($cid, \@genes, %OPT);

    return;
}

###############################################################################
#
sub read_clusttab_db {
	my($this, $dsn, $genomeData, %OPT) = @_;
    my($tabid) = $OPT{'CLUST_TAB_ID'};

    my($db) = MBGD::DB->new($dsn);

    #
    my($tab) = sprintf("cluster_tables_idx");
    my($opt) = {};
    $opt->{'where'} = sprintf("clusterID='%s'", $tabid);
    my($refRes) = $db->select_fetch($tab, $opt);
    if ($refRes->{'ROWS'} == 0) {
        print STDERR "No cluster-id found.(CLUSTER_ID=$tabid)\n";
        die("");
    }
    my($ref) = $refRes->{'INFO'}->[0];
    my($splist) = ($ref->{'cmd'} =~ /\-SPEC\=(\S+)/i);
    my($oglist) = ($ref->{'cmd'} =~ /\-Ooutgroup\=(\S+)/i);
    $this->{species} = [ split(/,/, $splist) ];


    #
    my($tab) = sprintf("cluster_domclust_cache_%s", $tabid);
    my($opt) = {};
    $opt->{'where'} = "";
    $opt->{'order'} = "clustid, subclustid";
    my(@ent_list_ig) = ();
    my(@ent_list_og) = ();
    my($prev_clustid, $prev_subclustid);

    my($refRes) = $db->select_fetch($tab, $opt);
    foreach my$ref (@{$refRes->{'INFO'}}) {
        my($clustid)    = $ref->{'clustid'};
        my($subclustid) = $ref->{'subclustid'};

#        if ($subclustid == 0) {
#            # Outgroup
#        }
        if ($OPT{'mode'} =~ /^cluster/i) {
            # Cluster mode
            if ($clustid != $prev_clustid) {
                if ((scalar(@ent_list_ig) != 0) || (scalar(ent_list_og) != 0)) {
                    my($cid) = sprintf("%s_%s", $prev_clustid, 1);
                    $this->set_genes_db($cid, \@ent_list_ig, \@ent_list_og, %OPT);
                }
                @ent_list_ig = ();
                @ent_list_og = ();
            }
        }
        else {
            # SubCluster mode
            if (($clustid != $prev_clustid) || ($subclustid != $prev_subclustid)) {
                if ($prev_subclustid != 0) {
                    if ((scalar(@ent_list_ig) != 0) || (scalar(ent_list_og) != 0)) {
                        my($cid) = sprintf("%s_%s", $prev_clustid, $prev_subclustid);
                        $this->set_genes_db($cid, \@ent_list_ig, \@ent_list_og, %OPT);
                    }
                }
                @ent_list_ig = ();
            }
            if ($clustid != $prev_clustid) {
                @ent_list_og = ();
            }
        }

        if ($subclustid == 0) {
            push(@ent_list_og, $ref);
        }
        else {
            push(@ent_list_ig, $ref);
        }

        $prev_clustid    = $clustid;
        $prev_subclustid = $subclustid;
    }
    if (scalar(@ent_list_ig) != 0) {
        if ($OPT{'mode'} =~ /^cluster/i) {
            my($cid) = sprintf("%s_%s", $prev_clustid, 1);
            $this->set_genes_db($cid, \@ent_list_ig, \@ent_list_og, %OPT);
        }
        else {
            if ($prev_subclustid != 0) {
                my($cid) = sprintf("%s_%s", $prev_clustid, $prev_subclustid);
                $this->set_genes_db($cid, \@ent_list_ig, \@ent_list_og, %OPT);
            }
        }
    }

	return;
}

###############################################################################
#
sub read_clusttab {
	my($class, $clusttab, $genomeData, %OPT) = @_;
	my($this) = $class->new;

	$this->{genomeData} = $genomeData;
    if ($clusttab =~ /^dbi\:/i) {
        $this->read_clusttab_db($clusttab, $genomeData, %OPT);
    }
    else {
        $this->read_clusttab_file($clusttab, $genomeData, %OPT);
    }

	return $this;
}

###############################################################################
#
sub parse_name {
	my($this, $name) = @_;
	my($sp, $dom);
	if ( $name =~ s/\((\d+)\)// ) {
		$dom = $1;
	}
	if ($name =~ /:/) {
		($sp, $name) = split(/:/, $name);
	}
	($sp, $name, $dom);
}

###############################################################################
#
sub make_index {
	my($this, $refsp) = @_;
	my($i);

	$this->{ClustData} = {};
	$this->{SpData} = {};

	foreach my $data (@{$this->{data_all}}) {
		my $sp = $data->{sp};
		my $clustid = $data->{clust};
		push(@{$this->{SpData}->{$sp}}, $data);
		push(@{$this->{ClustData}->{$clustid}->{$sp}}, $data);
	}
	foreach my $clustid (keys %{$this->{ClustData}}) {
		my @spList;
		foreach my $sp (@{$this->{species}}) {
			if (exists($this->{ClustData}->{$clustid}->{$sp})) {
				push(@spList, $sp);
			}
		}
		$this->{ClustData}->{$clustid}->{spcnt} =
			$this->{genomeData}->countSpecies(
				\@spList, without_outgrp=>1 );
	}

	for (my$i = 0; $i < @{$this->{species}}; $i++) {
		my $sp = $this->{species}->[$i];
		my @Pos = sort {
				$a->{chrid} cmp $b->{chrid}
		    ||
				$a->{pos} <=> $b->{pos}
                    ||
				$a->{dir} * $a->{dom} <=> $b->{dir} * $b->{dom}
			}
			@{$this->{SpData}->{$sp}};
		my $idx = 0;
		foreach my $d (@Pos) {
			$d->{order} = $idx++;
		}
		$this->{SpData}->{$sp} = \@Pos;
	}
	$this->set_cid_list($::refsp);
}

###############################################################################
#
sub set_cid_list {
	my($this, $refsp) = @_;
	my @CID = keys %{$this->{ClustData}};
	my @TmpCID;

	if ($refsp) {
	   ## sorting by the positions on the reference genome
	    for (my $i = 0; $i < @CID; $i++) {
            my $cid = $CID[$i];
            if (my $refdata = $this->getClustData1($cid, $refsp)) {
                my$tmpd = $this->{genomeData}->get($refsp, $refdata->{name});
                if ($tmpd) {
                    $TmpCID[$i] = $tmpd->{pos};
                }
            } else {
                $TmpCID[$i] = $ClustTab::BIGVALUE;
            }
	    }

	    @CID = @CID[ sort { $TmpCID[$a] <=> $TmpCID[$b] } (0..$#CID) ];
	}
	$this->{CID} = \@CID;
}

###############################################################################
#
sub CID {
	my($this, $id) = @_;
	if (! defined $id) {
		return $this->{CID};
	} else {
		$this->{CID}->[$id];
	}
}

###############################################################################
#
sub getClustSpCnt {
	my($this, $cid) = @_;
	$this->{ClustData}->{$cid}->{spcnt};
}

###############################################################################
#
sub getClustData {
	my($this, $cid, $sp, $idx) = @_;
	$cid =~ s/#\d+$// if ($this->{rmhash});	# remove hash_numbers
	if ($sp) {
		if (defined $idx && $this->{ClustData}->{$cid}->{$sp}) {
			return $this->{ClustData}->{$cid}->{$sp}->[$idx];
		} else {
			return $this->{ClustData}->{$cid}->{$sp};
		}
	} else {
		return $this->{ClustData}->{$cid};
	}
}

###############################################################################
#
sub getClustData1 {
	my($this, $cid, $sp) = @_;
	return $this->getClustData($cid, $sp, 0);
}

###############################################################################
#
sub setClustData {
	my($this, $cid, $sp, $data) = @_;
	$this->{ClustData}->{$cid}->{$sp} = $data;
}

###############################################################################
#
sub is_new_cid {
	my($cid) = @_;
	if ($cid =~ /#\d+/) {
		return 1;
	} else {
		return 0;
	}
}

###############################################################################
#
sub get_newid_idx {
	my($cid) = @_;
	if ($cid =~ /#(\d+)/) {
		return $1;
	} else {
		return -1;
	}
}

###############################################################################
#
sub changeClustID {
	my($this, $dataList, $newid, $sp) = @_;
	my(%tmp_spdata);
	my$data = $this->getClustData($newid,$sp);
	foreach my $d (@{$data}) {
		## Delete the original assignment (move to 'deleted' group)
		## Do not remove a reassigned clustid that contains a hash mark
		$d->{clust} = 'deleted' if ($d->{clust} !~ /#/);
	}
	foreach my $d (@{$dataList}) {
		$d->{clust} = $newid;
		if (! $sp) {
#			push(@{ $tmp_spdata->{$d->{sp}} }, $d);
			push(@{ $tmp_spdata{$d->{sp}} }, $d);
		}
	}
	if ($sp) {
		$this->{ClustData}->{$newid}->{$sp} = $dataList;
	} else {
		$this->{ClustData}->{$newid} = \%tmp_spdata;
	}
}

###############################################################################
#
sub getSpData {
	my($this, $sp, $i) = @_;
	if ($i=~/\d/) {
		my $numgenes = scalar(@{ $this->{SpData}->{$sp} });

		# for circular genomes
		$i %= $numgenes;

		$this->{SpData}->{$sp}->[$i];
	} else {
		$this->{SpData}->{$sp};
	}
}

###############################################################################
#
sub set_order {
	my($this, $order) = @_;
	foreach my $clid (keys %{$this->{ClustData}}){
	}
}

###############################################################################
#
sub renum_clustid {
	my($this) = @_;
	my($orig_cid,$cid);
	my(%CIDs, %ConvCID);
	foreach my $data (@{$this->{data_all}}) {
		$cid = $data->{clust};
		$orig_cid = $cid;
		$orig_cid =~ s/[\.\#].*$//;
		if (! $CIDs{$orig_cid}->{$cid}) {
			$CIDs{$orig_cid}->{$cid} = 1;
		}
	}
	foreach $orig_cid (keys %CIDs) {
		my@cids = sort keys %{$CIDs{$orig_cid}};
		if (@cids > 1) {
			my($cnum) = 1;	
			if ($cids[0] eq $orig_cid) {
				my $cid0 = shift(@cids);
				$ConvCID{$cid0} = "${orig_cid}_0";
				
			}
			foreach $cid (@cids) {
				$ConvCID{$cid} = "${orig_cid}_${cnum}";
				$cnum++;
			}
		} else {
			$ConvCID{$cids[0]} = $orig_cid;
		}
	}
	foreach my $data (@{$this->{data_all}}) {
		$cid = $data->{clust};
		if ($ConvCID{$cid}) {
			$data->{clust} = $ConvCID{$cid};
		}
	}
	\%ConvCID;
}

###############################################################################
#
sub save_clusttab {
	my($this, $clustout, $cid_list) = @_;
	if (ref $cid_list eq 'ARRAY') {
		## use the argument $cid_list
	} else {
		my @tmp_array = keys %{$this->{ClustData}};
		$cid_list = \@tmp_array;
	}
	open(O, ">$clustout");
	print O join("\t", "#", @{$this->{species}}),"\n";
##	foreach my $clid (keys %{$this->{ClustData}}){
	foreach my $clid (@{$cid_list}) {
		print O "$clid";
		foreach my $sp (@{$this->{species}}) {
			print O "\t";
			my $flag;
			foreach my $d (@{$this->{ClustData}->{$clid}->{$sp}}) {
				print O " " if ($flag);
				print O "$d->{name}";
				print O "($d->{dom})" if ($d->{dom});
				$flag = 1;
			}
		}
		print O "\n";
	}
	close(O);
}

###############################################################
package ClustData;
sub new {
	my($class, $id) = @_;
	my($this) = {id=>$id};
	$ClustData::ClusterID{"$id"} = $this;
	bless $this, $class;
}
sub getInstance {
	my($class, $id) = @_;
	if ($ClustData::ClustID{"$id"}) {
		return $ClustData::ClustID{"$id"};
	}
	return $class->new($id);
}

###############################################################
package DuplicatedClusterCheck;
sub new {
	my($class) = @_;
	my($this) = {};
	bless $this, $class;
}
sub dupcheck {
	my($this, $cid) = @_;
       if (++$this->{FoundNum}->{$cid} > 1) {
		return "$cid#$this->{FoundNum}->{$cid}";
	}
	return $cid;
}
sub dupcheck2 {
	my($this, $cid) = @_;
#	if ($this->{FoundNum}->{$cid} > 1 && $c !~ /#/) {
	if ($this->{FoundNum}->{$cid} > 1 && $cid !~ /#/) {
		return "$cid#1";
	}
	return $cid;
}

###############################################################
package main;
use GenomeData;
if (__FILE__ eq $0) {
	my$gdata = GenomeData->read($ARGV[1]);
	my$cl = ClustTab->read_clusttab($ARGV[0], $gdata);
	$cl->make_index;
	$cl->save_clusttab("OOO");
}
###############################################################
1;
