#!/usr/bin/perl
package ClustTab;
#use GeneData;

sub new {
	my($class) = @_;
	my($this) = {};
	bless $this, $class;
	$this->setOpt(%opt);
	$this;
}
sub setOpt {
	my($this, %opt) = @_;
	foreach $var (keys %opt) {
		$this->{$var} = $opt{$var};
	}
}
sub read_clusttab {
	my($class, $clusttab, $genomeData, %OPT) = @_;
	my($this) = $class->new;
	my($dmy,$i,@species);
	my($format);
	my(@ClustDataAll);
	$this->{genomeData} = $genomeData;
	open(C, $clusttab) || die "Can't open clusttab: $clusttab\n";
	if ($OPT{"rmhash"}) {
		$this->{'rmhash'} = $OPT{'rmhash'};
	}
	while(<C>) {
		chomp;
		if (/^#/) {
			($dmy, @species) = split(/\t/);
			$this->{species} = \@species;
		} elsif (/^ClusterID\t/) {
			$format='mbgd';
			($dmy, $dmy, @species) =split;
			pop(@species); # Gene
			pop(@species); # FuncCat
			$this->{species} = \@species;
		} else {
			my($clustid, @Genes);
			my(@Fields) = split(/\t/);
			if ($format eq 'mbgd') {
				($clustid, $dmy, $dmy, @Genes)
					= @Fields[0..$#species+2];
			} else {
				($clustid, @Genes) = @Fields;
			}
			my $spcnt = 0;
			my $i = 0;
			my(%foundSp, @spList, @TMP_ClustData);
			foreach my $genes (@Genes) {
				my $sp = $species[$i++];
				foreach my $g (sort split(/ /, $genes)) {
					my($sp0,$name,$dom) = &parse_name($g);
					my $gdata = $this->{genomeData}
							->get($sp,$name);
					next if (! $gdata);
					$foundSp{$sp} = 1;
					my $data = {
						sp => $sp,
						name => $name,
						dom => $dom,
						clust => $clustid,
						'pos' => $gdata->{'pos'},
						gene => $gdata->{gene},
						dir => $gdata->{dir},
					};
					push(@TMP_ClustData, $data);
				}
			}
			@spList = keys %foundSp;
			$spcnt = $this->{genomeData}->countSpecies(
					\@spList, without_outgrp => 1 );

			if ($spcnt >= $OPT{'MIN_SPCNT'}) {
				## incorporate only conserved clusters
				push(@ClustDataAll, @TMP_ClustData);
			}
		}
	}
	$this->{data_all} = \@ClustDataAll;
	$this;
}
sub parse_name {
	my($name) = @_;
	my($sp, $dom);
	if ( $name =~ s/\((\d+)\)// ) {
		$dom = $1;
	}
	if ($name =~ /:/) {
		($sp,$name) = split(/:/,$name);
	}
	($sp, $name, $dom);
}
sub make_index {
	my($this, $refsp) = @_;
	my($i);

	$this->{ClustData} = {};
	$this->{SpData} = {};

	foreach my $data (@{$this->{data_all}}) {
		my $sp = $data->{sp};
		my $clustid = $data->{clust};
		push(@{$this->{SpData}->{$sp}}, $data);
		push(@{$this->{ClustData}->{$clustid}->{$sp}}, $data);
	}
	foreach my $clustid (keys %{$this->{ClustData}}) {
		my @spList;
		foreach my $sp (@{$this->{species}}) {
			if (@{$this->{ClustData}->{$clustid}->{$sp}}) {
				push(@spList, $sp);
			}
		}
		$this->{ClustData}->{$clustid}->{spcnt} =
			$this->{genomeData}->countSpecies(
				\@spList, without_outgrp=>1 );
	}

	for ($i = 0; $i < @{$this->{species}}; $i++) {
		my $sp = $this->{species}->[$i];
		my @Pos = sort {
				$a->{pos} <=> $b->{pos} ||
				$a->{dir} * $a->{dom} <=> $b->{dir} * $b->{dom}
			}
			@{$this->{SpData}->{$sp}};
		my $idx = 0;
		foreach my $d (@Pos) {
			$d->{order} = $idx++;
		}
		$this->{SpData}->{$sp} = \@Pos;
	}
	$this->set_cid_list($::refsp);
}
sub set_cid_list {
	my($this, $refsp) = @_;
	my @CID = keys %{$this->{ClustData}};
	my @TmpCID;

	if ($refsp) {
	   ## sorting by the positions on the reference genome
	    for (my $i = 0; $i < @CID; $i++) {
		my $cid = $CID[$i];
		if (my $refdata = $this->getClustData1($cid, $refsp)) {
			$tmpd = $this->{genomeData}->get(
					$refsp, $refdata->{name});
			if ($tmpd) {
				$TmpCID[$i] = $tmpd->{pos};
			}
		} else {
			$TmpCID[$i] = $BIGVALUE;
;
		}
	    }

	    @CID = @CID[
		sort { $TmpCID[$a] <=> $TmpCID[$b] } (0..$#CID) ];
	}
	$this->{CID} = \@CID;
}
sub CID {
	my($this, $id) = @_;
	if (! defined $id) {
		return $this->{CID};
	} else {
		$this->{CID}->[$id];
	}
}
sub getClustSpCnt {
	my($this, $cid) = @_;
	$this->{ClustData}->{$cid}->{spcnt};
}
sub getClustData {
	my($this, $cid, $sp, $idx) = @_;
	$cid =~ s/#\d+$// if ($this->{rmhash});	# remove hash_numbers
	if ($sp) {
		if (defined $idx && $this->{ClustData}->{$cid}->{$sp}) {
			return $this->{ClustData}->{$cid}->{$sp}->[$idx];
		} else {
			return $this->{ClustData}->{$cid}->{$sp};
		}
	} else {
		return $this->{ClustData}->{$cid};
	}
}
sub getClustData1 {
	my($this, $cid, $sp) = @_;
	return $this->getClustData($cid, $sp, 0);
}
sub setClustData {
	my($this, $cid, $sp, $data) = @_;
	$this->{ClustData}->{$cid}->{$sp} = $data;
}
sub is_new_cid {
	my($cid) = @_;
	if ($cid =~ /#\d+/) {
		return 1;
	} else {
		return 0;
	}
}
sub get_newid_idx {
	my($cid) = @_;
	if ($cid =~ /#(\d+)/) {
		return $1;
	} else {
		return -1;
	}
}
sub changeClustID {
	my($this, $dataList, $newid, $sp) = @_;
	my(%tmp_spdata);
	$data = $this->getClustData($newid,$sp);
	foreach my $d (@{$data}) {
		## Delete the original assignment (move to 'deleted' group)
		## Do not remove a reassigned clustid that contains a hash mark
		$d->{clust} = 'deleted' if ($d->{clust} !~ /#/);
	}
	foreach my $d (@{$dataList}) {
		$d->{clust} = $newid;
		if (! $sp) {
			push(@{ $tmp_spdata->{$d->{sp}} }, $d);
		}
	}
	if ($sp) {
		$this->{ClustData}->{$newid}->{$sp} = $dataList;
	} else {
		$this->{ClustData}->{$newid} = \%tmp_spdata;
	}
}
sub getSpData {
	my($this, $sp, $i) = @_;
	if ($i=~/\d/) {
		my $numgenes = scalar(@{ $this->{SpData}->{$sp} });

		# for circular genomes
		$i %= $numgenes;

		$this->{SpData}->{$sp}->[$i];
	} else {
		$this->{SpData}->{$sp};
	}
}
sub set_order {
	my($this, $order) = @_;
	foreach my $clid (keys %{$this->{ClustData}}){
	}
}
sub renum_clustid {
	my($this) = @_;
	my($orig_cid,$cid);
	my(%CIDs, %ConvCID);
	foreach my $data (@{$this->{data_all}}) {
		$cid = $data->{clust};
		$orig_cid = $cid;
		$orig_cid =~ s/[\.\#].*$//;
		if (! $CIDs{$orig_cid}->{$cid}) {
			$CIDs{$orig_cid}->{$cid} = 1;
		}
	}
	foreach $orig_cid (keys %CIDs) {
		@cids = sort keys %{$CIDs{$orig_cid}};
		if (@cids > 1) {
			my($cnum) = 1;	
			if ($cids[0] eq $orig_cid) {
				my $cid0 = shift(@cids);
				$ConvCID{$cid0} = "${orig_cid}_0";
				
			}
			foreach $cid (@cids) {
				$ConvCID{$cid} = "${orig_cid}_${cnum}";
				$cnum++;
			}
		} else {
			$ConvCID{$cids[0]} = $orig_cid;
		}
	}
	foreach my $data (@{$this->{data_all}}) {
		$cid = $data->{clust};
		if ($ConvCID{$cid}) {
			$data->{clust} = $ConvCID{$cid};
		}
	}
	\%ConvCID;
}
sub save_clusttab {
	my($this, $clustout, $cid_list) = @_;
	if (ref $cid_list eq 'ARRAY') {
		## use the argument $cid_list
	} else {
		my @tmp_array = keys %{$this->{ClustData}};
		$cid_list = \@tmp_array;
	}
	open(O, ">$clustout");
	print O join("\t", "#", @{$this->{species}}),"\n";
##	foreach my $clid (keys %{$this->{ClustData}}){
	foreach my $clid (@{$cid_list}) {
		print O "$clid";
		foreach my $sp (@{$this->{species}}) {
			print O "\t";
			my $flag;
			foreach my $d (@{$this->{ClustData}->{$clid}->{$sp}}) {
				print O " " if ($flag);
				print O "$d->{name}";
				print O "($d->{dom})" if ($d->{dom});
				$flag = 1;
			}
		}
		print O "\n";
	}
	close(O);
}
###############################################################
package ClustData;
sub new {
	my($class, $id) = @_;
	my($this) = {id=>$id};
	$ClusterID{"$id"} = $this;
	bless $this, $class;
}
sub getInstance {
	my($class, $id) = @_;
	if ($cl = $ClustID{"$id"}) {
		return $cl;
	}
	return $class->new($id);
}
###############################################################
package DuplicatedClusterCheck;
sub new {
	my($class) = @_;
	my($this) = {};
	bless $this, $class;
}
sub dupcheck {
	my($this, $cid) = @_;
       if (++$this->{FoundNum}->{$cid} > 1) {
		return "$cid#$this->{FoundNum}->{$cid}";
	}
	return $cid;
}
sub dupcheck2 {
	my($this, $cid) = @_;
	if ($this->{FoundNum}->{$cid} > 1 && $c !~ /#/) {
		return "$cid#1";
	}
	return $cid;
}
###############################################################
package main;
use GenomeData;
if (__FILE__ eq $0) {
	$gdata = GenomeData->read($ARGV[1]);
	$cl = ClustTab->read_clusttab($ARGV[0], $gdata);
	$cl->make_index;
	$cl->save_clusttab("OOO");
}
###############################################################
1;
