#!/usr/local/bin/perl

package MBGD::ClustTab::Reader;

use MBGD::ClustTab;
use FileHandle;

$DEFAULT_ID_FIELD = 0;
$DEFAULT_BEGIN_FIELD = 2;

#
# $clustTab = MBGD::ClustTabReader->read(
#	list=>$clusteringResult,
#	splist=>[eco,bsu,...],
#	format=>'ID,dummy,DATA,funccat,genename');
#
sub read($;%) {
	my($class, %opt) = @_;
	my($this) = {};
	bless $this, $class;
	if ($opt{splist}) {
		if (ref($opt{splist}) eq 'ARRAY') {
			$this->{splist} = $opt{splist};
		} else {
			my @splist = split(/,/, $opt{splist});
			$this->{splist} = \@splist;
		}
	} else {
		$this->{splist} = [];
	}

	$this->{id_begin} = $DEFAULT_ID_FIELD;
	$this->{data_begin} = $DEFAULT_BEGIN_FIELD;

	if ($opt{format}) {
		if (ref $opt{format} eq 'ARRAY') {
			$this->{format} = $opt{format}; 
		} else {
			$this->{format} = split(/,/, $opt{format}); 
		}
		$this->set_format;
	} else {
		if ($opt{data_begin}) {
			$this->{data_begin} = $opt{data_begin};
		}
#		$this->{add_field} = {};
#		if (defined $opt{add_field}) {
#			if (ref $opt{add_field} eq 'HASH') {
##				$this->{add_field} = $opt{add_field};
#			} else {
#				my $i = $this->{data_begin} + @{$this->{splist}} - 1;
#				foreach $f (split(/,/, $opt{add_field})) {
#					$this->{add_field}->{$f} = ++$i;
#				}
#			}
#		}
	}

	if ($opt{listgenes}) {
		$this->{listgenes} = 1;
	}
	if ($opt{parse_name}) {
		$this->{parse_name} = 1;
	}
	if ($opt{limit}) {
		my($from,$to) = split(/,/, $opt{limit});
		if (! $to) {
			$this->{to} = $from;
		} else {
			($this->{from}, $this->{to}) = ($from, $to);
		}
	}
	if ($opt{file}) {
		$this->read_table_from_file($opt{file});
	} elsif ($opt{list}) {
		$this->read_table_from_list($opt{list});
	}

	return MBGD::ClustTab->new( splist=>$this->{splist}, cluster=>$this->{cluster},
				add_field=>$this->{add_field} );
}
sub set_format {
	my($this) = @_;
	my $spnum = @{$this->{splist}};
	return -1 if (! $spnum);
	if ($this->{format}) {
		my($i) = 0;
		foreach $item (@{ $this->{format} }) {
			if ($item eq 'ID') {
				# cluster identifier
				$this->{id_field} = $i++;
			} elsif ($item eq 'DUMMY') {
				# to be skipped
				$i++;
			} elsif ($item eq 'DATA') {
				# data item for each species
				$this->{data_begin} = $i;
				$i+=$spnum;
			} else {
				# additional field to be read
				$this->{add_field}->{$item} = $i++;
			}
		}
	}
}
sub read_table_from_file($$) {
	my($this, $file) = @_;
	my($fh);
	if ($file eq '-') {
		$fh = FileHandle->new("<&STDIN") || die;
	} else {
		$fh = FileHandle->new($file) || die "Can't open $file\n";
	}
	my(@cluster);
	my(@splist);
	my($spec);
	my($data_begin, $data_end);
	my($ln);

	$data_begin = $this->{data_begin};

	while (<$fh>) {
		my($cl);
		chomp;
		if (/^#/) {
			($tmpsp) = ($_ =~ /SPEC=([a-zA-Z0-9\,]+)/);
			@splist = split(/,/, $tmpsp);
			$this->{splist} = \@splist;
			$this->set_format;
			$data_end = $data_begin + @splist - 1;
			next;
		} else {
			next if (++$ln < $this->{from});
			last if ($this->{to} && $ln > $this->{to});
			my $cluster = $this->read_table($_,$data_begin,$data_end);
			push(@cluster, $cluster);
		}
	}
	$fh->close;
	$this->{cluster} = \@cluster;
}

sub read_table_from_list($\@) {
	my($this, $list) = @_;
	my(@cluster);
	my $data_begin = $this->{data_begin};
	my $data_end;
	my $ln;

	if ($this->{splist}) {
		$data_end = $data_begin + @{$this->{splist}} - 1;
	}

	foreach my $l (@{$list}) {
		next if (++$ln < $this->{from});
		last if ($this->{to} && $ln > $this->{to});
		my $cluster = $this->read_table($l, $data_begin, $data_end);
		push(@cluster, $cluster);
	}
	$this->{cluster} = \@cluster;
}
sub read_table($$;$$) {
	my($this, $line, $data_begin, $data_end) = @_;
	@F = split(/\t/, $line);

	my $clid = $F[$this->{id_field}];
	my $maxsize = 0;
	my $cluster = [];
	$data_end = $#F if (! $data_end);

	foreach my $f (@F[$data_begin .. $data_end]) {
		my $spdata = [];
		foreach my $g (split(/ /, $f)) {
			$g =~ s/^[a-z0-9]+://;
			if ($this->{parse_name} || $this->{listgenes}) {
				($gene,$dom) = &MBGD::ClustTab::parse_genename($g);
				push(@{$spdata}, [$gene,$dom]);
				if ($this->{listgenes}) {
					$this->{genes}->{$gene} = $clid;
				}
			} else {
				push(@{$spdata}, $g);
			}
		}
		push(@{$cluster}, $spdata);
		$maxsize = @{$sp} if ($maxsize < @{$sp});
	}

	my $fields = {};
	foreach $f (keys %{$this->{add_field}}) {
		$fields->{$f} = $F[ $this->{add_field}->{$f} ];
	}
	return MBGD::Cluster->new($clid, $cluster, $fields);
#	return ($clid, $cluster, $maxsize);
}

#########################################################################
package main;
if ($0 eq __FILE__) {
	$file = $ARGV[0];
	$file = "/dbb/project/MBGD/work/default.clusterTab" if (! $file);
#	$clusttab = MBGD::ClustTab::Reader->read(file=>$file, limit=>"1,1000", parse_name=>1);
	$clusttab = MBGD::ClustTab::Reader->read(file=>$file, limit=>"1,100");
#	$htm = MBGD::ClustTab::Writer_HTMLSimple->new($clusttab);
	$htm = MBGD::ClustTab::Writer->new($clusttab);
#	$htm = MBGD::ClustTab::Writer_XML->new($clusttab);
	$htm->print_table;
#	$clusttab->print;
	
}
1;
