#!/usr/bin/perl -s

$MIN_EXIST = 2 if (! $MIN_EXIST);

while(<>) {
	++$line;
	if ($line == 1) {
		($dmy, @F) = split(/\t/);
		@species = @F[0..$#F-2];
		next;
	}
	($clustID, $size, @F) = split(/\t/);
	@phylopat = ();
	my($exist_num) = 0;
	foreach $f (@F[0..$#F-2]) {
		if ($f) {
			$exist = 1;
			$exist_num++;
		} else {
			$exist = 0;
		}
		push(@phylopat, $exist);
	}

    $pat = join(" ", @phylopat);
    push(@orig_output, {name=>$clustID, pat=>$pat, n_exists=>$exist_num });
#	if ($exist_num < $MIN_EXIST) {
#        $pat = join(" ", @phylopat);
#        push(@skip_output, {name=>$clustID, pat=>$pat});
#    }
#    else {
#        $pat = join(" ", @phylopat);
#        push(@orig_output, {name=>$clustID, pat=>$pat});
#    }
}
if ($COMPRESS) {
	$output = &sortOutPut(\@orig_output, $MIN_EXIST, $ORIGOUT);
} else {
	$output = \@orig_output;
}
&printOut($output);


sub printOut {
	my($output, $outfile) = @_;
	if (! $outfile) {
		$outfile = "&STDOUT";
	}
	open(O, ">$outfile") || die;
	print "#rownum=", scalar(@{$output}),"\n";
	print "#colnum=", scalar(@species), "\n";
	for ($i = 0; $i <= $#{$output}; $i++) {
		print "$output->[$i]->{name} $output->[$i]->{pat}\n";
	}
}
sub sortOutPut {
	my($output, $min_exist, $origout) = @_;
	my(@tmp_output) = sort { $a->{pat} cmp $b->{pat} }@{$output};
	my(@new_output);
	my(@new_names);
	my($p, $prev_p);
	my($newnum);
	foreach $p (@tmp_output) {
		if (! $prev_p || $prev_p->{pat} ne $p->{pat}) {
			$newnum++;
        	if ($min_exist <= $p->{'n_exists'}) {
			    push(@new_output, {name=>$newnum, pat=>$p->{pat}});
            }
			$prev_p = $p;
		}
		$NameCorresp{$p->{name}} = {newnum=>$newnum, pat=>$p->{pat}};
	}
	if ($origout) {
		if ($origout eq '1') {
			$origout = "&STDOUT";
		}
		open(O, ">$origout") || die("Can not open $origout");
		foreach $n ( sort { ($NameCorresp{$a}->{newnum}
					<=> $NameCorresp{$b}->{newnum})
					|| ($a <=> $b)
					
				} keys %NameCorresp ) {
			print O "$NameCorresp{$n}->{newnum} $n $NameCorresp{$n}->{pat}\n";
		}
	}
	\@new_output;
}
