#!/usr/bin/perl -s

#
# getMbgdCluster.pl: a utility program to get clustering results from the MBGD/RECOG server
#   example) getMbgdCluster.pl species=eco output_format=dclst
#

$curl_prog = 'curl';
$url = 'http://mbgd.genome.ad.jp/htbin/RECOG/domclust.cgi';

@Params = (
	['species',	'ingroup',	'', 'Inroup species (comma-delimited list of species names)'],
	['outgroup',	'',		'', 'Outgroup species (comma-delimited list of species names)'],
	['score',	'',		'0-', 'Score cutoff'],
	['eval',	'',		'0-10', 'E-value cutoff'],
	['pam',		'',		'0-', 'PAM cutoff'],
	['ident',	'',		'0-100', 'Percentage identity cutoff'],
	['besthit',	'', 		'0|1', 'Use besthit criterion to choose entries from each genome'],
	['ratiocut',	'',		'0-1', 'Cutoff ratio of species overlap for tree splitting'],
	['output_format', '',		'dclst|tab|html', 'Cutoff ratio of species overlap for tree splitting'],
);
foreach $d (@Params) {
	($key, $alt, $val_range, $descr) = @{$d};
	$Params{$key} = 1;
	$Params{$alt} = 1;
	$ParamInfo{$key} = { keyname => $alt, val_range=>$val_range };
}
$sleep_time = 5;


foreach $arg (@ARGV) {
	($key, $value) = split(/=/, $arg);
	if ($key eq 'output_format') {
		if ($value eq 'tab') {
			$value = 'text';
		} elsif ($value eq 'dclst') {
			$value = 'list';
		}
		push(@curl_params, "$key=$value");
	} elsif ($Params{$key}) {
		if ($ParamInfo{$key}->{keyname}) {
			$key = $ParamInfo{$key}->{keyname};
		}
		if (&check_param($key, $value) == 0) {
			die "parameter range error\n";
		}
		push(@curl_params, "$key=$value");
	}
	if ($key eq 'ingroup' && $value ne '') {
		$flag = 1;
	}
}

if (! $flag) {
	&usage_exit;
}
$curl_params = join('&', @curl_params);
#system("curl -s '${url}?$curl_params'");

while (1) {
	## progress checking...
	open(P, "$curl_prog -s '${url}?$curl_params'|") || die "Can't connect the server\n";
	my($header, $flag);
	while(<P>) {
		if (/#STATUS=(\S+)/) {
			$status = $1;
		} elsif (/#STATUS2=(\S+)/) {
			$status2 = $1;
		} elsif (/#PROGRESS=(\S+)/) {
			$progress = $1;
		} elsif (/#START_DATA/) {
			$flag = 1;
			if (! $noheader) {
				print "$header";
				print;
			}
			next;
		}
		if ($flag) {
			print;
		} else {
			$header .= $_;
			print STDERR $_ if ($DEBUG);
		}
	}
	close(P);
	## exit loop if done
	last if ($flag);

	if (! $silent) {
		print "$status ";
		print "[$status2] " if ($status2 ne '');
		print "(progress = ${progress}%)" if ($progress ne '');
		print "\n";
	}
	sleep($sleep_time);
	$sleep_time = int($sleep_time * 1.2);
}

sub usage_exit {
	print STDERR "$0 [options] param=value ...\n";
	print "params:\n";
	foreach $p (@Params) {
		printf("  %-14s %s [%s]\n", $p->[0], $p->[3], $p->[2]);
	}
	print "options:\n";
	print "  -noheader    Do not output header information\n";
	print "  -silent      Suppress progress output\n";
	exit(1);
}
sub check_param {
	my($key, $value) = @_;
	my($range) = $ParamInfo{$key}->{val_range};
	if ($range =~ /^(\d+)\-(\d+)/) {
		my($from,$to) = ($1, $2);
		if ($from && $value < $from) {
			return 0;
		} elsif ($to && $to <  $value) {
			return 0;
		}
	} elsif ($range =~ /\|/) {
		my($flag);
		foreach my $v (split(/\|/, $range) ) {
			if ($v eq $value) {
				$flag = 1; last;
			}
		}
		return 0 if (! $flag);
	}
	return 1;
}
