#!/usr/bin/perl -s

#use strict;
use FindBin;
#use lib "$FindBin::Bin/../perllib";
use lib "$ENV{'MBGD_HOME'}/perllib/RECOG/CoreAligner";
use GenomeData;
use ClustTab;
use Graph;

#use PostScript::Simple;
use FileHandle;
use File::Path;
use POSIX;

## FOR DEBUG
#$DEBUG::CHECK_CLUST = "2159|318|976";
#$DEBUG::CHECK_CLUST = "1040|737";
#$DEBUG::CHECK_CLUST = "1976|405";
#$DEBUG=1;
$VERBOSE=1;

if (!defined($::tabid)) {
    $::tabid = $main::CLUST_TAB_ID;
}

# refsp: Reference Species
# (e.g.) $::refsp = 'bsu';

# SpGrp: sets of closely related organisms that should be count once
# (e.g.) $::SpGrp = 'ban:bce,sau:sep';

# Window size for checking neighbor along each genome
$::NBR_WIN=20 if (! $::NBR_WIN);
# Window size for checking neighbor along the core alignment
$::NBR_WIN_ALI=$::NBR_WIN if (! $::NBR_WIN_ALI);

# Cutoff ratio of conserved genes or connections (NBR_CONS_RATIO)
$::CONS_RATIO=0.5 if (! $::CONS_RATIO && ! $::CONS_NUM);
$::NBR_CONS_NUM2=2 if (! $::NBR_CONS_NUM2);

# Minimum number of ortholog groups that each cluster contains
$::MIN_CLUSTCNT = 10 if (! defined $::MIN_CLUSTCNT);

# max num of neighbors for calculating avg dist (obsolete)
##$::NUM_NBRDIST = 3 if (! $::NUM_NBRDIST);

# minimum gap size for displaying as "gapped"
$::DISP_MINGAP=1 if (! $::DISP_MINGAP);

# do_nbrclust: split clusters using neighboring gene information
#    if the cluster contains many inparalogs (at least $MIN_INPARALOG).
# Turned off by default (requires the option -do_nbrclust=1)
#$::do_nbrclust = 1 if (! $::skip_nbrclust);
$::MIN_INPARALOG = 1.4 if (! $::MIN_INPARALOG);
$::MIN_NBRCLUSTRATIO = .2 if (! $::MIN_NBRCLUSTRATIO);
#$::MIN_NBRCLUST = 3 if (! $::MIN_NBRCLUST);

# remove_para: retain only positional orthologs after alignment is done
$::remove_para = 1;

# MIN_SPCOV: required coverage of each genome for each cluster
#    to be added to the core
# A cluster is deleted if coverage < MIN_SPCOV or hitnum < SPCOV_MINNUM
#    in at least SPCOV_SPNUM species.
$::MIN_SPCOV = 0.5 if (! defined $::MIN_SPCOV);
$::SPCOV_MINNUM = 6 if (! defined $::SPCOV_MINNUM);
$::SPCOV_SPNUM = 1 if (! defined $::SPCOV_SPNUM);
# Cutoff of deletion score for removing local regions that are
# extensively deleted. Set SPCOV_REG_CUT = 0 to turn off this function.
$::SPCOV_REG_CUT = 20 if (! defined $::SPCOV_REG_CUT);

# clusters are ordered according to the positions on the reference genome
$::clust_order = 'posclust';

# Alpha: a parameter for score definition
#$::Alpha = 1 if (! defined $Alpha);
CalWeight::setParam(Alpha=>$::Alpha) if ($::Alpha);

# ReargPen: penalty of edge score for rearrangement
$::ReargPen = 0.25;

# SubLinkPen: penalty of edge score for subconserved link
# 	i.e. (NBR_CONS_NUM2 <= maxcnt < NBR_CONS_NUM)
$::SubLinkPen = 0.25;

# OrthoTolerance: take as an ortholog if wt >  max_wt * OrthoTolerance
$::OrthoTolerance = 0.8;

# Definitions of the other constants
$::BIGVALUE = 9999999999999;

###############################################################
## Load information from files
## Read data files
DEBUG::VERBOSE("Reading data files\n");

# read function categories for coloring gene symbol (triangle)
$::functype = 'cog' if (! $::functype);
if ($::funcdir) {
	$::funcData = FuncCat->new($::funcdir, $::functype)
}

# read gene list for coloring gene names
if ($::genelist) {
	&read_listfile($::genelist, \%::GeneNameColor, \%::GeneNameMark, $::genecolor, $::genemark);
}
if ($::clustlist) {
	&read_listfile($::clustlist, \%::ClusterColor, \%ClusterMark, $::clustcolor, $::clustmark);
}

## read gene attribute values for coloring each gene symbol (dot)
if ($::genecolors) {
	open(F, $::genecolors) || die "Can't open gencolfile: $genecolors";
	while(<F>){
		my($gene,$value) = split;
		my(@col) = split(/:/,$value);
		$::GeneValue{$gene} = \@col;
	}
	close(F);
	$::dotcolor = 'file';
}

# read genome data
if ($::out_species) {
    # $B=PNOBP>]$N@8J*<o(B
	@::out_species=split(/,/, $::out_species);
}
if ($::genomes) {
	$::GenomeData = GenomeData->read($::genomes, datapath=>$::datapath);
	@::species = @{ $::GenomeData->{species} };
	if (! $::out_species) {
		@::out_species = @{ $::GenomeData->{out_species} };
	}
} else {
	die "genomes file is undefined";
}

# set groups of closely related species
if ($::SpGrp) {
	$::GenomeData->setSpGroup($::SpGrp);
}
$::GenomeData->setSpIndex;

## set outgroups
if ($::OutGroup) {
	$::GenomeData->setOutGroup($::OutGroup);
}

# set the first species as a reference if not defined refsp
$::refsp = $::species[0] if (! $::refsp);

$::NUMSP = $::GenomeData->countSpecies(\@::species, without_outgrp=>1);


##############
# Setting CONS_NUM
# the number of conserved genes
if (! $::CONS_NUM) {
	$::CONS_NUM = $::CONS_RATIO * $::NUMSP;
	$::CONS_NUM = 2 if ($::CONS_NUM < 2);
}
# the number of conserved connections
if (! $::NBR_CONS_NUM) {
	if ($::NBR_CONS_RATIO) {
		$::NBR_CONS_NUM = $::NBR_CONS_RATIO * $::NUMSP;
	} else {
		# default
		$::NBR_CONS_NUM = $::CONS_NUM;
	}
}
if (! $::NBR_CONS_NUM2) {
	$::NBR_CONS_NUM2 = $::NBR_CONS_NUM;
}

if ($::SPCOV_SPRATIO) {
	if ($::SPCOV_SPNUM < $::NUMSP * $::SPCOV_SPRATIO) {
		$SPCOV_SPNUM = $::NUMSP * $::SPCOV_SPRATIO;
	}
}

###############################################################
# the procedure begins

# 1) read cluster table file and extract conserved groups
$::ClustTab = ClustTab->read_clusttab($::clusttab, $::GenomeData,
		                                MIN_SPCNT    => $::CONS_NUM,
                                        rmhash       => 1,
                                        CLUST_TAB_ID => $::tabid,
                                        MODE_CLUSTER => $::mode);
$::ClustTab->make_index($::refsp);
# for DEBUG

$::nbrTriple = NbrTriplet->new;

if ($::alignin) {
	## read sort result from $::align_in, print it and exit
	my $gsort = AlignmentPath->read_alignpath($::alignin);
	$::ClustTab->setOpt(rmhash=>0);
	my $galiList = $gsort->makeGenomeAlign();
	$galiList->print($::alignout);
	exit;
}

my $linkDir;

if ($::linkin) {
	## read neighbors of the directed graph from file ($linkin) for restart
	$linkDir = LinkDir->read_links($::linkin);
} else {
	my $nbrPair;
	if ($::pairin) {
	    ## read conserved neighbor pairs from file ($::pairin) for restart
		$nbrPair = ConsNbrPair->read_pairs($::pairin);
	} else {
		## 2) find conserved neighbor pairs in multiple genomes
		if ($::do_nbrclust) {
			# 2') split clusters using neighboring genes
			#    (this step is skipped by default)
			DEBUG::VERBOSE("Splitting groups by neighbors\n");
			&split_cluster_by_neighbor($::ClustTab);
		}

		DEBUG::VERBOSE("Collecting conserved neighbors\n");
		$nbrPair = ConsNbrPair->check_neighbor($::ClustTab);

		$nbrPair->save_pairs($::pairout) if ($::pairout);
	}
	## 3) determine the direction of the nodes and
	## find neighbors of the directed graph
	DEBUG::VERBOSE("Creating a directed graph\n");
	$linkDir = LinkDir->check_linkdir($nbrPair);
}

$linkDir->save_links($::linkout) if ($::linkout);

exit(0) if (! $::alignout);

## 4) converting the graph into a triplet graph
DEBUG::VERBOSE("Converting the graph into a triplet graph\n");
my $convGraph = TripletGraph->create($linkDir->{graph});

if (! $::skip_fs_cut ) {
	## 5) eliminating loops and make a DAG
	DEBUG::VERBOSE("Eliminating loops\n");
	my $fas = Graph::FeedbackSet->new($convGraph);
	$fas->contraction;
	($fs1, $fs2) = $fas->feedback_set;
	if (@{$fs2}) {
		print STDERR "Warning: the graph is not contractable @{$fs2}\n";
	}
	@all_fs = (@{$fs1}, @{$fs2});
	foreach $fn (@all_fs) {
		$convGraph->delete_node($fn);
		DEBUG::VERBOSE("delete node -> $fn\n");
	}
}

if ($::tripletout) {
	$convGraph->print_edges("$::tripletout");
}


## 6) finding longest paths by DP
DEBUG::VERBOSE("Finding longest paths\n");
my $gsort = AlignmentPath->findPath($::ClustTab, $convGraph);
DEBUG::VERBOSE("Output the result\n");
if ($::remove_para) {
	## 7) find genuine orthologs, and modify $::ClustTab
	$gsort->findBestOrthologs;
}
## 8) construct an alignment for output
my $galiList = $gsort->makeGenomeAlign();

## output the alignment (text or PS)
$galiList->print($::alignout);

## output the modified ortholog groups 
if ($::remove_para && $::clustout) {
	$::ClustTab->save_clusttab($::clustout);
}
if ($::core_clustout) {
	my @order = $galiList->getOrderedList;
	$::ClustTab->save_clusttab($::core_clustout, \@order);
}

DEBUG::VERBOSE("Done\n");

exit(0);


###############################################################
## Subroutines
###############################################################
# package main;
sub split_cluster_by_neighbor {
	my($clTab) = @_;
	my($i);
	ConsNbrPair->check_neighbor($clTab, {clustering=>1});
	$clTab->make_index($::refsp); #????
	if ($::clustout && ! $::remove_para) {
		$clTab->save_clusttab($::clustout);
	}
}
sub make_namestring {
	my($d) = @_;
	"$d->{sp}:$d->{name}:$d->{dom}";
}
sub make_namestring2 {
	my($d) = @_;
	my($str) = "$d->{sp}:$d->{name}";
	$str .= "($d->{dom})" if ($d->{dom});
	$str;
}
sub vectsum {
	my($vsum,$v1) = @_;
	for ($i = 0; $i < @{$v1}; $i++) {
		$vsum->[$i] += $v1->[$i];
	}
}
sub vectdiv {
	my($v1,$div) = @_;
	for ($i = 0; $i < @{$v1}; $i++) {
		$v1->[$i] /= $div;
	}
}
sub read_listfile {
	my($listfile, $ReadData1, $ReadData2, $defcolor, $defmark) = @_;
	open(F, $listfile) || die "Can't open genelist: $listfile";
	my($color, $mark, $markcolor, $colorcol, $markcol, $markcolorcol);
	$color = $defcolor if ($defcolor);
	$mark = $defmark if ($defmark);
	while(<F>){
		if (/^#/) {
			if (/#\s*color\s*=\s*(\S+)/) {
				$color = $1;
			} elsif (/#\s*mark\s*=\s*(\S+)/) {
				$mark = $1;
			} elsif (/#\s*colorcol\s*=\s*(\d+)/) {
				$colorcol = $1;
			} elsif (/#\s*markcol\s*=\s*(\d+)/) {
				$markcol = $1;
			}
			next;
		}
		chomp;
		my($name,@field) = split;
		if ($ReadData1) {
			if ($colorcol && $field[$colorcol-2]) {
				$ReadData1->{$name} = $field[$colorcol-2];
			} elsif ($color) {
				$ReadData1->{$name} = $color;
			}
		}
		if ($ReadData2) {
			if ($markcol && $field[$markcol-2]) {
				$ReadData2->{$name} = $field[$markcol-2];
			} elsif ($mark) {
				$ReadData2->{$name} = $mark;
			}
		}
	}
	close(F);
}
###########################################################
package DEBUG;
sub VERBOSE {
	my($message) = @_;
	print STDERR "$message" if ($::VERBOSE);
}
sub check_clust {
	my($str) = @_;
	return 0 if (! $DEBUG::CHECK_CLUST);
	$str =~ m{^($DEBUG::CHECK_CLUST)$};
#	$str =~ m{$DEBUG::CHECK_CLUST};
}

###########################################################
package CalWeight;

sub setParam {
	my(%opt) = @_;
	$Alpha = $opt{'Alpha'} if (defined $opt{'Alpha'});
	$Alpha2 = $opt{'Alpha2'} if (defined $opt{'Alpha2'});
}
sub calc {
	my($dist) = @_;
	my($weight);
	if ($Alpha) {
	    $weight = (1 / ($dist ** $Alpha));
	} elsif ($Alpha2) {
	    ## obsolete
	    $weight = ( ($::Alpha2+1) / ($Alpha2+$dist) );
	} else {
	    $weight = 1 / $dist;
	}
	$weight;
}

###########################################################
# ConsNbrPair: Conserved pair of neighbors
###########################################################
package ConsNbrPair;

sub new {
	my($class) = shift;
	my($this) = {};
	bless $this, $class;
	return $this;
}
sub check_neighbor {
	my($class, $clTab, $opt) = @_;
	my($this) = $class->new;
	my($i);
	## graph of coserved neighbor pairs
	$this->{graph} = UndirectedGraph->new;

	## check neighbors for each cluster
	for ($i = 0; $i < @{$clTab->CID}; $i++) {
		$this->check_neighbor0($clTab,$clTab->CID($i), $opt);
	}
	$this->check_sublink;
	## connecting the both ends of each gene
#	for ($i = 0; $i < @{$clTab->CID}; $i++) {
#		my $n = $clTab->CID($i);
#		$this->{graph}->add("$n:L", "$n:R", 0);
#	}
	$this;
}
sub check_neighbor0 {
	my($this, $clTab, $cl1, $opt) = @_;
	my($i, $j, $k);
	my(%OK);
	my(%Found, %Dist, %Dir);
	my(%TmpClustData);
	local(%NbrClust, %NbrClustSel);
	local(%MatchNbrClust);
	local($ClusteringMode);
	my($homcnt, $homspcnt);

	## ClusteringMode: clustering genes according to the pattern of
	##   the neighborhood genes.
	$ClusteringMode = 1 if ($opt->{clustering});


	# find neighboring clusters of $cl1 and create %Found, %Dist, %Dir
	for ($k = 0; $k < @::species; $k++) {
		my $spname = $::species[$k];
		my $spgrp = $::GenomeData->getSpGroup($spname);
	  	my $clustdata = $clTab->getClustData($cl1, $spname);

		my $numgenes = scalar(@{$clTab->getSpData($spname)});

	  	next if (! $clustdata);
		next if ($::GenomeData->isOutGroup($spname));

		$homcnt += @{$clustdata};
		$homspcnt++ if( @{$clustdata} );

		## check neighbors of gene $d1 (species $spname, cluster $cl1)
		foreach $d1 (@{$clustdata}) {
			my $order = $d1->{'order'};
			my $pos1 = $d1->{'pos'};
			my $dir = $d1->{'dir'};
			my $name = &::make_namestring($d1);

			## check genes within the window (+-NBR_WIN)
			for ($i = $order - $::NBR_WIN; $i <= $order + $::NBR_WIN; $i++) {
				my $cldata = $clTab->getSpData($spname,$i);

				my $clust = $cldata->{'clust'};
				my $pos2 = $cldata->{'pos'};
				my $dir2 = $cldata->{'dir'};

				next if ($clust == $cl1);

				$Found{$clust}->{$spgrp} = 1;
				my $dist = $i - $order;

				my $side;  ## side of the gene [L(5')==>(3')R]
				if ($dir * $dist > 0) {
					$side = 'R';
				} else {
					$side = 'L';
				}
if (DEBUG::check_clust($cl1)&& DEBUG::check_clust($clust)) {
	print STDERR "$cl1,$clust,$spname,$spgrp,$dist\n";
}

				$::nbrTriple->add($name,$side,$clust,$dist);

				if (! defined $Dist{$clust}->{$spgrp}
					|| $Dist{$clust}->{$spgrp} > abs($dist)) {
					$Dist{$clust}->{$spgrp} = abs($dist);

					$Dir{$clust}->{$spgrp} = $side;
					if ($dir == $dir2) {
					    ## same direction [==> ==>]
						$Dir{$clust}->{$spgrp} .= &opp_side($side);
					} else {
					    ## opposite direction [==> <==]
						$Dir{$clust}->{$spgrp} .= $side;
					}
				}
			}
		}
	}
	## skip if in the clustering mode and few inparalogs
	return if ($ClusteringMode &&
		(! $homspcnt || $homcnt/$homspcnt < $::MIN_INPARALOG) );

	# determine the direction between $cl1 and $cl2 (neighboring clusters)
	foreach $cl2 (keys %Found) {
		my $count = scalar(keys %{$Found{$cl2}});
		my($dir, $dircnt, $dirweight);
if (DEBUG::check_clust($cl2)&& DEBUG::check_clust($cl1)) {
	print STDERR "$cl1,$cl2>>", join(',', keys %{$Found{$cl2}}),"\n";
}


		$dircnt = {};
		$dirweight = {};
		my $tmpDist = $Dist{$cl2};
		my($distcnt, $avg_dist);
		my($pat) = {};
		my($curr_dir);
		my(%weight);

		foreach $sp ( sort { $tmpDist->{$a}<=>$tmpDist->{$b} }
					keys %{$tmpDist} ) {
			next if ($::GenomeData->isOutGroup($sp));
			my $spgrp = $::GenomeData->getSpGroup($sp);
			next if ($spgrp ne $sp);

			## edge weight is calculated based on inv_dist 
			$weight{$sp} = CalWeight::calc($tmpDist->{$sp});

			## (avg_dist is no longer used for edge weight)
			if (! $::NUM_NBRDIST || $distcnt < $::NUM_NBRDIST) {
				$avg_dist += $tmpDist->{$sp};
				$distcnt++;
			}
			$curr_dir = $Dir{$cl2}->{$sp};
			$dircnt->{$curr_dir}++;
			$dirweight->{$curr_dir} += $weight{$sp};
			if ($ClusteringMode) {
				if (! $pat->{$curr_dir}) {
					$pat->{$curr_dir} = BitVector->new;
				}
				$pat->{$curr_dir}->add(
					$::GenomeData->getSpIndex($sp) );
			}
		}
		$avg_dist = $avg_dist/$distcnt;
		my ($maxcnt, $maxweight, $maxdir);
		## direction is determined by majority vote (dirweight)
		foreach my $d (keys %{$dirweight}){
			if ($dirweight->{$d} > $maxweight) {
#			if ($dircnt->{$d} > $maxcnt) {
				$maxweight = $dirweight->{$d};
				$maxcnt = $dircnt->{$d};
				$maxdir=$d;
			}
		}
		my($weight);
		# weight of the edge
		foreach my $d (keys %{$dirweight}){
			if ($d eq $maxdir) {
				$weight += $dirweight->{$d};
			} else {
				# rearrangement
				$weight += $dirweight->{$d} * $::ReargPen;
			}
		}
		if ($maxcnt < $::NBR_CONS_NUM) {
			$weight *= $::SubLinkPen;
		}

		$NbrClust{$cl2} = {
			count => $count,
			dist=>$avg_dist, 
			maxdir => $maxdir,
			maxcnt => $maxcnt,
			maxweight => $maxweight,
			weight => $weight,
		};
	}
	my(%countProxL_sub, %countProxR_sub);
	for ($k = 0; $k < @::species; $k++) {
		my $sp = $::species[$k];
##		my $sp0 = $::GenomeData->getSpGroup($sp);
		my $clustdata;

	  	next if (! ($clustdata = $clTab->getClustData($cl1,$sp)) );
		next if ($::GenomeData->isOutGroup($sp));

		foreach $d (@{$clustdata}) {
			my $flag;
			$name = "$d->{sp}:$d->{name}:$d->{dom}";
			$TmpClustData{$name} = $d;

			## find a proximal cluster
			my ($proxL, $proxL_sub) = &check_NbrList($name,'L');
			my ($proxR, $proxR_sub) = &check_NbrList($name,'R');
			if ($proxL) {
				$NbrClustSel{$proxL} = $NbrClust{$proxL};
				foreach $cl (@{$proxL_sub}) {
					$countProxL_sub{$cl}->{$proxL}++;
				}
			}
			if ($proxR) {
				$NbrClustSel{$proxR} = $NbrClust{$proxR};
				foreach $cl (@{$proxR_sub}) {
					$countProxR_sub{$cl}->{$proxR}++;
				}
			}
		}
	}
	## examine the proximal clusters satisfying the secondary condition
	foreach $countProx_sub (\%countProxL_sub, \%countProxR_sub) {
		foreach $cl (keys %{$countProx_sub}) {
			foreach $cl0 (keys %{$countProx_sub->{$cl}}) {
				my($cnt) = $countProx_sub->{$cl}->{$cl0};
				if ($cnt >= $::NBR_CONS_NUM2){
					$NbrClustSel{$cl} = $NbrClust{$cl};
				}
			}
		}
	}

	if ($ClusteringMode) {
		## $tot_mchnum: the number of clusters shared by more than
		##    $CONS_NUM species.
		## $Mgraph{g1}->{g2}: the nubmer of clusters shared by
		##    the neighborhood of the two genes g1 and g2
		my(%Mgraph, $Mclust, $tot_mchnum);
		foreach $cl2 (keys %MatchNbrClust) {
			my($g1, $g2);
			foreach $g1 (keys %{$MatchNbrClust{$cl2}}) {
				if ($g2) {
					$Mgraph{$g1}->{$g2}++;
					$Mgraph{$g2}->{$g1}++;
				}
				$g2 = $g1;
			}
			$tot_mchnum++;
		}
		my($min_mchnum);
		if ($::MIN_NBRCLUSTRATIO) {
			$min_mchnum = $tot_mchnum * $::MIN_NBRCLUSTRATIO;
		}
		if ($::MIN_NBRCLUST > $min_mchnum) {
			$min_mchnum = $::MIN_NBRCLUST ;
		}
		foreach $g1 (keys %Mgraph) {
			foreach $g2 (keys %{$Mgraph{$g1}}) {
				if ($Mgraph{$g1}->{$g2} < $min_mchnum) {
					delete $Mgraph{$g1}->{$g2};
					delete $Mgraph{$g2}->{$g1};
				}
			}
		}

		my $graph = Graph->convGraph(\%Mgraph);
		my $sclust = Graph::SlinkClust->new($graph);
		$sclust->slinkclust;
		my ($Mclust, $clnum) = ($sclust->{clust},$sclust->{clustnum});

		if ($clnum > 1) {
			foreach $g (keys %{$Mclust}) {
				my $cln = $Mclust->{$g};
				my $d = $TmpClustData{$g};
				print STDERR "subgroup>$g $cln\n" if ($::DEBUG);
				## reset clusterid
				$d->{clust} = "$d->{clust}.$cln";
			}
		}
		return;
	}

	%NbrClust = %NbrClustSel;

	foreach $cl2 (keys %NbrClust) {
		my $nbrdata = $NbrClust{$cl2};
		my $maxdir = $nbrdata->{maxdir};

		## add node to the conserved neighborhood graph
		$this->make_pairlink($cl1,$cl2,$maxdir,$nbrdata);
	}
}

## Find a proximal cluster on $side(=L/R) of $gene
sub check_NbrList {
	my($gene, $side)= @_;
	my($nbrclust, $flag);
	my($ProximalClust, %ProximalClust_sub);

	## get clusters in $side=L|R of the $gene in the chromosomal order
	foreach my $clust ($::nbrTriple->getOrderedNbrList($gene,$side)) {
		my $count = $NbrClust{$clust}->{maxcnt};

		next if (substr($NbrClust{$clust}->{maxdir},0,1) ne $side);

		if (! $nbrclust) {
		    if ($count >= $::NBR_CONS_NUM) {

		    ## the most proximal cluster whose count >= NBR_CONS_NUM
			$nbrclust = $clust;
			$ProximalClust = $clust;
		    } elsif ($::NBR_CONS_NUM2 && $count >= $::NBR_CONS_NUM2) {
			$ProximalClust_sub{$clust} = 1;
		    } 
		}
		## for ClusteringMode
		## $gene of $clust is a neighbor of the current ortho_grp $cl1
		if ($count >= $::NBR_CONS_NUM) {
			$MatchNbrClust{$clust}->{$gene} = 1;
			$flag = 1;
		}
	}
	@Prox2 =  (keys %ProximalClust_sub);
	return($ProximalClust, \@Prox2);
}

#
# add an edge to the conserved neighborhood graph
#
#  dir1 != dir2: ===>R..L===> (+1);  dir1 == dir2: ===>R..R<== (-1)
#
sub make_pairlink {
	my($this,$n1,$n2,$rdir,$d) = @_;
	my($side1,$side2) = split(//, $rdir);

	if ($dd = $this->{graph}->edge_data($n1, $n2)) {
		$dd->{both} = 1;
	} else {
		## weighted distance for MST,
		## which is just an inverse of the edge weight
		$d->{wdist} = 1 / $d->{weight};

		$d->{dir} = ($side1 eq $side2) ? -1 : 1;	### => <= or <= =>
		$d->{rdir} = "$n1:$side1,$n2:$side2";
		$this->{graph}->add($n1, $n2, $d);
	}
}
sub check_sublink {
	my($this) = @_;
	foreach $e ($this->{graph}->edges) {
		### edge <$a,$b>
		($a,$b) = Graph::get_edge_ends($e);
		$d = $this->{graph}->edge_data($a,$b);
		if ($d->{maxcnt} < $::NBR_CONS_NUM) {
			my(%CONS_LINK, $flag);
			my(@nbr1) = $this->{graph}->out($a);
			### examine whether there is a node $n s.t.
			### both edges [$a,$n] and [$b,$n] are conserved
			foreach $n (@nbr1) {
				$d = $this->{graph}->edge_data($a,$n);
				if ($d->{maxcnt} >= $::NBR_CONS_NUM) {
					$CONS_LINK{$n} = 1;
				}
			}
			my @nbr2 = $this->{graph}->out($b);
			foreach $n (@nbr2) {
				$d = $this->{graph}->edge_data($b,$n);
				if ( $CONS_LINK{$n} &&
					$d->{maxcnt} >= $::NBR_CONS_NUM) {
					$flag = 1;
					last;
				}
			}
			if (! $flag) {
				## non-conserved sublink: delete
				$this->{graph}->delete_edge($a,$b);
			}
		}
	}
}

sub save_pairs {
	my($this, $outfile) = @_;
	if ($outfile) {
		if ($outfile == 1) {
			open(PAIROUT, ">&STDOUT");
		} else {
			open(PAIROUT, ">$outfile");
		}
	}
	foreach $e ($this->{graph}->edges) {
#		($a,$b) = split(/ /, $e);
		($a,$b) = Graph::get_edge_ends($e);
		$data = $this->{graph}->edge_data($a,$b);
		print PAIROUT join(' ', $a, $b, $data->{dist}, $data->{dir},
			$data->{rdir},
			$data->{count}, $data->{maxcnt}, $data->{both},
			$data->{weight}),"\n";
	}
}
sub read_pairs {
	my($class, $pairin) = @_;
	my $this = $class->new; 
	open(P, $pairin) || die("Can not open $pairin");
	my($flag);
	while(<P>) {
		$flag = 1 if (/====/);
		if (! $flag) {
			my($n1,$n2,$dist,$dir,$rdir,$count,$maxcnt,$both,$weight) = split;
			my $d = {dist=>$dist,dir=>$dir,rdir=>$rdir,
				count=>$count,maxcnt=>$maxcnt,both=>$both,weight=>$weight};
			$this->{graph}->add($n1,$n2,$d);
		}
	}
	close(P);
	$this;
}

sub opp_side {
	my($side) = @_;
	return (($side eq 'R') ? 'L' : 'R');
}
#######################################################
## for checking trinary neighboring relationships (a,b,c)
package NbrTriplet;
sub new{
	my($class) = shift;
	return bless {}, $class;
}
sub add {
	my($this, $name, $dir, $cluster, $dist) = @_;
	$this->{NbrClust}->{$name}->{$dir}->{$cluster} = abs($dist);
}
sub getDist {
	my($this, $name, $dir, $cluster) = @_;
	return $this->{NbrClust}->{$name}->{$dir}->{$cluster};
}
sub checkNbr {
	my($this, $name, $dir, $cluster) = @_;
	return (defined $this->{NbrClust}->{$name}->{$dir}->{$cluster});
}
sub getOrderedNbrList {
	my($this,$name,$dir) = @_;
	my $list = $this->{NbrClust}->{$name}->{$dir};
	return sort { $this->{NbrClust}->{$name}->{$dir}->{$a} <=>
		$this->{NbrClust}->{$name}->{$dir}->{$b} } (keys %{$list});
}
sub checkTriplet {
	my($this, $n0,$n1,$n2) = @_;

	return 1 if ($n0 eq '');

	($n0) = &tmpSplit($n0);
	($n1) = &tmpSplit($n1);
	($n2) = &tmpSplit($n2);

	my(@TmpSpList);
	my($cnt);
	foreach $sp (keys %{$::ClustTab->getClustData($n1)}) {
		next if ($::GenomeData->isOutGroup($sp));
		foreach $d (@{$::ClustTab->getClustData($n1,$sp)}) {
			my $name = &::make_namestring($d);
			if ( ($this->checkNbr($name,'L',$n0) &&
				$this->checkNbr($name,'R',$n2)) ||
			     ($this->checkNbr($name,'R',$n0) &&
				$this->checkNbr($name,'L',$n2)) ) {
				$cnt++;
				push(@TmpSpList, $sp);
				last;
###				return 1;
			}
		}
	}
	my $spcnt = $::GenomeData->countSpecies(\@TmpSpList);

#	if ($spcnt >= $::NBR_CONS_NUM) {
	if ($cnt >= $::NBR_CONS_NUM2) {
		return 1;
	}
	return 0;
}
sub tmpSplit {
	my($name) = @_;
	return split(/:/, $name);
}
#######################################################
package LinkDir;
sub new {
	my($class) = shift;
	return bless {}, $class;
}
sub check_linkdir {
	my($class, $nbrPair) = @_;
	my($this) = $class->new;
	my($n1, $nextdir);
	local(%ERRDIR);

	## undirected graph of conserved neighboring pairs
	local($nbrPairLink) = $nbrPair->{graph};

	## directed graph that has consistent orientation
	$this->{graph} = Graph->new;

	## constructing a minimum spaning forest from $nbrPairLink
	local($MST) = Graph::MST->kruskal($nbrPairLink, weight_tag=>'wdist');

	# for DEBUG
	## $MST->print_edges("/tmp/corealign.mstout");

	my (%Nodes, @Nodes);

	@Nodes = $MST->nodes;

	## check edges along the MST and
	## 	assign a direction to each gene ($::Gdir{$g})
	foreach $n1 ( sort { $::ClustTab->getClustSpCnt($b)<=>
				$::ClustTab->getClustSpCnt($a) } @Nodes ) {
		if (! $::Gdir{$n1}) {
			$this->check_link0($n1, 1);
		}
	}

#	# for DEBUG
#	open(O,">/tmp/ggdir");
#	foreach my $n1 ( @Nodes ) {
#		print O "$n1 $::Gdir{$n1}\n";
#	}
#	close(O);

	foreach my $n1 ( @Nodes ) {
		if ($::Gdir{$n1} >= 0) {
			$nextdir = 'R';
		} else {
			$nextdir = 'L';
		}
		foreach $n2 ($nbrPairLink->out($n1)) {
			next if ($n1 eq $n2);

		    my($data) = $nbrPairLink->edge_data($n1,$n2);
		    my($dir) = ( ($::Gdir{$n1} eq $::Gdir{$n2}) ? 1 : -1 );

		my($side1,$side2) = split(/,/, $data->{rdir});
		if ($side1 ne "$n1:$nextdir" && $side2 ne "$n1:$nextdir") {
			next;
		}

		    if ($data->{dir} ne $dir) {
			## eliminate edges in inconsistent direction
			## that are not included in the MST
			DEBUG::VERBOSE("Inconsistent edge (deleted): $n1 $n2\n");
			next;
		    }
		    $this->{graph}->add($n1,$n2, $data);
		}
	}
	$this;
}

sub check_link0 {
	my($this, $n1, $gdir) = @_;


	if ($::Gdir{$n1}) {
		if ($::Gdir{$n1} != $gdir) {
			print STDERR "ERROR: direction $n1: $gdir $TmpGdir{$n1}\n";
		}
		return -1;
	}

	# set direction of the gene $n1
	$::Gdir{$n1} = $gdir;

	## !!! traverse the link in reverse direction (go toward $prevdir)

	my(@nextnodes) = $MST->out($n1);

	foreach my $n2 (@nextnodes) {
		next if ($n2 eq $n1);
		my $data;

		$data = $nbrPairLink->edge_data($n1, $n2);

		my $dir = $data->{dir};
if (DEBUG::check_clust($n1) || DEBUG::check_clust($n2)) {
	print STDERR "NN>$n1,$n2,$dir,$nextdir,$dir2,$data->{both},$data->{dir}<\n";
}

		my $new_nextdir, $new_gdir;
		if ($dir < 0) {
			$new_gdir = $gdir * -1;
		} else {
			$new_gdir = $gdir;
		}
		$this->check_link0($n2, $new_gdir);
	}
}

sub save_links {
	my($this, $linkout) = @_;
	open(LINKOUT, ">$linkout");

	foreach $cl1 ($this->{graph}->nodes) {
		foreach $cl2 ($this->{graph}->out($cl1)) {
			my $linkdata = $this->{graph}->edge_data($cl1,$cl2);
			print LINKOUT join(' ', $cl1, $cl2,
				$linkdata->{dist},
				$linkdata->{dir},
				$linkdata->{count},
				$linkdata->{maxcnt},
				$linkdata->{both},
				$linkdata->{weight},
			), "\n";
		}
	}
	close(LINKOUT);
}
sub read_links {
	my($class, $linkin) = @_;
	my $this = $class->new;
	open(P, $linkin) || die("Can not open $linkin");
	$this->{graph} = Graph->new;
	while(<P>) {
		my($n1,$n2,$dist,$dir,$count,$maxcnt,$both,$weight) = split;
		my $d = {dist=>$dist,dir=>$dir,count=>$count,
				maxcnt=>$maxcnt,both=>$both,weight=>$weight};
		$this->{graph}->add($n1,$n2,$d);
	}
	close(P);
	$this;
}
sub make_graph {
	my($this) = @_;
	my($graph) = Graph->new;
	foreach $cl1 (keys %{$this->{NewLink}}) {
		foreach $cl2 (keys %{$this->{NewLink}->{$cl1}}) {
			$graph->add($cl1,$cl2,
				$this->{NewLink}->{$cl1}->{$cl2}->{dist});
		}
	}
	$graph;
}

###################################################
# A graph showing trinary relationships (A,B)-(B,C)
###################################################
package TripletGraph;
BEGIN{
	$NameDelim = ":";
}
sub create {
	local($class, $origGraph) = @_;
	my($graph_class) = $origGraph->isa('UndirectedGraph') ?
					'UndirectedGraph' : 'Graph';
	local($newGraph) = $graph_class->new;
	local(%Found);
	foreach my $n ($origGraph->nodes) {
		&make_tripletGraph0($n);
	}
	return $newGraph;
}
sub make_tripletGraph0 {
	my($n1, $n0) = @_;
	if ( $Found{$n0,$n1} ) {
		if ($Found{$n0,$n1} == 1) {
			## print STDERR "LOOP\n";
		}
		return;
	}
	$Found{$n0,$n1} = 1;
	foreach my $n2 ( $origGraph->out($n1) ) {
		if ($::nbrTriple->checkTriplet($n0,$n1,$n2)) {
			if ($n0 ne '') {
				$newGraph->add( &joinName($n0,$n1),
						&joinName($n1,$n2) );
				&set_node_weight($n0,$n1);
				&set_node_weight($n1,$n2);
			}
			&make_tripletGraph0($n2,$n1);
		} else {
		}
	}
	$Found{$n0,$n1} = 2;
}
sub set_node_weight {
	my($n1,$n2) = @_;
	my $weight = $origGraph->edge_weight($n1,$n2,'weight');
	$newGraph->set_node_weight(&joinName($n1,$n2), $weight);
}
#sub set_edge_weight {
#	my($n0,$n1,$n2) = @_;
#	my $weight = $origGraph->edge_weight($n1,$n2,'maxcnt');
#	$newGraph->set_edge_weight(&joinName($n0,$n1), &joinName($n1,$n2),
#			$weight);
#}
sub joinName {
	my($n1,$n2) = @_;
	return join($NameDelim, $n1, $n2);
}
sub splitName {
	my($name) = @_;
	return split(/$NameDelim/, $name);
}
sub checkName {
	my($name) = @_;
	$name =~ /$NameDelim/;
}


###################################################
# Find the best alignment path by DP
###################################################
package AlignmentPath;

sub new {
	return bless {}, $_[0];
}
sub dup {
	my $this = @_;
	my $new = AlignmentPath->new;
	foreach $k (keys %{$this}) {
		$new->{$k} = $this->{$k};
	}
	$new;
}
sub findPath {
	my($class, $clTab, $graph) = @_;
	my($this) = $class->new;

	$this->{Nodes} = $clTab->CID();

	$this->{graph} = $graph;

	my $corePath = $this->make_longestPath;

	my $dupCheck = DuplicatedClusterCheck->new;
	## convert back from the triplet graph node into the original node
	my($clustArray_PosClust) = $this->restoreTriplet($corePath, $dupCheck);

	my(@tmp_array);
	foreach my $path (@{$clustArray_PosClust}) {
		next if (! @{$path});
		my @newpath = $this->filterCluster($path);
		foreach $p (@newpath) {
			push(@tmp_array, $p);
		}
	}
	$this->{origPosClust} = $clustArray_PosClust = \@tmp_array;
	my $sortedClusters = $this->sortPosClust($clustArray_PosClust);

	my $newpid = 1;
	foreach my $path (@{$sortedClusters}) {
		foreach my $cid (@path) {
			$this->{PosClustID}->{$cid} = $newpid;
		}
		$newpid++;
	}
	$this->{sortedClusters} = $sortedClusters;

	return $this;
}

## To Calculate the longest path tree
sub longestPathSearch {
	my($this, $pathTree, $dir, $nodelist) = @_;

	local $PathTree = ($pathTree ? $pathTree : Graph->new);
	local %Visit;
	my @nodelist = ( $nodelist ? @{$nodelist} : $this->{graph}->nodes );

	foreach my $n (@nodelist) {
		$this->dp_search($n);
	}
	return $PathTree;
}
sub dp_search {
	my($this, $n1) = @_;
	my($weight, $max_weight, $maxpath);
	if ($Visit{$n1}) {
		if ( ! $PathTree->node_exist($n1) ) {
			print STDERR "LOOP: $n1\n";
			return 0;
		}
		return $PathTree->node_weight($n1);
	} elsif ($PathTree->node_exist($n1)) {
		return $PathTree->node_weight($n1);
	}
	$Visit{$n1} = 1;

	foreach my $n2 ( $this->{graph}->out($n1) ) {
		my $wt = $this->{graph}->node_weight($n2);
		$weight = $this->dp_search($n2) + $wt;
if (DEBUG::check_clust($n1)) {
	print STDERR "$n1>$n2, $wt, $weight\n";
}
		if ($weight > $max_weight) {
			$max_weight = $weight;
			$maxpath = $n2;
		}
	}
if (DEBUG::check_clust($n1)) {
	print STDERR ">>max_weight>>$n1,$max_weight,$maxpath\n";
}
	$PathTree->add($n1, $maxpath);
	$PathTree->set_node_weight($n1, $max_weight);
	return $max_weight;
}

sub make_longestPath {
	my($this) = @_;
	my($n);
	my(@CorePath);
	my(%Visit);

	my $pathTree = $this->longestPathSearch;

	my(@nodes)= $pathTree->nodes;

	my($ClustID);
	while (@nodes) {
		my($maxwt, $maxn, $wt, $n);
		## finding the maximum weighting node
		foreach $n (@nodes) {
			next if ($this->{PosClustID}->{$n});
			next if ($Visit{$n});
			$wt = $pathTree->node_weight($n);
			if ($maxwt < $wt) {
				$maxn = $n;
				$maxwt = $wt;
			}
		}

		## trace back
		$n = $maxn;
		$Visit{$n} = 1;
		my $Count = 0;
		my(@path0, %path0);
		while ($n) {
			last if ($this->{PosClustID}->{$n});
			push(@path0, $n);
			$Count++;
			$path0{$n} =  $n;

			my(@out) = $pathTree->out($n);
#if (@out != 1) {
#	print STDERR "Out:$n>>@out<<\n";
#}
			$n = $out[0];

			if ($n) {
				last if ($Visit{$n});
				$Visit{$n} = 1;
			}
		}
		last if (! $Count);	# no additional path

		## add the path into the core structure
		## 	if the cluster length >= $::MIN_CLUSTCNT
		if ($Count >= $::MIN_CLUSTCNT) {
			++$ClustID;
			foreach my $n0 (@path0) {
				$this->{PosClustID}->{$n0} = $ClustID;
			}
			push(@CorePath, \@path0);
		}

		my(@next_nodes);
		foreach $n (@nodes) {
			push(@next_nodes, $n) if (! $path0{$n});
		}
		@nodes = @next_nodes;

		## delete \@path0 and update the descendant nodes
		$this->updatePath($pathTree, \@path0);

		## recalculate the pathTree for finding the next longest path
		$this->longestPathSearch($pathTree, 'dir');

	}
	\@CorePath;
}

# update the graph by deleting descendant nodes to find the next optimal path 
# similar to the strategy for finding suboptimal alignment
#     by Waterman & Eggert JMB 1987
sub updatePath {
	my($this, $pathTree, $selectedPath) = @_;
	my(%selPathFlag);

	## delete nodes on the selectedPath
	foreach $n (@{$selectedPath}) {
		$pathTree->set_node_weight($n, -$::BIGVALUE);
		$selPathFlag{$n} = 1;
	}

	## find descendants of the nodes on the selected path to update
	$dfs = Graph::DFS->new($pathTree);
	my($inList, $outList) = $dfs->dfs('rev', $selectedPath);

	my @outList;

	foreach $n (sort {$outList->{$a}<=>$outList->{$b}} keys %{$outList}) {
		push(@outList, $n) if (! $selPathFlag{$n});
	}
	## delete the descendant nodes from pathTree before update
	foreach $n (@outList) {
		$pathTree->delete_node($n);
	}
	&deleteRedundantNodes($pathTree, \@outList, $selectedPath);
}
## delete short cut edges on the deleted path
sub deleteRedundantNodes {
	my($pathTree, $outList, $selectedPath) = @_;
	my(%alignIdx);
	my($i);
	foreach $n (@{$selectedPath}) {
		my($cid1,$cid2) = TripletGraph::splitName($n);
		$alignIdx{$cid1} = ++$i;
		$alignIdx{$cid2} = $i+1;
	}
	foreach $n (@{$outList}) {
		my($cid1,$cid2) = TripletGraph::splitName($n);
		if ($alignIdx{$cid1} && $alignIdx{$cid2} &&
		    abs($alignIdx{$cid1} - $alignIdx{$cid2}) < $::NBR_WIN_ALI) {
			## delete redundant nodes
			##    (=short cut edges of the deleted path)
			$pathTree->set_node_weight($n, -$::BIGVALUE);
		}
	}
}

## convert from the triplet graph node into the original node
## sort cluster list if posclust is specified
sub restoreTriplet {
	my($this, $Path, $dupCheck) = @_;
	my(@NewPath,@ClusterSort);
	my(@ClustArray_PosClust);
	my($pid, $prev_pid);

	sub checkDuplicatedCluster {
	## add numbers to ClustID if a cluster appears twice or more
		my($cid) = @_;
		my $newcid = $dupCheck->dupcheck($cid);
		if ($newcid ne $cid) {
			$::Gdir{$newcid} = $::Gdir{$cid};
		}
		return $newcid;
	}

	my($pid) = 0;
	push(@ClustArray_PosClust, "");	#dmy for [0]
	foreach my $pclust (@{$Path}) {
		my($flag,@new_pclust);
	    	foreach my $node_id (@{$pclust}) {
#			next if (! $this->{PosClustID}->{$node_id});
			if ( TripletGraph::checkName($node_id) ) {
				my($cid1,$cid2) =
					TripletGraph::splitName($node_id);
				if ($flag==0) {
					$cid1 = &checkDuplicatedCluster($cid1);
					push(@new_pclust, $cid1);
					$flag = 1;
				}
				$cid2 = &checkDuplicatedCluster($cid2);
				push(@new_pclust, $cid2);
			} else {
				push(@new_pclust, $node_id);
			}
		}
		push(@ClustArray_PosClust, \@new_pclust);
	}

	foreach my $path (@ClustArray_PosClust) {
	    for ($i = 0; $i < @{$path}; $i++) {
		my $cid = $path->[$i];
		my $newcid = $dupCheck->dupcheck2($cid);
		if ($newcid ne $cid) {
			$path->[$i] = $newcid;
			$::Gdir{$path->[$i]} = $::Gdir{$cid};
		}
	    }
	}
	return \@ClustArray_PosClust;
}

sub sortPosClust {
	my($this, $ClustArray_PosClust) = @_;
	if ($::clust_order eq 'posclust') {
		@ClusterSort = $this->posclust_sort($ClustArray_PosClust);
	} else {
		@ClusterSort = (0..$#{$ClustArray_PosClust});
	}
	my $newpid = 1;


	foreach $pid (@ClusterSort) {
		my @cids = @{ $ClustArray_PosClust->[$pid] };
		my $dir;
		my $cnt = @cids;


		if ($::clust_order eq 'posclust') {
			$dir = &posclust_dir(@cids);
			if ($dir < 0) {
				@cids = reverse @cids;
				foreach $cid (@cids) {
					$::Gdir{$cid} *= -1;
				}
			}
			## renum posclust_id
			foreach my $cid (@cids) {
				$this->{PosClustID}->{$cid} = $newpid;
			}
			$newpid++;
		}
		push(@NewPath, \@cids);
	}
	return \@NewPath;
}
sub filterCluster {
	my($this, $cids) = @_;
	if ( $this->filter_species($cids) ) {
		if ($::SPCOV_REG_CUT) {
			my @new_cid_list = $this->filter_region($cids);
			return@new_cid_list;
		} else {
			return ($cids);
		}
	} else {
		return();
	}
}
sub filter_species {
	my($this, $cids) = @_;
	my($count_clust, %del_sp);
	return 0 if (! @{$cids});

	$count_clust = @{ $cids };
	my($spcov_spnum, @delsp);
	foreach $sp (@::species) {
		next if ($::GenomeData->isOutGroup($sp));
		my($count_sp);

		foreach $cid (@{ $cids }) {
			next if ($::GenomeData->isOutGroup($sp));
			my $spd = $::ClustTab->getClustData($cid,$sp);
			if (@{$spd}) {
				$count_sp++;
			}
		}

		if ($count_sp / $count_clust < $::MIN_SPCOV ||
				$count_sp < $::SPCOV_MINNUM) {
			push(@delsp,$sp);
			$spcov_spnum++;
		}
	}
	if ($spcov_spnum >= $::SPCOV_SPNUM) {
		if ($DEBUG::VERBOSE) {
		    foreach $sp (@delsp) {
			DEBUG::VERBOSE("Delete: small coverage cluster($sp): $tot_count_sp{$sp}, $count_clust\n");
		    }
		}
		return 0;
	}
	return 1;
}
sub filter_region {
	my($this, $cids) = @_;
	my(@delreg, @delreg_all);

	return () if (! @{$cids});

	my($i);
	foreach $sp (@::species) {
		next if ($::GenomeData->isOutGroup($sp));
		my($i) = 0;
		my(@del_sp);
		foreach $cid (@{ $cids }) {
			my $spd = $::ClustTab->getClustData($cid,$sp);
			if (! @{$spd}) {
				$del_sp[$i] = 1;
			}
			$i++;
		}

		## score reflects concentration of deleted genes
		my($delnum, $score, $maxscore, $maxi, $begin);
		for ($i = 0; $i < @del_sp; $i++) {
##print STDERR "$sp, $i, $begin, $maxi, $maxscore $cid->[$i]\n";
			if ($del_sp[$i]) {
				if ($score == 0) {
					$begin = $i;
				}
				$delnum++; $score++;
				if ($score > $maxscore) {
					$maxi = $i;
					$maxscore = $score;
				}
			} else {
				$score--;
				if ($score == 0) {
					&check_spcov_reg(\@delreg_all,
					  $sp,$begin,$maxi,$maxscore,$delnum);
					$maxscore = 0; $begin = -1;
				} elsif ($score < 0) {
					$score = 0;
				}
			}
		}
		&check_spcov_reg(\@delreg_all,
			$sp,$begin,$maxi,$maxscore,$delnum);
	}
	my(@pos, $cnt, $del);
	foreach $r (@delreg_all) {
##print STDERR "$r->{begin} $r->{end}\n";
		push(@pos, {'pos'=>$r->{begin}, 'type'=>'b'});
		push(@pos, {'pos'=>$r->{end}, 'type'=>'e'});
	}
	@pos = sort {$a->{pos}<=>$b->{pos}} @pos;
	$del = {};
	foreach $p (@pos) {
		if ($p->{type} eq 'b') {
			$cnt++;
			if ($cnt >= $::SPCOV_SPNUM && ! defined $del->{begin}) {
				$del->{begin} = $p->{pos};
			}
		} elsif ($p->{type} eq 'e') {
			$cnt--;
			if ($cnt == $::SPCOV_SPNUM-1) {
				$del->{end} = $p->{pos};
				push(@delreg, $del);
				$del = {};
			}
		}
	}
	my(@new_cid_list, $idx);
	foreach $d (@delreg) {
		if ($d->{begin} - $idx + 1 >= $::MIN_CLUSTCNT) {
			my(@cidlist);
			for ($i = $idx; $i < $d->{begin}; $i++) {
				push(@cidlist, $cids->[$i]);
			}
			push(@new_cid_list, \@cidlist);
			
		}
		$idx = $d->{end} + 1;
	}
	if (@{$cids} - $idx + 1 >= $::MIN_CLUSTCNT) {
		my(@cidlist);
		for ($i = $idx; $i < @{$cids}; $i++) {
			push(@cidlist, $cids->[$i]);
		}
		push(@new_cid_list, \@cidlist);
	}
	return @new_cid_list;
}
sub check_spcov_reg {
	my($delreg_all, $sp,$begin,$end,$score,$delnum) = @_;
	if ($score >= $::SPCOV_REG_CUT) {
		my $len = $end - $begin + 1;
		my $hitnum = $len - $delnum;
		if ( $hitnum / $len < $::MIN_SPCOV
				|| $hitnum < $::SPCOV_MINNUM ) {
#print STDERR "DEL2>$sp, $begin, $end, $score, $delnum\n";
			push(@{$delreg_all}, {
				sp=>$sp, begin=>$begin,
				end=>$end, score=>$score} );
		}
	}
}
sub filter_duplication {
	my($this, $cids) = @_;
	my($count, $count_dup);
	foreach $cid (@{ $cids }) {
		if ($cid =~ /#/) {
			## dupnode;
			$count_dup++;
		}
		$count++;
	}
	if ($count_dup > $count * 0.25) {
		return 1;
	}
	return 0;
}

###################################################
# sort the clusters by the positions on the reference genome
sub posclust_sort {
	my($this, $clustArray) = @_;
	my($pid) = 0;
	my(%Sum, %SumP, %SumM, %CountP, %CountM, %PIDs);
	foreach my $list (@{$clustArray}) {
	    foreach my $node_id (@{$list}) {
		if ( TripletGraph::checkName($node_id) ) {
			my($cid1,$cid2) = TripletGraph::splitName($node_id);
			$refd = $::ClustTab->getClustData1($cid2,$::refsp);
		} else {
			$refd = $::ClustTab->getClustData1($node_id,$::refsp);
		}
		if ($refd) {
			if ($refd->{pos} > 0) {
				$SumP{$pid} += $refd->{pos};
				$CountP{$pid} ++;
			} else {
				$SumM{$pid} += $refd->{pos};
				$CountM{$pid} ++;
			}
  if ($::DEBUG) {
	$ggg{$pid} = $refd->{gene};
  }
		}
	    }
	    $pid++;
	}

	my $tot_pid = $pid;
	foreach ($pid = 0; $pid < $tot_pid; $pid++) {
		if ($CountM{$pid} >= $CountP{$pid} && $CountM{$pid} >= 1) {
			$Sum{$pid} = $SumM{$pid} / $CountM{$pid};
		} elsif ($CountM{$pid} < $CountP{$pid} && $CountP{$pid} >= 1) {
			$Sum{$pid} = $SumP{$pid} / $CountP{$pid};
		}
if ($::DEBUG) {
	print STDERR "P>$pid $Sum{$pid} $CountM{$pid} $CountP{$pid} $ggg{$pid}\n";
}
	}
	sort {$Sum{$a}<=>$Sum{$b}} (keys %Sum);
}
sub posclust_dir {
	my(@CIDs) = @_;
	my($DiffSum, $prevpos);
	my($DIFFCUT) = 10;   # 10kb
	foreach my $cid (@CIDs) {
		my $refd = $::ClustTab->getClustData1($cid,$::refsp);
		my $pos = $refd->{pos};
#		if ($prevpos) {
			if (abs($pos - $prevpos) < $DIFFCUT) {
				if ($prevpos < $pos) {
					$DiffSum++;
				} else {
					$DiffSum--;
				}
			}
#		}
		$prevpos = $pos;
	}
	if ($DiffSum > 0) {
		return 1;
	} else {
		return -1;
	}
}
###################################################
sub read_alignpath {
	my($class, $infile) = @_;
	my($this) = $class->new;
	my(@sortedClusters);
	my($clustlist) = [];
	my($posclust, $prev_posclust);
	open(T, $infile) || die "Can't open $infile";
	while(<T>){
		chomp;
		if (/^#/) {
			s/#//;
			if (! @::out_species) {
				@::out_species = split(/,/);
			}
			next;
		}
		my(@F) = split(/\t/);
		($clustid) = $F[0];
		($dir) = $F[2];
		($posclust) = $F[$#F];
		if ($posclust ne $prev_posclust && $prev_posclust) {
			push(@sortedClusters, $clustlist);
			$clustlist = [];
		}
		push(@{$clustlist}, $clustid);
		$this->{PosClustID}->{$clustid} = $posclust;
		$::Gdir{$clustid} = $dir;
		$prev_posclust = $posclust;
	}
	push(@sortedClusters, $clustlist);
#foreach $a (@sortedClusters) {
#    print STDERR join(' ',@$a),"\n";
#}
	close(T);
	$this->{sortedClusters} = \@sortedClusters;
	$this;
}
sub makeOrderedList {
	my($this, %opt) = @_;
	my(@ordlist);
	if (! $this->{orderedList} || $opt{force}) {
		foreach $path (@{$this->{sortedClusters}}) {
			push(@ordlist, @{$path});
		}
		$this->{OrderedList} = \@ordlist;;
	}
}
###################################################
## make alignment data for output
sub makeGenomeAlign {
	my($this) = @_;
	my($coln, $idx);
	my($prevPosClsutID);
	my($gAli);
	my(@tmp_gali);

	$this->makeOrderedList;

	$gAli = GenomeAlign->new;

	$idx = $coln = 0;
	foreach $pclust (@{$this->{sortedClusters}}) {
	    @tmp_gali = ();
	    foreach $cid (@{$pclust}) {
		my $aliNode = AlignNode->new;
		$aliNode->setData(idx=>$idx, colnum => $coln, cid=>$cid,
				posclust=>$this->{PosClustID}->{$cid});

		$aliNode->setConnection($this->{OrderedList}, $gAli);

		if ($::dotcolor eq 'file') {
			$aliNode->setGeneVal;
		}
		$gAli->add($aliNode);
		$idx++; $coln++;
	    }
	    $coln++;
	}

	$gAli->reset_idx;
	if ($::remove_para) {
#		$::ClustTab->setOpt(rmhash=>0);
		my $convCID = $::ClustTab->renum_clustid;
		$::ClustTab->make_index($::refsp);
		$gAli->renum_clustid($convCID);
		foreach $g (keys %::Gdir) {
			$::Gdir{$convCID->{$g}} = $::Gdir{$g};
		}
	}

	for ($i = 0; $i <= $coln; $i++) {
		$aliNode = $gAli->getData($i);
		next if (! $aliNode);
		$aliNode->setName();
	}
	$gAli->{tot_colnum} = $coln;
	return $gAli;
}
sub findBestOrthologs {
	my($this) = @_;
	my($idx);
	local(%Weight_data);

	$this->makeOrderedList;

	foreach $cid (@{$this->{OrderedList}}) {
		foreach my $sp (@::species) {
			## make %Weight_data for each dup_clust
			$this->findBestOrthologs_sub($cid, $idx, $sp);
		}
		$idx++;
	}
	my(%MaxData);
	## assign the cid of maximum weight to each gene data ($d)
	foreach $d (keys %Weight_data) {
		my($max_weight, $max_cid);
		my($data) = $Weight_data{$d}->{data};
		foreach $dd (@{$Weight_data{$d}->{weights}}) {
			if ($dd->{weight} > $max_weight) {
				$max_weight = $dd->{weight};
				$max_cid = $dd->{cid};
			}
		}
		push(@{$MaxData{$max_cid}->{$data->{sp}}}, $data);
	}
	foreach $cid (keys %MaxData) {
		foreach $sp (keys %{$MaxData{$cid}}) {
			my $new_spdata = $MaxData{$cid}->{$sp};
			$::ClustTab->changeClustID($new_spdata, $cid, $sp);
		}
	}
	$::ClustTab->setOpt(rmhash=>0);
	$::ClustTab->make_index($::refsp);
}
sub findBestOrthologs_sub {
	my($this, $cid, $idx, $sp) = @_;

	my $curr_spdata = $::ClustTab->getClustData($cid,$sp);
	return if (! $curr_spdata);

	my($dup_clust_flag);
	my($ListLen) = scalar(@{$this->{OrderedList}});
	my(@Weight_local);
	if (ClustTab::is_new_cid($cid)) {
		$dup_clust_flag = 1;
	}

	my($max_weight, @max_spdata);
	## check neighborhood genes 
	foreach my $d (@{$curr_spdata}) {
	    my($weight);
	    for (my $j = -$::NBR_WIN_ALI; $j <= $::NBR_WIN_ALI; $j++) {
		next if ($j == 0);
		my $idxj = $idx + $j;
		next if ($idxj < 0);
		last if ($idxj > $ListLen);
		my $previd = $this->{OrderedList}->[$idxj];
		my $prev_spdata = $::ClustTab->getClustData($previd,$sp);
		next if (! $prev_spdata);

		my $clstdiff = abs($j);

		my($dir);
		my($min_diff) = $::BIGVALUE;
		foreach my $pd (@{$prev_spdata}) {
			# direction along each genome
			if ($d->{order} > $pd->{order}){
				$dir = 1;
			} else {
				$dir = -1;
			}
			## diff: distance along each genome
			## clstdiff: distance along the alignment
			my $diff = abs($d->{order}-$pd->{order});

			next if ($diff > $::NBR_WIN);

			if ($diff < $min_diff){
				$min_diff = $diff;
				$mind = $pd;
			}
		}
		$min_diff = 0.5 if ($min_diff == 0);
		if ($min_diff != $::BIGVALUE) {
			# calculating cross-weight as sqrt(wt1*wt2)
			my($wt1) = CalWeight::calc($min_diff);
			my($wt2) = CalWeight::calc($clstdiff);
			my($wt) = sqrt($wt1*$wt2);
			$weight += $wt;
		}
	    }
if ($::DEBUG) {
	print STDERR "weight=$weight,cid=$cid; name=$d->{name}\n";
}
	    if ($dup_clust_flag) {
		## duplicated cluster ("#d")
		## weight for determining which cid is plausible for each gene
		if ($weight) {
			$Weight_data{"$d"}->{data} = $d;
			push(@{$Weight_data{"$d"}->{weights}},
				{ cid=>$cid, weight=>$weight } );
		}
	    } else {
		## non-duplicated cluster
		## weight for choosing genuine orthologs
		if ($weight) {
			push(@Weight_local, {data=>$d, weight=>$weight});
	    		if ($weight > $max_weight) {
				$max_weight = $weight;
			}
	    	}
	    }
	}
	if (! $dup_clust_flag) {
		foreach $wtd (@Weight_local) {
			if ($wtd->{weight} >= $max_weight * $::OrthoTolerance) {
				push(@max_spdata, $wtd->{data});
			}
		}
	}

	    # remove spurious inparalogs and rename ortholog cluster id
	if (! $dup_clust_flag) {
		my($new_spdata, $chg_flag);
		if  (@max_spdata && @{$curr_spdata} > 1) {
			## reset cluster id
			$::ClustTab->changeClustID(\@max_spdata, $cid, $sp);
		}
	}
}

## print gsort
sub print {
	my($this,$outfile) = @_;
	$outfile = "&STDOUT" if (! $outfile);
	open(O, ">$outfile") || die("Can not open $outfile");
	print O join("\t", "#", @::species),"\n";
	foreach $pclust (@{$this->{sortedClusters}}) {
	    foreach $cid (@{$pclust}) {
		print O "$cid";
		foreach $sp (@::species) {
			my $spdata = $::ClustTab->getClustData($cid,$sp);
			my $k;
			print O "\t";
			foreach my $spd (@{$spdata}) {
				my $spname = &::make_namestring2($spd);
				print O " " if ($k++);
				print O $spname;
			}
		}
		print O "\n";
	    }
	}
	close(O);
}
###################################################
# Ordered list of AlignNode (orthologous groups)
###################################################
package GenomeAlign;

BEGIN{
	@Score = (0, 0, 0, 1, 1, 0);
}

sub new {
	return bless {}, $_[0];
}
sub add {
	my($this, @data)= @_;
	push(@{$this->{list}}, @data);
}
sub length {
	scalar(@{$_[0]->{list}});
}
sub getData {
	my($this,$idx) = @_;
	$this->{list}->[$idx];
}
sub getOrderedList {
	my($this) = @_;
	my(@list);
	foreach $d (@{$this->{list}}) {
		push(@list, $d->{cid});
	}
	@list;
}
sub reset_idx {
	my($this) = @_;
	my($del) = 0;
	my($aliN);
	for ($idx = 0; $idx < @{$this->{list}}; $idx++) {
		$aliN = $this->{list}->[$idx];
		if ($aliN->{spcnt} < $::CONS_NUM) {
			## delete the node
			DEBUG::VERBOSE("delete small node: $aliN->{cid}, $aliN->{spcnt}\n");
			$n = splice(@{$this->{list}}, $idx, 1);
			$del++;
			$idx--;
			next;
		}
		$aliN->{idx} = $idx;
		$aliN->{colnum} -= $del;
	}
}
sub renum_clustid {
	my($this, $conv) = @_;
	foreach $d (@{$this->{list}}) {
		if ($conv->{$d->{cid}}) {
			$d->{cid} = $conv->{$d->{cid}};
		}
	}
}

sub print {
	my($this, $outfile) = @_;
	my($prevPosClsutID);
	my($out);
	if ($::output eq 'postscript') {
		$out = OutputPostScript->new(
				filename=>$outfile,
				colnum=>$this->{tot_colnum});
	} elsif ($::output eq 'text') {
		$out = OutputText->new($outfile);
	} else {
		## outfile=>stdout

		$out = OutputSimple->new($outfile);
	}
	foreach $sp (@::species) {
		$::calcDepth{$sp} = Calc_Depth->new;
	}
	foreach $data (@{$this->{list}}) {
		$out->printdata($data);
	}
	if ($::output eq 'postscript') {
		my $lastcol = $this->{list}->[$#{$this->{list}}]->{colnum};
		if (! $::no_legend) {
			$out->legend($lastcol + 10);
		}
		$out->output;
	}
}
###################################################
# An alignment node (=an orthologous group) for final output
###################################################
package AlignNode;
sub new {
	my($class, %opt) = @_;
	my $this = {};
	bless $this, $class;
	$this->setData(%opt);
	return $this;
}
sub setData {
	my($this, %opt) = @_;
	foreach $key ('idx', 'colnum', 'cid', 'posclust') {
		$this->{$key} = $opt{$key};
	}
}
sub setName {
	my($this) = @_;

	$this->{gene} = $this->{name} = '';
	my $refdata = $::ClustTab->getClustData1($this->{cid}, $::refsp);

	if ($refdata) {
		# a gene in the reference genome
		$this->{name} = $refdata->{name};
		if ($refdata->{gene}) {
			$this->{gene} = $refdata->{gene};
		} else {
			$this->{gene} = $refdata->{gene};
		}
	} else {
		# there is no gene in the reference genome
		foreach my $sp (@::species) {
			if ($::ClustTab->getClustData($this->{cid},$sp)) {
				my $d = $::ClustTab->getClustData1(
						$this->{cid},$sp);
				$this->{gene} =
				  $d->{gene} ? $d->{gene} : $d->{name};
				last;
			}
		}
	}
}

sub setGeneVal {
	# assign node color
	my($this) = @_;
	foreach my $sp (@::species) {
		my $spdata = $::ClustTab->getClustData($this->{cid},$sp);
		my(@val, $cnt);
		foreach my $spd (@{$spdata}) {
			my $spname = "$spd->{sp}:$spd->{name}";
			if (defined $::GeneValue{$spname}) {
				&::vectsum(\@val, $::GeneValue{$spname});
				$cnt++;
			}
		}
		if ($cnt > 0) {
			&::vectdiv(@val, $cnt);
			$this->{genetab}->{$sp}->{colorval} = join(':',@val);
		}
	}
}
sub setConnection {
	my($this, $OrderedList, $genomeAlign) = @_;
	my($ListLen) = scalar(@{$OrderedList});
	foreach my $sp (@::species) {
		my $dir;

		## information of each gene
		my $tabd = $this->{genetab}->{$sp} = {};

		$this->{connect}->{$sp} = [];

		my $curr_spdata = $::ClustTab->getClustData($this->{cid},$sp);

		next if (! $curr_spdata);

		my(%min_conn, %str2data);
		## check neighborhood genes to find the nearest neighbor
		##   of each gene on each side (min_conn{gene}->{side})
		foreach my $d (@{$curr_spdata}) {
		    my(%min_diff);
		    for (my $j = -$::NBR_WIN_ALI; $j <= $::NBR_WIN_ALI; $j++) {
			next if ($j == 0);
			$idxj = $this->{idx} + $j;
			$idxj_new = $this->{idx} + $j;
			next if ($idxj < 0);
			last if ($idxj > $ListLen);
			my $previd = $OrderedList->[$idxj];
			my $prev_spdata = $::ClustTab->getClustData($previd,$sp);
			next if (! $prev_spdata);

			if ($j < 0 && @{$prev_spdata}) {
			    ## save proximal neighbor cluster on OrderedList
			    ## (which is not necessarily connected in this sp)
				$tabd->{prevfnd} =
					$genomeAlign->getData($idxj_new);
		    		$tabd->{'pos'} = $d->{'pos'};
			}

			foreach my $pd (@{$prev_spdata}) {
				next if ($d eq $pd);
				# direction along each genome
				if ($d->{order} > $pd->{order}){
					$dir = 1;
				} else {
					$dir = -1;
				}

				## diff: distance along each genome
				## clstdiff: distance along the alignment
				my $diff = abs($d->{order}-$pd->{order});
				next if ($diff > $::NBR_WIN);

				if ( ! defined $min_diff{$dir} ||
				    $diff < $min_diff{$dir} ||
				($diff == $min_diff{$dir} &&
			abs($min_conn{"$d"}->{$dir}->{clstdiff}) > abs($j)) ) {
					$min_diff{$dir} = $diff;
					$min_conn{"$d"}->{$dir} = {
					   d=>$d, pd=>$pd, clstdiff=>$j};
				}
				$str2data{"$d"} = $d;
			}
		    }
		}
		@min_d_set =  (keys %min_conn);

		# make connection data
		foreach my $min_d (@min_d_set) {
		    my $sel_conn = $min_conn{$min_d};
		    foreach my $dir (keys %{$sel_conn}) {
			next if ($sel_conn->{$dir}->{clstdiff} >= 0);

			my $d = $sel_conn->{$dir}->{d};
			my $pd = $sel_conn->{$dir}->{pd};
			my $diff = abs($d->{order} - $pd->{order});

			my $cond = {dir=>$dir};
			$cond->{clstdiff} = $sel_conn->{$dir}->{clstdiff};

			$cond->{conn} = 1;
			$cond->{diff} = $diff;
			$idxj = $this->{idx} + $cond->{clstdiff};
			my $previd = $OrderedList->[$idxj];
			$cond->{prevcol} = $genomeAlign->getData($idxj);

			if ($d->{dir} * $pd->{dir} !=
			    $::Gdir{$this->{cid}}*$::Gdir{$previd}) {
				## inversion
				$cond->{dirinv} =  1;
			} else {
				$cond->{dirinv} =  0;
			}
			push(@{$this->{connect}->{$sp}}, $cond);
		    }
		}
		$tabd->{paranum} = scalar(@{$curr_spdata});
	}
	my(%SpCount);
	my(@TmpSpHit);
	foreach my $sp (@::species) {
		if ($this->{genetab}->{$sp}->{paranum}) {
			next if $::GenomeData->isOutGroup($sp);
			push(@TmpSpHit, $sp);
		}
	}
	$this->{spcnt} = $::GenomeData->countSpecies(\@TmpSpHit);
}
###################################################
package OutputData;

###################################################
package OutputPostScript;

sub new {
	my($class, %options) = @_;

    eval("use PostScript::Simple;");
    if ($@) {
        die($@);
    }

    #
	my $this = bless {}, $class;
	$this->outps_setup($options{colnum});
	if (! $options{filename} || $options{filename} eq '1') {
		$this->{filename} = 'drawcore.ps';	#default
	} else {
		$this->{filename} = $options{filename};
	}
	$this;
}

sub outps_setup {
	my($this, $tot_colnum) = @_;

	$::paper = 'A4' if (! $::paper);
	if ($::paper eq 'A3') {
		$Xsize = 840; $Ysize = 1200;
	} elsif ($::paper eq 'A5') {
		$Xsize = 420; $Ysize = 600;
	} else {
		$Xsize = 595; $Ysize = 840;
	}
	$Margin = $Ysize / 25;

	if ($::landscape) {
		my $tmp = $Xsize; $Xsize = $Ysize; $Ysize = $tmp;
	}
	if ($::COLNUM) {
		$COLNUM = $::COLNUM;
	} elsif ($::landscape) {
		$COLNUM = 160;
	} else {
		$COLNUM = 120;
	}
	$::print_ori = $::landscape if (! defined $::print_ori);
	if ($::ROWNUM) {
		$ROWNUM = $::ROWNUM;
	} else {
		$ROWNUM = POSIX::ceil($tot_colnum / $COLNUM)
	}

	$Xmargin = $Margin;
	$Ymargin = $Margin;

	$ROWSIZ = ($Ysize-$Ymargin*2) / $ROWNUM;
	if (! $::no_text) {
		$ROW_HSIZ = $ROWSIZ * 0.3;
	} else {
		$ROW_HSIZ = $ROWSIZ * 0.1;
	}
	$ROW_DATASIZ = $ROWSIZ - $ROW_HSIZ;
	$TopMargin = $Ymargin + $ROW_HSIZ;
	$ROW_HSPACE = 1;

	$COL_HSIZ = $Xsize / 60;
	$LeftMargin = $Xmargin + $COL_HSIZ * 1.2;
	$COLSIZ = ($Xsize-$Xmargin-$LeftMargin) / $COLNUM;

	$DotSiz = $COL_HSIZ / 12;
	$FontSiz = $DotSiz * 3.3;
	$FontSizSp = $FontSiz;
	if ($::no_text) {
		$FontSizSp = $DotSiz * 4.5;
	}
	$FontSizSmall = $DotSiz * 2.7;
	$LineWid0 = $DotSiz * 0.05;
	$LineWid = $DotSiz * 0.6;
	$LineWid2 = $DotSiz * 0.15;
	$Ysft_fact = 0.5;
	$Xsft_fact = 0.35;
	$TextAngle = 45;

	$LINENUM = 0+ (@::out_species ? @::out_species : @::species);
	$LINENUM++;

	$TextFont = "Times-Roman";
	$TextFontSp = "Times-Italic";

	@FUNCCOL = ([160,160,160], [200,200,0], [220,0,0], [0,0,220],
			[0,200,200], [160,160,160]);
	@GENENAME_COL = ("red");
	$LINECOL = {
		normal=>[0,0,0], reverse=>[255,0,0], gap=>[0,255,0],
			outgroup=>[140,140,140]};


	$eps = 1;
	my $psout = new PostScript::Simple(
		landscape=>$::print_ori,
		papersize => $::paper,
		colour => 1, units => "bp",
		eps=>$eps,
	);
	if (! $eps) {
		$psout->newpage;
	}
	$psout->setfont($TextFont,$FontSiz);
	$psout->setlinewidth($LineWid);
	$this->{psout} = $psout;
}

sub printdata {
	my($this, $out) = @_;
	my $ps = $this->{psout};

	my $idx = $out->{idx};
	my $n = $out->{colnum};

	my($rown, $coln) = &get_tabpos($n);
	my($x, $y) = &get_xypos($rown,$coln);

	my (@ospecies) = @::species;
	@ospecies = @::out_species if (@::out_species);

	if (! defined $out->{rown} || $rown > $out->{rown}) {
		my $i = 1;
	## print species names
#		$i = 1 if ($::funcdir);
		$ps->setfont($TextFontSp,$FontSizSp);
		foreach $sp (@ospecies) {
			my $y1 = $y - ($ROW_DATASIZ / $LINENUM) * $i;
			my $spname = $sp;

			if ( ! $::spabbname && $::GenomeData->{name}->{$sp} ) {
				$spname = $::GenomeData->{name}->{$sp};
			}
			$ps->text($Xmargin,$y1,"$spname");
			$i++;
		}
		$out->{rown} = $rown;
		$ps->setfont($TextFont,$FontSiz);
	}

	$yspace = $ROW_HSPACE;

	my $yspace1 = $yspace;
	my $yspace2 = $yspace * 2;
	my $x1 = $x - $COLSIZ/2;
	my $x2 = $x + $COLSIZ/2;
	my $y1 = $y + $DotSiz;
	my $y2 = $y - $DotSiz;
	my($func, $colstr, $col);
	if ($::funcData) {
	    $func = $::funcData->getGeneFunc1("$::refsp:$out->{name}");
	    $colstr = $::funcData->getFuncColor($func);
	} else {
	    $colstr = "ffffff";
	}
	$col = &conv_color( $colstr );

	if ($::Gdir{$out->{cid}} > 0) {
		$ps->setcolour(@{$col});
		$ps->polygon({filled=>1}, $x1,$y1, $x1,$y2, $x2,$y);

		$ps->setlinewidth($LineWid0);
		$ps->setcolour('black');
		$ps->polygon({}, $x1,$y1, $x1,$y2, $x2,$y, $x1,$y1);
	} else {
		$ps->setcolour(@{$col});
		$ps->polygon({filled=>1}, $x2,$y1, $x2,$y2, $x1,$y);
		$ps->setlinewidth($LineWid0);
		$ps->setcolour('black');
		$ps->polygon({}, $x2,$y1, $x2,$y2, $x1,$y, $x2,$y1);
	}
	$ps->setcolour('black');
	$yspace = $yspace2 + 1.25;

	if ( ($namecolor = $::GeneNameColor{$out->{gene}}) ||
		($namecolor =  $::ClusterColor{$out->{cid}}) ) {
		$ps->setcolour($namecolor);
	}
	my $outname = $out->{gene};
	if (defined $::GeneNameMark{$out->{gene}}) {
		$outname .= $::GeneNameMark{$out->{gene}};
	} elsif (defined $::ClusterMark{$out->{cid}}) {
		$outname .= $::ClusterMark{$out->{cid}};
	}
	if (! $::no_text) {
		if (length($outname) > 6) {
			$ps->setfont($TextFont,$FontSizSmall);
		}
		$ps->text({rotate=>$TextAngle}, $x,$y+$yspace,"$outname");
		if (length($outname) > 6) {
			$ps->setfont($TextFont,$FontSiz);
		}
	}
	$ps->setcolour('black');
	$prevx = $x - $COLSIZ;
	my $spn = 1;

	foreach $sp (@ospecies) {
		## ypos of the line for this species
		my $y1 = $y - ($ROW_DATASIZ / $LINENUM) * $spn;
		my $tabd = $out->{genetab}->{$sp};
		foreach $conn (@{$out->{connect}->{$sp}}) {
			my $skipflag = 0, $gapflag = 0;
			my $yshift, $xshift, $prevx1, $currx1;

			my $previdx = $conn->{prevcol}->{idx};
			my $prevcol = $conn->{prevcol}->{colnum};

			if ($previdx < $tabd->{prevfnd}->{idx}){
				$skipflag = 1;
			}

			if ($::GenomeData->isOutGroup($sp)) {
				$ps->setcolour(@{$LINECOL->{outgroup}});
			} elsif ($conn->{dirinv}){
				$ps->setcolour(@{$LINECOL->{reverse}});
			} elsif ($conn->{diff}>=$::DISP_MINGAP+1){
				$gapflag = 1;
				$ps->setcolour(@{$LINECOL->{gap}});
			}
			($prevx, $prevy) = &get_xypos(
					&get_tabpos($prevcol) );
			if ($skipflag) {
				my $depth = $::calcDepth{$sp}->calc(	
						$idx,$previdx);
				$yshift = $DotSiz * (1 + $Ysft_fact * $depth);
				$xshift = $DotSiz * $Xsft_fact;
				$prevx1 = $prevx+$xshift;
				$currx1 = $x-$xshift;
			}
			if ($prevy == $y) {
				## continue from the previous line
				if ($skipflag) {
					$ps->setlinewidth($LineWid2);
					$ps->line($prevx1, $y1,
						$prevx1, $y1+$yshift);
					$ps->line($prevx1,
						$y1+$yshift,
					    	$currx1, $y1+$yshift);
					$ps->line($currx1,$y1,
						$currx1, $y1+$yshift);
				} else {
					$ps->setlinewidth($LineWid);
					$ps->line($prevx,$y1,$x,$y1);
				}
			} else {
				## new line
				my $x1 = $LeftMargin - $COLSIZ/2;
				my $x2 = $Xsize - $Xmargin - $COLSIZ/2;
				my $prevy1 = $prevy -
					($ROW_DATASIZ / $LINENUM)*$spn;

				if ($skipflag) {
					$ps->setlinewidth($LineWid2);
					$ps->line($prevx1, $prevy1,
						$prevx1, $prevy1+$yshift);
					$ps->line($prevx1,
						$prevy1+$yshift,
					    	$x2, $prevy1+$yshift);
					$ps->line($x1, $y1+$yshift,
					    	$currx1, $y1+$yshift);
					$ps->line($currx1,$y1,
						$currx1, $y1+$yshift);
				} else {
					$ps->setlinewidth($LineWid);
					$ps->line($x1,$y1,$x,$y1);
					$ps->line($prevx,$prevy1,$x2,$prevy1);
				}
			}
			$ps->setcolour(@{$LINECOL->{normal}});
		}

		if ($::dotcolor eq 'genomepos') {
			my $rpos = $tabd->{pos}*1000 / $::GeneData->{length}->{$sp};
			if ($rpos < 0) {
				$ps->setcolour(510*(-$rpos), 510*(.5+$rpos),0);
			} else {
				$ps->setcolour(0,510*(.5-$rpos),510*$rpos);
			}
		} elsif ($::dotcolor eq 'file' && $tabd->{colorval}) {
			my($R,$G,$B) = split(/:/,$tabd->{colorval});
			$ps->setcolour($R,$G,$B);
		}
		if ($tabd->{paranum} == 1) {
			$ps->circle({filled=>1},$x,$y1,$DotSiz);
		} elsif ($tabd->{paranum} > 1) {
			$ps->box( {filled=>1}, $x-$DotSiz, $y1-$DotSiz,
				$x+$DotSiz, $y1+$DotSiz);
		}
		$ps->setcolour(0,0,0);

		$spn++;
	}
}
sub output {
	my($this, $filename) = @_;
	if (! $filename) {
		$filename = $this->{filename};
	}
	$this->{psout}->output($filename);
}
sub conv_color {
	my($hexstr) = @_;
	[hex substr($hexstr, 0, 2),
		hex substr($hexstr, 2, 2),
		hex substr($hexstr, 4, 2) ];
}
###################################################
sub legend {
	my($this, $colnum) = @_;
	my($rown, $coln) = &get_tabpos($colnum);
	my($x, $y) = &get_xypos($rown,$coln);
	my $x1 = $x - $COLSIZ/2;
	my $x2 = $x + $COLSIZ/2;
	my($y0,$y1,$y2);
	my($i, $j);
	return if (! $funcData);
	my @funcIDs = $::funcData->listFuncID('', colordef=>1);
	my $ps = $this->{psout};
	$j = 1;
	foreach $i (0..$#funcIDs) {
		$info = $::funcData->getFuncInfo($funcIDs[$i]);
		$colstr = $::funcData->getFuncColor($funcIDs[$i]);
		next if (! $colstr);
		$col = &conv_color( $colstr );
		$ps->setcolour(@{$col});

		$y0 = $y - ($ROW_DATASIZ / $LINENUM) * ($j++);
		$y1 = $y0 + $DotSiz; $y2 = $y0 - $DotSiz;

		$ps->polygon({filled=>1}, $x1,$y1, $x1,$y2, $x2,$y0);
		$ps->setcolour(0,0,0);
		$ps->text($x2+$COLSIZ/2,$y0-$DotSiz,"$info->{name}");
	}
	my $y0 = $y - ($ROW_DATASIZ / $LINENUM) * ($#::FuncName+1);
	$y1 = $y0 + $DotSiz; $y2 = $y0 - $DotSiz;
	$ps->setcolour(255,0,0);
	$ps->text($x1,$y0-$DotSiz,"red");
	$ps->setcolour(0,0,0);
	$ps->text($x2+$COLSIZ/2,$y0-$DotSiz,"B.subtilis essential genes");
}
###################################################
sub get_tabpos {
	my($n) = @_;
	(int($n / $COLNUM), $n % $COLNUM);
}
sub get_xypos {
	my($rown, $coln) = @_;
	my($x,$y);
	$y = $Ysize - ($TopMargin + $rown * $ROWSIZ);
	$x = $LeftMargin + $coln * $COLSIZ;
	($x, $y);
}

###################################################
package OutputSimple;

sub new {
	my($class, $filename) = @_;
	my $this = bless {}, $class;
    if (! $filename || $filename eq '1') {
        $filename = '&STDOUT';
    } else {
#        my($dirname) = File::Basename::dirname($filename);
#        File::Path::mkpath("$dirname", 0, 0750);
    }
	$this->{filename} = $filename;
	$out_fd = $this->{out_fd} = FileHandle->new(">$filename") || die("Can not open $filename($!)");
	if (@::out_species){
		print $out_fd "#", join(',', @::out_species),"\n";
	}
	$this;
}

sub printdata {
	my($this, $data) = @_;
	my (@ospecies) = @::species;
	@ospecies = @::out_species if (@::out_species);
	my($out_fd) = $this->{out_fd};

	print $out_fd "$data->{cid}\t";
	print $out_fd "$data->{gene}\t";

	print $out_fd "$::Gdir{$data->{cid}}";
	foreach $sp (@ospecies) {
		my($status);

		$status = $this->eval_connect($data, $sp);

		if ($status == 1) {
			# consecutive (incl. deletion)
			print $out_fd "\t*";
		} elsif ($status == 2) {
			# insertion
			print $out_fd "\t+";
		} elsif ($status == 3) {
			# rearrangement
			print $out_fd "\t|";
		} elsif ($status == 4) {
			# disconnect
			print $out_fd "\t.";
		} elsif ($status == 5) {
			# none
			print $out_fd "\t";
		}
		if ($data->{genetab}->{$sp}->{paranum} > 1) {
			print $out_fd "D";
		}
	}
	print $out_fd "\t$data->{posclust}";
	print $out_fd "\t$PathTree{$data->{cid}}->{dist}";
	print $out_fd "\t$PathTree{$data->{cid}}->{path}";
	print $out_fd "\t$PathTree{$data->{cid}}->{diff}";
	print $out_fd "\n";
}
sub eval_connect {
	my($this,$data,$sp) = @_;
	my($status);
	foreach my $cond (@{$data->{connect}->{$sp}}) {
		if ($cond->{prevcol}->{idx} == $data->{idx} - 1) {
		    if ($cond->{diff} == 1) {
			$status = 1; last;	# consecutive
		    } else {
			$status = 2;	# insertion
		    }
		} elsif ($cond->{conn}) {
			$status = 3;	# deletion/rearrangement
		}
	}
	if (! $status) {
		if ($data->{genetab}->{$sp}->{paranum} > 0) {
			$status = 4;	# disconnected
		} else {
			$status = 5;	# none
		}
	}
	$status;
}
###################################################
package OutputNames;
sub new {
	my($class, %options) = @_;
	my $this = bless {}, $class;
	my $filename = $options{'filename'};
	$filename = '&STDOUT' if (! $filename || $filename eq '1');
	$this->{filename} = $filename;
	$out_fd = $this->{out_fd} = FileHandle->new(">$filename");
	if (@::out_species){
		print $out_fd "#", join(',', @::out_species),"\n";
	}
	$this;
}
sub printdata {
	my($this, $data) = @_;
	my (@ospecies) = @::species;
	@ospecies = @::out_species if (@::out_species);
	my($out_fd) = $this->{out_fd};

	print $out_fd "$data->{cid}\t";
	print $out_fd "$data->{gene}\t";

	print $out_fd "$::Gdir{$data->{cid}}";
	foreach $sp (@ospecies) {
		my($flag);
		print "\t";
		my $spdata = $::ClustTab->getClustData($data->{cid},$sp);
		foreach my $spd (@{$spdata}) {
			my $spname = "$spd->{sp}:$spd->{name}";
			$spname .= "($spd->{dom})" if ($spd->{$dom});
			print " " if ($flag == 0);
			print "$spname";
			$flag = 1;
		}
	}
	print "\n";
}

###################################################
package Calc_Depth;

sub new {
	my($class) = @_;
	my($this) = {};
	$this->{MAXHEIGHT} = 16;
	$this->{SkipDepth} = ();
	bless $this, $class;
	$this;
}
sub calc {
	my($this, $curr, $prev) = @_;
	my($foundDepth, $retDepth);
	for (my $i = $curr-1; $i > $prev; $i--) {
		$foundDepth |= $this->{SkipDepth}->[$i];
	}
	$retDepth = 0;
	for (my $i = 1; $i < $this->{MAXHEIGHT}; $i++) {
		if ( ( ($foundDepth >> $i) & 1) == 0) {
			$retDepth = $i;
			last;
		}
	}
	for (my $i = $curr; $i >= $prev; $i--) {
		$this->{SkipDepth}->[$i] |= (1 << $retDepth);
	}
	$retDepth;
}
sub getDepth {
	my($this,$idx) = @_;
	my($depth) = 0;
	for (my $i = 0; $i < $this->{MAXHEIGHT}; $i++) {
		if ( ( ($this->{SkipDepth}->[$idx] >> $i) & 1) ) {
			$depth = $i;
		}
	}
	return $depth;
}

