#!/usr/bin/perl -s

use File::Basename;
use File::Copy;

use FindBin qw($Bin);

require 'checkUserData.pl';

if (@ARGV < 2) {
	die "Usage: $0 genome_list_file data_dir_top [out_dir_top]\n";
}

$genome_list_file = $ARGV[0];
$data_dir_top = $ARGV[1];
## optional
$outdir_top = $ARGV[2];

$outdir_top = "$Bin/../species" if (! $outdir_top);

$CMD_read_migap = "./read_migap.pl";

if (! -d $outdir_top) {
	mkdir $outdir_top;
}
#$pubgenomes = "published_genomes";
#if (open(F, $pubgenomes)) {
#    while (<F>) {
#	next if (/^#/);
#	chomp;
#	($mbgd_sp, $mbgd_spid, $strain, $sa_id) = split(/\t/);
#	$SkipSpid{"gu00$sa_id"} = 1;
#    }
#    close(F);
#}
$genome_list_file = "genome_info" if (! $genome_list_file);
$data_dir_top = "data" if (! $data_dir_top);
open(F, $genome_list_file) || die "Can't open genome list file\n";

while (<F>) {
	next if (/^#/);
	chomp;
	($data_dir, $spcode, $spname, $spabb, $strain, $taxid, $status) = split(/\t/);

	if ($spcode =~ /([a-z]+)([0-9]+)/) { 
		$spcode_head = $1;
		$spnum = $2;
		$spnum_length = length($2);
		
	} elsif (! $spcode) { 
		$spcode = $spcode_head . sprintf("%0${spnum_length}d", ++$spnum);
	}

	$spname = $prev_spname if (! $spname);;
	$spabb = $prev_spabb if (! $spabb);;
	$taxid = $prev_taxid if (! $taxid);;
	$status = $prev_status if (! $status);;

	$info = {
		data_dir => $data_dir,
		spcode => $spcode,
		spname => $spname,
		spabb => $spabb,
		strain => $strain,
		taxid => $taxid,
		status => $status};
	push(@Data, $info);

	$prev_spcode = $spcode;
	$prev_spname = $spname;
	$prev_spabb = $spabb;
	$prev_strain = $strain;
	$prev_taxid = $taxid;
	$prev_status = $status;
}
close(F);

$GenomeNum = 1;
foreach $info (@Data) {
	$dir = $info->{data_dir};
	$data_dir = "$data_dir_top/$dir";
	next if (! -d "$data_dir");

	$GenomeID = sprintf "%03d", $GenomeNum;
	$spid = sprintf "gu%05d", $GenomeNum;

	$spcode = $info->{spcode};

	$GenomeNum++;

	## create genome data
	if ($info->{status} eq 'complete') {
		$Status = 'complete';
	} else {
		$Status = 'incomplete';
	}


	next if ($SkipSpid{$spid});

	## create genome directories
	$output_dir = "$outdir_top/$spid";
	mkdir $output_dir;
	$out_gu_dir = "$output_dir/gu";
	mkdir $out_gu_dir;
	$out_gu_data_dir = "$out_gu_dir/data";
	mkdir $out_gu_data_dir;

	## create genome information
	&output_genomeinfo($out_gu_dir, $GenomeID, $info, $Status);


	## create chromosome information
	### create a list of chromosomes
	my($chrid);
	my(@gbk_files);
	foreach $gbk_file (<$data_dir/*.gbk>) {
		if (-d $gbk_file) {
			## directory
			foreach $fname (<$gbk_file/*.gbk>) {
				push(@gbk_files, $fname);
			}
		} else {
			## file
			push(@gbk_files, $gbk_file);
		}
	}
	### execute read_migap.pl
	if ($mode eq 'migap') {
		&exec_read_migap(\@gbk_files, $out_gu_data_dir, $info->{strain});
	} else {
		$id = 1;
		foreach $file (@gbk_files) {
			$dirname = sprintf "$out_gu_data_dir/chr%02d", $id;
			$outfile = "$dirname/ncbiGbk";
			mkdir $dirname;
			open(F, "$file");
			open(O, ">$outfile");
			while (<F>) {
				print O $_;
			}
			close(F);
			close(O);
			$id++;
			&checkUploadGbkFile($dirname);
		}
	}
	&output_timeStamp($out_gu_dir);
}
system("checkSpidTab.pl");

sub exec_read_migap {
	my($infile, $outdir, $spname, $chrid) = @_;
	my($geneHead) = uc($spname);
	if (ref $infile eq 'ARRAY') {
		print("$CMD_read_migap -outdir=$outdir @$infile\n");
		system("$CMD_read_migap -spname=$spname -gene_head=$geneHead -outdir=$outdir @$infile");
	} else {
		print("$CMD_read_migap -outdir=$outdir $infile\n");
		system("$CMD_read_migap -spname=$spname -gene_head=$geneHead -outdir=$outdir $infile");
	}
}

sub output_timeStamp {
	my($outdir) = @_;
	open(O, ">$outdir/last_update"); close(O);
#	system("touch -r $outdir/genome.txt  $outdir/last_update");
}

sub output_genomeinfo {
	my($outdir, $genomeID, $info, $type) = @_;
	open(GOUT, ">$outdir/genome.txt") || die "Can't open $outdir/genome.txt\n";
	$genomeID += 0;
	print GOUT "id_user_genome	$genomeID\n";
	print GOUT "species_name	$info->{spname}\n";
	print GOUT "abbreviation_name	$info->{spabb}\n";
	print GOUT "sp	$info->{spcode}\n";
	print GOUT "strain	$info->{strain}\n";
	print GOUT "taxonomy_id	$info->{taxid}\n";
	print GOUT "type	$type\n";
	close(GOUT);
}
