#!/usr/bin/perl -s
use strict;
use DirHandle;
use FileHandle;
use File::Path;
use RECOG::RecogCommon;
require "MBGD_Conf.pl";
require "InfoSpec.pl";

###############################################################################
#
sub print_usage {
    print "Usage :: $0 FILE_download_mymbgd NEW-SP\n";

    exit(0);
}

###############################################################################
#
sub expand_tar {
    my($file_tar) = shift;
    my($dir) = shift;
    my($sta) = 1;

    my($opt_tar) = '';
    if ($file_tar =~ /\.tar$/) {
        $opt_tar = '';
    }
    elsif ($file_tar =~ /\.tar.Z$/) {
        $opt_tar = 'Z';
    }
    elsif (($file_tar =~ /\.tgz$/) || ($file_tar =~ /\.tar.gz$/)) {
        $opt_tar = 'z';
    }
    else {
        die("Illegal file format.");
    }

    #
    my($cmd) = "$main::CMD_tar -x$opt_tar -C $dir -f $file_tar";
    print STDERR "CMD :: $cmd\n" if ($main::DEBUG);
    system("$cmd");

    if (! -e "$dir/gm/genome.txt") {
        print STDERR "ERROR :: Not found genome.txt\n";
        $sta = 0;
    }
    for(my$i=1; ; $i++) {
        if (! -e "$dir/gm/data/$i.gene") {
            last;
        }
#        $n_chr++;

        if (! -e "$dir/gm/data/$i.chromosome.txt") {
            print STDERR "ERROR :: Not found $i.chromosome.txt\n";
            $sta = 0;
        }
        if (! -e "$dir/gm/data/$i.chrseq") {
            print STDERR "ERROR :: Not found $i.chrseq\n";
            $sta = 0;
        }
        if (! -e "$dir/gm/data/$i.geneseq") {
            print STDERR "ERROR :: Not found $i.geneseq\n";
            $sta = 0;
        }
        if (! -e "$dir/gm/data/$i.protseq") {
            print STDERR "ERROR :: Not found $i.protseq\n";
            $sta = 0;
        }
    }

    #
    if (!$sta) {
        rmtree("$dir/gm");
    }

    return $sta;
}

###############################################################################
#
sub update_spid {
    my($dir) = shift;
    my($spid) = shift;
    my($sp) = shift;
    my($filename);
    my($fhr);
    my($fhw);

    # genome.txt
    $filename = "$dir/gm/genome.txt";
    $fhr = FileHandle->new("$filename") || die("Can not open $filename($!)");
    $fhw = FileHandle->new(">$filename.$$") || die("Can not open $filename.$$($!)");
    while (my$line=$fhr->getline()) {
        if ($line =~ /^sp\t/) {
            $line = join("\t", 'sp', $sp) . "\n";
        }
        elsif ($line =~ /^spid\t/) {
            $line = join("\t", 'spid', $spid) . "\n";
        }
        elsif ($line =~ /^date_release\t\s*$/) {
            my($sec, $min, $hour, $mday, $mon, $year) = localtime(time());
            $year += 1900;
            $mon++;
            my($today) = sprintf("%04d-%02d-%02d", $year, $mon, $mday);
            $line = join("\t", 'date_release', $today) . "\n";
        }

        $fhw->print($line);
    }
    $fhw->close();
    $fhr->close();
    rename("$filename", "$filename.bak");
    rename("$filename.$$", "$filename");

    # chromosome
    for(my$i=1; ; $i++) {
        $filename = "$dir/gm/data/$i.gene";
        if (! -e "$filename") {
            last;
        }
#        $n_chr++;

        #
        $filename = "$dir/gm/data/$i.chromosome.txt";
        $fhr = FileHandle->new("$filename") || die("Can not open $filename($!)");
        $fhw = FileHandle->new(">$filename.$$") || die("Can not open $filename.$$($!)");
        while (my$line=$fhr->getline()) {
            if ($line =~ /^sp\t/) {
                $line = join("\t", 'sp', $sp) . "\n";
            }
            elsif ($line =~ /^spid\t/) {
                $line = join("\t", 'spid', $spid) . "\n";
            }
            elsif ($line =~ /^accession\t/) {
                $line = join("\t", 'accession', "$sp-$i") . "\n";
            }

            $fhw->print($line);
        }
        $fhw->close();
        $fhr->close();
        rename("$filename", "$filename.bak");
        rename("$filename.$$", "$filename");

        #
        foreach my$type ('geneseq', 'protseq') {
            $filename = "$dir/gm/data/$i.$type";
            $fhr = FileHandle->new("$filename") || die("Can not open $filename($!)");
            $fhw = FileHandle->new(">$filename.$$") || die("Can not open $filename.$$($!)");
            while (my$line=$fhr->getline()) {
                if ($line =~ /^\s*>\s*(ug\d+\:)/) {
                    my($ug) = $1;
                    $line =~ s#$ug##;
                }

                $fhw->print($line);
            }
            $fhw->close();
            $fhr->close();
            rename("$filename", "$filename.bak");
            rename("$filename.$$", "$filename");
        }
    }

    return;
}

###############################################################################
#
sub import_mymbgd_data {
    my($file_tar) = shift;
    my($sp) = shift;

    #
    my($fileSpidTab) = $main::FILE_spidtab;
    my($info) = getInfoSpecTab($fileSpidTab);
    if (exists($info->{'NAME2DIR'}->{"$sp"})) {
        print STDERR "ERROR :: This sp is used.[$sp]\n";
        exit(-1);
    }

    #
    my($dir_work) = "$ENV{'RECOG_HOME'}/work/import_$$";
    mkpath("$dir_work", 0, 0750);

    my($sta) = expand_tar($file_tar, $dir_work);
    if (!$sta) {
        rmtree("$dir_work");
        return;
    }

    #
    my($spid) = RECOG::RecogCommon::get_new_spid();
    if (! $spid) {
        print STDERR "WARNING :: Too many user genome data.\n";
        exit(0);
    }

    #
    my($dir) = "$ENV{'MBGD_HOME'}/species/$spid";
    mkpath("$dir", 0, 0750);

    #
    addInfoSpecTabEntry($info, $spid, $sp, $dir);
    setInfoSpecTab($fileSpidTab, $info);

    my($cmd) = "$main::CMD_mv $dir_work/gm $dir";
    system("$cmd");

    update_spid($dir, $spid, $sp);

    #
    rmtree("$dir_work");

    return;
}

###############################################################################
if ($0 eq __FILE__) {
    my($file_tar) = shift(@ARGV);
    my($sp)       = shift(@ARGV);

    if (! -e "$file_tar") {
        print TSDERR "File not found :: $file_tar\n";
        print_usage();
	}
    if (!$sp) {
        print_usage();
	}

    import_mymbgd_data($file_tar, $sp);
}

1;
