#!/usr/bin/perl -s
require "MBGD_common.pl";

#$UNIPROT_DIR = "/bio/ftp/uniprot-ftp/uniprot/current_release/knowledgebase/complete";
if (! $main::UNIPROT_DIR) {
    $UNIPROT_DIR = $main::DIR_ftp_uniprot;
}

if (! $main::DIR_OUT) {
    $main::DIR_OUT = $main::DIR_ftp_uniprot;
}

$NAMES_FILE   = "$main::DIR_OUT/names.tab";
$DR_EMBL_FILE = "$main::DIR_OUT/xref_embl.tab";
$DR_FILE      = "$main::DIR_OUT/xref.tab";

open(NAMES, ">$NAMES_FILE");
open(DREMBL, ">$DR_EMBL_FILE");
open(DRFILE, ">$DR_FILE");

#
my(@fileUniprot) = ('uniprot_sprot.dat', 'uniprot_trembl.dat');
foreach my$file (@fileUniprot) {
    next if (! -e "$UNIPROT_DIR/$file.gz");

    print STDERR "Decompress :: $file\n";
    $cmd = "$main::CMD_gzip -dc $UNIPROT_DIR/$file.gz > $main::DIR_OUT/$file";
    system("$cmd");

    print STDERR "Reading :: $file\n";
    read_uniprot("$main::DIR_OUT/$file");
    unlink("$main::DIR_OUT/$file");
}

close(DRFILE);
close(DREMBL);
close(NAMES);

exit(0);

sub read_uniprot {
	my($file) = @_;
	open(F, $file) || die;
	while (<F>) {
		chomp;
		if (/^ID\s*(\S+)/) {
			$ID = $1;
		} elsif (/^AC\s*(\S+)/) {
			$AC = $1; $AC =~ s/;//;
		} elsif (/^GN /) {
			($Name) = /Name=([^;]+)/;
			($Ordered) = /OrderedLocusNames=([^;]+)/;
			($Synonyms) = /Synonyms=([^;]+)/;
			($Orfnames) = /ORFNames=([^;]+)/;
			print NAMES join("\t", $AC, $ID, $Name, $Ordered, $Synonyms, $Orfnames),"\n";
		} elsif (/^DR\s*(\S.*)$/) {
			$line = $1; $line =~ s/\.$//;
			($dbname,@ids) = split(/;\s*/, $line);
			if ($dbname eq 'EMBL') {
				my ($emblacc, $protid, $status, $moltype) = @ids;
				print DREMBL join("\t",$ID,$AC,$emblacc,$protid, $status, $moltype),"\n";
			} else {
				my ($acc, $name, $status) = @ids;
				print DRFILE join("\t",$AC,$dbname,$acc,$name, $status),"\n";
			}
		}
	}
}

