#!/usr/bin/perl -s
#_****************************************************************************
#_*
#_* makeSpeciesUpdate.pl ץ
#_*
#_****************************************************************************
#_*
#_*  ե̾ : makeSpeciesUpdate.pl
#_*
#_*  ǽ   : SpeciesǡUpdate롣UpdateǤʤäΤˤĤƤ
#_*               ¾ǥ쥯ȥ˽Ϥ
#_*               
#_*
#_*  ưˡ   : ./makeSpeciesUpdate.pl
#_*
#_*  ưץ  : -S=[Eukaryotes][Prokaryotes]
#_*                    ꤷ硢ꤷʪ̤ΥǡΤߤ.
#_*                    -R
#_*                    ꤷ硢taxonomy ID˴ؤƤ̵Ǿ񤭤褦ˤ롣
#_*
#_*     : (1) KEGGΥڡCompleteΥǡ
#_*                   - KEGGΥڡwgetǼ롣
#_*                   - KEGGCompleteǡʲιܤ
#_*		       key                     value
#_*		       sp                      ʪ̾Υܥ
#_*		       orgname                 ʪ̾
#_*		       wwwlink                 
#_*		       medid                   ʸֹ(MEDID)
#_*		       source                  
#_*		       sourcewww               󸵤ؤΥ
#_*                   - sourcewwwGenBankΥե褿硢
#_*                     organism namestrain롣
#_*               (2) NCBIΥǡ
#_*                   - NCBIComplete Microbial GenomesȤڡParseưʲιܤ
#_*                     롣
#_*		     key               value
#_*		    orgname            "Organism Name"
#_* 	    taxid              "Taxonomy ID"
#_*		    institution        "Center Name"
#_*		    wwwlink            "Ceneter URL"
#_*		    medid              "List of publications (comma separated)"
#_*		    accession_id       "List of accessions (comma separated)"
#_*               (3) (1)(2)Υǡޡɬפʥǡ롣
#_*                   KEGGCompleteΥܤȤΥǡ˾ɲäƤ
#_*                   KEGGNCBIǡΥޡˡ
#_*                   1. taxonomy IDפǡƱΥǡȤư
#_*                   2. organism nameפǡƱΥǡȤư
#_*
#_*                 ǡ
#_*		    sp			ʪ̾Υܥ
#_*		    abbrev		ά
#_*		    orgname		ʪ̾
#_*		    source		ǡʸrefseqȤƤ
#_*		    strain		strain̾
#_*		    taxon		Taxonomy ID
#_*		    institution		ɵ
#_*		    medid		Pubmed ID
#_*		    publication		ʸ
#_*		    wwwlink		ɵؤHPɥ쥹
#_*
#_*  ջ   :
#_*
#_*  ѹ   : 2006/08/25 [½]
#_*               
#_*
#_***************************************************************************
use File::Basename;
use File::Path;
use FileHandle;
use ParseNCBIcomplete;
use ParseKEGGcomplete;
require "MBGD_commonPath.pl";

#_  Хѿ
# KEGGOrganismڡhttp
$KEGG = "http://www.genome.jp/kegg/catalog/org_list.html" ;

# KEGGComplete򽸤ꤹEukaryotes, Prokaryotes, All
$KEGG_DIV = "Prokariyotes";

# NCBIɤե
$NCBI_URL = 'http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi?dump=selected';
$NCBI_FILE = "$ENV{'MBGD_HOME'}/work/NCBI_GENOME_PROJ.txt";

# PubMedڡhttp
$PUBMED = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Display&uid=";

#########################
# ¹ԥνǥ쥯ȥ
$LOG_DIR = "$ENV{'MBGD_HOME'}/log";

# ȥեνǥ쥯ȥ
$TMP_DIR = "$ENV{'MBGD_HOME'}/work";

# оݥեΥǥ쥯ȥ
$UPDATE_DIR = "$ENV{'MBGD_HOME'}/species";
#########################
#_
#_ 0. logtmpupdateǥ쥯ȥ̵꤬Ϻ롣
  if(!-e $main::LOG_DIR) {
    mkpath($main::LOG_DIR, 0, 0750);
  }
  if(!-e $main::TMP_DIR) {
    mkpath($main::TMP_DIR, 0, 0750);
  }
  if(!-e $main::UPDATE_DIR) {
    mkpath($main::UPDATE_DIR, 0, 0750);
  }
#########################

# եϥɥκ
#$TMP = `date "+%Y%m%d%H%M%S"`;
#chomp($TMP);
my($sec, $min, $hour, $mday, $mon, $year) = localtime();
$TMP = sprintf("%04d%02d%02d%02d%02d%02d", $year + 1900, $mon + 1, $mday,
                                           $hour, $min, $sec);


open(WHD, ">>$TMP_DIR/makeSpeciesUpdate$TMP") or die($!);
&makeSpeciesUpdate($S,$R, @ARGV);
close(WHD);

exit;


#_
#_----------------------------------------------------------------------------
#_ե:
sub makeSpeciesUpdate {
  my $k = shift(@_);   # ǡо
  my $rewriteFlag = shift(@_); #񤭥⡼ɤɤ
  my @spList = @_;

  my($refSpHash);
  if (scalar(@spList) != 0) {
    $refSpHash = {};
    foreach my $sp (@spList) {
      $refSpHash->{"$sp"} = 1;
    }
  }
#_
#_ 1. KEGGCompleteΥǡ

  # wgetKEGGΥڡɤ롣
  my $tmpfile = "$main::TMP_DIR/kegg_$TMP";
  my $logfile = "$main::LOG_DIR/kegg_dwnload_$TMP";
  my $cmd = "$main::CMD_wget $main::KEGG -O $tmpfile -o $logfile";
  system($cmd)==0 or die("invalid command. : $cmd");

  # ڡparseɬפʥǡ롣
#  my $kegg = new ParseKEGGcomplete($tmpfile, $main::KEGG_DIV);
  my $kegg = new ParseKEGGcomplete($tmpfile);
  
  # եκ
  unlink($tmpfile);
  unlink($logfile);

#_
#_ 2. NCBIΥǡ
  my $cmd = "$main::CMD_wget -O $main::NCBI_FILE -o $logfile $main::NCBI_URL";
  system($cmd);
  my $ncbi = new ParseNCBIcomplete($main::NCBI_FILE);

#_
#_ 3. KEGGNCBIΥǡޡƥǡ

  # organism + strainƱɽˤʤΤʤɤå롣
  my $orgStrain_ref = {}; 

  # KEGGCompleteΥǡܤȤơɲäƤ
  my @updateList = $kegg->getAll();
  
  foreach my $kegg_ref (sort(@updateList)) {
    my $sp = $kegg_ref->{'sp'};
    if ($refSpHash && ($refSpHash->{"$sp"} == 0)) {
      next;
    }

    # ʲΥǡ򽸤
    my $data_ref = {};
    $data_ref->{'sp'} = $kegg_ref->{'sp'};
    $data_ref->{'abbrev'} = "";
    $data_ref->{'source'} = 'refseq';
    $data_ref->{'orgname'} = "";
    $data_ref->{'strain'} = "";
    $data_ref->{'taxon'} = "";
    $data_ref->{'institution'} = "";
    $data_ref->{'medid'}="";
    $data_ref->{'publish'} = "";
    $data_ref->{'wwwlink'} = "";

    my $ncbi_ref=undef;

    # sourceΥΥǥ쥯ȥ.gbkեGenBank롣
    # .bgkե뤬ʤϡ顼ϿΥǡء
    my $www = $kegg_ref->{'sourcewww'};
    $tmpfile = "$main::TMP_DIR/genbank_$TMP";
    $logfile = "$main::LOG_DIR/genbank_log_$TMP";

    # ftpȤΥǥ쥯ȥparseGenBankե̾
    my $gbkname = "";
    $cmd = "$main::CMD_wget -O $tmpfile -o $logfile $www";
    if(system($cmd)!=0) {
      # ftpȤμ˼Ԥ硢KEGGǼʪ̾ǥǡ롣
      my $kname = $kegg_ref->{'orgname'};
      if(length($kname) > 0) {
        $ncbi_ref = &searchSpName($kname,$ncbi);
        if($ncbi_ref!=undef) {
          $data_ref = &setOrgInstWwwFromNCBI($data_ref,$ncbi_ref);
        }
      }
    } else {
      my $gbkname = &getFileNameFromFtpSite($tmpfile);
      
      # genbankե뤬ä
      if(length($gbkname) > 0) {
        $cmd = "$main::CMD_wget -O $tmpfile -o $logfile $gbkname";
        if(system($cmd)!=0) {
          # genbankե뤬Ǥʤä硢KEGGʪ̾鸡롣
        } else {
          $data_ref = &getStrainTaxonOrgname($tmpfile,$data_ref);

          # genbankե뤫Taxonomy IDꡢNCBIΥǡ롣
          # Taxonomy IDƤʤ硢orgasnism nameNCBIΥǡ롣
          my $taxon = $data_ref->{'taxon'};
          if(length($taxon) > 0) {
            #$ncbi_ref = $ncbi->getLineKTaxon($taxon);
            $ncbi_ref = &searchTaxonomyID($data_ref,$ncbi);
            if($ncbi_ref != undef) {
              $data_ref = &setOrgInstWwwFromNCBI($data_ref,$ncbi_ref);
            }
          }
          # taxon ʤä硢organism nameNCBIΥǡ롣
          if($ncbi_ref==undef) {
            my $oname = $data_ref->{'orgname'};
            if(length($oname) > 0) {
              $ncbi_ref = &searchSpName($oname,$ncbi);
              if($ncbi_ref!=undef) {
                $data_ref = &setOrgInstWwwFromNCBI($data_ref,$ncbi_ref);
              }
            }  
          }
        }
      }
      # ƤGenBankǡĴ٤ƤNCBIΥǡʤä硢
      # KEGGʪ̾Ƥߤ롣
      if($ncbi_ref==undef) {
        my $kname = $kegg_ref->{'orgname'};
        if(length($kname) > 0) {
          $ncbi_ref = &searchSpName($kname,$ncbi);
          if($ncbi_ref!=undef) {
            $data_ref = &setOrgInstWwwFromNCBI($data_ref,$ncbi_ref);
          }
        }
      }
    } 
    # KEGGNCBIǡꡢmedidpublishĴ롣
    if($ncbi_ref!=undef) {
      $data_ref = &setMedId($kegg_ref, $ncbi_ref, $data_ref);
    }
    # եκ
    unlink($tmpfile);
    unlink($logfile);

    # organism + strainɽƱˤʤäƤޤΤʤĴ롣
    # ƱΤȯ줿ǡWARNING˽Ϥ롣
    # ΥץǤ̾ѤȤޤǤϤʤΤǡ
    # sp, sp: SAME : organism + strain
    # Ȥɽä硢Ȥstrain̾˶̤ä롣
    $orgStrain_ref = &checkSameName($data_ref, $orgStrain_ref);

    # UpdateǡȺޤǤΥեspӤ롣
    # taxonomy id, institution, wwwϤΤޤ޾񤭤ѤäƤWarning
    # ̵ΤϿ˺ʥĤ
    # Ȱ㤦ΤѤʤɡWarningФ
    &updateData($data_ref, $rewriteFlag);
    
  }
}

#_****************************************************************************
#_ ؿ̾  : checkSameName
#_
#_ ǽס
#_   ʪ̾鳺NCBIǡõ̵undef֤
#_
#_ 
#_   (1) $data_ref     : IN : ʪΥǡåȤΥϥåΥե
#_   (2) $orgStrain_ref : IN : sp͡organism_strainkeyȤϥåΥե
#_
#_ ͡ $orgStrain_ref : ϥåե
#_
#_ ס
#_  organism_strainƱɽˡʣä硢WARNINGϤ롣
#_ 
sub checkSameName {
  my($data_ref, $orgStrain_ref) = @_;

  my $sp = $data_ref->{'sp'};
  my $org = $data_ref->{'orgname'};
  my $strain = $data_ref->{'strain'};

  if(length($org) < 1 || length($strain) < 1) {
    return $orgStrain_ref;
  }

  if(exists $orgStrain_ref->{$org."_".$strain}) {
    my $orisp =  $orgStrain_ref->{$org."_".$strain};
    $orgStrain_ref->{$org."_".$strain} = "$sp, $orisp";
    print WHD "$sp, $orisp: SAME : $org + $strain\n";
  }
  else {
    $orgStrain_ref->{$org."_".$strain} = $sp;
  }
 
  return $orgStrain_ref;
}

#_****************************************************************************
#_ ؿ̾  : searchTaxonomyID
#_
#_ ǽס
#_   TaxonomyID鳺NCBIǡõ̵undef֤
#_
#_ 
#_   (1) $data_ref  : IN : ǡΥե
#_   (2) $ncbi      : IN : NCBIǡ֥
#_
#_ ͡ $ncbi_ref : NCBIǡΥϥåե
#_
#_ ס
#_   ҥåȤΤ1ʤ餽֤
#_   2ʾ夬ҥåȤʪ̾פΤ֤ʸߡ
#_   ҥåȤʤä硢undef֤
#_
sub searchTaxonomyID {
  my($data_ref, $ncbi) = @_;

  my $ncbi_ref_list = $ncbi->getLineKTaxon($data_ref->{'taxon'});
  my $ncbi_ref = undef;

  if(scalar(@$ncbi_ref_list) == 1) {
    $ncbi_ref = $$ncbi_ref_list[0];
    if($data_ref->{'orgname'} ne $ncbi_ref->{'orgname'}) {
      $ncbi_ref = undef;
    }
  }
  elsif(scalar(@$ncbi_ref_list)> 1) {
    foreach my $ref (@$ncbi_ref_list) {
      if($data_ref->{'orgname'} eq $ref->{'orgname'}) {
        if($ncbi_ref != undef) {
          print WHD "$data_ref->{'sp'} : SAME : taxon : $data_ref->{'taxon'}\n";
        }
        else {
          $ncbi_ref = $ref;
        }
      }
    }
  }
  return $ncbi_ref;
  
}

#_****************************************************************************
#_ ؿ̾  : searchSpName
#_
#_ ǽס
#_   ʪ̾鳺NCBIǡõ̵undef֤
#_
#_ 
#_   (1) $spname  : IN : ʪ̾
#_   (2) $ncbi    : IN : NCBIǡ֥
#_
#_ ͡ $ncbi_ref : NCBIǡΥϥåե
#_
#_ ס
#_   ʪ̾Ƭ2ʸǸ롣ҥåȤΤ1ʤ餽֤
#_   2ʾ夬ҥåȤʪ̾פΤ֤ʸߡ
#_   ҥåȤʤä硢undef֤
#_
sub searchSpName {
  my($spname, $ncbi) = @_;

  my($f,$s) = split/ /,$spname;
  my $name = $f . " " . $s;
  my $ncbi_ref_list = $ncbi->getLineKName($name);
  my $ncbi_ref=undef;

  if(scalar(@$ncbi_ref_list) == 1) {
    $ncbi_ref = $$ncbi_ref_list[0];
  }
  elsif(scalar(@$ncbi_ref_list)> 1) {
    foreach my $ref (@$ncbi_ref_list) {
      if($oname eq $ref->{'orgname'}) {
        if($ncbi_ref != undef) {
          print WHD "$data_ref->{'sp'} : SAME : orgname : $data_ref->{'orgname'}\n";
        } else {
          $ncbi_ref = $ref;
        }
      }
    }
  }
  return $ncbi_ref;
}

#_****************************************************************************
#_ ؿ̾  : updateData
#_
#_ ǽס
#_   ʪΥǡåȤɹߡ󹹿̵ͭǧ롣
#_
#_ 
#_   (1) $list_ref  : IN : ᤿ʪǡΥϥåե
#_   (2) $flag      : IN : taxonomy ID񤭤뤫ɤ
#_
#_ ͡ ʤ
#_
#_ ס
# UpdateǡȺޤǤΥեspӤ롣
# taxonomy id, institution, wwwlinkϼǡФΤޤ޾񤭤
# ѤäƤWarning
# ̵ΤϿ˺ʥĤ
# Ȱ㤦ΤѤʤɡWarningФ
sub updateData {
  my $list_ref = shift(@_);
  my $flag = shift(@_);

  my $hash_ref = {};
  my $sp = $list_ref->{'sp'};

  # ʪ̾Υե뤬¸ߤ鹹Υå
  # ̵п
  my $spfile = "$main::UPDATE_DIR/$sp/MBGD/$sp.genome";
  if(-f "$spfile") {
    open(FH, "$spfile") or die($!);
    my @list;
    while(<FH>) {
      chomp;
      @list = split/\t/;
    }
    close(FH);
    
    # å
    # sp
    if($sp ne $list[0]) {
      # ե̾ȤϰפƤʤϤʤʤ
      # log˽Фƾ
      print WHD "$sp: sp overwrite: $sp : $list[0]\n";
    }
    $hash_ref->{'sp'} = $sp;

    # abbrev
    $hash_ref = &diffMode($sp,"abbrev",$list[1],$list_ref->{'abbrev'}, $hash_ref);

    # spieces
    $hash_ref = &diffMode($sp,"orgname",$list[2],$list_ref->{'orgname'}, $hash_ref);

    # strain
    $hash_ref = &diffMode($sp,"strain",$list[3],$list_ref->{'strain'}, $hash_ref);

    # taxon
    if(defined($flag)) {
      $hash_ref = &writeMode($sp,"taxon",$list[4],$list_ref->{'taxon'}, $hash_ref);
    } else {
      $hash_ref = &diffMode($sp,"taxon",$list[4],$list_ref->{'taxon'}, $hash_ref);
    }

    # source
    $hash_ref->{'source'} = $list_ref->{'source'};

    # institution
    $hash_ref = &diffMode($sp,"institution",$list[6],$list_ref->{'institution'}, $hash_ref);

    # wwwlink
    $hash_ref = &diffMode($sp,"wwwlink",$list[7],$list_ref->{'wwwlink'}, $hash_ref);

    # medid
    $hash_ref = &diffMode($sp,"medid",$list[8],$list_ref->{'medid'}, $hash_ref);

    # publish
    # medidХ顼Ϥʤ
    if($list[8] ne $list_ref->{'medid'}) {
      $hash_ref = &diffMode($sp,"publish",$list[9],$list_ref->{'publish'}, $hash_ref);
    } else {
      $hash_ref->{'publish'} = $list[9];
    }

    &updateFile($hash_ref);
  }
  else {
    # 
    my($dir) = "$main::UPDATE_DIR/$sp/MBGD";
    mkpath("$dir", 0, 0750) || die("Can not mkpath $dir($!)");

    &makeNewFile($list_ref);
  }

}

#_****************************************************************************
#_ ؿ̾  : setOrgInstWwwFromNCBI
#_
#_ ǽס
#_   NCBIȤΥǡꡢtaxonorgnameabbrevinstitutionwwwlink롣
#_
#_ 
#_   (1) $list_ref  : IN : ƤʪǡΥϥåե
#_   (2) $ncbi_ref   : IN : NCBIȤǡΥϥåե
#_
#_ ͡
#_   (1) $list_ref : ʪΥǡΥϥåե
#_
#_ ס
#_   ʲΥǡʪΥǡɲä
#_       KEY		VALUE
#_       orgname	ʪ̾
#_       abbrev         ά
#_       institution	Υɤ濴Ȥʤä̾
#_       wwwlink	ءޤϥץȤؤHPɥ쥹
sub setOrgInstWwwFromNCBI {
  my $list_ref = shift(@_);
  my $ncbi_ref = shift(@_);

  # abbrevԽ
  my $name = $ncbi_ref->{'orgname'};
  my @nlist = split/ /, $name;
  if($nlist[0] =~ /(.).*/) {
    my $head = $1;
    if($nlist[1] eq "sp.") {
      $list_ref->{'abbrev'} = $nlist[0]. " " .$nlist[1];
    }
    else {
      # ʸ餦
      $list_ref->{'abbrev'} = $head.".".$nlist[1];
    }
  }

  # orgnameԽstrainäϤ򤹤롣
  if($name =~ /(.*)\sstr.\s.*/) {
    $name = $1;
  }
  $list_ref->{'orgname'}=$name;
  $list_ref->{'institution'} = $ncbi_ref->{'institution'};
  $list_ref->{'wwwlink'} = $ncbi_ref->{'wwwlink'};
  $list_ref->{'taxon'} = $ncbi_ref->{'taxid'};

  return $list_ref;
}

#_****************************************************************************
#_ ؿ̾  : getFileNameFromFtpSite
#_
#_ ǽס
#_   FtpȤΥǥ쥯ȥ.gbkγĥҤĥե̾1ļäƤ롣
#_
#_ 
#_   (1) $file  : IN : ftpȤΥǥ쥯ȥɤΡ
#_
#_ ͡
#_   (1) $gbkfile : ե̾Υꥹȡ
#_
#_ ס
sub getFileNameFromFtpSite {
  my $file = shift(@_); 
  local(*FH);

  my $gbkfile="";
  open(FH, $file) or die($!);
  while(<FH>) {
    chomp;
    if(/\<A HREF="(.*)"\>.*.gbk\<\/A\>/) {
      $gbkfile = $1;
      last;
    }
    elsif(/\<a href="(.*)"\>.*.gbk\<\/a\>/) {
      $gbkfile = $1;
      last;
    }
  }
  close(FH);

  return $gbkfile;

}

#_****************************************************************************
#_ ؿ̾  : setMedId
#_
#_ ǽס
#_   NCBIȤΥǡKEGGΥǡꡢʸȤmedid롣
#_
#_ 
#_   (1) $kegg_ref  : IN : KEGGȤǡΥϥåե
#_   (2) $ncbi_ref  : IN : NCBIȤǡΥϥåե
#_   (3) $data_ref  : IN : ƤʪǡΥϥåե
#_
#_ ͡
#_   (1) $data_ref : ʪΥǡΥϥåե
#_
#_ ס
#_   ʸƱˡ
#_     1. NCBIpubmedID1Ĥʤ餽medidȤ
#_     2. ʣä硢NCBIKeggˤʸפΤmedidȤ
#_     3. NCBI̵硢KeggˤʸmedidȤ
#_     4. NCBIKegg̵硢UnpublishǤȤɲäʤ
#_ 
#_   ʲΥǡʪΥǡɲä
#_       KEY            VALUE
#_       medid		medid
#_       publish	ʸ
sub setMedId {
  my ($kegg_ref,$ncbi_ref,$data_ref) = @_;
  
  my $medid = "";
  my $publish = "";

  if(exists $ncbi_ref->{'medid'}) {
    my $ncbi_id = $ncbi_ref->{'medid'};
    # id(id)
    my %idhash;
    my @ncbi_id_list;
    foreach my $id (split/&uid=/,$ncbi_id) {
      $id =~ s/ //g;  # 
      if(exists $idhash{$id}) {
        next; # ʣ
      }
      else {
        push(@ncbi_id_list, $id); 
        $idhash{$id}++;
      }
    }
    
    if(scalar(@ncbi_id_list)==1) {  # NCBIȤIDĤ
      $medid = shift(@ncbi_id_list);
      $publish = &getPublish($medid);
    }
    elsif(scalar(@ncbi_id_list) > 1) {   # NCBIȤʣID硢KEGGƱΤ򥲥å
      my $kegg_id = $kegg_ref->{'medid'};
      foreach my $k (@ncbi_id_list) {
        if($kegg_id == $k) {
          $medid = $k;
          $publish = &getPublish($medid);
          last;
        }
      }
    }
    else {   # NCBIȤǻ̵꤬KEGGǥå
       my $kegg_id = $kegg_ref->{'medid'};
       if(length($kegg_id) > 0 ) {
         $medid = $kegg_id;
         $publish = &getPublish($medid);
       }
    }
  }
  $data_ref->{'medid'} = $medid;
  $data_ref->{'publish'} = $publish;

  return $data_ref;
}

#_****************************************************************************
#_ ؿ̾  : getPublish
#_
#_ ǽס
#_   medidPubMedإڡˤpublishξ롣
#_
#_ 
#_   (1) $medid  : IN : medid
#_
#_ ͡
#_   (1) $publish : ʸ
#_
#_ ס
#_   medidꥵȤإHit1Ǥ뤳Ȥǧ롣
#_   1ʾ夢ä϶publish֤
#_   1ξ硢ѥޥåpublishȴФƤӽФ֤
#_
sub getPublish {
  my ($medid) = shift(@_);
  local(*FH);

  my $tmpfile = "$main::TMP_DIR/publish.txt";
  my $logfile = "$main::LOG_DIR/publish.txt";
  my $cmd = "$main::CMD_wget -O $tmpfile -o $logfile  \'$main::PUBMED$medid\'";
  system($cmd)==0 or die("Can not get pubmedPage : $cmd");

  my $hit = 0;
  my $publish = "";
  my $pmid = "";
  open(FH, $tmpfile) or die($!);
  while(<FH>) {
    chomp;
    # ҥåȷĴ
    if(/title="Total Results"\>\<center\>\<font class="pmlinkna"\>\<b\>All: (\d+)/) {
      $hit = $1;
    }
    if($hit == 1) {
      if(/AL_get.*\>(.*)\<\/a\>\<\/span\>([^\<]*)\<\/span\>/) {
        $publish = $1 . $2;
      }
      if(/\<p class="pmid"\>PMID: (\d+)/) {
        $pmid = $1;
        last;
      }
    }
    elsif($hit > 1) { # ʣξ硢Ȥꤢ⤷ʤ
      last;
    }
  }
  close(FH);

  return $publish;
}

#_****************************************************************************
#_ ؿ̾  : getStrainTaxonOrgname
#_
#_ ǽס
#_   GenBankΥǡꡢstraintaxonorgnameʪǡɲä롣
#_
#_ 
#_   (1) $file  : IN : GenBankΥե
#_   (2) $hash_ref  : IN : ʪǡΥϥåե
#_
#_ ͡
#_   (1) $hash_ref : ʪΥǡΥϥåե
#_
#_ ס
#_   ʲΥǡʪΥǡɲä
#_       KEY            VALUE
#_       strain         strain̾
#_       taxon        	taxonomy ID 
#_       orgname	ʪ̾
sub getStrainTaxonOrgname {
  my $file = shift(@_);
  my $hash_ref = shift(@_);
  local(*FH);
  
  my $orgname="";
  my $flag = 0;
  my $data = "";
  my @sourcelist;
  # sourceξ롣
  open(FH, $file) or die($!);
  while(<FH>) {
    chomp;
    if(/^\s+ORGANISM\s+(.*)/) {
      $orgname = $1;
      next;
    }
    elsif(/^\s{5}source/) {
      if(length($data) > 0) {
        push(@sourcelist, $data);
        $data = "";
      }
      $flag = 1;
      next;
    }
    elsif(/^\s{5}\w+/) {
      if(length($data) > 0) {
        push(@sourcelist, $data);
        $data = "";
      }
      $flag = 0;
      next;
    }
    elsif($flag == 1) {
      if(/^\s{21}(.*)/) {
        $data .= " $1"; # Ը˶ʸƤ
      }
    }
  }
  close(FH);

  # orgnameϷꡣ
  $hash_ref->{'orgname'} = $orgname;
   
  # straintaxonμ
  my @stsList;
  foreach my $l (@sourcelist) {
    my $stshashref = {};

    if($l =~ /\/strain="([^"]+)"/) {
      $stshashref->{'strain'} = $1;
    }
    if($l =~ /\/db_xref="taxon:(\d+)"/) {
      $stshashref->{'taxon'} = $1;
    }
    if($l =~ /\/organism="([^"]+)"/) {
      $stshashref->{'org'} = $1;
    }
  
    push(@stsList,$stshashref);
  }

  my $stshashref = $stsList[0];
  # straintaxonƱԤʣä  # ޤʤä֤Ϥ˼Τѡ
  if(scalar(@stsList) > 1) {
    foreach my $ref (@stsList) {
      (my $o = $orgname) =~ s/ //g;
      (my $s = $ref->{'org'}) =~ s/ //g;
      if($o =~ /$s/i  || $s =~ /$o/i) {
        $stshashref = $ref;
        last;
      }
      elsif(length($ref->{'org'}) < 1) {
        $stshashref = $ref;
        last;
      }
    }
  }
  elsif(scalar(@stsList) < 1) {
    return $hash_ref;
  }

  # straintaxonƱꤵ줿

  # strainΥեޥåȽ(¾ˤ⤢пɲ)
  my $strain = $stshashref->{'strain'};
  if($strain=~/=/) {
    $strain=~ s/=/:/g;
  }

  $hash_ref->{'strain'} = $strain;
  $hash_ref->{'taxon'} = $stshashref->{'taxon'};

  return $hash_ref;
} 

#_****************************************************************************
#_ ؿ̾  : diffMode
#_
#_ ǽס
#_   оݤͤȼͤӤ򤹤롣
#_   UPDATE FAILURE
#_   оݤͤ꿷ͤʤ碪оݤͤѤ
#_   WRITE
#_   оݤͤʤͤ碪ͤ
#_   DIFF
#_   оݤͤͤ͡פʤ碪оݤͤѤ
#_   
#_   warningɽʤξ
#_   оݤͤʤͤʤ->оݤͤѤ
#_   оݤͤ͡ꡢͤפƤ碪оݤͤѤ
#_   
#_
#_ 
#_     (1) $sp      : sp̾
#_     (1) $item    : ̾
#_     (2) $val     : оݤ
#_     (3) $new     : 
#_     (4) $hash_ref : ǡϿϥåΥե
#_
#_ ͡ $hash_ref
#_
#_ ס
sub diffMode {
  my ($sp, $item, $val, $new, $hash_ref) = @_;
  
  # UPDATA FAIRURE
  if(length($new) < 1 && length($val) > 0) {
      $hash_ref->{$item} = $val;
      print WHD "$sp: UPDATE FAILURE $item: $new : $val\n";
  }
  # WRITE
  elsif(length($val) < 1 && length($new) > 0) {
      $hash_ref->{$item} = $new;
      print WHD "$sp: WRITE $item: $new : $val\n";
  }
  # DIFF
  elsif($new ne $val) {
    if($item eq "strain") {
      (my $nori = $val) =~ s/ //g;
      (my $nauto = $new) =~ s/ //g;
      if($nori=~/$nauto/i) { # ̵ʸʸζ̤ʤʤOK
      }
      else {
        print WHD "$sp: DIFF $item: $new : $val\n";
      }
    } elsif ($item eq "orgname") {
      if($new =~ /$val/i) { # ʸʸζ̤ʤorgnameޤޤƤOK
                            # 餷ʪ̾ˤstrainäƤ뤳Ȥ⤢Τǡ
      } else {
        print WHD "$sp: DIFF $item: $new : $val\n";
      }
    } elsif ($item eq "wwwlink") {
      # Ǹ˥å夬դƤƤĤƤʤƤOK
      if(($new =~ /$val\//) || ($val =~ /$new\//)) {
      } else {
        print WHD "$sp: DIFF $item: $new : $val\n";
      }
    } else {
      print WHD "$sp: DIFF $item: $new : $val\n";
    }
    $hash_ref->{$item} = $val;
  }
  else {
    $hash_ref->{$item} = $val;
  }

  return $hash_ref;
}

#_****************************************************************************
#_ ؿ̾  : writeMode
#_
#_ ǽס
#_   оݤͤȼͤӤ򤹤롣
#_   UPDATE FAILURE
#_   оݤͤ꿷ͤʤ碪оݤͤѤ
#_   WRITE
#_   оݤͤʤͤ碪ͤ
#_   OVERWRITE
#_   оݤͤͤ͡פʤ碪ͤѤ
#_
#_   warningɽʤξ
#_   оݤͤʤͤʤ->оݤͤѤ
#_   оݤͤ͡ꡢͤפƤ碪оݤͤѤ
#_
#_
#_ 
#_     (1) $sp      : sp̾
#_     (1) $item    : ̾
#_     (2) $val     : оݤ
#_     (3) $new     : 
#_     (4) $hash_ref : ǡϿϥåΥե
#_
#_ ͡ $hash_ref
#_
#_ ס
sub writeMode {
  my ($sp, $item, $val, $new, $hash_ref) = @_;

  # UPDATA FAIRURE
  if(length($new) < 1 && length($val) > 0) {
      $hash_ref->{$item} = $val;
      print WHD "$sp: UPDATA FAIRURE $item: $new : $val\n";
  }
  # WRITE
  elsif(length($val) < 1 && length($new) > 0) {
      $hash_ref->{$item} = $new;
      print WHD "$sp: WRITE $item: $new : $val\n";
  }
  # OVERWRITE
  elsif($new ne $val) {
    (my $nori = $val) =~ s/ //g;
    (my $nauto = $new) =~ s/ //g;
    if($nori=~/$nauto/i) { # ̵ʸʸζ̤ʤʤOK
      $hash_ref->{$item} = $new;
    }
    else {
      $hash_ref->{$item} = $new;
      print WHD "$sp: OVERWRITE $item: $new : $val\n";
    }
  }
  else {
    $hash_ref->{$item} = $new;
  }

  return $hash_ref;
}

#_****************************************************************************
#_ ؿ̾  : updateFile
#_
#_ ǽס
#_   KEGGNCBIǡե˾񤭤롣
#_
#_ 
#_   (1) $list_ref  : IN : ʪǡΥϥåե
#_
#_ ͡ ʤ
#_
#_ ס
sub updateFile {
  my($list_ref)=shift(@_);

  # sp̾ǥեǡtabڤǽϤ롣 
  my $sp = $list_ref->{'sp'};
  my $pub = $list_ref->{'publish'};
  if(length($pub) < 1) {
    $pub = "Unpublished";
  }

  # strat
  my $data = $list_ref->{'sp'} . "\t" .
             $list_ref->{'abbrev'} . "\t" .
             $list_ref->{'orgname'} . "\t" .
             $list_ref->{'strain'} . "\t" .
             $list_ref->{'taxon'} . "\t" .
             $list_ref->{'source'} . "\t" .
             $list_ref->{'institution'} . "\t" .
             $list_ref->{'wwwlink'} . "\t".
             $list_ref->{'medid'} . "\t" .
             $pub . "\n";
  my $file = "$main::UPDATE_DIR/$sp/MBGD/$sp.genome";
  open(FH, ">$file") or die "Can not open file : $file($!)";
  print FH $data;
  close(FH);
  
}

#_****************************************************************************
#_ ؿ̾  : makeNewFile
#_
#_ ǽס
#_   KEGGNCBIǼǡ򿷤ե˺롣
#_
#_ 
#_   (1) $list_ref  : IN : ʪǡΥϥåե
#_
#_ ͡ ʤ
#_
#_ ס
#_
sub makeNewFile {
  my($list_ref)=shift(@_);
  # Ƥʤǡå롣
  my $msg = "";
  if(length($list_ref->{'sp'}) < 1) {
    $msg = "sp ";
  }
  if(length($list_ref->{'abbrev'}) < 1) {
    $msg .= "abbrev ";
  }
  if(length($list_ref->{'orgname'}) < 1) {
    $msg .= "orgname ";
  }
  if(length($list_ref->{'strain'}) < 1) {
    $msg .= "strain ";
  }
  if(length($list_ref->{'taxon'}) < 1) {
    $msg .= "taxon ";
  }
  if(length($list_ref->{'source'}) < 1) {
    $msg .= "source ";
  }
  if(length($list_ref->{'institution'}) < 1) {
    $msg .= "institution ";
  }
  if(length($list_ref->{'medid'}) < 1) {
    $msg .= "medid ";
  }
  if(length($list_ref->{'publish'}) < 1) {
    $msg .= "publish ";
  }
 
  print WHD "$list_ref->{'sp'} : NEW update\n";
  if(length($msg) > 0) {
    print WHD "$list_ref->{'sp'} : NEW FAILURE : $msg\n";
  }

  &updateFile($list_ref);

}
##################
1;
