#!/usr/local/bin/perl -s

###############################################################################
# prosite.dat ɤ߹ߡѥ󸡺ԤΥ⥸塼
#
# SKIP-FLAG=TRUE Ƥ륨ȥ꡼ǡ˴ؤƤϥѥ󸡺򤷤ʤ
#
#
###############################################################################
package Prosite;
$FILE_prosite = "$ENV{'BIOROOT'}/db/ideas/prosite/prosite.dat";

###############################################################################
#
sub new {
    my($class) = shift;
    my($fileProsite) = @_;
    my($self) = {};

    bless($self, $class);

    # Prosite ǡեɤ߹
    if (! $fileProsite) {
        $fileProsite = $FILE_prosite;
    }
    $self->read_prosite($fileProsite);

    return $self;
}

###############################################################################
# ѥޥå
sub searchPattern {
    my($self) = shift;
    my($seq) = @_;
    my($prosite);
    my($key_prosite);
    my($comment);
    my($info);
    my($ent);

    $prosite = $self->{'PROSITE'};

    # ̳Ǽΰ
    $info = [];

    KEY_PROSITE:
    foreach $key_prosite (sort(keys(%{$prosite}))) {
        # ѥƤʤΤϡåʤ
        if  (! defined($prosite->{"$key_prosite"}->{'PA'})) {
            next;
        }

        # Ȥˡ 'SKIP-FLAG=TRUE' ƤΤϡåʤ
        foreach $comment (@{$prosite->{"$key_prosite"}->{'CC'}}) {
            if ($comment =~ /SKIP-FLAG=TRUE/) {
                next KEY_PROSITE;
            }
        }

        # ѥ󸡺
        pos($seq) = 0;
        while($seq =~ m/$prosite->{"$key_prosite"}->{'PA_perl'}/g) {
            # ѥ˥ޥå
            $ent = {};
            $ent->{'FROM'} = length($`) + 1;
            $ent->{'TO'}   = pos($seq);
            $ent->{'ID'}   = ${$prosite->{"$key_prosite"}->{'ID'}}[0];
            $ent->{'AC'}   = ${$prosite->{"$key_prosite"}->{'AC'}}[0];
            $ent->{'PA'}   = $prosite->{"$key_prosite"}->{'PA'};
            $ent->{'SEQ'}  = $&;
            $ent->{'DE'}   = $prosite->{"$key_prosite"}->{'DE'};

            push(@{$info}, $ent);
        }
    }

    return $info;
}

###############################################################################
# prosite.dat եɤ߹
#
# ID  Identification                     (Begins each entry; 1 per entry)
# AC  Accession number                   (1 per entry)
# DT  Date                               (1 per entry)
# DE  Short description                  (1 per entry)
# PA  Pattern                            (>=0 per entry)
# MA  Matrix/profile                     (>=0 per entry)
# RU  Rule                               (>=0 per entry)
# NR  Numerical results                  (>=0 per entry)
# CC  Comments                           (>=0 per entry)
# DR  Cross-references to SWISS-PROT     (>=0 per entry)
# 3D  Cross-references to PDB            (>=0 per entry)
# DO  Pointer to the documentation file  (1 per entry)
# //  Termination line                   (Ends each entry; 1 per entry)
#
sub read_prosite {
    my($self) = shift;
    my($file_prosite) = @_;
    my($data);
    my($identification);
    my($pattern);
    my($results);
    my($ref_swiss);
    my($ref_pdb);
    local(*FILE_H);

    $data = {};
    $ac2id = {};      # AC  ID 褦ˤ
    $identification = "top_prosite";

    open(FILE_H, "$file_prosite") || die "Can not open $file_prosite($!).";
    while(<FILE_H>) {
        chomp();

        if (/^(ID)   (.*)$/) {
            ($identification) = split(';', $2);
            $data->{"$identification"}->{"$1"} = [ split(';', $2) ];
            $pattern   = '';
            $results   = '';
            $ref_swiss = '';
            $ref_pdb   = '';
        }
        elsif (/^(AC)   (.*)$/) {
            $data->{"$identification"}->{"$1"} = [ split(';', $2) ];

            my($ac) = $2;
            ($ac) = ($ac =~ /(PS\d+)/);
            $ac2id->{"$ac"} = $identification;
        }
        elsif (/^(DT)   (.*)$/) {
            $data->{"$identification"}->{"$1"} = $2;
        }
        elsif (/^(DE)   (.*)$/) {
            $data->{"$identification"}->{"$1"} = $2;
        }
        elsif (/^(PA)   (.*)$/) {
            $pattern .= $2;
        }
        elsif (/^(MA)   (.*)$/) {
            if (! defined($data->{"$identification"}->{"$1"})) {
                $data->{"$identification"}->{"$1"} = [];
            }
            push(@{$data->{"$identification"}->{"$1"}}, $2);
        }
        elsif (/^(RU)   (.*)$/) {
            if (! defined($data->{"$identification"}->{"$1"})) {
                $data->{"$identification"}->{"$1"} = [];
            }
            push(@{$data->{"$identification"}->{"$1"}}, $2);
        }
        elsif (/^(NR)   (.*)$/) {
            $results .= $2;
        }
        elsif (/^(CC) {0,3}(.*)$/) {
            if (! defined($data->{"$identification"}->{"$1"})) {
                $data->{"$identification"}->{"$1"} = [];
            }
            push(@{$data->{"$identification"}->{"$1"}}, $2);
        }
        elsif (/^(DR)   (.*)$/) {
            $ref_swiss .= $2;
        }
        elsif (/^(3D)   (.*)$/) {
            $ref_pdb .= $2;
        }
        elsif (/^(DO)   (.*)$/) {
            $data->{"$identification"}->{"$1"} = [ split(';', $2) ];
        }
        elsif (/^\/\//) {
            # PA θ
            if ($pattern ne '') {
                $data->{"$identification"}->{'PA'} = $pattern;

                # ѥ Perl ɽѴ
                my($patPerl) = $self->pa2re($data->{"$identification"}->{'PA'});
                $data->{"$identification"}->{'PA_perl'} = $patPerl;
            }

            # NR θ
            if ($results ne '') {
                $results =~ s/ //g;
                $data->{"$identification"}->{'NR'} = [ split(';', $results) ];
            }

            # DR θ
            if ($ref_swiss ne '') {
                $ref_swiss =~ s/ //g;
                $data->{"$identification"}->{'DR'} = [ split(';', $ref_swiss) ];
            }

            # 3D θ
            if ($ref_pdb ne '') {
                $ref_pdb =~ s/ //g;
                $data->{"$identification"}->{'3D'} = [ split(/;/, $ref_pdb) ];
            }
        }
        else {
            # unknown line type
            print STDERR "Warning : unknown line type $_\n";
        }

    }
    close(FILE_H);

    # ɤ߹ǡǼ
    $self->{'PROSITE'} = $data;
    $self->{'AC2ID'}   = $ac2id;

    return;
}

###############################################################################
# prosite Υѥ perl ɽѴ
# convert PA to Perl regular expressions
sub pa2re {
    my($self) = shift;
    my($pattern_prosite) = @_;
    my($pattern_perl);

    $pattern_perl = $pattern_prosite;

    $pattern_perl =~ s/-//g;
    $pattern_perl =~ s/.$//g;
    $pattern_perl =~ s/{/[^/g;
    $pattern_perl =~ tr/}/]/;
    $pattern_perl =~ tr/<>x()/^$.{}/;

    return $pattern_perl;
}

###############################################################################
#
sub getIdByAc {
    my($self) = shift;
    my($ac) = @_;
    my($id);

    $id = $self->{'AC2ID'}->{"$ac"};

    return $id;
}

###############################################################################
#
sub getDescrByAc {
    my($self) = shift;
    my($ac) = @_;
    my($id);
    my($descr);

    $id = $self->getIdByAc($ac);

    $descr = $self->{'PROSITE'}->{"$id"}->{'DE'};

    return $descr;
}

###############################################################################
package main;
if ($0 eq __FILE__) {
    mt($obj);

    $obj = Prosite->new();

    exit;
}

###############################################################################
1;                                                # Return TRUE Value
###############################################################################
