#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include "db.h"

#define MAXLEN 5000
#define MAXNAME 50

struct Form {
    char hdr_ent[10];
    int hdr_entlen;
    int pos_ent;
    char hdr_tit[10];
    int hdr_titlen;
    int pos_tit;
    char hdr_seq[10];
    int hdr_seqlen;
    char end_mk[10];
    int end_mklen;
} form[MAXDB] = {
    { "ENTRY", 5, 6,
      "TITLE", 5, 16,
      "SEQUENCE", 8,
      "///", 3 },
    { "ID", 2, 3,
      "DE", 2, 5,
      "SQ", 2,
      "//", 2 },
    { "LOCUS", 5, 6,
      "DEFINITION", 10, 12,
      "ORIGIN", 6,
      "//", 2 },
    { "MEMBER", 6, 7,
      "DEFINITION", 10, 11,
      "SEQUENCE", 8,
      "STRUCTURE",  9 },
    { "CODE", 4, 5,
      "NAME", 4, 5,
      "SEQUENCE", 8,
      "END", 3 },
};
char gapmark = '-';

#ifdef DEBUG
main(argc, argv)
     int argc;
     char *argv[];
{
    DB *db;
    char entname[MAXNAME];
    char seq[MAXLEN];
    char filename[50];
    int i;

    if (argc < 2) {
	fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
	exit(1);
    }
    strcpy(filename, argv[1]);
    if ((db = dbopen(filename)) == NULL) {
	fprintf(stderr, "Can't open %s\n", filename);
	exit(1);
    }
    if (dbtype(db) == NONE) {
	fprintf(stderr, "Unknown database %s\n", filename);
	exit(1);
    }

    while (getseq(db, entname, seq)) {
	puts(entname);
	puts(seq);
    }
}
#endif

getseq(db, entname, seq, maxlen)
     DB *db;
     char *entname;
     char *seq;
     int maxlen;
{
    char buf[BUFSIZ];
    char *p;
    int err_flag = 0;
    int i = 0;
    FILE *fp = dbptr(db);
    int type = dbtype(db);

    if (type == STANF)
	return get_stanf(fp, entname, seq, maxlen);
    if (type == FASTA) {
int sta;
	if (db->status == EOF) return 0;
/*
	return get_fasta(fp, entname, seq, maxlen,
			db->buf, db->entbuf, &(db->status));
*/
	sta = get_fasta(fp, entname, seq, maxlen,
			db->buf, db->entbuf, &(db->status));
	return sta;
    }

    while (fgets(buf, sizeof(buf), fp) != NULL) {
	if (strncmp(buf, form[type].hdr_ent, form[type].hdr_entlen) == 0) {
	    sscanf(&buf[form[type].pos_ent], "%s", entname);
	}
	else if (strncmp(buf, form[type].hdr_seq, form[type].hdr_seqlen) == 0) {
	    while (fgets(buf, sizeof(buf), fp) != NULL
		   && strncmp(buf, form[type].end_mk, form[type].end_mklen)
		                                                     != 0) {
		for (p = buf; *p != '\0' && *p != '\n'; p++) {
		    if (isupper(*p))
			seq[i++] = *p;
		    else if (islower(*p))
			seq[i++] = toupper(*p);
		    else if (*p == gapmark)
			seq[i++] = gapmark;
#ifdef ALLOW_DIGIT
		    else if (isdigit(*p))
			seq[i++] = *p;
#endif
                    if (i > maxlen) {
			i = 0;
			err_flag = 1;
		    }
		}
	    }
	    seq[i] = '\0';
	    if (err_flag)
		return -1;
	    else
		return 1;
	}
    }
    return 0;
}
getseqtitle(db, entname, title, seq, maxlen)
	DB *db;
	int maxlen;
	char *entname, *title, *seq;
{
    char buf[BUFSIZ];
    char *p;
    int err_flag = 0;
    int i = 0;
    FILE *fp = dbptr(db);
    int type = dbtype(db);

    if (type == STANF) {
	title[0] = '\0';
	return get_stanf_title(fp, entname, title, seq, maxlen);
    }
    if (type == FASTA) {
	title[0] = '\0';
	return get_fasta_title(fp, entname, title, seq, maxlen,
				db->buf, db->entbuf, &(db->status));
    }

    while (fgets(buf, sizeof(buf), fp) != NULL) {
	if (strncmp(buf, form[type].hdr_ent, form[type].hdr_entlen) == 0) {
	    sscanf(&buf[form[type].pos_ent], "%s", entname);
	}
	else if (strncmp(buf, form[type].hdr_tit, form[type].hdr_titlen) == 0) {
	    	strcpy(title, &buf[form[type].pos_tit]);
		chop(title);
	}
	else if (strncmp(buf, form[type].hdr_seq, form[type].hdr_seqlen) == 0) {
	    while (fgets(buf, sizeof(buf), fp) != NULL
		   && strncmp(buf, form[type].end_mk, form[type].end_mklen)
		                                                     != 0) {
		for (p = buf; *p != '\0' && *p != '\n'; p++) {
		    if (isupper(*p))
			seq[i++] = *p;
		    else if (islower(*p))
			seq[i++] = toupper(*p);
		    else if (*p == gapmark)
			seq[i++] = gapmark;
                    if (i > maxlen) {
			i = 0;
			err_flag = 1;
		    }
		}
	    }
	    seq[i] = '\0';
	    if (err_flag)
		return -1;
	    else
		return 1;
	}
    }
    return 0;
}

get_stanf(fp, entname, seq, maxlen)
     FILE *fp;
     char *entname, *seq;
     int maxlen;
{
    char buf[BUFSIZ], *p;
    int i = 0;
    int seqflag = 0;
    int err_flag = 0;

    while (fgets(buf, sizeof(buf), fp) != NULL) {
	if (buf[0] == ';')
	    continue;
	else if (!seqflag) {
	    sscanf(buf, "%s", entname);
	    seqflag = 1;
	}
	else {
	    for (p = buf; *p != '\0' && *p != '\n'; p++) {
		if (isupper(*p))
		    seq[i++] = *p;
		else if (islower(*p))
		    seq[i++] = toupper(*p);
		else if (*p == gapmark)
		    seq[i++] = gapmark;
		else if (*p == '1') {
		    seq[i] = '\0';
		    if (err_flag)
			return -1;
		    else
			return 1;
		}
		if (i > maxlen) {
		    i = 0;
		    err_flag = 1;
		}
	    }
	}
    }
    return 0;
}

get_fasta(fp, entname, seq, maxlen, buf, entbuf, status)
     FILE *fp;
     char *entname, *seq;
     int maxlen;
     char *buf;
     char *entbuf;
     int *status;
{
    char *p;
    int i = 0;
    int seqflag = 0;
    int err_flag = 0;
    int incomplete_title = 0;
    char *ebufp = entbuf;

    if (*status == DBEOF) {
	return 0;
    }

    strcpy(entname, entbuf);

    do {
	if (buf[0] == '\0') {
		continue;
#ifdef ALLOW_COMMENT
	} else if (buf[0] == '#') {
		continue;
#endif
	} else if (buf[0] == '>') {
	    if (strlen(buf) == BUFSIZ - 1) incomplete_title = 1;
	    if (*status == FIRST) {
		ebufp = entname;
		*status = CONT;
	    } else {
		ebufp = entbuf;
	    }
	    for (p = buf + 1; *p == ' '  || *p == '\t'; p++)
	    	    ;
	    while (*p != '\0' && ! isspace(*p) && *p != '(') 
		*ebufp++ = *p++;
	    *ebufp = '\0';
	    if (seqflag) {
		seq[i] = '\0';
		if (err_flag)
		    return -1;
		else
		    return 1;
	    }
	    continue;
	} else {
	    if (incomplete_title) {
		if (strlen(buf) < BUFSIZ - 1) incomplete_title = 0;
		continue;
	    }
	    seqflag = 1;
	    for (p = buf; *p != '\0' && *p != '\n'; p++) {
		if (isupper(*p))
		    seq[i++] = *p;
		else if (islower(*p))
		    seq[i++] = toupper(*p);
		else if (*p == gapmark)
		    seq[i++] = gapmark;
#ifdef ALLOW_DIGIT
		else if (isdigit(*p))
			seq[i++] = *p;
#endif
		if (i > maxlen) {
		    i = 0;
		    err_flag = 1;
		}
	    }
	}
    } while (fgets(buf, BUFSIZ, fp) != NULL); 
    seq[i] = '\0';
    *status = DBEOF;
    if (err_flag)
	return -1;
    else 
    	return 1;
}
get_stanf_title(fp, entname, title, seq, maxlen)
     FILE *fp;
     char *entname, *seq;
     char *title;
     int maxlen;
{
    char buf[BUFSIZ], *p;
    int i = 0;
    char titbuf[BUFSIZ];
    int seqflag = 0;
    int err_flag = 0;

    while (fgets(buf, sizeof(buf), fp) != NULL) {
	if (buf[0] == ';') {
	    chop(buf);
	    strcat(title, &buf[1]);
	    continue;
	} else if (!seqflag) {
	    sscanf(buf, "%s", entname);
	    seqflag = 1;
	}
	else {
	    for (p = buf; *p != '\0' && *p != '\n'; p++) {
		if (isupper(*p))
		    seq[i++] = *p;
		else if (islower(*p))
		    seq[i++] = toupper(*p);
		else if (*p == gapmark)
		    seq[i++] = gapmark;
		else if (*p == '1') {
		    seq[i] = '\0';
		    if (err_flag)
			return -1;
		    else
			return 1;
		}
		if (i > maxlen) {
		    i = 0;
		    err_flag = 1;
		}
	    }
	}
    }
    return 0;
}

get_fasta_title(fp, entname, title, seq, maxlen, buf, entbuf, status)
     FILE *fp;
     char *entname, *seq;
     int maxlen;
     char *title;
     char *buf;
     char *entbuf;
     int *status;
{
    char *p;
    int i = 0;
    char titbuf[BUFSIZ];
    int seqflag = 0;
    int err_flag = 0;

    if (*status == DBEOF) {
	return 0;
    }

    do {
	if (buf[0] == '\0') {
	    continue;
#ifdef ALLOW_COMMENT
	} else if (buf[0] == '#') {
	    continue;
#endif
	} else if (buf[0] == '>') {
	    if (seqflag) {
		seq[i] ='\0';
		if (err_flag)
		    return -1;
		else
		    return 1;
	    }
	    for (p = buf + 1; *p && isspace(*p); p++) 
		;
	    while (*p && ! isspace(*p)) {
		*entname++ = *p++;
	    }
	    *entname = '\0';
	    chop(p);
	    strcpy(title, p);
	    continue;
	}
	else {
	    seqflag = 1;
	    for (p = buf; *p != '\0' && *p != '\n'; p++) {
		if (isupper(*p))
		    seq[i++] = *p;
		else if (islower(*p))
		    seq[i++] = toupper(*p);
		else if (*p == gapmark)
		    seq[i++] = gapmark;
#ifdef ALLOW_DIGIT
		else if (isdigit(*p))
			seq[i++] = *p;
#endif
/*
		else if (*p == '1') {
		    seq[i] = '\0';
		    if (err_flag)
			return -1;
		    else
			return 1;
		}
*/
		if (i > maxlen) {
		    i = 0;
		    err_flag = 1;
		}
	    }
	}
    } while (fgets(buf, BUFSIZ, fp) != NULL);
    *status = DBEOF;
    return 1;
}

DB *dbopen(dbname)
     char *dbname;
{
    char *filename;
    DB *db;

    if ((db = (DB *) malloc(sizeof(DB))) == NULL) {
	return NULL;
    }
    if (dbname == NULL) {
	return NULL;
    } else if (strcmp(dbname, "stdin") == 0) {
	db->fp = stdin;
    } else if ((filename = getenv(dbname)) != NULL) {
	if ((db->fp = fopen(filename, "r")) == NULL)
		return NULL;
    } else {
	if ((db->fp = fopen(dbname, "r")) == NULL)
		return NULL;
    }
    db->type = getdbtype(db->fp);
    db->buf[0] = '\0';
    db->entbuf[0] = '\0';
    db->status = FIRST;
    return db;
}

getdbtype(fp)
     FILE *fp;
{
    int db;
    int i;
    char str[BUFSIZ];

#ifdef ALLOW_COMMENT
    char buf[BUFSIZ];
    if ((str[0] = getc(fp)) == '#') {
	do {
		if (fgets(buf, sizeof(buf), fp) == 0) {
			fprintf(stderr, "Unmature EOF\n");
			close(fp);
			exit(1);
		}
	} while ((str[0] = getc(fp)) == '#');
    }
    ungetc(str[0], fp);
#endif
    if ((str[0] = getc(fp)) == ';') {
	ungetc(str[0], fp);
	return STANF;
    } else if (str[0] == '>') {
	ungetc(str[0], fp);
	return FASTA;
    } 
    for (i = 1; i < BUFSIZ; i++) {
	if ((str[i] = getc(fp)) == ' ') {
	    str[i] = '\0';
	    for (db = 0; db < MAXDB; db++) {
		if (strcmp(str, form[db].hdr_ent) == 0) {
		    while (--i >= 0)
			ungetc(str[i], fp);
		    return db;
		}
	    }
	    break;
	}
    }
    while (--i)
	ungetc(str[i], fp);
    return NONE;
}
chop(str)
	char *str;
{
	if (! *str)
		return;
	while (*++str);
	*(str - 1) = '\0';
}

dbrewind(db)
	DB *db;
{
	db->status = FIRST;
	db->buf[0] = '\0';
	db->entbuf[0] = '\0';
	rewind(dbptr(db));
}
