/*
 * DomClust: Hierarchical Clustering for Orthologous Domain Classification
 * Copyright (c) 2000-2007, Ikuo Uchiyama
 * All rights reserved.
 */

#include <cstdio>
#include <cstdlib>
#include <string>
#include <cstring>
#include <iostream>
#include <fstream>
#include <list>

using namespace std;

#include "SpecTree.h"

#define SPNAMELEN 10
#define TAXNAMEBUFLEN 500000

#define FLAG_TAXOR 1
#define FLAG_TAXAND 2

struct NodeBuf {
	int prev_child;
	int count;
	NodeBuf(int _prev=-1) {prev_child=_prev; count=0;}
};
vector<NodeBuf> node_buf;

static int sp_treenode_idx[MAX_SPNUM];
char set_spweight_flag;

/*
set_spweight()
{
	set_spweight_flag = 1;
}
unset_spweight()
{
	set_spweight_flag = 0;
}
*/

TaxNode::TaxNode(int _spid, int _parent, double _weight)
{
	spid = _spid;
	parent = _parent;
	weight = _weight;
	child = sibling = -1;
	flag = 0;
	name = NULL;
}

SpecTree* SpecTreeRead::readSPfile(char *spfile)
{
	char buf[BUFSIZ];
	char *p;
	string patbuf;
	int i;
	char flag = 1;
	ifstream ifs;
	SpecTree *spTree;

	ifs.open(spfile);
	if (ifs.fail()) {
		cerr << "Can't open sptree file\n";
		exit(1);
	}
	i = 0;
	while(! ifs.eof()) {
		ifs.getline(buf, BUFSIZ);
		flag = 1;
		for (p = buf; *p; p++) {
			if (*p == '#'){
				flag = 0;
			} else if (*p == '\n') {
				flag = 1;
				continue;
			}
			if (! flag) {
				continue;
			}
			patbuf += *p;
		}
	}
	spTree = new SpecTree();
	parse_tree(patbuf.c_str(), spTree);
	ifs.close();
//	spTree->setSpflag();
	return spTree;
}

SpecTree* SpecTreeRead::parse_tree(const char *str, SpecTree *spTree)
{
	double weight;
	int i, l, id;
	const char *sp;
	char spname_buf[SPNAMELEN];
	static char *taxname_buf;
	char *taxbuf_p;
	int taxname_bufsize = TAXNAMEBUFLEN;
	int buflen = 0;
	list<int*> lst;
	enum {NAME,TAXNAME,OTHER} status = OTHER;
	int intnode_buf[MAX_SPNUM], *parentp;
	int bufidx, parent;
	char buf[BUFSIZ];
	int spname_pos[MAX_SPNUM*3];

	if ( (taxname_buf = (char*) malloc(taxname_bufsize)) == NULL ) {
		fprintf(stderr, "Can't allocate memory\n");
		exit(1);
	}
	taxbuf_p = taxname_buf;

/***
      (  A  (  B  C) (  D  E  (  F  G)  H)  I)
idx   0  1  2  3  4  5  6  7  8  9 10  11  12
par  -1  0  0  2  2  0  5  5  5  8  8   5   0
spid -1  A -1  B  C -1  D  E -1  F  G   H   I
1/w0  1  4  4  2  2  4  4  4  4  2  2   4   4

parentp(id,cnt):  (0,4)->(2,2)->(5,3)->(8,2)
***/

	/* root */
	TaxNode taxNode(-1, -1, 1);
	NodeBuf nbuf;

	spTree->addTaxNode(taxNode);

/*
	spTree.node[0].spid = -1;
	spTree.node[0].parent = -1;
	spTree.node[0].weight = 1;
*/

	parentp = &intnode_buf[0];
//	node_buf[0].prev_child = -1;
	node_buf.push_back(nbuf);
	*parentp = 0;

	bufidx = 1;

	if (specSet == NULL) {
	}

	for (const char* p = str; *p; p++) {
		if (isalnum(*p)) {
			if (status == NAME) {
				l++;
			} else if (status == TAXNAME) {
				if (l == 0) {
					sp = p;
				}
				l++;
			} else {
				status = NAME;
				sp = p;
				l = 1;
			}
		} else if (status == NAME) {
			strncpy(spname_buf,sp,l);
			spname_buf[l] = 0;
			id = specSet->getSPid(spname_buf);
			/* create new node */
			spTree->createNewNode(id, *parentp, (char)0);
			bufidx++;
			status = OTHER;
		} else if (status == TAXNAME) {
			if (*p == ']') {
				int next_buflen = buflen + (l+1);
				if (next_buflen >= taxname_bufsize) {
					taxname_bufsize = int(taxname_bufsize * 1.5);
					if ( (taxname_buf = (char*) realloc(taxname_buf, taxname_bufsize)) == NULL ) {
						fprintf(stderr, "Can't allocate memory\n");
						exit(1);
					}
					taxbuf_p = taxname_buf + buflen;
				}

				strncpy(taxbuf_p, sp, l);
				taxbuf_p[l] = '\0';
				spname_pos[bufidx-1] = buflen;
/*
				spTree->node[bufidx-1].name = taxbuf_p;
*/
				taxbuf_p += (l+1);
				buflen = next_buflen;

				status = OTHER;
			} else {
				l++;
			}
		}
		if (status == OTHER) {
			if (*p == '(' || *p == '{') {
				char flag = 0;
				if (*p == '{') {
					flag = FLAG_TAXOR;
				}
				spTree->createNewNode(-1, *parentp,flag);

				/* save current parent and
					create a new internal node */
				lst.push_back(parentp);
				parentp++;
				*parentp = bufidx;

				bufidx++;
			} else if (*p == ')' || *p == '}') {
				parentp = lst.back();
				lst.pop_back();
				if (parentp == NULL) {
					parse_spinfo_error();
				}
			} else if (*p == '[') {
				l = 0;
				status = TAXNAME;
			} else if (*p == ':') {
				char *q;
				weight = strtod(p, &q);
				p = q;
				if (p == NULL) {
					parse_spinfo_error();
				}
			} else {
			}
		}
	}
//	spTree->nodenum = bufidx;
	for (i = 0; i < bufidx; i++) {
//fprintf(stderr, "%d,%d,%s\n",i,spname_pos[i], taxname_buf + spname_pos[i]);
		TaxNode *taxn = spTree->getNode(i);
		if (spname_pos[i] > 0) {
			/* copy taxname_buf
				only when it is specified in this node */
			taxn->name = taxname_buf + spname_pos[i];
		}
		if ((parent = taxn->parent) == -1) continue;
		if (parent == 0) continue;
		TaxNode *taxn_parent = spTree->getNode(parent);
		if (taxn_parent->flag & FLAG_TAXOR) {
			/* taxonomic OR: do not count each child */
			taxn->weight = 0;
		} else if (set_spweight_flag) {
			taxn->weight = taxn_parent->weight
						/ node_buf[parent].count;
		} else {
			taxn->weight = 1.0;
		}
		if (taxn->spid >= 0) {
			SpecSet::setSPweight(taxn->spid, taxn->weight);
		}
	}
	spTree->setSpflag();
}
void SpecTreeRead::parse_spinfo_error()
{
	fprintf(stderr, "parse error\n");
	exit(1);
}


SpecTree::SpecTree() {
	replaceLeafName = true;
}
void SpecTree::addTaxNode(TaxNode& taxNode) {
	nodeList.push_back(taxNode);
}

void SpecTree::createNewNode(int spid, int parent, char flag)
{
	int prev_sibling;
	TaxNode taxNode(spid, parent, 1);
	NodeBuf nbuf;
	taxNode.flag = flag;
	int bufidx = nodeList.size();

	addTaxNode(taxNode);

/*
	spTree.node[bufidx].spid = spid;
	spTree.node[bufidx].parent = parent;
	spTree.node[bufidx].weight = 1;
	spTree.node[bufidx].child = -1;
	spTree.node[bufidx].sibling = -1;
	spTree.node[bufidx].flag = flag;
	
	node_buf[bufidx].count = 0;
	node_buf[bufidx].prev_child = -1;
*/
	node_buf.push_back(nbuf);
	node_buf[parent].count++;

	if ((prev_sibling = node_buf[parent].prev_child) >= 0) {
		nodeList[prev_sibling].sibling = bufidx;
	} else {
		nodeList[parent].child = bufidx;
	}
	node_buf[parent].prev_child = bufidx;
	if (spid>=0) {
		taxNode.spSet.setSpecSet(spid);
		sp_treenode_idx[spid] = bufidx;
	}
}
void SpecTree::addSpecies()
{
	int i;
	int spnum = SpecSet::getSPnum();
/* add species that are not found in sptreefile: called from preproc */
	for (i = 0; i < spnum; i++) {
		if (sp_treenode_idx[i] == 0) {
			createNewNode(i, 0, (char)0);
		}
	}
	setSpflag();
}
void SpecTree::setSpflag()
{
	int i, j;
int k = 0;
	for (i = nodeList.size()-1; i >= 0; i--) {
		nodeList[i].spSet.clearSpSet();
		if (nodeList[i].child < 0) {
			if (nodeList[i].spid >= 0) {
				nodeList[i].spSet.setSpecSet(nodeList[i].spid);
			}
		} else {
			for (j = nodeList[i].child; j >= 0; j = nodeList[j].sibling) {
				specFlagOR( nodeList[i].spSet, nodeList[j].spSet, nodeList[i].spSet);
			}
		}
	}
}
void SpecTree::print()
{
	int i;
	for (i = 0; i < nodeList.size(); i++) {
		printf("%3d %3d %3d %3d %s %3d %1d %7.3lf ",

			i,nodeList[i].parent,
			nodeList[i].child, nodeList[i].sibling,
			(nodeList[i].spid >= 0 ?
				SpecSet::getSPname(nodeList[i].spid).c_str() :
			    (nodeList[i].name ?
			    	nodeList[i].name : "---")), 
			nodeList[i].spid, nodeList[i].flag,
			nodeList[i].weight*100);
		nodeList[i].spSet.printSpSet();
		cout << endl;
	}
}
string SpecTree::getTaxName(int id) {
	if (replaceLeafName && isLeaf(id) && nodeList[id].name == 0) {
		/* leaaf node: the name of the node should be stored in the parent node */
		int parent = nodeList[id].parent;
		if (countChild(parent) == 1) {
			return(nodeList[parent].name);
		}
	}
	if (nodeList[id].name) {
		return(nodeList[id].name);
	} else if (nodeList[id].spid >= 0) {
		return(SpecSet::getSPname(nodeList[id].spid));
	}
}

/* return the deepest node that includes the set of species specified by spflag */
int SpecTree::matchFlags(SpecSet& spSet)
{
	register int i;
	for (i = nodeList.size()-1; i >= 0; i--) {
		if (specFlagInclude(nodeList[i].spSet, spSet)) {
			return i;
		}
	}
//	cout << nodeList[0].spSet.printSpSet() << endl;
	return -1;
}
bool SpecTree::isLeaf(int nodeid)
{
	return(nodeList[nodeid].child < 0);
}
int SpecTree::countChild(int nodeid)
{
	int cid, count = 0;
	for (cid = nodeList[nodeid].child; cid >= 0;
			cid = nodeList[cid].sibling) {
		count++;
	}
	return count;
}
void SpecTree::unset_replaceLeafName() {
	replaceLeafName = false;
}
/*
double sptree_spFlagCountTaxOrW(specFlag spflag)
{
	register int i;
	double cnt = 0;
	for (i = 0; i < spTree.nodenum; i++) {
		if (spTree.node[i].flag & FLAG_TAXOR) {
			if (spFlagANDcnt(spTree.node[i].spflag, spflag)) {
				cnt += spTree.node[i].weight;
			}
		}
	}
	return cnt;
}

double sptree_MatchFlagsCntW(specFlag spflag1, specFlag spflag2, int nodenum)
{
	register int i, j;
	int cnt1, cnt2;
	static specFlag tmp_spflag1, tmp_spflag2, tmp_spflag3, tmp_spflag;
	double cnt;
	
	cnt = 0.0;
	clearSPflag(tmp_spflag);
	clearSPflag(tmp_spflag1);
	clearSPflag(tmp_spflag2);
	clearSPflag(tmp_spflag3);

	for (i = spTree.node[nodenum].child; i >= 0;
			i = spTree.node[i].sibling) {
		spFlagAND(spflag1, spTree.node[i].spflag, tmp_spflag1);
		spFlagAND(spflag2, spTree.node[i].spflag, tmp_spflag2);
		spFlagOR(tmp_spflag1, tmp_spflag2, tmp_spflag3);
		spFlagOR(tmp_spflag, tmp_spflag3, tmp_spflag);
	}
	cnt = spFlagCntW(tmp_spflag3);
	return cnt;
}
sptree_MatchFlagsInv(specFlag *spflag)
{
	register int i;
	for (i = spTree.nodenum-1; i >= 0; i--) {
		if (spFlagInclude(spflag, spTree.node[i].spflag)) {
			return i;
		}
	}
	return -1;
}
*/

#ifdef DEBUGMAIN_SPTREE
main(int argc,char **argv)
{
	int c;
	SpecSet spf;
	SpecTreeRead spRead;
	SpecTree *spTree;
	if (argc <=1){
		cerr << "Usage: " << argv[0] << " spfile\n";
		exit(1);
	}
	spTree = spRead.readSPfile(argv[1]);
	spTree->print();

/***
	setSPflag(spf,0);
	addSPflag(spf,1);
	addSPflag(spf,2);
	addSPflag(spf,3);
	addSPflag(spf,5);
	addSPflag(spf,6);
	print_specFlag(spf);
	c =sptree_spFlagCountWithTaxOr(spf);
	printf("%d\n",c);
**/
}
#endif
