#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cstring>
#include <list>
#include <functional>
#include <algorithm>
#include "HomData.h"

using namespace std;

//class HomData

HomData::HomData(string& _name1, string& _name2, int& _from1, int& _to1,
		int& _from2, int& _to2, double& _pam, double& _score) :
	pam(_pam), score(_score) {
	dom1 = DomainPool::create(_name1, _from1, _to1, 0);
	dom2 = DomainPool::create(_name2, _from2, _to1, 0);
}
HomData::HomData(Domain* _dom1, Domain* _dom2, double _pam, double _score) :
	dom1(_dom1), dom2(_dom2), pam(_pam), score(_score) {
}

void HomData::print(ostream& ost) {
	ost << dom1->getDomName() << " " << dom2->getDomName() <<
		" " << dom1->getFrom() << " " << dom1->getTo() <<
		" " << dom2->getFrom() << " " << dom2->getTo() <<
		" " << pam << " " << score << endl;
}
ostream& operator<<(ostream& ost, const HomData& hd) {
	return (ost << "[" << hd.dom1->getName() << " " <<
		hd.dom2->getName() << " " <<
		hd.dom1->getFrom() << " " << hd.dom1->getTo() << " " <<
		hd.dom2->getFrom() << " " << hd.dom2->getTo() << " " <<
		hd.pam << " " << hd.score << "]");
}

//class HomDataSet
HomDataSet::HomDataSet() {
}
HomDataSet::~HomDataSet() {
}
void HomDataSet::addGeneData(GeneData& g) {
//	geneSet.push_back(g);
	geneSet.addGeneData(g);
//	spSet.insert(g.getSpec());
}
void HomDataSet::insertAddedSpSet(string spec) {
	addedSpSet.insert(spec);
}
void HomDataSet::addHomData(HomData& h) {
	homSet.push_back(h);
}
void HomDataSet::makeIndex() {
/*
	for (int idx = 0; idx < geneSet.size(); idx++) {
		GeneData *gdata = &geneSet[ idx ];
		geneMap.insert(GeneMap_t::value_type(gdata->getName(), idx));
	}
*/
	for (int idx = 0; idx < homSet.size(); idx++) {
		HomData *hnew = &homSet[ idx ];
		addGene2HomMap(hnew->dom1->getOrigName(), idx);
		addGene2HomMap(hnew->dom2->getOrigName(), idx);
	}
}
void HomDataSet::addGene2HomMap(string name, int idx) {
	Gene2HomMap_t::iterator p;
	list<int> *ls;
	if ( (p = gene2HomMap.find(name)) != gene2HomMap.end() ) {
		ls = p->second;
	} else {
		ls = gene2HomMap[ name ] = new list<int>();
	}
	ls->push_back(idx);
}
void HomDataSet::clearHomData() {
	int flag = 0;
//cerr << homSet.size() << " " << HomData::MEM_CUTOFF << endl;
	if (homSet.size() > HomData::MEM_CUTOFF) {
		flag = 1;
	}
	homSet.clear();;
	clearGene2HomMap();
	if (flag) {
//cerr << "clear Hom\n";
//		vector<HomData>(homSet).swap(homSet);
		vector<HomData>().swap(homSet);
//cerr << homSet.size() << " " << HomData::MEM_CUTOFF << endl;
	}
}
void HomDataSet::clearGene2HomMap() {
	map<string, list<int>*>::iterator p;
	for (p = gene2HomMap.begin(); p != gene2HomMap.end(); p++) {
		delete(p->second);
	}
	gene2HomMap.clear();
}
void HomDataSet::printAllGenes() {
	geneSet.printAllGenes();
}
void HomDataSet::printAllHom() {
	int i;
	for (i = 0; i < homSet.size(); i++) {
		homSet[i].print();
	}
}

/* key=<domid1,domid2>, value=HomData */
HomDataList* HomDataSet::getGeneHomologs(Domain *dom, HomDataList* homList, bool reset) {
	string name = dom->getOrigName();
	list<int>* idxList = gene2HomMap[ name ];
//cout << "idxList: " << name << " " << idxList << endl;

	if (homList == NULL) {
		homList = new list<HomData*>();
	} else if (reset) {
		clearHomDataList(homList);
/*
		homList->clear();
*/
	}
	if (idxList == NULL) {
		return NULL;
	}

/*
	Domain origDom;
	dom->getOrigDomain(&origDom);
cout << "DOO: " << *dom << " " << origDom << endl;
*/

	for (list<int>::iterator p = idxList->begin(); p != idxList->end(); p++) {
		HomData *hd = &homSet[*p];
		if (dom->overlap(hd->dom1) || dom->overlap(hd->dom2)) {
/*
if (name.substr(0,3) != "OP1"){
cout << "HIT: " << *hd << endl;
}
*/
			homList->push_back(hd);
		} else {
		}
	}
	return homList;
}
HomDataList* HomDataSet::getAllHomologData(HomDataList* homList, bool reset) {
	if (homList == NULL) {
		homList = new list<HomData*>();
	} else if (reset) {
		clearHomDataList(homList);
/*
		homList->clear();
*/
	}
	for (vector<HomData>::iterator p = homSet.begin(); p != homSet.end(); p++) {
		HomData *hd = &(*p);
		homList->push_back(hd);
	}
	return homList;
}
void HomDataSet::clearHomDataList(HomDataList *homList) {
	int flag = 0;
	if (homList->size() > HomData::MEM_CUTOFF) {
		flag = 1;
	}
	homList->clear();
	HomDataList(*homList).swap(*homList);
}

HomSetMap::HomSetMap() {
	
}
void HomSetMap::add(HomData *homData) {
	string pairKey;
	string domstr1 = homData->dom1->toString();
	string domstr2 = homData->dom2->toString();
	if (domstr1 > domstr2) {
		swap<string>(domstr1, domstr2);
	}
	pairKey = domstr1+","+domstr2;
	homSetMapData[ pairKey ] = homData;
}
HomSetMap::iterator HomSetMap::begin() {
	return homSetMapData.begin();
}
HomSetMap::iterator HomSetMap::end() {
	return homSetMapData.end();
}

//class ReadHomData
ReadHomData::ReadHomData() : filenum(0), mode(GeneMode), newspFlag(false) {
}
int ReadHomData::resetMode() {
	newspFlag = false;
	mode = GeneMode;
}
int ReadHomData::openFiles(const vector<string> _files) {
	int flag;
	filenum = 0;
	files = _files;
/*
	mode = GeneMode;
	newspFlag = false;
*/
	while ((flag = openFile(files[filenum].c_str())) < 0) {
//cout << "fail to open:" << files[filenum] << endl;
		if (++filenum >= files.size()) {
			break;
		}
	}
	return flag;
}
int ReadHomData::openFile(const char *file) {
	if (strcmp(file, "stdin") == 0) {
		isp = &cin;
	} else {
		ifs.close();
		ifs.open(file);
		if (ifs.fail()) {
			return -1;
		}
		isp = &ifs;
	}
	return 0;
}
HomDataSet *ReadHomData::readAllData(HomDataSet *homSet) {
	HomDataSet *homDataSet = ReadHomData::readData(homSet, 0);
	homDataSet->makeIndex();
	return homDataSet;
}
HomDataSet *ReadHomData::readEachData(HomDataSet *homSet) {
	homSet->clearHomData();
	return ReadHomData::readData(homSet, 1);
}
HomDataSet *ReadHomData::readData(HomDataSet *inSet, int each_gene) {
	char buf[BUFSIZ+1];
	HomDataSet *homDataSet = NULL;

	if (endOfData()) {
		return NULL;
	}
	if (inSet == NULL) {
		homDataSet = new HomDataSet();
	} else {
		homDataSet = inSet;
	}
	curr_geneName = "";
	while (! endOfData()) {
		if (each_gene && next_line.size() > 0) {
			strcpy(buf, next_line.c_str());
			next_line = "";
		} else {
			getNextLine(buf);
		}
		if (strlen(buf) == 0) continue;
//printf("%s\n",buf);
		istringstream is(buf);
		if (mode == GeneMode) {
			string spec, name;
			int len, pos, dir;
			if (strncmp(buf, "//", 2)==0) {
				mode = HomMode;
				homDataSet->makeGeneIndex();
				continue;
			} else if (strncmp(buf, "--", 2)==0) {
				newspFlag = true;
				homDataSet->setNewSpDataIdx();
				continue;
			}
			is >> spec >> name >> len >> pos >> dir;
			GeneData g(spec, name, len, pos, dir);
			homDataSet->addGeneData(g);
			SpecSet::addNewSpecies(spec);

			if (newspFlag) {
				homDataSet->insertAddedSpSet(g.getSpec());
			}
		} else {
			string name1, name2;
			int from1, to1, from2, to2;
			double score, pam;
			is >> name1 >> name2 >> from1 >> to1 >> from2 >> to2
				>> pam >> score;
/*
int flag = 0;
if (name1=="rsa:RSAL33209_1099") {
flag = 1;
}
*/
			Domain *dom1 = DomainPool::create(name1, from1, to1, 0);
			Domain *dom2 = DomainPool::create(name2, from2, to2, 0);
			if (dom1->overlapAll(dom2)) {
				// skip self match 
				continue;
			}

			/* swap if dom1 is new, dom2 is existing (dom1 is existing, dom2 is new */
//cerr << homDataSet->checkAddedSpec(dom1->getSpec()) << " " <<  homDataSet->checkAddedSpec(dom2->getSpec()) << endl;
			if (homDataSet->checkAddedSpec(dom1->getSpec()) &&
					! homDataSet->checkAddedSpec(dom2->getSpec())) { 
				swap(name1, name2);
				swap(dom1, dom2);
			}

//cout << "curr_geneName=" << curr_geneName << "  " << each_gene << "\n";
			if (each_gene) {
				if (curr_geneName == "") {
					curr_geneName = name2;
//cout << "setName:" << name2 << endl;
				} else if (name2 != curr_geneName) {
					next_line = buf;
//cout << "break\n";
					break;
				}
			}
//cout << "OK\n";


			if (domInfo) {
				/* both are new (additional) species */
				if (SimValue::getReprType() != 's') {
					/* calculate modified distance */
					GeneData* g1 = homDataSet->getGeneDataByName(name1);
					GeneData* g2 = homDataSet->getGeneDataByName(name2);
					if (! g1) {
						cerr << "Gene not found: " << name1 << endl;
						continue;
//						exit(1);
					}
					if (! g2) {
						cerr << "Gene not found: " << name2 << endl;
						continue;
//						exit(1);
					}

					int len1 = g1->getLength();
					int len2 = g2->getLength();
					int minlen, alilen;
					int min_alilen = opt->min_alilen;
					string sp1 = GeneName::getSpec(name1);
					string sp2 = GeneName::getSpec(name2);
/*
cout << sp1 << " " << sp2 << endl;
cout << specset->checkPartial(sp1) << " " << specset->checkPartial(sp2) << endl;
*/

					if (specset->checkPartial(sp1) ||
					    specset->checkPartial(sp2)) {
						/* partial sequence: no penalty for edge trancation
							----<===>
						   	    <===>---
						*/
						int minlen1 = std::min(from1, from2) - 1;
						int minlen2 = std::min(len1 - to1, len2 - to2);
						alilen = std::min(to1 - from1, to2 - from2) + 1;
						minlen = minlen1 + minlen2 + alilen;
						min_alilen = (minlen - 10) * 0.8;
/*
if (flag) {
cout << name1 << " " << name2 << " " << minlen1 << " " << minlen2 << " " << alilen << " " << pam << " " << score << endl;
}
*/
					} else if (len1 < len2) {
						minlen = len1; alilen = to1 - from1 + 1;
					} else {
						minlen = len2; alilen = to2 - from2 + 1;
					}
					if (alilen < min_alilen) {
						minlen = (minlen < min_alilen) ? minlen : min_alilen;
/*
if (flag) {
cout << "origpam: " << pam <<  endl;
}
*/
						pam = (pam * alilen + opt->missdist * (minlen - alilen)) / minlen;
/*
if (flag) {
cout << "modpam: " << pam << " " << minlen << " " << alilen << endl;
cout << *dom1 << " " << *dom2 << " " << pam << " " << score << endl;
}
*/
					}
				}
				if (homDataSet->checkAddedSpec(dom1->getSpec()) &&
						homDataSet->checkAddedSpec(dom2->getSpec())) { 
					/* both are newly added genomes */
					HomData h(dom1, dom2, pam, score);
					homDataSet->addHomData(h);
					continue;
				}
				/* dom1 is existing, dom2 is new */
/*
if (flag) {
cout << "OK: " << *dom1 << ": " << *dom2 << ": " << pam << " " << score << endl;
}
*/
				addAllOvlpDomains(homDataSet, dom1, dom2, pam, score);
			} else {
				//never come here (unless domInfo is not defined
				HomData h(dom1,dom2,pam,score);
				homDataSet->addHomData(h);
			}
		}
	}
	return homDataSet;
}
bool ReadHomData::endOfData() {
	return (isp->eof() && filenum >= files.size());
}
int ReadHomData::getNextLine(char *buf) {
	if (isp->eof()) {
		if(++filenum < files.size()) {
			if (openFile(files[filenum].c_str()) < 0) {
				return -1;
			}
		} else {
			return -1;;
		}
	}
	isp->getline(buf, BUFSIZ);
	return 0;
}

/** find all domains overlapped with dom1 and add them to homDataSet */
void ReadHomData::addAllOvlpDomains(
	HomDataSet *homDataSet, Domain *dom1, Domain *dom2, double pam, double score)
{
	list<Domain *>* tmpDomList;
	tmpDomList = domInfo->getOvlpDomains(dom1);
	list<Domain*>::iterator p;
	for (p = tmpDomList->begin(); p != tmpDomList->end(); p++) {
		Domain *foundDom = *p;

		Domain *newdom1 = DomainPool::duplicate(dom1);

		// copy clustid
		newdom1->setClustID( foundDom->getClustID() );
		// copy subclustid
		newdom1->copySubClustIDs( foundDom );

		// newdom1 is placed onto foundDom
		// name is replaced with name(dom)
		newdom1->placeOn(foundDom);


		Domain tmp_dom1(*dom1);
		tmp_dom1.limitRegion(foundDom);

		Domain* newdom2 = DomainPool::duplicate(dom2);
		// the region of newdom1 is mapped onto newdom2
		tmp_dom1.transform(dom1, dom2, newdom2);

/*
if (dom1->getName() == "rsa:RSAL33209_1099") {
cout << "DDD: " << *dom1 << "; " << *newdom1 << ": " << *foundDom << " :: " << *newdom2;;
if (newdom1->getBaseDom()) {
	cout << ":: " << *(newdom1->getBaseDom());
}
cout << endl;
}
*/

		HomData h(newdom1, newdom2, pam, score);
/*
if (dom1->getName().substr(0,11)=="bja:BLL1029"){
	cout << "<"<< *dom1 << " " << *dom2 << endl;
	cout << ">"<<*newdom1 << " " << *newdom2 << endl;
	cout << h << endl;
}
*/
		homDataSet->addHomData(h);
	}
/*
	delete dom1;
	delete dom2;
*/
	delete tmpDomList;
}

