Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/Cpp/ncListCreator.cpp @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/Cpp/ncListCreator.cpp Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,179 @@ +#include "ncListCreator.hpp" + +NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {} + +string NCListCreator::getFileName(string chromosome) { + return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed"; +} + +void NCListCreator::splitFile () { + Chromosomes chromosomes; + GenomicInterval interval; + map <string, ofstream> splittedFiles; + map <string, ofstream>::iterator it; + ifstream file; + string line, fileName; + string chromosome; + file.open(inputFileName.c_str()); + if (file.is_open()) { + while (file.good()) { + getline(file, line); + if (line.size() > 0) { + interval.parseFromLine(line); + chromosomes.insert(interval.chromosome); + fileName = getFileName(interval.chromosome); + it = splittedFiles.find(interval.chromosome); + if (it == splittedFiles.end()) { + ofstream outputFile; + outputFile.open(fileName.c_str(), ios::out | ios::binary); + interval.writeBinary(outputFile); + splittedFiles[chromosome] = outputFile; + } + else { + it->second << line << "\n"; + } + } + } + file.close(); + for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) { + it->second.close(); + } + } + else { + cout << "Unable to open file" << inputFileName; + } +} + +void NCListCreator::run() { + for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) { + buildLists(*it); + } +} + +void NCListCreator::buildLists(string chromosome) { + createTables(chromosome); + labelLists(); + computeSubStart(); + computeAbsPosition(); + cleanFiles(); + transfer(chromosome); +} + +void NCListCreator::createTables(string chromosome) { + initLists(chromosome); + h = new Table(H_CELL_SIZE, nbLists); + t = new Table(T_CELL_SIZE, nbLines); + l = new Table(L_CELL_SIZE, nbLines); + fillTables(chromosome); +} + +void NCListCreator::initLists (string chromosome) { + nbLists = 0; + nbLines = 0; + ifstream file; + file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); + Interval currentInterval, previousInterval; + if (file.is_open()) { + while (file.good()) { + if (currentInterval.parseBinary(file)) { + nbLines++; + if (previousInterval.include(currentInterval)) { + nbLists++; + } + } + previousInterval = currentInterval; + } + } + file.close(); +} + +void NCListCreator::fillTables (string chromosome) { + ifstream file; + file.open(getFileName(chromosome).c_str(), ios::in | ios::binary); + Interval currentInterval, previousInterval; + unsigned int i = 0; + if (file.is_open()) { + while (file.good()) { + if (currentInterval.parseBinary(file)) { + t->write(currentInterval.start, i, 0); + t->writeHere(currentInterval.end); + t->writeHere(-1); + t->writeHere(-1); + } + i++; + } + file.close(); + } + t->write(SENTINEL, LIST, -1); + l->write(SENTINEL, LIST, 0); + t->write(SENTINEL, NEW, -1); +} + +void NCListCreator::labelLists () { + unsigned int nextL = 0, thisL, length; + unsigned int p; + Interval current, parent; + for (unsigned int i = 0; i < nbLines; i++) { + p = i - 1; + t->moveTo(p, 0); + parent.readBinary(t->file); + t->moveTo(i, 0); + current.readBinary(t->file); + while ((p != SENTINEL) && (! parent.include(current))) { + p = t->read(p, PARENT); + t->moveTo(p, 0); + parent.readBinary(t->file); + } + thisL = t->read(p, LIST); + if (thisL == SENTINEL) { + thisL = nextL; + nextL++; + length = 0; + t->write(p, LIST, thisL); + } + else { + length = h->read(thisL, LENGTH); + } + t->write(i, PARENT, p); + h->write(thisL, LENGTH, length+1); + } +} + +void NCListCreator::computeSubStart () { + unsigned int total = 0; + for (unsigned int i = 0; i < nbLists; i++) { + h->write(i, START, total); + total += h->read(i, LENGTH); + h->write(i, LENGTH, 0); + } +} + +void NCListCreator::computeAbsPosition () { + Value s, e, pt, hp, pl, nb, lp; + for (unsigned int i = 0; i < nbLines; i++) { + s = t->read(i, START); + e = t->read(i, END); + pt = t->read(i, PARENT); + hp = t->read(pt, LIST); + pl = t->read(pt, NEW); + nb = h->read(hp, LENGTH); + lp = h->read(hp, START) + nb; + t->write(i, NEW, lp); + l->write(lp, START, s); + l->write(lp, END, e); + l->write(lp, LIST, SENTINEL); + l->write(lp, PARENT, pl); + h->write(lp, LENGTH, nb+1); + if (nb == 0) { + l->write(pl, LIST, hp); + } + } +} + +void NCListCreator::cleanFiles () { + t->destroy(); +} + +void NCListCreator::transfer (string chromosome) { + ncLists[chromosome] = NCList(h, l); +}