diff SMART/Java/Python/Cpp/ncListCreator.cpp @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/Cpp/ncListCreator.cpp	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,179 @@
+#include "ncListCreator.hpp"
+
+NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {}
+
+string NCListCreator::getFileName(string chromosome) {
+    return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed";
+}
+
+void NCListCreator::splitFile () {
+    Chromosomes chromosomes;
+    GenomicInterval interval;
+    map <string, ofstream> splittedFiles;
+    map <string, ofstream>::iterator it;
+    ifstream file;
+    string line, fileName;
+    string chromosome;
+    file.open(inputFileName.c_str());
+    if (file.is_open()) {
+        while (file.good()) {
+            getline(file, line);
+            if (line.size() > 0) {
+                interval.parseFromLine(line);
+                chromosomes.insert(interval.chromosome);
+                fileName = getFileName(interval.chromosome);
+                it = splittedFiles.find(interval.chromosome);
+                if (it == splittedFiles.end()) {
+                    ofstream outputFile;
+                    outputFile.open(fileName.c_str(), ios::out | ios::binary);
+                    interval.writeBinary(outputFile);
+                    splittedFiles[chromosome] = outputFile;
+                }
+                else {
+                    it->second << line << "\n";
+                }
+            }
+        }
+        file.close();
+        for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) {
+            it->second.close();
+        }
+    }
+    else {
+        cout << "Unable to open file" << inputFileName;
+    }
+}
+
+void NCListCreator::run() {
+    for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) {
+        buildLists(*it);
+    }
+}
+
+void NCListCreator::buildLists(string chromosome) {
+    createTables(chromosome);
+    labelLists();
+    computeSubStart();
+    computeAbsPosition();
+    cleanFiles();
+    transfer(chromosome);
+}
+
+void NCListCreator::createTables(string chromosome) {
+    initLists(chromosome);
+    h = new Table(H_CELL_SIZE, nbLists);
+    t = new Table(T_CELL_SIZE, nbLines);
+    l = new Table(L_CELL_SIZE, nbLines);
+    fillTables(chromosome);
+}
+ 
+void NCListCreator::initLists (string chromosome) {
+    nbLists = 0;
+    nbLines = 0;
+    ifstream file;
+    file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
+    Interval currentInterval, previousInterval;
+    if (file.is_open()) {
+        while (file.good()) {
+            if (currentInterval.parseBinary(file)) {
+                nbLines++;
+                if (previousInterval.include(currentInterval)) {
+                    nbLists++;
+                }
+            }
+            previousInterval = currentInterval;
+        }
+    }
+    file.close();
+}
+
+void NCListCreator::fillTables (string chromosome) {
+    ifstream file;
+    file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
+    Interval currentInterval, previousInterval;
+    unsigned int i = 0;
+    if (file.is_open()) {
+        while (file.good()) {
+            if (currentInterval.parseBinary(file)) {
+                t->write(currentInterval.start, i, 0);
+                t->writeHere(currentInterval.end);
+                t->writeHere(-1);
+                t->writeHere(-1);
+            }
+            i++;
+        }
+        file.close();
+    }
+    t->write(SENTINEL, LIST, -1);
+    l->write(SENTINEL, LIST,  0);
+    t->write(SENTINEL, NEW,  -1);
+}
+
+void NCListCreator::labelLists () {
+    unsigned int nextL = 0, thisL, length;
+    unsigned int p;
+    Interval current, parent;
+    for (unsigned int i = 0; i < nbLines; i++) {
+        p = i - 1;
+        t->moveTo(p, 0);
+        parent.readBinary(t->file);
+        t->moveTo(i, 0);
+        current.readBinary(t->file);
+        while ((p != SENTINEL) && (! parent.include(current))) {
+            p = t->read(p, PARENT);
+            t->moveTo(p, 0);
+            parent.readBinary(t->file);
+        }
+        thisL = t->read(p, LIST);
+        if (thisL == SENTINEL) {
+            thisL = nextL;
+            nextL++;
+            length = 0;
+            t->write(p, LIST, thisL);
+        }
+        else {
+            length = h->read(thisL, LENGTH);
+        }
+        t->write(i, PARENT, p);
+        h->write(thisL, LENGTH, length+1);
+    }
+}
+
+void NCListCreator::computeSubStart () {
+    unsigned int total = 0;
+    for (unsigned int i = 0; i < nbLists; i++) {
+        h->write(i, START, total);
+        total += h->read(i, LENGTH);
+        h->write(i, LENGTH, 0);
+    }
+}
+
+void NCListCreator::computeAbsPosition () {
+    Value s, e, pt, hp, pl, nb, lp;
+    for (unsigned int i = 0; i < nbLines; i++) {
+        s = t->read(i,   START);
+        e  = t->read(i,  END);
+        pt = t->read(i,  PARENT);
+        hp = t->read(pt, LIST);
+        pl = t->read(pt, NEW);
+        nb = h->read(hp, LENGTH);
+        lp = h->read(hp, START) + nb;
+        t->write(i,  NEW,    lp);
+        l->write(lp, START,  s);
+        l->write(lp, END,    e);
+        l->write(lp, LIST,   SENTINEL);
+        l->write(lp, PARENT, pl);
+        h->write(lp, LENGTH, nb+1);
+        if (nb == 0) {
+            l->write(pl, LIST, hp);   
+        }
+    }
+}
+
+void NCListCreator::cleanFiles () {
+    t->destroy();
+}
+
+void NCListCreator::transfer (string chromosome) {
+    ncLists[chromosome] = NCList(h, l);
+}