18
|
1 #include "ncListCreator.hpp"
|
|
2
|
|
3 NCListCreator::NCListCreator(string inputFileName): inputFileName(inputFileName) {}
|
|
4
|
|
5 string NCListCreator::getFileName(string chromosome) {
|
|
6 return inputFileName.substr(0, inputFileName.find_last_of('.')) + "_" + chromosome + ".bed";
|
|
7 }
|
|
8
|
|
9 void NCListCreator::splitFile () {
|
|
10 Chromosomes chromosomes;
|
|
11 GenomicInterval interval;
|
|
12 map <string, ofstream> splittedFiles;
|
|
13 map <string, ofstream>::iterator it;
|
|
14 ifstream file;
|
|
15 string line, fileName;
|
|
16 string chromosome;
|
|
17 file.open(inputFileName.c_str());
|
|
18 if (file.is_open()) {
|
|
19 while (file.good()) {
|
|
20 getline(file, line);
|
|
21 if (line.size() > 0) {
|
|
22 interval.parseFromLine(line);
|
|
23 chromosomes.insert(interval.chromosome);
|
|
24 fileName = getFileName(interval.chromosome);
|
|
25 it = splittedFiles.find(interval.chromosome);
|
|
26 if (it == splittedFiles.end()) {
|
|
27 ofstream outputFile;
|
|
28 outputFile.open(fileName.c_str(), ios::out | ios::binary);
|
|
29 interval.writeBinary(outputFile);
|
|
30 splittedFiles[chromosome] = outputFile;
|
|
31 }
|
|
32 else {
|
|
33 it->second << line << "\n";
|
|
34 }
|
|
35 }
|
|
36 }
|
|
37 file.close();
|
|
38 for (it = splittedFiles.begin(); it != splittedFiles.end(); it++) {
|
|
39 it->second.close();
|
|
40 }
|
|
41 }
|
|
42 else {
|
|
43 cout << "Unable to open file" << inputFileName;
|
|
44 }
|
|
45 }
|
|
46
|
|
47 void NCListCreator::run() {
|
|
48 for (Chromosomes::iterator it = chromosomes.begin(); splittedFiles != chromosomes.end(); splittedFiles++) {
|
|
49 buildLists(*it);
|
|
50 }
|
|
51 }
|
|
52
|
|
53 void NCListCreator::buildLists(string chromosome) {
|
|
54 createTables(chromosome);
|
|
55 labelLists();
|
|
56 computeSubStart();
|
|
57 computeAbsPosition();
|
|
58 cleanFiles();
|
|
59 transfer(chromosome);
|
|
60 }
|
|
61
|
|
62 void NCListCreator::createTables(string chromosome) {
|
|
63 initLists(chromosome);
|
|
64 h = new Table(H_CELL_SIZE, nbLists);
|
|
65 t = new Table(T_CELL_SIZE, nbLines);
|
|
66 l = new Table(L_CELL_SIZE, nbLines);
|
|
67 fillTables(chromosome);
|
|
68 }
|
|
69
|
|
70 void NCListCreator::initLists (string chromosome) {
|
|
71 nbLists = 0;
|
|
72 nbLines = 0;
|
|
73 ifstream file;
|
|
74 file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
|
|
75 Interval currentInterval, previousInterval;
|
|
76 if (file.is_open()) {
|
|
77 while (file.good()) {
|
|
78 if (currentInterval.parseBinary(file)) {
|
|
79 nbLines++;
|
|
80 if (previousInterval.include(currentInterval)) {
|
|
81 nbLists++;
|
|
82 }
|
|
83 }
|
|
84 previousInterval = currentInterval;
|
|
85 }
|
|
86 }
|
|
87 file.close();
|
|
88 }
|
|
89
|
|
90 void NCListCreator::fillTables (string chromosome) {
|
|
91 ifstream file;
|
|
92 file.open(getFileName(chromosome).c_str(), ios::in | ios::binary);
|
|
93 Interval currentInterval, previousInterval;
|
|
94 unsigned int i = 0;
|
|
95 if (file.is_open()) {
|
|
96 while (file.good()) {
|
|
97 if (currentInterval.parseBinary(file)) {
|
|
98 t->write(currentInterval.start, i, 0);
|
|
99 t->writeHere(currentInterval.end);
|
|
100 t->writeHere(-1);
|
|
101 t->writeHere(-1);
|
|
102 }
|
|
103 i++;
|
|
104 }
|
|
105 file.close();
|
|
106 }
|
|
107 t->write(SENTINEL, LIST, -1);
|
|
108 l->write(SENTINEL, LIST, 0);
|
|
109 t->write(SENTINEL, NEW, -1);
|
|
110 }
|
|
111
|
|
112 void NCListCreator::labelLists () {
|
|
113 unsigned int nextL = 0, thisL, length;
|
|
114 unsigned int p;
|
|
115 Interval current, parent;
|
|
116 for (unsigned int i = 0; i < nbLines; i++) {
|
|
117 p = i - 1;
|
|
118 t->moveTo(p, 0);
|
|
119 parent.readBinary(t->file);
|
|
120 t->moveTo(i, 0);
|
|
121 current.readBinary(t->file);
|
|
122 while ((p != SENTINEL) && (! parent.include(current))) {
|
|
123 p = t->read(p, PARENT);
|
|
124 t->moveTo(p, 0);
|
|
125 parent.readBinary(t->file);
|
|
126 }
|
|
127 thisL = t->read(p, LIST);
|
|
128 if (thisL == SENTINEL) {
|
|
129 thisL = nextL;
|
|
130 nextL++;
|
|
131 length = 0;
|
|
132 t->write(p, LIST, thisL);
|
|
133 }
|
|
134 else {
|
|
135 length = h->read(thisL, LENGTH);
|
|
136 }
|
|
137 t->write(i, PARENT, p);
|
|
138 h->write(thisL, LENGTH, length+1);
|
|
139 }
|
|
140 }
|
|
141
|
|
142 void NCListCreator::computeSubStart () {
|
|
143 unsigned int total = 0;
|
|
144 for (unsigned int i = 0; i < nbLists; i++) {
|
|
145 h->write(i, START, total);
|
|
146 total += h->read(i, LENGTH);
|
|
147 h->write(i, LENGTH, 0);
|
|
148 }
|
|
149 }
|
|
150
|
|
151 void NCListCreator::computeAbsPosition () {
|
|
152 Value s, e, pt, hp, pl, nb, lp;
|
|
153 for (unsigned int i = 0; i < nbLines; i++) {
|
|
154 s = t->read(i, START);
|
|
155 e = t->read(i, END);
|
|
156 pt = t->read(i, PARENT);
|
|
157 hp = t->read(pt, LIST);
|
|
158 pl = t->read(pt, NEW);
|
|
159 nb = h->read(hp, LENGTH);
|
|
160 lp = h->read(hp, START) + nb;
|
|
161 t->write(i, NEW, lp);
|
|
162 l->write(lp, START, s);
|
|
163 l->write(lp, END, e);
|
|
164 l->write(lp, LIST, SENTINEL);
|
|
165 l->write(lp, PARENT, pl);
|
|
166 h->write(lp, LENGTH, nb+1);
|
|
167 if (nb == 0) {
|
|
168 l->write(pl, LIST, hp);
|
|
169 }
|
|
170 }
|
|
171 }
|
|
172
|
|
173 void NCListCreator::cleanFiles () {
|
|
174 t->destroy();
|
|
175 }
|
|
176
|
|
177 void NCListCreator::transfer (string chromosome) {
|
|
178 ncLists[chromosome] = NCList(h, l);
|
|
179 }
|