Mercurial > repos > calkan > mrfast
comparison mrfast-2.1.0.5/baseFAST.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7b3dc85dc7fd | 1:d4054b05b015 |
---|---|
1 /* | |
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without modification, | |
6 * are permitted provided that the following conditions are met: | |
7 * | |
8 * Redistributions of source code must retain the above copyright notice, this list | |
9 * of conditions and the following disclaimer. | |
10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
11 * list of conditions and the following disclaimer in the documentation and/or other | |
12 * materials provided with the distribution. | |
13 * - Neither the names of the University of Washington, Simon Fraser University, | |
14 * nor the names of its contributors may be | |
15 * used to endorse or promote products derived from this software without specific | |
16 * prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 /* | |
32 Authors: | |
33 Farhad Hormozdiari | |
34 Faraz Hach | |
35 Can Alkan | |
36 Emails: | |
37 farhadh AT uw DOT edu | |
38 fhach AT cs DOT sfu DOT ca | |
39 calkan AT uw DOT edu | |
40 */ | |
41 | |
42 | |
43 | |
44 | |
45 | |
46 #include <stdio.h> | |
47 #include <stdlib.h> | |
48 #include <string.h> | |
49 #include <math.h> | |
50 #include "Common.h" | |
51 #include "CommandLineParser.h" | |
52 #include "Reads.h" | |
53 #include "Output.h" | |
54 #include "HashTable.h" | |
55 #include "MrFAST.h" | |
56 | |
57 char *versionNumber = "2.1"; // Current Version | |
58 unsigned char seqFastq; | |
59 | |
60 int main(int argc, char *argv[]) | |
61 { | |
62 if (!parseCommandLine(argc, argv)) | |
63 return 1; | |
64 | |
65 configHashTable(); | |
66 /**************************************************** | |
67 * INDEXING | |
68 ***************************************************/ | |
69 if (indexingMode) | |
70 { | |
71 int i; | |
72 /******************************** | |
73 * Regular Mode | |
74 ********************************/ | |
75 configHashTable(); | |
76 for (i = 0; i < fileCnt; i++) | |
77 { | |
78 generateHashTable(fileName[i][0], fileName[i][1]); | |
79 } | |
80 } | |
81 /**************************************************** | |
82 * SEARCHING | |
83 ***************************************************/ | |
84 else | |
85 { | |
86 Read *seqList; | |
87 unsigned int seqListSize; | |
88 int fc; | |
89 int samplingLocsSize; | |
90 int *samplingLocs; | |
91 double totalLoadingTime = 0; | |
92 double totalMappingTime = 0; | |
93 double startTime; | |
94 double loadingTime; | |
95 double mappingTime; | |
96 double lstartTime; | |
97 double ppTime = 0.0; | |
98 double tmpTime;; | |
99 char *prevGen = getMem(CONTIG_NAME_SIZE); | |
100 prevGen[0]='\0'; | |
101 char *curGen; | |
102 int flag; | |
103 double maxMem=0; | |
104 char fname1[FILE_NAME_LENGTH]; | |
105 char fname2[FILE_NAME_LENGTH]; | |
106 char fname3[FILE_NAME_LENGTH]; | |
107 char fname4[FILE_NAME_LENGTH]; | |
108 char fname5[FILE_NAME_LENGTH]; | |
109 | |
110 char outputFileName[FILE_NAME_LENGTH]; | |
111 // Loading Sequences & Sampling Locations | |
112 startTime = getTime(); | |
113 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) | |
114 { | |
115 return 1; | |
116 } | |
117 | |
118 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
119 totalLoadingTime += getTime()-startTime; | |
120 | |
121 if (pairedEndMode) | |
122 { | |
123 //Switching to Inferred Size | |
124 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; | |
125 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; | |
126 if (pairedEndDiscordantMode) | |
127 { | |
128 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
129 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
130 } | |
131 | |
132 sprintf(fname1, "__%s__1", mappingOutput); | |
133 sprintf(fname2, "__%s__2", mappingOutput); | |
134 sprintf(fname3, "__%s__disc", mappingOutput); | |
135 sprintf(fname4, "__%s__oea1", mappingOutput); | |
136 sprintf(fname5, "__%s__oea2", mappingOutput); | |
137 unlink(fname1); | |
138 unlink(fname2); | |
139 unlink(fname3); | |
140 unlink(fname4); | |
141 unlink(fname5); | |
142 } | |
143 | |
144 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); | |
145 // Preparing output | |
146 initOutput(outputFileName, outCompressed); | |
147 | |
148 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
149 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); | |
150 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
151 | |
152 /******************************** | |
153 * Regular Mode | |
154 ********************************/ | |
155 if (!pairedEndMode) | |
156 { | |
157 initLookUpTable(); | |
158 if(bestMode) | |
159 initBestMapping(seqListSize); | |
160 | |
161 for (fc = 0; fc <fileCnt; fc++) | |
162 { | |
163 if (!initLoadingHashTable(fileName[fc][1])) | |
164 { | |
165 return 1; | |
166 } | |
167 | |
168 mappingTime = 0; | |
169 loadingTime = 0; | |
170 prevGen[0] = '\0'; | |
171 flag = 1; | |
172 | |
173 do | |
174 { | |
175 flag = loadHashTable ( &tmpTime, errThreshold); // Reading a fragment | |
176 curGen = getRefGenomeName(); | |
177 | |
178 // First Time | |
179 if (flag && prevGen[0]== '\0') | |
180 { | |
181 sprintf(prevGen, "%s", curGen); | |
182 } | |
183 | |
184 if ( !flag || strcmp(prevGen, curGen)!=0) | |
185 { | |
186 | |
187 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
188 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
189 fflush(stdout); | |
190 | |
191 totalMappingTime += mappingTime; | |
192 totalLoadingTime += loadingTime; | |
193 | |
194 loadingTime = 0; | |
195 mappingTime = 0; | |
196 maxMem = 0; | |
197 | |
198 if (!flag) | |
199 { | |
200 break; | |
201 } | |
202 } | |
203 else if (progressRep && mappingTime != 0) | |
204 { | |
205 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
206 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
207 fflush(stdout); | |
208 } | |
209 | |
210 sprintf(prevGen, "%s", curGen); | |
211 | |
212 loadingTime += tmpTime; | |
213 // lstartTime = getTime(); | |
214 | |
215 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
216 | |
217 lstartTime = getTime(); | |
218 | |
219 | |
220 mapAllSingleEndSeq(); | |
221 | |
222 | |
223 | |
224 mappingTime += getTime() - lstartTime; | |
225 if (maxMem < getMemUsage()) | |
226 { | |
227 maxMem = getMemUsage(); | |
228 } | |
229 } while (flag); | |
230 | |
231 } // end for; | |
232 if(bestMode) | |
233 finalizeBestSingleMapping(); | |
234 finalizeFAST(); | |
235 finalizeLoadingHashTable(); | |
236 | |
237 } | |
238 // Pairedend Mapping Mode | |
239 else | |
240 { | |
241 initLookUpTable(); | |
242 if(pairedEndMode) | |
243 initBestConcordantDiscordant(seqListSize); | |
244 for (fc = 0; fc < fileCnt; fc++) | |
245 { | |
246 if (!initLoadingHashTable(fileName[fc][1])) | |
247 { | |
248 return 1; | |
249 } | |
250 mappingTime = 0; | |
251 loadingTime = 0; | |
252 prevGen[0] = '\0'; | |
253 flag = 1; | |
254 | |
255 do | |
256 { | |
257 flag = loadHashTable ( &tmpTime , errThreshold); // Reading a fragment | |
258 curGen = getRefGenomeName(); | |
259 | |
260 // First Time | |
261 if (flag && prevGen[0]== '\0') | |
262 { | |
263 sprintf(prevGen, "%s", curGen); | |
264 } | |
265 | |
266 if ( !flag || strcmp(prevGen, curGen)!=0) | |
267 { | |
268 | |
269 // DISCORDANT | |
270 lstartTime = getTime(); | |
271 outputPairedEnd(); | |
272 mappingTime += getTime() - lstartTime; | |
273 //DISCORDANT | |
274 | |
275 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
276 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
277 fflush(stdout); | |
278 | |
279 totalMappingTime += mappingTime; | |
280 totalLoadingTime += loadingTime; | |
281 | |
282 loadingTime = 0; | |
283 mappingTime = 0; | |
284 maxMem = 0; | |
285 | |
286 if (!flag) | |
287 { | |
288 break; | |
289 } | |
290 } | |
291 else if (progressRep && mappingTime != 0) | |
292 { | |
293 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
294 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
295 fflush(stdout); | |
296 } | |
297 | |
298 sprintf(prevGen, "%s", curGen); | |
299 | |
300 loadingTime += tmpTime; | |
301 lstartTime = getTime(); | |
302 | |
303 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
304 | |
305 mapPairedEndSeq(); | |
306 | |
307 mappingTime += getTime() - lstartTime; | |
308 if (maxMem < getMemUsage()) | |
309 { | |
310 maxMem = getMemUsage(); | |
311 } | |
312 | |
313 } while (flag); | |
314 | |
315 } // end for; | |
316 | |
317 if(pairedEndMode) | |
318 { | |
319 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); | |
320 finalizeOEAReads(outputFileName); | |
321 outputAllTransChromosal(transChromosal); | |
322 finalizeBestConcordantDiscordant(); | |
323 } | |
324 | |
325 finalizeLoadingHashTable(); | |
326 | |
327 if (pairedEndDiscordantMode) | |
328 { | |
329 lstartTime = getTime(); | |
330 outputPairedEndDiscPP(); | |
331 ppTime = getTime() - lstartTime; | |
332 } | |
333 finalizeFAST(); | |
334 } //else | |
335 | |
336 finalizeOutput(); | |
337 | |
338 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
339 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); | |
340 if (pairedEndDiscordantMode) | |
341 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); | |
342 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); | |
343 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); | |
344 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); | |
345 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); | |
346 | |
347 int cof = (pairedEndMode)?2:1; | |
348 | |
349 if (progressRep && maxHits != 0) | |
350 { | |
351 int frequency[maxHits+1]; | |
352 int i; | |
353 for ( i=0 ; i <= maxHits; i++) | |
354 { | |
355 frequency[i] = 0; | |
356 } | |
357 | |
358 | |
359 for (fc = 0; fc < seqListSize; fc++) | |
360 { | |
361 frequency[(int)(*(seqList[fc*cof].hits))]++; | |
362 } | |
363 frequency[maxHits] = completedSeqCnt; | |
364 for ( i=0 ; i <= maxHits; i++) | |
365 { | |
366 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); | |
367 } | |
368 } | |
369 | |
370 | |
371 finalizeReads(unmappedOutput); | |
372 freeMem(prevGen, CONTIG_NAME_SIZE); | |
373 } | |
374 | |
375 return 1; | |
376 } |