Mercurial > repos > calkan > mrsfast
comparison mrsfast-2.3.0.2/baseFAST.c @ 0:ec628ba33878 default tip
Uploaded source code for mrsFAST
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:39:28 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ec628ba33878 |
---|---|
1 /* | |
2 * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without modification, | |
6 * are permitted provided that the following conditions are met: | |
7 * | |
8 * Redistributions of source code must retain the above copyright notice, this list | |
9 * of conditions and the following disclaimer. | |
10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
11 * list of conditions and the following disclaimer in the documentation and/or other | |
12 * materials provided with the distribution. | |
13 * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be | |
14 * used to endorse or promote products derived from this software without specific | |
15 * prior written permission. | |
16 * | |
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 */ | |
29 | |
30 /* | |
31 * Author : Faraz Hach | |
32 * Email : fhach AT cs DOT sfu | |
33 * Last Update : 2009-02-01 | |
34 */ | |
35 | |
36 | |
37 #include <stdio.h> | |
38 #include <stdlib.h> | |
39 #include <string.h> | |
40 #include <math.h> | |
41 #include "Common.h" | |
42 #include "CommandLineParser.h" | |
43 #include "Reads.h" | |
44 #include "Output.h" | |
45 #include "HashTable.h" | |
46 #include "MrsFAST.h" | |
47 | |
48 char *versionNumber = "2.3"; // Current Version | |
49 unsigned char seqFastq; | |
50 | |
51 int main(int argc, char *argv[]) | |
52 { | |
53 if (!parseCommandLine(argc, argv)) | |
54 return 1; | |
55 | |
56 configHashTable(); | |
57 /**************************************************** | |
58 * INDEXING | |
59 ***************************************************/ | |
60 if (indexingMode) | |
61 { | |
62 int i; | |
63 /******************************** | |
64 * Regulard Mode | |
65 ********************************/ | |
66 if (!bisulfiteMode) | |
67 { | |
68 configHashTable(); | |
69 for (i = 0; i < fileCnt; i++) | |
70 { | |
71 generateHashTable(fileName[i][0], fileName[i][1]); | |
72 } | |
73 } | |
74 else | |
75 /******************************** | |
76 * Bisulfite Mode | |
77 ********************************/ | |
78 { // TODO | |
79 } | |
80 } | |
81 /**************************************************** | |
82 * SEARCHING | |
83 ***************************************************/ | |
84 else | |
85 { | |
86 Read *seqList; | |
87 unsigned int seqListSize; | |
88 int fc; | |
89 int samplingLocsSize; | |
90 int *samplingLocs; | |
91 double totalLoadingTime = 0; | |
92 double totalMappingTime = 0; | |
93 double startTime; | |
94 double loadingTime; | |
95 double mappingTime; | |
96 double lstartTime; | |
97 double ppTime; | |
98 double tmpTime;; | |
99 char *prevGen = getMem(CONTIG_NAME_SIZE); | |
100 prevGen[0]='\0'; | |
101 char *curGen; | |
102 int flag; | |
103 double maxMem=0; | |
104 char fname1[FILE_NAME_LENGTH]; | |
105 char fname2[FILE_NAME_LENGTH]; | |
106 char fname3[FILE_NAME_LENGTH]; | |
107 char fname4[FILE_NAME_LENGTH]; | |
108 char fname5[FILE_NAME_LENGTH]; | |
109 // Loading Sequences & Sampling Locations | |
110 startTime = getTime(); | |
111 if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL) | |
112 { | |
113 //TODO | |
114 } | |
115 else | |
116 { | |
117 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) | |
118 { | |
119 return 1; | |
120 } | |
121 } | |
122 | |
123 //loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
124 totalLoadingTime += getTime()-startTime; | |
125 | |
126 | |
127 | |
128 | |
129 if (pairedEndMode) | |
130 { | |
131 //Switching to Inferred Size | |
132 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; | |
133 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; | |
134 if (pairedEndDiscordantMode) | |
135 { | |
136 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
137 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
138 } | |
139 | |
140 /* The size between the ends; | |
141 minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1; | |
142 maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1; | |
143 if (pairedEndDiscordantMode) | |
144 { | |
145 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1; | |
146 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1; | |
147 }*/ | |
148 sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput); | |
149 sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput); | |
150 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); | |
151 sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput); | |
152 sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput); | |
153 unlink(fname1); | |
154 unlink(fname2); | |
155 unlink(fname3); | |
156 unlink(fname4); | |
157 unlink(fname5); | |
158 } | |
159 | |
160 // Preparing output | |
161 initOutput(mappingOutput, outCompressed); | |
162 | |
163 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
164 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); | |
165 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
166 | |
167 /******************************** | |
168 * Regular Mode | |
169 ********************************/ | |
170 if (!bisulfiteMode) | |
171 { | |
172 if (!pairedEndMode) | |
173 { | |
174 for (fc = 0; fc <fileCnt; fc++) | |
175 { | |
176 if (!initLoadingHashTable(fileName[fc][1])) | |
177 { | |
178 return 1; | |
179 } | |
180 | |
181 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
182 | |
183 mappingTime = 0; | |
184 loadingTime = 0; | |
185 prevGen[0] = '\0'; | |
186 flag = 1; | |
187 | |
188 do | |
189 { | |
190 flag = loadHashTable ( &tmpTime ); // Reading a fragment | |
191 curGen = getRefGenomeName(); | |
192 | |
193 // First Time | |
194 if (flag && prevGen[0]== '\0') | |
195 { | |
196 sprintf(prevGen, "%s", curGen); | |
197 } | |
198 | |
199 if ( !flag || strcmp(prevGen, curGen)!=0) | |
200 { | |
201 | |
202 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
203 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
204 fflush(stdout); | |
205 | |
206 totalMappingTime += mappingTime; | |
207 totalLoadingTime += loadingTime; | |
208 | |
209 loadingTime = 0; | |
210 mappingTime = 0; | |
211 maxMem = 0; | |
212 | |
213 if (!flag) | |
214 { | |
215 break; | |
216 } | |
217 } | |
218 else if (progressRep && mappingTime != 0) | |
219 { | |
220 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
221 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
222 fflush(stdout); | |
223 } | |
224 | |
225 sprintf(prevGen, "%s", curGen); | |
226 | |
227 loadingTime += tmpTime; | |
228 lstartTime = getTime(); | |
229 | |
230 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
231 | |
232 mapSingleEndSeq(); | |
233 | |
234 mappingTime += getTime() - lstartTime; | |
235 if (maxMem < getMemUsage()) | |
236 { | |
237 maxMem = getMemUsage(); | |
238 } | |
239 } while (flag); | |
240 | |
241 } // end for; | |
242 finalizeFAST(); | |
243 finalizeLoadingHashTable(); | |
244 | |
245 } | |
246 // Pairedend Mapping Mode | |
247 else | |
248 { | |
249 | |
250 for (fc = 0; fc <fileCnt; fc++) | |
251 { | |
252 if (!initLoadingHashTable(fileName[fc][1])) | |
253 { | |
254 return 1; | |
255 } | |
256 | |
257 | |
258 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
259 | |
260 mappingTime = 0; | |
261 loadingTime = 0; | |
262 prevGen[0] = '\0'; | |
263 flag = 1; | |
264 | |
265 do | |
266 { | |
267 flag = loadHashTable ( &tmpTime ); // Reading a fragment | |
268 curGen = getRefGenomeName(); | |
269 | |
270 // First Time | |
271 if (flag && prevGen[0]== '\0') | |
272 { | |
273 sprintf(prevGen, "%s", curGen); | |
274 } | |
275 | |
276 if ( !flag || strcmp(prevGen, curGen)!=0) | |
277 { | |
278 | |
279 // DISCORDANT | |
280 lstartTime = getTime(); | |
281 outputPairedEnd(); | |
282 mappingTime += getTime() - lstartTime; | |
283 //DISCORDANT | |
284 | |
285 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
286 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
287 fflush(stdout); | |
288 | |
289 totalMappingTime += mappingTime; | |
290 totalLoadingTime += loadingTime; | |
291 | |
292 loadingTime = 0; | |
293 mappingTime = 0; | |
294 maxMem = 0; | |
295 | |
296 if (!flag) | |
297 { | |
298 break; | |
299 } | |
300 } | |
301 else if (progressRep && mappingTime != 0) | |
302 { | |
303 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
304 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
305 fflush(stdout); | |
306 } | |
307 | |
308 sprintf(prevGen, "%s", curGen); | |
309 | |
310 loadingTime += tmpTime; | |
311 lstartTime = getTime(); | |
312 | |
313 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
314 | |
315 mapPairedEndSeq(); | |
316 | |
317 mappingTime += getTime() - lstartTime; | |
318 if (maxMem < getMemUsage()) | |
319 { | |
320 maxMem = getMemUsage(); | |
321 } | |
322 } while (flag); | |
323 | |
324 } // end for; | |
325 | |
326 finalizeLoadingHashTable(); | |
327 if (pairedEndDiscordantMode) | |
328 { | |
329 lstartTime = getTime(); | |
330 outputPairedEndDiscPP(); | |
331 ppTime = getTime() - lstartTime; | |
332 } | |
333 finalizeFAST(); | |
334 } | |
335 } | |
336 /******************************** | |
337 * Bisulfite Mode | |
338 ********************************/ | |
339 { | |
340 //TODO | |
341 } | |
342 | |
343 | |
344 finalizeOutput(); | |
345 | |
346 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
347 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); | |
348 if (pairedEndDiscordantMode) | |
349 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); | |
350 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); | |
351 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); | |
352 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); | |
353 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); | |
354 | |
355 int cof = (pairedEndMode)?2:1; | |
356 | |
357 if (progressRep && maxHits != 0) | |
358 { | |
359 int frequency[maxHits+1]; | |
360 int i; | |
361 for ( i=0 ; i <= maxHits; i++) | |
362 { | |
363 frequency[i] = 0; | |
364 } | |
365 | |
366 | |
367 for (fc = 0; fc < seqListSize; fc++) | |
368 { | |
369 frequency[*(seqList[fc*cof].hits)]++; | |
370 } | |
371 frequency[maxHits] = completedSeqCnt; | |
372 for ( i=0 ; i <= maxHits; i++) | |
373 { | |
374 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); | |
375 } | |
376 } | |
377 | |
378 | |
379 if (strcmp(unmappedOutput, "") == 0) | |
380 { | |
381 char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ]; | |
382 sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput ); | |
383 finalizeReads(fn); | |
384 } | |
385 else | |
386 { | |
387 finalizeReads(unmappedOutput); | |
388 } | |
389 | |
390 | |
391 | |
392 freeMem(prevGen, CONTIG_NAME_SIZE); | |
393 } | |
394 | |
395 | |
396 | |
397 return 1; | |
398 } |