comparison mrsfast-2.3.0.2/baseFAST.c @ 0:ec628ba33878 default tip

Uploaded source code for mrsFAST
author calkan
date Tue, 21 Feb 2012 10:39:28 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ec628ba33878
1 /*
2 * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
14 * used to endorse or promote products derived from this software without specific
15 * prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * Author : Faraz Hach
32 * Email : fhach AT cs DOT sfu
33 * Last Update : 2009-02-01
34 */
35
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <math.h>
41 #include "Common.h"
42 #include "CommandLineParser.h"
43 #include "Reads.h"
44 #include "Output.h"
45 #include "HashTable.h"
46 #include "MrsFAST.h"
47
48 char *versionNumber = "2.3"; // Current Version
49 unsigned char seqFastq;
50
51 int main(int argc, char *argv[])
52 {
53 if (!parseCommandLine(argc, argv))
54 return 1;
55
56 configHashTable();
57 /****************************************************
58 * INDEXING
59 ***************************************************/
60 if (indexingMode)
61 {
62 int i;
63 /********************************
64 * Regulard Mode
65 ********************************/
66 if (!bisulfiteMode)
67 {
68 configHashTable();
69 for (i = 0; i < fileCnt; i++)
70 {
71 generateHashTable(fileName[i][0], fileName[i][1]);
72 }
73 }
74 else
75 /********************************
76 * Bisulfite Mode
77 ********************************/
78 { // TODO
79 }
80 }
81 /****************************************************
82 * SEARCHING
83 ***************************************************/
84 else
85 {
86 Read *seqList;
87 unsigned int seqListSize;
88 int fc;
89 int samplingLocsSize;
90 int *samplingLocs;
91 double totalLoadingTime = 0;
92 double totalMappingTime = 0;
93 double startTime;
94 double loadingTime;
95 double mappingTime;
96 double lstartTime;
97 double ppTime;
98 double tmpTime;;
99 char *prevGen = getMem(CONTIG_NAME_SIZE);
100 prevGen[0]='\0';
101 char *curGen;
102 int flag;
103 double maxMem=0;
104 char fname1[FILE_NAME_LENGTH];
105 char fname2[FILE_NAME_LENGTH];
106 char fname3[FILE_NAME_LENGTH];
107 char fname4[FILE_NAME_LENGTH];
108 char fname5[FILE_NAME_LENGTH];
109 // Loading Sequences & Sampling Locations
110 startTime = getTime();
111 if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL)
112 {
113 //TODO
114 }
115 else
116 {
117 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize))
118 {
119 return 1;
120 }
121 }
122
123 //loadSamplingLocations(&samplingLocs, &samplingLocsSize);
124 totalLoadingTime += getTime()-startTime;
125
126
127
128
129 if (pairedEndMode)
130 {
131 //Switching to Inferred Size
132 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2;
133 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2;
134 if (pairedEndDiscordantMode)
135 {
136 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2;
137 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2;
138 }
139
140 /* The size between the ends;
141 minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1;
142 maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1;
143 if (pairedEndDiscordantMode)
144 {
145 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1;
146 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1;
147 }*/
148 sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput);
149 sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput);
150 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput);
151 sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput);
152 sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput);
153 unlink(fname1);
154 unlink(fname2);
155 unlink(fname3);
156 unlink(fname4);
157 unlink(fname5);
158 }
159
160 // Preparing output
161 initOutput(mappingOutput, outCompressed);
162
163 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
164 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads");
165 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
166
167 /********************************
168 * Regular Mode
169 ********************************/
170 if (!bisulfiteMode)
171 {
172 if (!pairedEndMode)
173 {
174 for (fc = 0; fc <fileCnt; fc++)
175 {
176 if (!initLoadingHashTable(fileName[fc][1]))
177 {
178 return 1;
179 }
180
181 loadSamplingLocations(&samplingLocs, &samplingLocsSize);
182
183 mappingTime = 0;
184 loadingTime = 0;
185 prevGen[0] = '\0';
186 flag = 1;
187
188 do
189 {
190 flag = loadHashTable ( &tmpTime ); // Reading a fragment
191 curGen = getRefGenomeName();
192
193 // First Time
194 if (flag && prevGen[0]== '\0')
195 {
196 sprintf(prevGen, "%s", curGen);
197 }
198
199 if ( !flag || strcmp(prevGen, curGen)!=0)
200 {
201
202 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
203 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
204 fflush(stdout);
205
206 totalMappingTime += mappingTime;
207 totalLoadingTime += loadingTime;
208
209 loadingTime = 0;
210 mappingTime = 0;
211 maxMem = 0;
212
213 if (!flag)
214 {
215 break;
216 }
217 }
218 else if (progressRep && mappingTime != 0)
219 {
220 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
221 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
222 fflush(stdout);
223 }
224
225 sprintf(prevGen, "%s", curGen);
226
227 loadingTime += tmpTime;
228 lstartTime = getTime();
229
230 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
231
232 mapSingleEndSeq();
233
234 mappingTime += getTime() - lstartTime;
235 if (maxMem < getMemUsage())
236 {
237 maxMem = getMemUsage();
238 }
239 } while (flag);
240
241 } // end for;
242 finalizeFAST();
243 finalizeLoadingHashTable();
244
245 }
246 // Pairedend Mapping Mode
247 else
248 {
249
250 for (fc = 0; fc <fileCnt; fc++)
251 {
252 if (!initLoadingHashTable(fileName[fc][1]))
253 {
254 return 1;
255 }
256
257
258 loadSamplingLocations(&samplingLocs, &samplingLocsSize);
259
260 mappingTime = 0;
261 loadingTime = 0;
262 prevGen[0] = '\0';
263 flag = 1;
264
265 do
266 {
267 flag = loadHashTable ( &tmpTime ); // Reading a fragment
268 curGen = getRefGenomeName();
269
270 // First Time
271 if (flag && prevGen[0]== '\0')
272 {
273 sprintf(prevGen, "%s", curGen);
274 }
275
276 if ( !flag || strcmp(prevGen, curGen)!=0)
277 {
278
279 // DISCORDANT
280 lstartTime = getTime();
281 outputPairedEnd();
282 mappingTime += getTime() - lstartTime;
283 //DISCORDANT
284
285 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
286 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
287 fflush(stdout);
288
289 totalMappingTime += mappingTime;
290 totalLoadingTime += loadingTime;
291
292 loadingTime = 0;
293 mappingTime = 0;
294 maxMem = 0;
295
296 if (!flag)
297 {
298 break;
299 }
300 }
301 else if (progressRep && mappingTime != 0)
302 {
303 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
304 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
305 fflush(stdout);
306 }
307
308 sprintf(prevGen, "%s", curGen);
309
310 loadingTime += tmpTime;
311 lstartTime = getTime();
312
313 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
314
315 mapPairedEndSeq();
316
317 mappingTime += getTime() - lstartTime;
318 if (maxMem < getMemUsage())
319 {
320 maxMem = getMemUsage();
321 }
322 } while (flag);
323
324 } // end for;
325
326 finalizeLoadingHashTable();
327 if (pairedEndDiscordantMode)
328 {
329 lstartTime = getTime();
330 outputPairedEndDiscPP();
331 ppTime = getTime() - lstartTime;
332 }
333 finalizeFAST();
334 }
335 }
336 /********************************
337 * Bisulfite Mode
338 ********************************/
339 {
340 //TODO
341 }
342
343
344 finalizeOutput();
345
346 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
347 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime);
348 if (pairedEndDiscordantMode)
349 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime);
350 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime);
351 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize);
352 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt);
353 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize));
354
355 int cof = (pairedEndMode)?2:1;
356
357 if (progressRep && maxHits != 0)
358 {
359 int frequency[maxHits+1];
360 int i;
361 for ( i=0 ; i <= maxHits; i++)
362 {
363 frequency[i] = 0;
364 }
365
366
367 for (fc = 0; fc < seqListSize; fc++)
368 {
369 frequency[*(seqList[fc*cof].hits)]++;
370 }
371 frequency[maxHits] = completedSeqCnt;
372 for ( i=0 ; i <= maxHits; i++)
373 {
374 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize);
375 }
376 }
377
378
379 if (strcmp(unmappedOutput, "") == 0)
380 {
381 char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ];
382 sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput );
383 finalizeReads(fn);
384 }
385 else
386 {
387 finalizeReads(unmappedOutput);
388 }
389
390
391
392 freeMem(prevGen, CONTIG_NAME_SIZE);
393 }
394
395
396
397 return 1;
398 }