comparison mrfast-2.1.0.5/baseFAST.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents
children
comparison
equal deleted inserted replaced
0:7b3dc85dc7fd 1:d4054b05b015
1 /*
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the names of the University of Washington, Simon Fraser University,
14 * nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 Authors:
33 Farhad Hormozdiari
34 Faraz Hach
35 Can Alkan
36 Emails:
37 farhadh AT uw DOT edu
38 fhach AT cs DOT sfu DOT ca
39 calkan AT uw DOT edu
40 */
41
42
43
44
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <math.h>
50 #include "Common.h"
51 #include "CommandLineParser.h"
52 #include "Reads.h"
53 #include "Output.h"
54 #include "HashTable.h"
55 #include "MrFAST.h"
56
57 char *versionNumber = "2.1"; // Current Version
58 unsigned char seqFastq;
59
60 int main(int argc, char *argv[])
61 {
62 if (!parseCommandLine(argc, argv))
63 return 1;
64
65 configHashTable();
66 /****************************************************
67 * INDEXING
68 ***************************************************/
69 if (indexingMode)
70 {
71 int i;
72 /********************************
73 * Regular Mode
74 ********************************/
75 configHashTable();
76 for (i = 0; i < fileCnt; i++)
77 {
78 generateHashTable(fileName[i][0], fileName[i][1]);
79 }
80 }
81 /****************************************************
82 * SEARCHING
83 ***************************************************/
84 else
85 {
86 Read *seqList;
87 unsigned int seqListSize;
88 int fc;
89 int samplingLocsSize;
90 int *samplingLocs;
91 double totalLoadingTime = 0;
92 double totalMappingTime = 0;
93 double startTime;
94 double loadingTime;
95 double mappingTime;
96 double lstartTime;
97 double ppTime = 0.0;
98 double tmpTime;;
99 char *prevGen = getMem(CONTIG_NAME_SIZE);
100 prevGen[0]='\0';
101 char *curGen;
102 int flag;
103 double maxMem=0;
104 char fname1[FILE_NAME_LENGTH];
105 char fname2[FILE_NAME_LENGTH];
106 char fname3[FILE_NAME_LENGTH];
107 char fname4[FILE_NAME_LENGTH];
108 char fname5[FILE_NAME_LENGTH];
109
110 char outputFileName[FILE_NAME_LENGTH];
111 // Loading Sequences & Sampling Locations
112 startTime = getTime();
113 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize))
114 {
115 return 1;
116 }
117
118 loadSamplingLocations(&samplingLocs, &samplingLocsSize);
119 totalLoadingTime += getTime()-startTime;
120
121 if (pairedEndMode)
122 {
123 //Switching to Inferred Size
124 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2;
125 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2;
126 if (pairedEndDiscordantMode)
127 {
128 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2;
129 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2;
130 }
131
132 sprintf(fname1, "__%s__1", mappingOutput);
133 sprintf(fname2, "__%s__2", mappingOutput);
134 sprintf(fname3, "__%s__disc", mappingOutput);
135 sprintf(fname4, "__%s__oea1", mappingOutput);
136 sprintf(fname5, "__%s__oea2", mappingOutput);
137 unlink(fname1);
138 unlink(fname2);
139 unlink(fname3);
140 unlink(fname4);
141 unlink(fname5);
142 }
143
144 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);
145 // Preparing output
146 initOutput(outputFileName, outCompressed);
147
148 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
149 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads");
150 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
151
152 /********************************
153 * Regular Mode
154 ********************************/
155 if (!pairedEndMode)
156 {
157 initLookUpTable();
158 if(bestMode)
159 initBestMapping(seqListSize);
160
161 for (fc = 0; fc <fileCnt; fc++)
162 {
163 if (!initLoadingHashTable(fileName[fc][1]))
164 {
165 return 1;
166 }
167
168 mappingTime = 0;
169 loadingTime = 0;
170 prevGen[0] = '\0';
171 flag = 1;
172
173 do
174 {
175 flag = loadHashTable ( &tmpTime, errThreshold); // Reading a fragment
176 curGen = getRefGenomeName();
177
178 // First Time
179 if (flag && prevGen[0]== '\0')
180 {
181 sprintf(prevGen, "%s", curGen);
182 }
183
184 if ( !flag || strcmp(prevGen, curGen)!=0)
185 {
186
187 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
188 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
189 fflush(stdout);
190
191 totalMappingTime += mappingTime;
192 totalLoadingTime += loadingTime;
193
194 loadingTime = 0;
195 mappingTime = 0;
196 maxMem = 0;
197
198 if (!flag)
199 {
200 break;
201 }
202 }
203 else if (progressRep && mappingTime != 0)
204 {
205 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
206 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
207 fflush(stdout);
208 }
209
210 sprintf(prevGen, "%s", curGen);
211
212 loadingTime += tmpTime;
213 // lstartTime = getTime();
214
215 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
216
217 lstartTime = getTime();
218
219
220 mapAllSingleEndSeq();
221
222
223
224 mappingTime += getTime() - lstartTime;
225 if (maxMem < getMemUsage())
226 {
227 maxMem = getMemUsage();
228 }
229 } while (flag);
230
231 } // end for;
232 if(bestMode)
233 finalizeBestSingleMapping();
234 finalizeFAST();
235 finalizeLoadingHashTable();
236
237 }
238 // Pairedend Mapping Mode
239 else
240 {
241 initLookUpTable();
242 if(pairedEndMode)
243 initBestConcordantDiscordant(seqListSize);
244 for (fc = 0; fc < fileCnt; fc++)
245 {
246 if (!initLoadingHashTable(fileName[fc][1]))
247 {
248 return 1;
249 }
250 mappingTime = 0;
251 loadingTime = 0;
252 prevGen[0] = '\0';
253 flag = 1;
254
255 do
256 {
257 flag = loadHashTable ( &tmpTime , errThreshold); // Reading a fragment
258 curGen = getRefGenomeName();
259
260 // First Time
261 if (flag && prevGen[0]== '\0')
262 {
263 sprintf(prevGen, "%s", curGen);
264 }
265
266 if ( !flag || strcmp(prevGen, curGen)!=0)
267 {
268
269 // DISCORDANT
270 lstartTime = getTime();
271 outputPairedEnd();
272 mappingTime += getTime() - lstartTime;
273 //DISCORDANT
274
275 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
276 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
277 fflush(stdout);
278
279 totalMappingTime += mappingTime;
280 totalLoadingTime += loadingTime;
281
282 loadingTime = 0;
283 mappingTime = 0;
284 maxMem = 0;
285
286 if (!flag)
287 {
288 break;
289 }
290 }
291 else if (progressRep && mappingTime != 0)
292 {
293 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
294 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
295 fflush(stdout);
296 }
297
298 sprintf(prevGen, "%s", curGen);
299
300 loadingTime += tmpTime;
301 lstartTime = getTime();
302
303 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
304
305 mapPairedEndSeq();
306
307 mappingTime += getTime() - lstartTime;
308 if (maxMem < getMemUsage())
309 {
310 maxMem = getMemUsage();
311 }
312
313 } while (flag);
314
315 } // end for;
316
317 if(pairedEndMode)
318 {
319 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);
320 finalizeOEAReads(outputFileName);
321 outputAllTransChromosal(transChromosal);
322 finalizeBestConcordantDiscordant();
323 }
324
325 finalizeLoadingHashTable();
326
327 if (pairedEndDiscordantMode)
328 {
329 lstartTime = getTime();
330 outputPairedEndDiscPP();
331 ppTime = getTime() - lstartTime;
332 }
333 finalizeFAST();
334 } //else
335
336 finalizeOutput();
337
338 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
339 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime);
340 if (pairedEndDiscordantMode)
341 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime);
342 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime);
343 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize);
344 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt);
345 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize));
346
347 int cof = (pairedEndMode)?2:1;
348
349 if (progressRep && maxHits != 0)
350 {
351 int frequency[maxHits+1];
352 int i;
353 for ( i=0 ; i <= maxHits; i++)
354 {
355 frequency[i] = 0;
356 }
357
358
359 for (fc = 0; fc < seqListSize; fc++)
360 {
361 frequency[(int)(*(seqList[fc*cof].hits))]++;
362 }
363 frequency[maxHits] = completedSeqCnt;
364 for ( i=0 ; i <= maxHits; i++)
365 {
366 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize);
367 }
368 }
369
370
371 finalizeReads(unmappedOutput);
372 freeMem(prevGen, CONTIG_NAME_SIZE);
373 }
374
375 return 1;
376 }