Mercurial > repos > calkan > mrfast
comparison mrfast-2.1.0.4/baseFAST.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
| author | calkan |
|---|---|
| date | Fri, 09 Mar 2012 07:35:51 -0500 |
| parents | 7b3dc85dc7fd |
| children |
comparison
equal
deleted
inserted
replaced
| 0:7b3dc85dc7fd | 1:d4054b05b015 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University | |
| 3 * All rights reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without modification, | |
| 6 * are permitted provided that the following conditions are met: | |
| 7 * | |
| 8 * Redistributions of source code must retain the above copyright notice, this list | |
| 9 * of conditions and the following disclaimer. | |
| 10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
| 11 * list of conditions and the following disclaimer in the documentation and/or other | |
| 12 * materials provided with the distribution. | |
| 13 * - Neither the names of the University of Washington, Simon Fraser University, | |
| 14 * nor the names of its contributors may be | |
| 15 * used to endorse or promote products derived from this software without specific | |
| 16 * prior written permission. | |
| 17 * | |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
| 22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
| 26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 /* | |
| 32 Authors: | |
| 33 Farhad Hormozdiari | |
| 34 Faraz Hach | |
| 35 Can Alkan | |
| 36 Emails: | |
| 37 farhadh AT uw DOT edu | |
| 38 fhach AT cs DOT sfu DOT ca | |
| 39 calkan AT uw DOT edu | |
| 40 */ | |
| 41 | |
| 42 | |
| 43 | |
| 44 | |
| 45 | |
| 46 #include <stdio.h> | |
| 47 #include <stdlib.h> | |
| 48 #include <string.h> | |
| 49 #include <math.h> | |
| 50 #include "Common.h" | |
| 51 #include "CommandLineParser.h" | |
| 52 #include "Reads.h" | |
| 53 #include "Output.h" | |
| 54 #include "HashTable.h" | |
| 55 #include "MrFAST.h" | |
| 56 | |
| 57 char *versionNumber = "2.1"; // Current Version | |
| 58 unsigned char seqFastq; | |
| 59 | |
| 60 int main(int argc, char *argv[]) | |
| 61 { | |
| 62 if (!parseCommandLine(argc, argv)) | |
| 63 return 1; | |
| 64 | |
| 65 configHashTable(); | |
| 66 /**************************************************** | |
| 67 * INDEXING | |
| 68 ***************************************************/ | |
| 69 if (indexingMode) | |
| 70 { | |
| 71 int i; | |
| 72 /******************************** | |
| 73 * Regular Mode | |
| 74 ********************************/ | |
| 75 configHashTable(); | |
| 76 for (i = 0; i < fileCnt; i++) | |
| 77 { | |
| 78 generateHashTable(fileName[i][0], fileName[i][1]); | |
| 79 } | |
| 80 } | |
| 81 /**************************************************** | |
| 82 * SEARCHING | |
| 83 ***************************************************/ | |
| 84 else | |
| 85 { | |
| 86 Read *seqList; | |
| 87 unsigned int seqListSize; | |
| 88 int fc; | |
| 89 int samplingLocsSize; | |
| 90 int *samplingLocs; | |
| 91 double totalLoadingTime = 0; | |
| 92 double totalMappingTime = 0; | |
| 93 double startTime; | |
| 94 double loadingTime; | |
| 95 double mappingTime; | |
| 96 double lstartTime; | |
| 97 double ppTime = 0.0; | |
| 98 double tmpTime;; | |
| 99 char *prevGen = getMem(CONTIG_NAME_SIZE); | |
| 100 prevGen[0]='\0'; | |
| 101 char *curGen; | |
| 102 int flag; | |
| 103 double maxMem=0; | |
| 104 char fname1[FILE_NAME_LENGTH]; | |
| 105 char fname2[FILE_NAME_LENGTH]; | |
| 106 char fname3[FILE_NAME_LENGTH]; | |
| 107 char fname4[FILE_NAME_LENGTH]; | |
| 108 char fname5[FILE_NAME_LENGTH]; | |
| 109 | |
| 110 char outputFileName[FILE_NAME_LENGTH]; | |
| 111 // Loading Sequences & Sampling Locations | |
| 112 startTime = getTime(); | |
| 113 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) | |
| 114 { | |
| 115 return 1; | |
| 116 } | |
| 117 | |
| 118 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
| 119 totalLoadingTime += getTime()-startTime; | |
| 120 | |
| 121 if (pairedEndMode) | |
| 122 { | |
| 123 //Switching to Inferred Size | |
| 124 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; | |
| 125 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; | |
| 126 if (pairedEndDiscordantMode) | |
| 127 { | |
| 128 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
| 129 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
| 130 } | |
| 131 | |
| 132 sprintf(fname1, "__%s__1", mappingOutput); | |
| 133 sprintf(fname2, "__%s__2", mappingOutput); | |
| 134 sprintf(fname3, "__%s__disc", mappingOutput); | |
| 135 sprintf(fname4, "__%s__oea1", mappingOutput); | |
| 136 sprintf(fname5, "__%s__oea2", mappingOutput); | |
| 137 unlink(fname1); | |
| 138 unlink(fname2); | |
| 139 unlink(fname3); | |
| 140 unlink(fname4); | |
| 141 unlink(fname5); | |
| 142 } | |
| 143 | |
| 144 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); | |
| 145 // Preparing output | |
| 146 initOutput(outputFileName, outCompressed); | |
| 147 | |
| 148 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 149 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); | |
| 150 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 151 | |
| 152 /******************************** | |
| 153 * Regular Mode | |
| 154 ********************************/ | |
| 155 if (!pairedEndMode) | |
| 156 { | |
| 157 initLookUpTable(); | |
| 158 if(bestMode) | |
| 159 initBestMapping(seqListSize); | |
| 160 | |
| 161 for (fc = 0; fc <fileCnt; fc++) | |
| 162 { | |
| 163 if (!initLoadingHashTable(fileName[fc][1])) | |
| 164 { | |
| 165 return 1; | |
| 166 } | |
| 167 | |
| 168 mappingTime = 0; | |
| 169 loadingTime = 0; | |
| 170 prevGen[0] = '\0'; | |
| 171 flag = 1; | |
| 172 | |
| 173 do | |
| 174 { | |
| 175 flag = loadHashTable ( &tmpTime, errThreshold); // Reading a fragment | |
| 176 curGen = getRefGenomeName(); | |
| 177 | |
| 178 // First Time | |
| 179 if (flag && prevGen[0]== '\0') | |
| 180 { | |
| 181 sprintf(prevGen, "%s", curGen); | |
| 182 } | |
| 183 | |
| 184 if ( !flag || strcmp(prevGen, curGen)!=0) | |
| 185 { | |
| 186 | |
| 187 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 188 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 189 fflush(stdout); | |
| 190 | |
| 191 totalMappingTime += mappingTime; | |
| 192 totalLoadingTime += loadingTime; | |
| 193 | |
| 194 loadingTime = 0; | |
| 195 mappingTime = 0; | |
| 196 maxMem = 0; | |
| 197 | |
| 198 if (!flag) | |
| 199 { | |
| 200 break; | |
| 201 } | |
| 202 } | |
| 203 else if (progressRep && mappingTime != 0) | |
| 204 { | |
| 205 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 206 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 207 fflush(stdout); | |
| 208 } | |
| 209 | |
| 210 sprintf(prevGen, "%s", curGen); | |
| 211 | |
| 212 loadingTime += tmpTime; | |
| 213 // lstartTime = getTime(); | |
| 214 | |
| 215 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
| 216 | |
| 217 lstartTime = getTime(); | |
| 218 | |
| 219 | |
| 220 mapAllSingleEndSeq(); | |
| 221 | |
| 222 | |
| 223 | |
| 224 mappingTime += getTime() - lstartTime; | |
| 225 if (maxMem < getMemUsage()) | |
| 226 { | |
| 227 maxMem = getMemUsage(); | |
| 228 } | |
| 229 } while (flag); | |
| 230 | |
| 231 } // end for; | |
| 232 if(bestMode) | |
| 233 finalizeBestSingleMapping(); | |
| 234 finalizeFAST(); | |
| 235 finalizeLoadingHashTable(); | |
| 236 | |
| 237 } | |
| 238 // Pairedend Mapping Mode | |
| 239 else | |
| 240 { | |
| 241 initLookUpTable(); | |
| 242 if(pairedEndMode) | |
| 243 initBestConcordantDiscordant(seqListSize); | |
| 244 for (fc = 0; fc < fileCnt; fc++) | |
| 245 { | |
| 246 if (!initLoadingHashTable(fileName[fc][1])) | |
| 247 { | |
| 248 return 1; | |
| 249 } | |
| 250 mappingTime = 0; | |
| 251 loadingTime = 0; | |
| 252 prevGen[0] = '\0'; | |
| 253 flag = 1; | |
| 254 | |
| 255 do | |
| 256 { | |
| 257 flag = loadHashTable ( &tmpTime , errThreshold); // Reading a fragment | |
| 258 curGen = getRefGenomeName(); | |
| 259 | |
| 260 // First Time | |
| 261 if (flag && prevGen[0]== '\0') | |
| 262 { | |
| 263 sprintf(prevGen, "%s", curGen); | |
| 264 } | |
| 265 | |
| 266 if ( !flag || strcmp(prevGen, curGen)!=0) | |
| 267 { | |
| 268 | |
| 269 // DISCORDANT | |
| 270 lstartTime = getTime(); | |
| 271 outputPairedEnd(); | |
| 272 mappingTime += getTime() - lstartTime; | |
| 273 //DISCORDANT | |
| 274 | |
| 275 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 276 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 277 fflush(stdout); | |
| 278 | |
| 279 totalMappingTime += mappingTime; | |
| 280 totalLoadingTime += loadingTime; | |
| 281 | |
| 282 loadingTime = 0; | |
| 283 mappingTime = 0; | |
| 284 maxMem = 0; | |
| 285 | |
| 286 if (!flag) | |
| 287 { | |
| 288 break; | |
| 289 } | |
| 290 } | |
| 291 else if (progressRep && mappingTime != 0) | |
| 292 { | |
| 293 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 294 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 295 fflush(stdout); | |
| 296 } | |
| 297 | |
| 298 sprintf(prevGen, "%s", curGen); | |
| 299 | |
| 300 loadingTime += tmpTime; | |
| 301 lstartTime = getTime(); | |
| 302 | |
| 303 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
| 304 | |
| 305 mapPairedEndSeq(); | |
| 306 | |
| 307 mappingTime += getTime() - lstartTime; | |
| 308 if (maxMem < getMemUsage()) | |
| 309 { | |
| 310 maxMem = getMemUsage(); | |
| 311 } | |
| 312 | |
| 313 } while (flag); | |
| 314 | |
| 315 } // end for; | |
| 316 | |
| 317 if(pairedEndMode) | |
| 318 { | |
| 319 sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); | |
| 320 finalizeOEAReads(outputFileName); | |
| 321 outputAllTransChromosal(transChromosal); | |
| 322 finalizeBestConcordantDiscordant(); | |
| 323 } | |
| 324 | |
| 325 finalizeLoadingHashTable(); | |
| 326 | |
| 327 if (pairedEndDiscordantMode) | |
| 328 { | |
| 329 lstartTime = getTime(); | |
| 330 outputPairedEndDiscPP(); | |
| 331 ppTime = getTime() - lstartTime; | |
| 332 } | |
| 333 finalizeFAST(); | |
| 334 } //else | |
| 335 | |
| 336 finalizeOutput(); | |
| 337 | |
| 338 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 339 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); | |
| 340 if (pairedEndDiscordantMode) | |
| 341 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); | |
| 342 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); | |
| 343 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); | |
| 344 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); | |
| 345 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); | |
| 346 | |
| 347 int cof = (pairedEndMode)?2:1; | |
| 348 | |
| 349 if (progressRep && maxHits != 0) | |
| 350 { | |
| 351 int frequency[maxHits+1]; | |
| 352 int i; | |
| 353 for ( i=0 ; i <= maxHits; i++) | |
| 354 { | |
| 355 frequency[i] = 0; | |
| 356 } | |
| 357 | |
| 358 | |
| 359 for (fc = 0; fc < seqListSize; fc++) | |
| 360 { | |
| 361 frequency[(int)(*(seqList[fc*cof].hits))]++; | |
| 362 } | |
| 363 frequency[maxHits] = completedSeqCnt; | |
| 364 for ( i=0 ; i <= maxHits; i++) | |
| 365 { | |
| 366 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 | |
| 371 finalizeReads(unmappedOutput); | |
| 372 freeMem(prevGen, CONTIG_NAME_SIZE); | |
| 373 } | |
| 374 | |
| 375 return 1; | |
| 376 } |
