Mercurial > repos > calkan > mrsfast
comparison mrsfast-2.3.0.2/baseFAST.c @ 0:ec628ba33878 default tip
Uploaded source code for mrsFAST
| author | calkan |
|---|---|
| date | Tue, 21 Feb 2012 10:39:28 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ec628ba33878 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University | |
| 3 * All rights reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without modification, | |
| 6 * are permitted provided that the following conditions are met: | |
| 7 * | |
| 8 * Redistributions of source code must retain the above copyright notice, this list | |
| 9 * of conditions and the following disclaimer. | |
| 10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
| 11 * list of conditions and the following disclaimer in the documentation and/or other | |
| 12 * materials provided with the distribution. | |
| 13 * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be | |
| 14 * used to endorse or promote products derived from this software without specific | |
| 15 * prior written permission. | |
| 16 * | |
| 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
| 21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
| 25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 28 */ | |
| 29 | |
| 30 /* | |
| 31 * Author : Faraz Hach | |
| 32 * Email : fhach AT cs DOT sfu | |
| 33 * Last Update : 2009-02-01 | |
| 34 */ | |
| 35 | |
| 36 | |
| 37 #include <stdio.h> | |
| 38 #include <stdlib.h> | |
| 39 #include <string.h> | |
| 40 #include <math.h> | |
| 41 #include "Common.h" | |
| 42 #include "CommandLineParser.h" | |
| 43 #include "Reads.h" | |
| 44 #include "Output.h" | |
| 45 #include "HashTable.h" | |
| 46 #include "MrsFAST.h" | |
| 47 | |
| 48 char *versionNumber = "2.3"; // Current Version | |
| 49 unsigned char seqFastq; | |
| 50 | |
| 51 int main(int argc, char *argv[]) | |
| 52 { | |
| 53 if (!parseCommandLine(argc, argv)) | |
| 54 return 1; | |
| 55 | |
| 56 configHashTable(); | |
| 57 /**************************************************** | |
| 58 * INDEXING | |
| 59 ***************************************************/ | |
| 60 if (indexingMode) | |
| 61 { | |
| 62 int i; | |
| 63 /******************************** | |
| 64 * Regulard Mode | |
| 65 ********************************/ | |
| 66 if (!bisulfiteMode) | |
| 67 { | |
| 68 configHashTable(); | |
| 69 for (i = 0; i < fileCnt; i++) | |
| 70 { | |
| 71 generateHashTable(fileName[i][0], fileName[i][1]); | |
| 72 } | |
| 73 } | |
| 74 else | |
| 75 /******************************** | |
| 76 * Bisulfite Mode | |
| 77 ********************************/ | |
| 78 { // TODO | |
| 79 } | |
| 80 } | |
| 81 /**************************************************** | |
| 82 * SEARCHING | |
| 83 ***************************************************/ | |
| 84 else | |
| 85 { | |
| 86 Read *seqList; | |
| 87 unsigned int seqListSize; | |
| 88 int fc; | |
| 89 int samplingLocsSize; | |
| 90 int *samplingLocs; | |
| 91 double totalLoadingTime = 0; | |
| 92 double totalMappingTime = 0; | |
| 93 double startTime; | |
| 94 double loadingTime; | |
| 95 double mappingTime; | |
| 96 double lstartTime; | |
| 97 double ppTime; | |
| 98 double tmpTime;; | |
| 99 char *prevGen = getMem(CONTIG_NAME_SIZE); | |
| 100 prevGen[0]='\0'; | |
| 101 char *curGen; | |
| 102 int flag; | |
| 103 double maxMem=0; | |
| 104 char fname1[FILE_NAME_LENGTH]; | |
| 105 char fname2[FILE_NAME_LENGTH]; | |
| 106 char fname3[FILE_NAME_LENGTH]; | |
| 107 char fname4[FILE_NAME_LENGTH]; | |
| 108 char fname5[FILE_NAME_LENGTH]; | |
| 109 // Loading Sequences & Sampling Locations | |
| 110 startTime = getTime(); | |
| 111 if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL) | |
| 112 { | |
| 113 //TODO | |
| 114 } | |
| 115 else | |
| 116 { | |
| 117 if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) | |
| 118 { | |
| 119 return 1; | |
| 120 } | |
| 121 } | |
| 122 | |
| 123 //loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
| 124 totalLoadingTime += getTime()-startTime; | |
| 125 | |
| 126 | |
| 127 | |
| 128 | |
| 129 if (pairedEndMode) | |
| 130 { | |
| 131 //Switching to Inferred Size | |
| 132 minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; | |
| 133 maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; | |
| 134 if (pairedEndDiscordantMode) | |
| 135 { | |
| 136 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
| 137 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; | |
| 138 } | |
| 139 | |
| 140 /* The size between the ends; | |
| 141 minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1; | |
| 142 maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1; | |
| 143 if (pairedEndDiscordantMode) | |
| 144 { | |
| 145 maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1; | |
| 146 minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1; | |
| 147 }*/ | |
| 148 sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput); | |
| 149 sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput); | |
| 150 sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); | |
| 151 sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput); | |
| 152 sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput); | |
| 153 unlink(fname1); | |
| 154 unlink(fname2); | |
| 155 unlink(fname3); | |
| 156 unlink(fname4); | |
| 157 unlink(fname5); | |
| 158 } | |
| 159 | |
| 160 // Preparing output | |
| 161 initOutput(mappingOutput, outCompressed); | |
| 162 | |
| 163 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 164 fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); | |
| 165 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 166 | |
| 167 /******************************** | |
| 168 * Regular Mode | |
| 169 ********************************/ | |
| 170 if (!bisulfiteMode) | |
| 171 { | |
| 172 if (!pairedEndMode) | |
| 173 { | |
| 174 for (fc = 0; fc <fileCnt; fc++) | |
| 175 { | |
| 176 if (!initLoadingHashTable(fileName[fc][1])) | |
| 177 { | |
| 178 return 1; | |
| 179 } | |
| 180 | |
| 181 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
| 182 | |
| 183 mappingTime = 0; | |
| 184 loadingTime = 0; | |
| 185 prevGen[0] = '\0'; | |
| 186 flag = 1; | |
| 187 | |
| 188 do | |
| 189 { | |
| 190 flag = loadHashTable ( &tmpTime ); // Reading a fragment | |
| 191 curGen = getRefGenomeName(); | |
| 192 | |
| 193 // First Time | |
| 194 if (flag && prevGen[0]== '\0') | |
| 195 { | |
| 196 sprintf(prevGen, "%s", curGen); | |
| 197 } | |
| 198 | |
| 199 if ( !flag || strcmp(prevGen, curGen)!=0) | |
| 200 { | |
| 201 | |
| 202 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 203 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 204 fflush(stdout); | |
| 205 | |
| 206 totalMappingTime += mappingTime; | |
| 207 totalLoadingTime += loadingTime; | |
| 208 | |
| 209 loadingTime = 0; | |
| 210 mappingTime = 0; | |
| 211 maxMem = 0; | |
| 212 | |
| 213 if (!flag) | |
| 214 { | |
| 215 break; | |
| 216 } | |
| 217 } | |
| 218 else if (progressRep && mappingTime != 0) | |
| 219 { | |
| 220 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 221 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 222 fflush(stdout); | |
| 223 } | |
| 224 | |
| 225 sprintf(prevGen, "%s", curGen); | |
| 226 | |
| 227 loadingTime += tmpTime; | |
| 228 lstartTime = getTime(); | |
| 229 | |
| 230 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
| 231 | |
| 232 mapSingleEndSeq(); | |
| 233 | |
| 234 mappingTime += getTime() - lstartTime; | |
| 235 if (maxMem < getMemUsage()) | |
| 236 { | |
| 237 maxMem = getMemUsage(); | |
| 238 } | |
| 239 } while (flag); | |
| 240 | |
| 241 } // end for; | |
| 242 finalizeFAST(); | |
| 243 finalizeLoadingHashTable(); | |
| 244 | |
| 245 } | |
| 246 // Pairedend Mapping Mode | |
| 247 else | |
| 248 { | |
| 249 | |
| 250 for (fc = 0; fc <fileCnt; fc++) | |
| 251 { | |
| 252 if (!initLoadingHashTable(fileName[fc][1])) | |
| 253 { | |
| 254 return 1; | |
| 255 } | |
| 256 | |
| 257 | |
| 258 loadSamplingLocations(&samplingLocs, &samplingLocsSize); | |
| 259 | |
| 260 mappingTime = 0; | |
| 261 loadingTime = 0; | |
| 262 prevGen[0] = '\0'; | |
| 263 flag = 1; | |
| 264 | |
| 265 do | |
| 266 { | |
| 267 flag = loadHashTable ( &tmpTime ); // Reading a fragment | |
| 268 curGen = getRefGenomeName(); | |
| 269 | |
| 270 // First Time | |
| 271 if (flag && prevGen[0]== '\0') | |
| 272 { | |
| 273 sprintf(prevGen, "%s", curGen); | |
| 274 } | |
| 275 | |
| 276 if ( !flag || strcmp(prevGen, curGen)!=0) | |
| 277 { | |
| 278 | |
| 279 // DISCORDANT | |
| 280 lstartTime = getTime(); | |
| 281 outputPairedEnd(); | |
| 282 mappingTime += getTime() - lstartTime; | |
| 283 //DISCORDANT | |
| 284 | |
| 285 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 286 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 287 fflush(stdout); | |
| 288 | |
| 289 totalMappingTime += mappingTime; | |
| 290 totalLoadingTime += loadingTime; | |
| 291 | |
| 292 loadingTime = 0; | |
| 293 mappingTime = 0; | |
| 294 maxMem = 0; | |
| 295 | |
| 296 if (!flag) | |
| 297 { | |
| 298 break; | |
| 299 } | |
| 300 } | |
| 301 else if (progressRep && mappingTime != 0) | |
| 302 { | |
| 303 fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", | |
| 304 prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); | |
| 305 fflush(stdout); | |
| 306 } | |
| 307 | |
| 308 sprintf(prevGen, "%s", curGen); | |
| 309 | |
| 310 loadingTime += tmpTime; | |
| 311 lstartTime = getTime(); | |
| 312 | |
| 313 initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); | |
| 314 | |
| 315 mapPairedEndSeq(); | |
| 316 | |
| 317 mappingTime += getTime() - lstartTime; | |
| 318 if (maxMem < getMemUsage()) | |
| 319 { | |
| 320 maxMem = getMemUsage(); | |
| 321 } | |
| 322 } while (flag); | |
| 323 | |
| 324 } // end for; | |
| 325 | |
| 326 finalizeLoadingHashTable(); | |
| 327 if (pairedEndDiscordantMode) | |
| 328 { | |
| 329 lstartTime = getTime(); | |
| 330 outputPairedEndDiscPP(); | |
| 331 ppTime = getTime() - lstartTime; | |
| 332 } | |
| 333 finalizeFAST(); | |
| 334 } | |
| 335 } | |
| 336 /******************************** | |
| 337 * Bisulfite Mode | |
| 338 ********************************/ | |
| 339 { | |
| 340 //TODO | |
| 341 } | |
| 342 | |
| 343 | |
| 344 finalizeOutput(); | |
| 345 | |
| 346 fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); | |
| 347 fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); | |
| 348 if (pairedEndDiscordantMode) | |
| 349 fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); | |
| 350 fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); | |
| 351 fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); | |
| 352 fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); | |
| 353 fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); | |
| 354 | |
| 355 int cof = (pairedEndMode)?2:1; | |
| 356 | |
| 357 if (progressRep && maxHits != 0) | |
| 358 { | |
| 359 int frequency[maxHits+1]; | |
| 360 int i; | |
| 361 for ( i=0 ; i <= maxHits; i++) | |
| 362 { | |
| 363 frequency[i] = 0; | |
| 364 } | |
| 365 | |
| 366 | |
| 367 for (fc = 0; fc < seqListSize; fc++) | |
| 368 { | |
| 369 frequency[*(seqList[fc*cof].hits)]++; | |
| 370 } | |
| 371 frequency[maxHits] = completedSeqCnt; | |
| 372 for ( i=0 ; i <= maxHits; i++) | |
| 373 { | |
| 374 fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); | |
| 375 } | |
| 376 } | |
| 377 | |
| 378 | |
| 379 if (strcmp(unmappedOutput, "") == 0) | |
| 380 { | |
| 381 char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ]; | |
| 382 sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput ); | |
| 383 finalizeReads(fn); | |
| 384 } | |
| 385 else | |
| 386 { | |
| 387 finalizeReads(unmappedOutput); | |
| 388 } | |
| 389 | |
| 390 | |
| 391 | |
| 392 freeMem(prevGen, CONTIG_NAME_SIZE); | |
| 393 } | |
| 394 | |
| 395 | |
| 396 | |
| 397 return 1; | |
| 398 } |
