Mercurial > repos > calkan > mrfast
view mrfast-2.1.0.5/baseFAST.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | |
children |
line wrap: on
line source
/* * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list * of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. * - Neither the names of the University of Washington, Simon Fraser University, * nor the names of its contributors may be * used to endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Authors: Farhad Hormozdiari Faraz Hach Can Alkan Emails: farhadh AT uw DOT edu fhach AT cs DOT sfu DOT ca calkan AT uw DOT edu */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include "Common.h" #include "CommandLineParser.h" #include "Reads.h" #include "Output.h" #include "HashTable.h" #include "MrFAST.h" char *versionNumber = "2.1"; // Current Version unsigned char seqFastq; int main(int argc, char *argv[]) { if (!parseCommandLine(argc, argv)) return 1; configHashTable(); /**************************************************** * INDEXING ***************************************************/ if (indexingMode) { int i; /******************************** * Regular Mode ********************************/ configHashTable(); for (i = 0; i < fileCnt; i++) { generateHashTable(fileName[i][0], fileName[i][1]); } } /**************************************************** * SEARCHING ***************************************************/ else { Read *seqList; unsigned int seqListSize; int fc; int samplingLocsSize; int *samplingLocs; double totalLoadingTime = 0; double totalMappingTime = 0; double startTime; double loadingTime; double mappingTime; double lstartTime; double ppTime = 0.0; double tmpTime;; char *prevGen = getMem(CONTIG_NAME_SIZE); prevGen[0]='\0'; char *curGen; int flag; double maxMem=0; char fname1[FILE_NAME_LENGTH]; char fname2[FILE_NAME_LENGTH]; char fname3[FILE_NAME_LENGTH]; char fname4[FILE_NAME_LENGTH]; char fname5[FILE_NAME_LENGTH]; char outputFileName[FILE_NAME_LENGTH]; // Loading Sequences & Sampling Locations startTime = getTime(); if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) { return 1; } loadSamplingLocations(&samplingLocs, &samplingLocsSize); totalLoadingTime += getTime()-startTime; if (pairedEndMode) { //Switching to Inferred Size minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; if (pairedEndDiscordantMode) { maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; } sprintf(fname1, "__%s__1", mappingOutput); sprintf(fname2, "__%s__2", mappingOutput); sprintf(fname3, "__%s__disc", mappingOutput); sprintf(fname4, "__%s__oea1", mappingOutput); sprintf(fname5, "__%s__oea2", mappingOutput); unlink(fname1); unlink(fname2); unlink(fname3); unlink(fname4); unlink(fname5); } sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); // Preparing output initOutput(outputFileName, outCompressed); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); /******************************** * Regular Mode ********************************/ if (!pairedEndMode) { initLookUpTable(); if(bestMode) initBestMapping(seqListSize); for (fc = 0; fc <fileCnt; fc++) { if (!initLoadingHashTable(fileName[fc][1])) { return 1; } mappingTime = 0; loadingTime = 0; prevGen[0] = '\0'; flag = 1; do { flag = loadHashTable ( &tmpTime, errThreshold); // Reading a fragment curGen = getRefGenomeName(); // First Time if (flag && prevGen[0]== '\0') { sprintf(prevGen, "%s", curGen); } if ( !flag || strcmp(prevGen, curGen)!=0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); totalMappingTime += mappingTime; totalLoadingTime += loadingTime; loadingTime = 0; mappingTime = 0; maxMem = 0; if (!flag) { break; } } else if (progressRep && mappingTime != 0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); } sprintf(prevGen, "%s", curGen); loadingTime += tmpTime; // lstartTime = getTime(); initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); lstartTime = getTime(); mapAllSingleEndSeq(); mappingTime += getTime() - lstartTime; if (maxMem < getMemUsage()) { maxMem = getMemUsage(); } } while (flag); } // end for; if(bestMode) finalizeBestSingleMapping(); finalizeFAST(); finalizeLoadingHashTable(); } // Pairedend Mapping Mode else { initLookUpTable(); if(pairedEndMode) initBestConcordantDiscordant(seqListSize); for (fc = 0; fc < fileCnt; fc++) { if (!initLoadingHashTable(fileName[fc][1])) { return 1; } mappingTime = 0; loadingTime = 0; prevGen[0] = '\0'; flag = 1; do { flag = loadHashTable ( &tmpTime , errThreshold); // Reading a fragment curGen = getRefGenomeName(); // First Time if (flag && prevGen[0]== '\0') { sprintf(prevGen, "%s", curGen); } if ( !flag || strcmp(prevGen, curGen)!=0) { // DISCORDANT lstartTime = getTime(); outputPairedEnd(); mappingTime += getTime() - lstartTime; //DISCORDANT fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); totalMappingTime += mappingTime; totalLoadingTime += loadingTime; loadingTime = 0; mappingTime = 0; maxMem = 0; if (!flag) { break; } } else if (progressRep && mappingTime != 0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); } sprintf(prevGen, "%s", curGen); loadingTime += tmpTime; lstartTime = getTime(); initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); mapPairedEndSeq(); mappingTime += getTime() - lstartTime; if (maxMem < getMemUsage()) { maxMem = getMemUsage(); } } while (flag); } // end for; if(pairedEndMode) { sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); finalizeOEAReads(outputFileName); outputAllTransChromosal(transChromosal); finalizeBestConcordantDiscordant(); } finalizeLoadingHashTable(); if (pairedEndDiscordantMode) { lstartTime = getTime(); outputPairedEndDiscPP(); ppTime = getTime() - lstartTime; } finalizeFAST(); } //else finalizeOutput(); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); if (pairedEndDiscordantMode) fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); int cof = (pairedEndMode)?2:1; if (progressRep && maxHits != 0) { int frequency[maxHits+1]; int i; for ( i=0 ; i <= maxHits; i++) { frequency[i] = 0; } for (fc = 0; fc < seqListSize; fc++) { frequency[(int)(*(seqList[fc*cof].hits))]++; } frequency[maxHits] = completedSeqCnt; for ( i=0 ; i <= maxHits; i++) { fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); } } finalizeReads(unmappedOutput); freeMem(prevGen, CONTIG_NAME_SIZE); } return 1; }