Mercurial > repos > calkan > mrfast
diff mrfast-2.1.0.4/baseFAST.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | 7b3dc85dc7fd |
children |
line wrap: on
line diff
--- a/mrfast-2.1.0.4/baseFAST.c Tue Feb 21 10:29:47 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,376 +0,0 @@ -/* - * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this list - * of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, this - * list of conditions and the following disclaimer in the documentation and/or other - * materials provided with the distribution. - * - Neither the names of the University of Washington, Simon Fraser University, - * nor the names of its contributors may be - * used to endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Authors: - Farhad Hormozdiari - Faraz Hach - Can Alkan - Emails: - farhadh AT uw DOT edu - fhach AT cs DOT sfu DOT ca - calkan AT uw DOT edu -*/ - - - - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include "Common.h" -#include "CommandLineParser.h" -#include "Reads.h" -#include "Output.h" -#include "HashTable.h" -#include "MrFAST.h" - -char *versionNumber = "2.1"; // Current Version -unsigned char seqFastq; - -int main(int argc, char *argv[]) -{ - if (!parseCommandLine(argc, argv)) - return 1; - - configHashTable(); - /**************************************************** - * INDEXING - ***************************************************/ - if (indexingMode) - { - int i; - /******************************** - * Regular Mode - ********************************/ - configHashTable(); - for (i = 0; i < fileCnt; i++) - { - generateHashTable(fileName[i][0], fileName[i][1]); - } - } - /**************************************************** - * SEARCHING - ***************************************************/ - else - { - Read *seqList; - unsigned int seqListSize; - int fc; - int samplingLocsSize; - int *samplingLocs; - double totalLoadingTime = 0; - double totalMappingTime = 0; - double startTime; - double loadingTime; - double mappingTime; - double lstartTime; - double ppTime = 0.0; - double tmpTime;; - char *prevGen = getMem(CONTIG_NAME_SIZE); - prevGen[0]='\0'; - char *curGen; - int flag; - double maxMem=0; - char fname1[FILE_NAME_LENGTH]; - char fname2[FILE_NAME_LENGTH]; - char fname3[FILE_NAME_LENGTH]; - char fname4[FILE_NAME_LENGTH]; - char fname5[FILE_NAME_LENGTH]; - - char outputFileName[FILE_NAME_LENGTH]; - // Loading Sequences & Sampling Locations - startTime = getTime(); - if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) - { - return 1; - } - - loadSamplingLocations(&samplingLocs, &samplingLocsSize); - totalLoadingTime += getTime()-startTime; - - if (pairedEndMode) - { - //Switching to Inferred Size - minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; - maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; - if (pairedEndDiscordantMode) - { - maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; - minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; - } - - sprintf(fname1, "__%s__1", mappingOutput); - sprintf(fname2, "__%s__2", mappingOutput); - sprintf(fname3, "__%s__disc", mappingOutput); - sprintf(fname4, "__%s__oea1", mappingOutput); - sprintf(fname5, "__%s__oea2", mappingOutput); - unlink(fname1); - unlink(fname2); - unlink(fname3); - unlink(fname4); - unlink(fname5); - } - - sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); - // Preparing output - initOutput(outputFileName, outCompressed); - - fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); - fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); - fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); - - /******************************** - * Regular Mode - ********************************/ - if (!pairedEndMode) - { - initLookUpTable(); - if(bestMode) - initBestMapping(seqListSize); - - for (fc = 0; fc <fileCnt; fc++) - { - if (!initLoadingHashTable(fileName[fc][1])) - { - return 1; - } - - mappingTime = 0; - loadingTime = 0; - prevGen[0] = '\0'; - flag = 1; - - do - { - flag = loadHashTable ( &tmpTime, errThreshold); // Reading a fragment - curGen = getRefGenomeName(); - - // First Time - if (flag && prevGen[0]== '\0') - { - sprintf(prevGen, "%s", curGen); - } - - if ( !flag || strcmp(prevGen, curGen)!=0) - { - - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", - prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); - - totalMappingTime += mappingTime; - totalLoadingTime += loadingTime; - - loadingTime = 0; - mappingTime = 0; - maxMem = 0; - - if (!flag) - { - break; - } - } - else if (progressRep && mappingTime != 0) - { - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", - prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); - } - - sprintf(prevGen, "%s", curGen); - - loadingTime += tmpTime; -// lstartTime = getTime(); - - initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); - - lstartTime = getTime(); - - - mapAllSingleEndSeq(); - - - - mappingTime += getTime() - lstartTime; - if (maxMem < getMemUsage()) - { - maxMem = getMemUsage(); - } - } while (flag); - - } // end for; - if(bestMode) - finalizeBestSingleMapping(); - finalizeFAST(); - finalizeLoadingHashTable(); - - } - // Pairedend Mapping Mode - else - { - initLookUpTable(); - if(pairedEndMode) - initBestConcordantDiscordant(seqListSize); - for (fc = 0; fc < fileCnt; fc++) - { - if (!initLoadingHashTable(fileName[fc][1])) - { - return 1; - } - mappingTime = 0; - loadingTime = 0; - prevGen[0] = '\0'; - flag = 1; - - do - { - flag = loadHashTable ( &tmpTime , errThreshold); // Reading a fragment - curGen = getRefGenomeName(); - - // First Time - if (flag && prevGen[0]== '\0') - { - sprintf(prevGen, "%s", curGen); - } - - if ( !flag || strcmp(prevGen, curGen)!=0) - { - - // DISCORDANT - lstartTime = getTime(); - outputPairedEnd(); - mappingTime += getTime() - lstartTime; - //DISCORDANT - - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", - prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); - - totalMappingTime += mappingTime; - totalLoadingTime += loadingTime; - - loadingTime = 0; - mappingTime = 0; - maxMem = 0; - - if (!flag) - { - break; - } - } - else if (progressRep && mappingTime != 0) - { - fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", - prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); - fflush(stdout); - } - - sprintf(prevGen, "%s", curGen); - - loadingTime += tmpTime; - lstartTime = getTime(); - - initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); - - mapPairedEndSeq(); - - mappingTime += getTime() - lstartTime; - if (maxMem < getMemUsage()) - { - maxMem = getMemUsage(); - } - - } while (flag); - - } // end for; - - if(pairedEndMode) - { - sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput); - finalizeOEAReads(outputFileName); - outputAllTransChromosal(transChromosal); - finalizeBestConcordantDiscordant(); - } - - finalizeLoadingHashTable(); - - if (pairedEndDiscordantMode) - { - lstartTime = getTime(); - outputPairedEndDiscPP(); - ppTime = getTime() - lstartTime; - } - finalizeFAST(); - } //else - - finalizeOutput(); - - fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); - fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); - if (pairedEndDiscordantMode) - fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); - fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); - fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); - fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); - fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); - - int cof = (pairedEndMode)?2:1; - - if (progressRep && maxHits != 0) - { - int frequency[maxHits+1]; - int i; - for ( i=0 ; i <= maxHits; i++) - { - frequency[i] = 0; - } - - - for (fc = 0; fc < seqListSize; fc++) - { - frequency[(int)(*(seqList[fc*cof].hits))]++; - } - frequency[maxHits] = completedSeqCnt; - for ( i=0 ; i <= maxHits; i++) - { - fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); - } - } - - - finalizeReads(unmappedOutput); - freeMem(prevGen, CONTIG_NAME_SIZE); - } - - return 1; -}