Mercurial > repos > calkan > mrsfast
diff mrsfast-2.3.0.2/baseFAST.c @ 0:ec628ba33878 default tip
Uploaded source code for mrsFAST
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:39:28 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mrsfast-2.3.0.2/baseFAST.c Tue Feb 21 10:39:28 2012 -0500 @@ -0,0 +1,398 @@ +/* + * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this list + * of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be + * used to endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Author : Faraz Hach + * Email : fhach AT cs DOT sfu + * Last Update : 2009-02-01 + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include "Common.h" +#include "CommandLineParser.h" +#include "Reads.h" +#include "Output.h" +#include "HashTable.h" +#include "MrsFAST.h" + +char *versionNumber = "2.3"; // Current Version +unsigned char seqFastq; + +int main(int argc, char *argv[]) +{ + if (!parseCommandLine(argc, argv)) + return 1; + + configHashTable(); + /**************************************************** + * INDEXING + ***************************************************/ + if (indexingMode) + { + int i; + /******************************** + * Regulard Mode + ********************************/ + if (!bisulfiteMode) + { + configHashTable(); + for (i = 0; i < fileCnt; i++) + { + generateHashTable(fileName[i][0], fileName[i][1]); + } + } + else + /******************************** + * Bisulfite Mode + ********************************/ + { // TODO + } + } + /**************************************************** + * SEARCHING + ***************************************************/ + else + { + Read *seqList; + unsigned int seqListSize; + int fc; + int samplingLocsSize; + int *samplingLocs; + double totalLoadingTime = 0; + double totalMappingTime = 0; + double startTime; + double loadingTime; + double mappingTime; + double lstartTime; + double ppTime; + double tmpTime;; + char *prevGen = getMem(CONTIG_NAME_SIZE); + prevGen[0]='\0'; + char *curGen; + int flag; + double maxMem=0; + char fname1[FILE_NAME_LENGTH]; + char fname2[FILE_NAME_LENGTH]; + char fname3[FILE_NAME_LENGTH]; + char fname4[FILE_NAME_LENGTH]; + char fname5[FILE_NAME_LENGTH]; + // Loading Sequences & Sampling Locations + startTime = getTime(); + if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL) + { + //TODO + } + else + { + if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) + { + return 1; + } + } + + //loadSamplingLocations(&samplingLocs, &samplingLocsSize); + totalLoadingTime += getTime()-startTime; + + + + + if (pairedEndMode) + { + //Switching to Inferred Size + minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; + maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; + if (pairedEndDiscordantMode) + { + maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; + minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; + } + + /* The size between the ends; + minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1; + maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1; + if (pairedEndDiscordantMode) + { + maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1; + minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1; + }*/ + sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput); + sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput); + sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); + sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput); + sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput); + unlink(fname1); + unlink(fname2); + unlink(fname3); + unlink(fname4); + unlink(fname5); + } + + // Preparing output + initOutput(mappingOutput, outCompressed); + + fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); + fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); + fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); + + /******************************** + * Regular Mode + ********************************/ + if (!bisulfiteMode) + { + if (!pairedEndMode) + { + for (fc = 0; fc <fileCnt; fc++) + { + if (!initLoadingHashTable(fileName[fc][1])) + { + return 1; + } + + loadSamplingLocations(&samplingLocs, &samplingLocsSize); + + mappingTime = 0; + loadingTime = 0; + prevGen[0] = '\0'; + flag = 1; + + do + { + flag = loadHashTable ( &tmpTime ); // Reading a fragment + curGen = getRefGenomeName(); + + // First Time + if (flag && prevGen[0]== '\0') + { + sprintf(prevGen, "%s", curGen); + } + + if ( !flag || strcmp(prevGen, curGen)!=0) + { + + fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); + fflush(stdout); + + totalMappingTime += mappingTime; + totalLoadingTime += loadingTime; + + loadingTime = 0; + mappingTime = 0; + maxMem = 0; + + if (!flag) + { + break; + } + } + else if (progressRep && mappingTime != 0) + { + fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); + fflush(stdout); + } + + sprintf(prevGen, "%s", curGen); + + loadingTime += tmpTime; + lstartTime = getTime(); + + initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); + + mapSingleEndSeq(); + + mappingTime += getTime() - lstartTime; + if (maxMem < getMemUsage()) + { + maxMem = getMemUsage(); + } + } while (flag); + + } // end for; + finalizeFAST(); + finalizeLoadingHashTable(); + + } + // Pairedend Mapping Mode + else + { + + for (fc = 0; fc <fileCnt; fc++) + { + if (!initLoadingHashTable(fileName[fc][1])) + { + return 1; + } + + + loadSamplingLocations(&samplingLocs, &samplingLocsSize); + + mappingTime = 0; + loadingTime = 0; + prevGen[0] = '\0'; + flag = 1; + + do + { + flag = loadHashTable ( &tmpTime ); // Reading a fragment + curGen = getRefGenomeName(); + + // First Time + if (flag && prevGen[0]== '\0') + { + sprintf(prevGen, "%s", curGen); + } + + if ( !flag || strcmp(prevGen, curGen)!=0) + { + + // DISCORDANT + lstartTime = getTime(); + outputPairedEnd(); + mappingTime += getTime() - lstartTime; + //DISCORDANT + + fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); + fflush(stdout); + + totalMappingTime += mappingTime; + totalLoadingTime += loadingTime; + + loadingTime = 0; + mappingTime = 0; + maxMem = 0; + + if (!flag) + { + break; + } + } + else if (progressRep && mappingTime != 0) + { + fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", + prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); + fflush(stdout); + } + + sprintf(prevGen, "%s", curGen); + + loadingTime += tmpTime; + lstartTime = getTime(); + + initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); + + mapPairedEndSeq(); + + mappingTime += getTime() - lstartTime; + if (maxMem < getMemUsage()) + { + maxMem = getMemUsage(); + } + } while (flag); + + } // end for; + + finalizeLoadingHashTable(); + if (pairedEndDiscordantMode) + { + lstartTime = getTime(); + outputPairedEndDiscPP(); + ppTime = getTime() - lstartTime; + } + finalizeFAST(); + } + } + /******************************** + * Bisulfite Mode + ********************************/ + { + //TODO + } + + + finalizeOutput(); + + fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); + fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); + if (pairedEndDiscordantMode) + fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); + fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); + fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); + fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); + fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); + + int cof = (pairedEndMode)?2:1; + + if (progressRep && maxHits != 0) + { + int frequency[maxHits+1]; + int i; + for ( i=0 ; i <= maxHits; i++) + { + frequency[i] = 0; + } + + + for (fc = 0; fc < seqListSize; fc++) + { + frequency[*(seqList[fc*cof].hits)]++; + } + frequency[maxHits] = completedSeqCnt; + for ( i=0 ; i <= maxHits; i++) + { + fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); + } + } + + + if (strcmp(unmappedOutput, "") == 0) + { + char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ]; + sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput ); + finalizeReads(fn); + } + else + { + finalizeReads(unmappedOutput); + } + + + + freeMem(prevGen, CONTIG_NAME_SIZE); + } + + + + return 1; +}