Mercurial > repos > calkan > mrsfast
view mrsfast-2.3.0.2/baseFAST.c @ 0:ec628ba33878 default tip
Uploaded source code for mrsFAST
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:39:28 -0500 |
parents | |
children |
line wrap: on
line source
/* * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list * of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be * used to endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Author : Faraz Hach * Email : fhach AT cs DOT sfu * Last Update : 2009-02-01 */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include "Common.h" #include "CommandLineParser.h" #include "Reads.h" #include "Output.h" #include "HashTable.h" #include "MrsFAST.h" char *versionNumber = "2.3"; // Current Version unsigned char seqFastq; int main(int argc, char *argv[]) { if (!parseCommandLine(argc, argv)) return 1; configHashTable(); /**************************************************** * INDEXING ***************************************************/ if (indexingMode) { int i; /******************************** * Regulard Mode ********************************/ if (!bisulfiteMode) { configHashTable(); for (i = 0; i < fileCnt; i++) { generateHashTable(fileName[i][0], fileName[i][1]); } } else /******************************** * Bisulfite Mode ********************************/ { // TODO } } /**************************************************** * SEARCHING ***************************************************/ else { Read *seqList; unsigned int seqListSize; int fc; int samplingLocsSize; int *samplingLocs; double totalLoadingTime = 0; double totalMappingTime = 0; double startTime; double loadingTime; double mappingTime; double lstartTime; double ppTime; double tmpTime;; char *prevGen = getMem(CONTIG_NAME_SIZE); prevGen[0]='\0'; char *curGen; int flag; double maxMem=0; char fname1[FILE_NAME_LENGTH]; char fname2[FILE_NAME_LENGTH]; char fname3[FILE_NAME_LENGTH]; char fname4[FILE_NAME_LENGTH]; char fname5[FILE_NAME_LENGTH]; // Loading Sequences & Sampling Locations startTime = getTime(); if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL) { //TODO } else { if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize)) { return 1; } } //loadSamplingLocations(&samplingLocs, &samplingLocsSize); totalLoadingTime += getTime()-startTime; if (pairedEndMode) { //Switching to Inferred Size minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2; maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2; if (pairedEndDiscordantMode) { maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2; minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2; } /* The size between the ends; minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1; maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1; if (pairedEndDiscordantMode) { maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1; minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1; }*/ sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput); sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput); sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput); sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput); sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput); unlink(fname1); unlink(fname2); unlink(fname3); unlink(fname4); unlink(fname5); } // Preparing output initOutput(mappingOutput, outCompressed); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads"); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); /******************************** * Regular Mode ********************************/ if (!bisulfiteMode) { if (!pairedEndMode) { for (fc = 0; fc <fileCnt; fc++) { if (!initLoadingHashTable(fileName[fc][1])) { return 1; } loadSamplingLocations(&samplingLocs, &samplingLocsSize); mappingTime = 0; loadingTime = 0; prevGen[0] = '\0'; flag = 1; do { flag = loadHashTable ( &tmpTime ); // Reading a fragment curGen = getRefGenomeName(); // First Time if (flag && prevGen[0]== '\0') { sprintf(prevGen, "%s", curGen); } if ( !flag || strcmp(prevGen, curGen)!=0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); totalMappingTime += mappingTime; totalLoadingTime += loadingTime; loadingTime = 0; mappingTime = 0; maxMem = 0; if (!flag) { break; } } else if (progressRep && mappingTime != 0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); } sprintf(prevGen, "%s", curGen); loadingTime += tmpTime; lstartTime = getTime(); initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); mapSingleEndSeq(); mappingTime += getTime() - lstartTime; if (maxMem < getMemUsage()) { maxMem = getMemUsage(); } } while (flag); } // end for; finalizeFAST(); finalizeLoadingHashTable(); } // Pairedend Mapping Mode else { for (fc = 0; fc <fileCnt; fc++) { if (!initLoadingHashTable(fileName[fc][1])) { return 1; } loadSamplingLocations(&samplingLocs, &samplingLocsSize); mappingTime = 0; loadingTime = 0; prevGen[0] = '\0'; flag = 1; do { flag = loadHashTable ( &tmpTime ); // Reading a fragment curGen = getRefGenomeName(); // First Time if (flag && prevGen[0]== '\0') { sprintf(prevGen, "%s", curGen); } if ( !flag || strcmp(prevGen, curGen)!=0) { // DISCORDANT lstartTime = getTime(); outputPairedEnd(); mappingTime += getTime() - lstartTime; //DISCORDANT fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); totalMappingTime += mappingTime; totalLoadingTime += loadingTime; loadingTime = 0; mappingTime = 0; maxMem = 0; if (!flag) { break; } } else if (progressRep && mappingTime != 0) { fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n", prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt); fflush(stdout); } sprintf(prevGen, "%s", curGen); loadingTime += tmpTime; lstartTime = getTime(); initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]); mapPairedEndSeq(); mappingTime += getTime() - lstartTime; if (maxMem < getMemUsage()) { maxMem = getMemUsage(); } } while (flag); } // end for; finalizeLoadingHashTable(); if (pairedEndDiscordantMode) { lstartTime = getTime(); outputPairedEndDiscPP(); ppTime = getTime() - lstartTime; } finalizeFAST(); } } /******************************** * Bisulfite Mode ********************************/ { //TODO } finalizeOutput(); fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n"); fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime); if (pairedEndDiscordantMode) fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime); fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime); fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize); fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt); fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize)); int cof = (pairedEndMode)?2:1; if (progressRep && maxHits != 0) { int frequency[maxHits+1]; int i; for ( i=0 ; i <= maxHits; i++) { frequency[i] = 0; } for (fc = 0; fc < seqListSize; fc++) { frequency[*(seqList[fc*cof].hits)]++; } frequency[maxHits] = completedSeqCnt; for ( i=0 ; i <= maxHits; i++) { fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize); } } if (strcmp(unmappedOutput, "") == 0) { char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ]; sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput ); finalizeReads(fn); } else { finalizeReads(unmappedOutput); } freeMem(prevGen, CONTIG_NAME_SIZE); } return 1; }