diff mrfast-2.1.0.5/baseFAST.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrfast-2.1.0.5/baseFAST.c	Fri Mar 09 07:35:51 2012 -0500
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the names of the University of Washington, Simon Fraser University, 
+ *   nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+  Authors: 
+      Farhad Hormozdiari
+      Faraz Hach
+      Can Alkan
+  Emails:
+      farhadh AT uw DOT edu
+      fhach AT cs DOT sfu DOT ca
+      calkan AT uw DOT edu
+*/
+
+
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "Common.h"
+#include "CommandLineParser.h"
+#include "Reads.h"
+#include "Output.h"
+#include "HashTable.h"
+#include "MrFAST.h"
+
+char 				*versionNumber = "2.1";			// Current Version
+unsigned char		seqFastq;
+
+int main(int argc, char *argv[])
+{
+	if (!parseCommandLine(argc, argv))
+		return 1;
+
+	configHashTable();
+	/****************************************************
+	 * INDEXING
+	 ***************************************************/
+	if (indexingMode)
+	{
+		int i;
+		/********************************
+		 * Regular Mode
+		 ********************************/
+		configHashTable();
+		for (i = 0; i < fileCnt; i++)
+		{
+			generateHashTable(fileName[i][0], fileName[i][1]);
+		}
+	}
+	/****************************************************
+	 * SEARCHING
+	 ***************************************************/
+	else
+	{
+		Read *seqList;
+		unsigned int seqListSize;
+		int fc;
+		int samplingLocsSize;
+		int *samplingLocs;
+		double totalLoadingTime = 0;
+		double totalMappingTime = 0;
+		double startTime;
+		double loadingTime;
+		double mappingTime;
+		double lstartTime;
+		double ppTime = 0.0;
+		double tmpTime;;
+		char *prevGen = getMem(CONTIG_NAME_SIZE);
+		prevGen[0]='\0';
+		char *curGen;
+		int	flag;
+		double maxMem=0;
+		char fname1[FILE_NAME_LENGTH];
+		char fname2[FILE_NAME_LENGTH];
+		char fname3[FILE_NAME_LENGTH];
+		char fname4[FILE_NAME_LENGTH];
+		char fname5[FILE_NAME_LENGTH];
+	
+		char outputFileName[FILE_NAME_LENGTH];
+		// Loading Sequences & Sampling Locations
+		startTime = getTime();
+		if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize))
+		{
+			return 1;
+		}
+
+		loadSamplingLocations(&samplingLocs, &samplingLocsSize);
+		totalLoadingTime += getTime()-startTime;
+
+		if (pairedEndMode)
+		{
+			//Switching to Inferred Size 
+			minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2;
+			maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2;
+			if (pairedEndDiscordantMode)
+			{
+				maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2;
+				minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2;
+			}
+			
+			sprintf(fname1, "__%s__1", mappingOutput);
+			sprintf(fname2, "__%s__2", mappingOutput);
+			sprintf(fname3, "__%s__disc", mappingOutput);
+			sprintf(fname4, "__%s__oea1", mappingOutput);
+			sprintf(fname5, "__%s__oea2", mappingOutput);
+			unlink(fname1);
+			unlink(fname2);
+			unlink(fname3);
+			unlink(fname4);
+			unlink(fname5);
+		}
+
+		sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);
+		// Preparing output
+		initOutput(outputFileName, outCompressed);
+
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+		fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads");
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+
+		/********************************
+		 * Regular Mode
+		 ********************************/
+			if (!pairedEndMode)
+			{
+				initLookUpTable();
+				if(bestMode)
+                                        initBestMapping(seqListSize);
+
+				for (fc = 0; fc <fileCnt; fc++)
+				{
+					if (!initLoadingHashTable(fileName[fc][1]))
+					{
+						return 1;
+					}
+							
+					mappingTime = 0;
+					loadingTime = 0;
+					prevGen[0] = '\0';
+					flag = 1;
+
+					do
+					{
+						flag = loadHashTable ( &tmpTime, errThreshold);  			// Reading a fragment
+						curGen = getRefGenomeName();
+
+						// First Time
+						if (flag && prevGen[0]== '\0')
+						{
+							sprintf(prevGen, "%s", curGen);
+						}
+
+						if ( !flag || strcmp(prevGen, curGen)!=0)
+						{
+
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+
+							totalMappingTime += mappingTime;
+							totalLoadingTime += loadingTime;
+
+							loadingTime = 0;
+							mappingTime = 0;
+							maxMem = 0;
+
+							if (!flag)
+							{
+								break;
+							}
+						}
+						else if (progressRep && mappingTime != 0)
+						{
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+						}
+
+						sprintf(prevGen, "%s", curGen);
+
+						loadingTime += tmpTime;
+//						lstartTime = getTime();
+
+						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
+
+						lstartTime = getTime();
+
+								
+						mapAllSingleEndSeq();
+
+		
+						
+						mappingTime += getTime() - lstartTime;
+						if (maxMem < getMemUsage())
+						{
+							maxMem = getMemUsage();					 
+						}
+					} while (flag);
+
+				} // end for;
+				if(bestMode)
+					finalizeBestSingleMapping();
+				finalizeFAST();
+				finalizeLoadingHashTable();
+
+			}
+			// Pairedend Mapping Mode
+			else
+			{
+				initLookUpTable();
+				if(pairedEndMode)
+					initBestConcordantDiscordant(seqListSize);
+				for (fc = 0; fc < fileCnt; fc++)
+				{
+					if (!initLoadingHashTable(fileName[fc][1]))
+					{
+						return 1;
+					}
+					mappingTime = 0;
+					loadingTime = 0;
+					prevGen[0] = '\0';
+					flag = 1;
+
+					do
+					{
+						flag = loadHashTable ( &tmpTime , errThreshold);  			// Reading a fragment
+						curGen = getRefGenomeName();
+
+						// First Time
+						if (flag && prevGen[0]== '\0')
+						{
+							sprintf(prevGen, "%s", curGen);
+						}
+
+						if ( !flag || strcmp(prevGen, curGen)!=0)
+						{
+
+							// DISCORDANT
+							lstartTime = getTime();					
+							outputPairedEnd();
+							mappingTime += getTime() - lstartTime;
+							//DISCORDANT			
+
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+
+							totalMappingTime += mappingTime;
+							totalLoadingTime += loadingTime;
+
+							loadingTime = 0;
+							mappingTime = 0;
+							maxMem = 0;
+
+							if (!flag)
+							{
+								break;
+							}
+						}
+						else if (progressRep && mappingTime != 0)
+						{
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+						}
+
+						sprintf(prevGen, "%s", curGen);
+
+						loadingTime += tmpTime;
+						lstartTime = getTime();
+					
+						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
+						
+						mapPairedEndSeq();
+					
+						mappingTime += getTime() - lstartTime;
+						if (maxMem < getMemUsage())
+						{
+							maxMem = getMemUsage();
+						}
+
+					} while (flag);
+
+				} // end for;
+				
+				if(pairedEndMode)
+				{		
+					sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);		
+					finalizeOEAReads(outputFileName);			
+					outputAllTransChromosal(transChromosal);
+					finalizeBestConcordantDiscordant();
+				}
+			
+				finalizeLoadingHashTable();
+				
+				if (pairedEndDiscordantMode)
+				{
+					lstartTime = getTime();							
+					outputPairedEndDiscPP();
+					ppTime = getTime() - lstartTime;
+				}
+				finalizeFAST();
+			} //else
+
+			finalizeOutput();
+
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+		fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime);
+		if (pairedEndDiscordantMode)
+			fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime);
+		fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime);
+		fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize);
+		fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt);
+		fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize));
+
+		int cof = (pairedEndMode)?2:1;
+
+		if (progressRep && maxHits != 0)
+		{
+			int frequency[maxHits+1];
+			int i;
+			for ( i=0 ; i <= maxHits; i++)
+			{
+				frequency[i] = 0;
+			}
+
+
+			for (fc = 0; fc < seqListSize; fc++)
+			{
+			  frequency[(int)(*(seqList[fc*cof].hits))]++;
+			}
+			frequency[maxHits] = completedSeqCnt;
+			for ( i=0 ; i <= maxHits; i++)
+			{
+				fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize);
+			}
+		}
+
+
+		finalizeReads(unmappedOutput);
+		freeMem(prevGen, CONTIG_NAME_SIZE);
+	}
+
+	return 1;
+}