diff mrfast-2.1.0.4/baseFAST.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents 7b3dc85dc7fd
children
line wrap: on
line diff
--- a/mrfast-2.1.0.4/baseFAST.c	Tue Feb 21 10:29:47 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this list
- * of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice, this
- *   list of conditions and the following disclaimer in the documentation and/or other
- *   materials provided with the distribution.
- * - Neither the names of the University of Washington, Simon Fraser University, 
- *   nor the names of its contributors may be
- *   used to endorse or promote products derived from this software without specific
- *   prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
-  Authors: 
-      Farhad Hormozdiari
-      Faraz Hach
-      Can Alkan
-  Emails:
-      farhadh AT uw DOT edu
-      fhach AT cs DOT sfu DOT ca
-      calkan AT uw DOT edu
-*/
-
-
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "Common.h"
-#include "CommandLineParser.h"
-#include "Reads.h"
-#include "Output.h"
-#include "HashTable.h"
-#include "MrFAST.h"
-
-char 				*versionNumber = "2.1";			// Current Version
-unsigned char		seqFastq;
-
-int main(int argc, char *argv[])
-{
-	if (!parseCommandLine(argc, argv))
-		return 1;
-
-	configHashTable();
-	/****************************************************
-	 * INDEXING
-	 ***************************************************/
-	if (indexingMode)
-	{
-		int i;
-		/********************************
-		 * Regular Mode
-		 ********************************/
-		configHashTable();
-		for (i = 0; i < fileCnt; i++)
-		{
-			generateHashTable(fileName[i][0], fileName[i][1]);
-		}
-	}
-	/****************************************************
-	 * SEARCHING
-	 ***************************************************/
-	else
-	{
-		Read *seqList;
-		unsigned int seqListSize;
-		int fc;
-		int samplingLocsSize;
-		int *samplingLocs;
-		double totalLoadingTime = 0;
-		double totalMappingTime = 0;
-		double startTime;
-		double loadingTime;
-		double mappingTime;
-		double lstartTime;
-		double ppTime = 0.0;
-		double tmpTime;;
-		char *prevGen = getMem(CONTIG_NAME_SIZE);
-		prevGen[0]='\0';
-		char *curGen;
-		int	flag;
-		double maxMem=0;
-		char fname1[FILE_NAME_LENGTH];
-		char fname2[FILE_NAME_LENGTH];
-		char fname3[FILE_NAME_LENGTH];
-		char fname4[FILE_NAME_LENGTH];
-		char fname5[FILE_NAME_LENGTH];
-	
-		char outputFileName[FILE_NAME_LENGTH];
-		// Loading Sequences & Sampling Locations
-		startTime = getTime();
-		if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize))
-		{
-			return 1;
-		}
-
-		loadSamplingLocations(&samplingLocs, &samplingLocsSize);
-		totalLoadingTime += getTime()-startTime;
-
-		if (pairedEndMode)
-		{
-			//Switching to Inferred Size 
-			minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2;
-			maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2;
-			if (pairedEndDiscordantMode)
-			{
-				maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2;
-				minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2;
-			}
-			
-			sprintf(fname1, "__%s__1", mappingOutput);
-			sprintf(fname2, "__%s__2", mappingOutput);
-			sprintf(fname3, "__%s__disc", mappingOutput);
-			sprintf(fname4, "__%s__oea1", mappingOutput);
-			sprintf(fname5, "__%s__oea2", mappingOutput);
-			unlink(fname1);
-			unlink(fname2);
-			unlink(fname3);
-			unlink(fname4);
-			unlink(fname5);
-		}
-
-		sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);
-		// Preparing output
-		initOutput(outputFileName, outCompressed);
-
-		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
-		fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads");
-		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
-
-		/********************************
-		 * Regular Mode
-		 ********************************/
-			if (!pairedEndMode)
-			{
-				initLookUpTable();
-				if(bestMode)
-                                        initBestMapping(seqListSize);
-
-				for (fc = 0; fc <fileCnt; fc++)
-				{
-					if (!initLoadingHashTable(fileName[fc][1]))
-					{
-						return 1;
-					}
-							
-					mappingTime = 0;
-					loadingTime = 0;
-					prevGen[0] = '\0';
-					flag = 1;
-
-					do
-					{
-						flag = loadHashTable ( &tmpTime, errThreshold);  			// Reading a fragment
-						curGen = getRefGenomeName();
-
-						// First Time
-						if (flag && prevGen[0]== '\0')
-						{
-							sprintf(prevGen, "%s", curGen);
-						}
-
-						if ( !flag || strcmp(prevGen, curGen)!=0)
-						{
-
-							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
-									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
-							fflush(stdout);
-
-							totalMappingTime += mappingTime;
-							totalLoadingTime += loadingTime;
-
-							loadingTime = 0;
-							mappingTime = 0;
-							maxMem = 0;
-
-							if (!flag)
-							{
-								break;
-							}
-						}
-						else if (progressRep && mappingTime != 0)
-						{
-							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
-									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
-							fflush(stdout);
-						}
-
-						sprintf(prevGen, "%s", curGen);
-
-						loadingTime += tmpTime;
-//						lstartTime = getTime();
-
-						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
-
-						lstartTime = getTime();
-
-								
-						mapAllSingleEndSeq();
-
-		
-						
-						mappingTime += getTime() - lstartTime;
-						if (maxMem < getMemUsage())
-						{
-							maxMem = getMemUsage();					 
-						}
-					} while (flag);
-
-				} // end for;
-				if(bestMode)
-					finalizeBestSingleMapping();
-				finalizeFAST();
-				finalizeLoadingHashTable();
-
-			}
-			// Pairedend Mapping Mode
-			else
-			{
-				initLookUpTable();
-				if(pairedEndMode)
-					initBestConcordantDiscordant(seqListSize);
-				for (fc = 0; fc < fileCnt; fc++)
-				{
-					if (!initLoadingHashTable(fileName[fc][1]))
-					{
-						return 1;
-					}
-					mappingTime = 0;
-					loadingTime = 0;
-					prevGen[0] = '\0';
-					flag = 1;
-
-					do
-					{
-						flag = loadHashTable ( &tmpTime , errThreshold);  			// Reading a fragment
-						curGen = getRefGenomeName();
-
-						// First Time
-						if (flag && prevGen[0]== '\0')
-						{
-							sprintf(prevGen, "%s", curGen);
-						}
-
-						if ( !flag || strcmp(prevGen, curGen)!=0)
-						{
-
-							// DISCORDANT
-							lstartTime = getTime();					
-							outputPairedEnd();
-							mappingTime += getTime() - lstartTime;
-							//DISCORDANT			
-
-							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
-									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
-							fflush(stdout);
-
-							totalMappingTime += mappingTime;
-							totalLoadingTime += loadingTime;
-
-							loadingTime = 0;
-							mappingTime = 0;
-							maxMem = 0;
-
-							if (!flag)
-							{
-								break;
-							}
-						}
-						else if (progressRep && mappingTime != 0)
-						{
-							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
-									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
-							fflush(stdout);
-						}
-
-						sprintf(prevGen, "%s", curGen);
-
-						loadingTime += tmpTime;
-						lstartTime = getTime();
-					
-						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
-						
-						mapPairedEndSeq();
-					
-						mappingTime += getTime() - lstartTime;
-						if (maxMem < getMemUsage())
-						{
-							maxMem = getMemUsage();
-						}
-
-					} while (flag);
-
-				} // end for;
-				
-				if(pairedEndMode)
-				{		
-					sprintf(outputFileName, "%s%s",mappingOutputPath , mappingOutput);		
-					finalizeOEAReads(outputFileName);			
-					outputAllTransChromosal(transChromosal);
-					finalizeBestConcordantDiscordant();
-				}
-			
-				finalizeLoadingHashTable();
-				
-				if (pairedEndDiscordantMode)
-				{
-					lstartTime = getTime();							
-					outputPairedEndDiscPP();
-					ppTime = getTime() - lstartTime;
-				}
-				finalizeFAST();
-			} //else
-
-		finalizeOutput();
-
-		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
-		fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime);
-		if (pairedEndDiscordantMode)
-			fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime);
-		fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime);
-		fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize);
-		fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt);
-		fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize));
-
-		int cof = (pairedEndMode)?2:1;
-
-		if (progressRep && maxHits != 0)
-		{
-			int frequency[maxHits+1];
-			int i;
-			for ( i=0 ; i <= maxHits; i++)
-			{
-				frequency[i] = 0;
-			}
-
-
-			for (fc = 0; fc < seqListSize; fc++)
-			{
-			  frequency[(int)(*(seqList[fc*cof].hits))]++;
-			}
-			frequency[maxHits] = completedSeqCnt;
-			for ( i=0 ; i <= maxHits; i++)
-			{
-				fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize);
-			}
-		}
-
-
-		finalizeReads(unmappedOutput);
-		freeMem(prevGen, CONTIG_NAME_SIZE);
-	}
-
-	return 1;
-}