changeset 0:ec628ba33878 default tip

Uploaded source code for mrsFAST
author calkan
date Tue, 21 Feb 2012 10:39:28 -0500
parents
children
files mrsfast-2.3.0.2/CommandLineParser.c mrsfast-2.3.0.2/CommandLineParser.h mrsfast-2.3.0.2/Common.c mrsfast-2.3.0.2/Common.h mrsfast-2.3.0.2/HashTable.c mrsfast-2.3.0.2/HashTable.h mrsfast-2.3.0.2/Makefile mrsfast-2.3.0.2/MrsFAST.c mrsfast-2.3.0.2/MrsFAST.h mrsfast-2.3.0.2/Output.c mrsfast-2.3.0.2/Output.h mrsfast-2.3.0.2/Reads.c mrsfast-2.3.0.2/Reads.h mrsfast-2.3.0.2/RefGenome.c mrsfast-2.3.0.2/RefGenome.h mrsfast-2.3.0.2/baseFAST.c
diffstat 16 files changed, 4508 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/CommandLineParser.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,373 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-01-29
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <ctype.h>
+#include "Common.h"
+#include "CommandLineParser.h"
+
+int						uniqueMode=1;
+int						indexingMode;
+int						searchingMode;
+int						bisulfiteMode;
+int						pairedEndMode;
+int						pairedEndDiscordantMode;
+int						pairedEndProfilingMode;
+int						seqCompressed;
+int						outCompressed;
+int						cropSize = 0;
+int						progressRep = 0;
+int						minPairEndedDistance=-1;
+int						maxPairEndedDistance=-1;
+int						minPairEndedDiscordantDistance=-1;
+int						maxPairEndedDiscordantDistance=-1;
+char					*seqFile1;
+char					*seqFile2;
+char					*mappingOutput = "output";
+char					*mappingOutputPath = "";
+char					*unmappedOutput = "";
+char					fileName[1000][2][FILE_NAME_LENGTH];
+int						fileCnt;
+unsigned char			errThreshold=2;
+unsigned char			maxHits=0;
+unsigned char			WINDOW_SIZE = 12;
+unsigned int			CONTIG_SIZE;
+unsigned int			CONTIG_MAX_SIZE;
+
+void printHelp();
+
+int parseCommandLine (int argc, char *argv[])
+{
+
+	int o;
+	int index;
+	char *fastaFile = NULL;
+	char *fastaOutputFile = NULL;
+	char *indexFile = NULL;
+	char *batchFile = NULL ;
+	int  batchMode = 0;
+	static struct option longOptions[] = 
+	{
+
+		//		{"bs",				no_argument,		&bisulfiteMode,		1},
+		{"pe",				no_argument,		&pairedEndMode,		1},
+		{"discordant-vh",	no_argument,		&pairedEndDiscordantMode,	1},
+		{"profile",			no_argument, 		&pairedEndProfilingMode,	1},
+		{"seqcomp",			no_argument,		&seqCompressed,		1},
+		{"outcomp",			no_argument,		&outCompressed,		1},
+		{"progress",		no_argument,		&progressRep,		1},
+		{"index",			required_argument,	0, 					'i'},
+		{"search",			required_argument,	0,					's'},
+		{"help",			no_argument,		0,					'h'},
+		{"version",			no_argument,		0,					'v'},
+		{"seq",				required_argument,	0,					'x'},
+		{"seq1",			required_argument,	0,					'x'},
+		{"seq2",			required_argument,	0,					'y'},
+		{"ws",				required_argument,  0,					'w'},
+		{"min",				required_argument,  0,					'l'},
+		{"max",				required_argument,  0,					'm'},
+		{"crop",			required_argument,  0,					'c'}
+
+	};
+
+
+
+	while ( (o = getopt_long ( argc, argv, "f:i:u:o:s:e:n:bhv", longOptions, &index))!= -1 )
+	{
+		switch (o)
+		{
+			case 'i':
+				indexingMode = 1;
+				fastaFile = optarg;
+				break;
+			case 's':
+				searchingMode = 1;
+				fastaFile = optarg;
+				break;
+			case 'b':
+				batchMode = 1;
+				break;
+			case 'c': 
+				cropSize = atoi(optarg);
+				break;
+			case 'w':
+				WINDOW_SIZE = atoi(optarg);
+				break;
+			case 'x':
+				seqFile1 = optarg;
+				break;
+			case 'y':
+				seqFile2 = optarg;
+				break;
+			case 'u':
+				unmappedOutput = optarg;
+				break;
+			case 'o':
+				mappingOutput = getMem(FILE_NAME_LENGTH);
+				mappingOutputPath = getMem(FILE_NAME_LENGTH);
+				stripPath (optarg, &mappingOutputPath, &mappingOutput);
+				break;
+			case 'n':
+				maxHits = atoi(optarg);
+				break;
+			case 'e':
+				errThreshold = atoi(optarg);
+				break;
+			case 'l':
+				minPairEndedDistance = atoi(optarg);
+				break;
+			case 'm':
+				maxPairEndedDistance = atoi(optarg);
+				break;					
+			case 'h':
+				printHelp();
+				return 0;
+				break;
+			case 'v':
+				fprintf(stdout, "%s.%s\n", versionNumber, versionNumberF);
+				return 0;
+				break;
+		}
+
+	}
+
+	if (indexingMode + searchingMode != 1)
+	{
+		fprintf(stdout, "ERROR: Indexing / Searching mode should be selected\n");
+		return 0;
+	}
+
+	if (WINDOW_SIZE > 14 || WINDOW_SIZE < 8)
+	{
+		fprintf(stdout, "ERROR: Window size should be in [8..14]\n");
+		return 0;
+	}
+
+
+	if ( indexingMode )
+	{
+		CONTIG_SIZE		= 15000000;
+		CONTIG_MAX_SIZE	= 40000000;
+
+		if (batchMode)
+		{
+			batchFile = fastaFile;
+			fastaFile = NULL;
+		}
+
+		if (batchFile == NULL && fastaFile == NULL)
+		{
+			fprintf(stdout, "ERROR: Reference(s) should be indicated for indexing\n");
+			return 0;
+		}
+
+		if (pairedEndDiscordantMode)
+		{
+			fprintf(stdout, "ERROR: --discordant cannot be used in indexing mode. \n");
+			return 0;
+		}
+
+	}
+
+
+	if ( searchingMode )
+	{
+		CONTIG_SIZE		= 300000000;
+		CONTIG_MAX_SIZE	= 300000000;
+
+
+		if (batchMode)
+		{
+			batchFile = fastaFile;
+			fastaFile = NULL;
+		}
+
+		if (batchFile == NULL && fastaFile == NULL)
+		{
+			fprintf(stdout, "ERROR: Index File(s) should be indiciated for searching\n");
+			return 0;
+		}
+
+		if (seqFile1 == NULL && seqFile2 == NULL)
+		{
+			fprintf(stdout, "ERROR: Please indicate a sequence file for searching.\n");
+			return 0;
+		}
+
+
+		if (!pairedEndMode && !bisulfiteMode && seqFile2 != NULL)
+		{
+			fprintf(stdout, "ERROR: Second File can be indicated in pairedend/bisulfite mode\n");
+			return 0;
+		}
+
+		if (pairedEndMode && (minPairEndedDistance <0 || maxPairEndedDistance < 0 || minPairEndedDistance > maxPairEndedDistance))
+		{
+			fprintf(stdout, "ERROR: Please enter a valid range for pairedend sequences.\n");
+			return 0;
+		}
+
+		if (pairedEndMode && seqFile1 == NULL)
+		{
+			fprintf(stdout, "ERROR: Please indicate the first file for pairedend search.\n");
+			return 0;
+		}
+
+		if (!pairedEndMode && pairedEndDiscordantMode)
+		{
+			fprintf(stdout, "ERROR: --discordant should be used with --pe");
+			return 0;
+		}
+
+		if (!pairedEndMode && pairedEndProfilingMode)
+		{
+			fprintf(stdout, "ERROR: --profile should be used with --pe");
+			return 0;
+		}
+	}
+
+	int i = 0;
+
+
+	if (batchMode)
+	{
+		FILE *fp = fileOpen(batchFile, "r");
+
+		if (fp == NULL)
+			return 0;
+
+		fileCnt  = 0;
+
+		while ( fgets(fileName[fileCnt][0], FILE_NAME_LENGTH, fp))
+		{
+			for (i = strlen(fileName[fileCnt][0])-1; i>=0; i--)
+				if ( !isspace(fileName[fileCnt][0][i]))
+					break;
+			fileName[fileCnt][0][i+1] = '\0';
+
+			if (strcmp(fileName[fileCnt][0], "") != 0)
+			{
+				if (bisulfiteMode)
+					sprintf(fileName[fileCnt][1], "%s.bsindex", fileName[fileCnt][0]); 
+				else
+					sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); 
+				fileCnt++;
+			}
+		}
+	}
+	else
+	{
+		sprintf(fileName[fileCnt][0], "%s", fastaFile);
+		if (bisulfiteMode)
+			sprintf(fileName[fileCnt][1], "%s.bsindex", fileName[fileCnt][0]); 
+		else
+			sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); 
+		fileCnt++;
+	}
+
+
+	if (pairedEndProfilingMode)
+	{
+
+		minPairEndedDistance = 0;
+		maxPairEndedDistance = 300000000;
+
+	}
+
+	if (pairedEndDiscordantMode)
+	{
+		minPairEndedDiscordantDistance = minPairEndedDistance;
+		maxPairEndedDiscordantDistance = maxPairEndedDistance;
+
+		minPairEndedDistance = 0;
+		maxPairEndedDistance = 300000000;
+	}
+
+	return 1;
+}
+
+
+void printHelp()
+{
+	char *errorType;
+	if (mrFAST)
+	{
+		fprintf(stdout,"mrFAST : Micro-Read Fast Alignment Search Tool.\n\n");
+		fprintf(stdout,"Usage: mrFAST [options]\n\n");
+		errorType="edit distance";
+	}
+	else
+	{
+		fprintf(stdout,"mrsFAST : Micro-Read Substitutions (only) Fast Alignment Search Tool.\n\n");
+		fprintf(stdout,"mrsFAST is a cache oblivious read mapping tool. mrsFAST capable of mapping\n");
+		fprintf(stdout,"single and paired end reads to the reference genome. Bisulfite treated \n");
+		fprintf(stdout,"sequences are not supported in this version. By default mrsFAST reports  \n");
+		fprintf(stdout,"the output in SAM format.\n\n");
+		fprintf(stdout,"Usage: mrsFAST [options]\n\n");
+		errorType="hamming distance";
+	}
+
+	fprintf(stdout,"General Options:\n");
+	fprintf(stdout," -v|--version\t\tCurrent Version.\n");
+	fprintf(stdout," -h\t\t\tShows the help file.\n");
+	fprintf(stdout,"\n\n");
+
+	fprintf(stdout,"Indexing Options:\n");
+	fprintf(stdout," --index [file]\t\tGenerate an index from the specified fasta file. \n");
+	fprintf(stdout," -b\t\t\tIndicates the indexing will be done in batch mode.\n\t\t\tThe file specified in --index should contain the \n\t\t\tlist of fasta files.\n");
+	fprintf(stdout," -ws [int]\t\tSet window size for indexing (default:12-min:8 max:14).\n");
+	//	fprintf(stdout," -bs \t\t\tBisulfite mode.");
+	fprintf(stdout,"\n\n");
+
+	fprintf(stdout,"Searching Options:\n");
+	fprintf(stdout," --search [file]\tSearch the specified genome. Index file should be \n\t\t\tin same directory as the fasta file.\n");
+	fprintf(stdout," -b\t\t\tIndicates the mapping will be done in batch mode. \n\t\t\tThe file specified in --search should contain the \n\t\t\tlist of fasta files.\n");
+	fprintf(stdout," --pe \t\t\tSearch will be done in Pairedend mode.\n");
+	//	fprintf(stdout," --bs \t\t\tSearch will be done in Bisulfite mode.\n");
+	fprintf(stdout," --seq [file]\t\tInput sequences in fasta/fastq format [file]. If \n\t\t\tpairend reads are interleaved, use this option.\n");
+	fprintf(stdout," --seq1 [file]\t\tInput sequences in fasta/fastq format [file] (First \n\t\t\tfile). Use this option to indicate the first file of \n\t\t\tpair-end reads. You can use this option alone in \n\t\t\tbisulfite mode. \n");
+	fprintf(stdout," --seq2 [file]\t\tInput sequences in fasta/fastq format [file] (Second \n\t\t\tfile). Use this option to indicate the second file of \n\t\t\tpair-end reads. You can use this option alone in \n\t\t\tbisulfite mode. \n");
+	fprintf(stdout," -o [file]\t\tOutput of the mapped sequences. The default is output.\n");
+	fprintf(stdout," --seqcomp \t\tIndicates that the input sequences are compressed(gz).\n");
+	fprintf(stdout," --outcomp \t\tIndicates that output file should be compressed(gz).\n");
+	//	fprintf(stdout," -u [file]\t\tSave unmapped sequences to the [file] in fasta format.\n");
+	fprintf(stdout," -n [int]\t\tMaximum number of locations reported for a sequence \n\t\t\t(default 0, all mappings). \n");
+	fprintf(stdout," -e [int]\t\t%s (default 2).\n", errorType);
+	fprintf(stdout," --min [int]\t\tMin inferred distance allowed between two pairend sequences.\n");
+	fprintf(stdout," --max [int]\t\tMax inferred distance allowed between two pairend sequences.\n");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/CommandLineParser.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __COMMAND_LINE_PARSER__
+#define __COMMAND_LINE_PARSER__
+
+int parseCommandLine (int argc, char *argv[]);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Common.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-02-01
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <zlib.h>
+#include <string.h>
+#include "Common.h"
+
+
+unsigned short 			SEQ_LENGTH = 0;
+unsigned short 			QUAL_LENGTH = 0;
+long long				memUsage = 0;
+/**********************************************/
+FILE *fileOpen(char *fileName, char *mode)
+{
+	FILE *fp;
+	fp = fopen (fileName, mode);
+	if (fp == NULL)
+	{
+		fprintf(stdout, "Error: Cannot Open the file %s\n", fileName);
+		fflush(stdout);
+		exit(0);
+	}
+	return fp;
+}
+/**********************************************/
+gzFile fileOpenGZ(char *fileName, char *mode)
+{
+	gzFile gzfp;
+	gzfp = gzopen (fileName, mode);
+	if (gzfp == Z_NULL)
+	{
+		fprintf(stdout, "Error: Cannot Open the file %s\n", fileName);
+		fflush(stdout);
+		exit(0);
+	}
+	return gzfp;
+}
+/**********************************************/
+double getTime(void)
+{
+	struct timeval t;
+	gettimeofday(&t, NULL);
+	return t.tv_sec+t.tv_usec/1000000.0;
+}
+
+/**********************************************/
+char reverseCompleteChar(char c)
+{
+	char ret;
+	switch (c)
+	{
+		case 'A': 
+					ret = 'T';
+					break;
+		case 'T':
+					ret = 'A';
+					break;
+		case 'C':	
+					ret = 'G';
+					break;
+		case 'G':
+					ret = 'C';
+					break;
+		default:
+					ret = 'N';
+					break;
+	}
+	return ret;
+}
+/**********************************************/
+void reverseComplete (char *seq, char *rcSeq , int length)
+{
+	int i;
+	for (i=0; i<length; i++)
+	{
+		rcSeq[i]=reverseCompleteChar (seq[length-1-i]) ;
+	}
+}
+/**********************************************/
+void * getMem(size_t size)
+{
+	memUsage+=size;
+	return malloc(size);
+}
+/**********************************************/
+void freeMem(void *ptr, size_t size)
+{
+	memUsage-=size;
+	free(ptr);
+}
+/**********************************************/
+double getMemUsage()
+{
+	return memUsage/1048576.0;
+}
+/**********************************************/
+void reverse (char *seq, char *rcSeq , int length)
+{
+	int i;
+	for (i=0; i<length; i++)
+	{
+		rcSeq[i]=seq[length-1-i];
+	}
+}
+/**********************************************/
+void stripPath(char *full, char **path, char **fileName)
+{
+	int i;
+	int pos = -1;
+
+	for (i=strlen(full)-1; i>=0; i--)
+	{
+		if (full[i]=='/')
+		{
+			pos = i;
+			break;
+		}
+
+	}
+
+	if (pos != -1)
+	{
+		sprintf(*fileName, "%s%c", (full+pos+1), '\0');
+		full[pos+1]='\0';
+		sprintf(*path,"%s%c", full, '\0');
+	}
+	else
+	{
+		sprintf(*fileName, "%s%c", full, '\0');
+		sprintf(*path,"%c", '\0');
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Common.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __COMMON__
+#define __COMMON__
+
+#include <zlib.h>
+
+#define SEQ_MAX_LENGTH		200			// Seq Max Length
+#define CONTIG_OVERLAP		200 		// No. of characters overlapped between contings
+#define CONTIG_NAME_SIZE	200			// Contig name max size
+#define FILE_NAME_LENGTH	400			// Filename Max Length
+#define DISCORDANT_CUT_OFF  800
+
+extern unsigned int		CONTIG_SIZE;
+extern unsigned int		CONTIG_MAX_SIZE;
+
+
+extern unsigned char	WINDOW_SIZE				;		// WINDOW SIZE for indexing/searching
+extern unsigned short	SEQ_LENGTH;						// Sequence(read) length
+extern unsigned short	QUAL_LENGTH;
+
+extern char				*versionNumber;
+extern char				*versionNumberF;
+extern unsigned char	mrFAST;
+
+
+extern int				uniqueMode;
+extern int				indexingMode;
+extern int				searchingMode;
+extern int				bisulfiteMode;
+extern int				pairedEndMode;
+extern int				pairedEndDiscordantMode;
+extern int				pairedEndProfilingMode;
+extern int				seqCompressed;
+extern int				outCompressed;
+extern int				cropSize;
+extern int				progressRep;
+extern char 			*seqFile1;
+extern char				*seqFile2;
+extern char				*seqUnmapped;
+extern char				*mappingOutput;
+extern char 			*mappingOutputPath;
+extern char				*unmappedOutput;
+extern unsigned char	seqFastq;
+extern unsigned char	errThreshold;
+extern unsigned char	maxHits;	
+extern int				minPairEndedDiscordantDistance;
+extern int				maxPairEndedDiscordantDistance;
+extern int				minPairEndedDistance;
+extern int				maxPairEndedDistance;
+extern char				fileName[1000][2][FILE_NAME_LENGTH];
+extern int				fileCnt;
+extern long long		memUsage;
+
+FILE	* fileOpen(char *fileName, char *mode);
+gzFile	fileOpenGZ(char *fileName, char *mode);
+double	getTime(void);
+void	reverseComplete (char *seq, char *rcSeq , int length);
+void	* getMem(size_t size);
+void	freeMem(void * ptr, size_t size);
+double	getMemUsage();
+void 	reverse (char *seq, char *rcSeq , int length);
+void 	stripPath(char *full, char **path, char **fileName);
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/HashTable.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "Common.h"
+#include "RefGenome.h"
+#include "HashTable.h"
+/**********************************************/
+FILE		*_ih_fp					= NULL;
+IHashTable	*_ih_hashTable			= NULL;
+int 		_ih_maxHashTableSize	= 0;
+char		*_ih_refGen				= NULL;
+char		*_ih_refGenName			= NULL;
+long long	_ih_memUsage			= 0;
+int			_ih_refGenOff			= 0;
+/**********************************************/
+
+int hashVal(char *seq)
+{
+	int i=0;
+	int val=0, numericVal=0;
+
+	while(i<WINDOW_SIZE)
+	{
+		switch (seq[i])
+		{
+			case 'A':
+				numericVal = 0;
+				break;
+			case 'C':
+				numericVal = 1;
+				break;
+			case 'G' :
+				numericVal = 2;
+				break;
+			case 'T':
+				numericVal = 3;
+				break;
+			default:
+				return -1;
+				break;
+		}
+		val = (val << 2)|numericVal;
+		i++;
+	}
+	return val;
+}
+/**********************************************/
+void freeIHashTableContent(IHashTable *hashTable, unsigned int maxSize)
+{
+	int i=0;
+	for (i=0; i<maxSize; i++)
+	{
+		if (hashTable[i].locs != NULL)
+		{
+			freeMem(hashTable[i].locs, (hashTable[i].locs[0]+1)*(sizeof(unsigned int)));
+			hashTable[i].locs = NULL;
+		}
+	}
+}
+/**********************************************/
+void initSavingIHashTable(char *fileName)
+{
+	int tmp;
+	_ih_fp = fileOpen(fileName, "w");
+	unsigned char bIndex = 0;				// Bisulfite Index
+	
+	// First Two bytes are indicating the type of the index & window size
+	tmp = fwrite(&bIndex, sizeof(bIndex), 1, _ih_fp);
+	tmp = fwrite(&WINDOW_SIZE, sizeof(WINDOW_SIZE), 1, _ih_fp);
+	
+}
+/**********************************************/
+void finalizeSavingIHashTable()
+{
+	fclose(_ih_fp);
+}
+/**********************************************/
+void saveIHashTable(IHashTable *hashTable,  unsigned int size, unsigned int maxSize, char *refGen, char *refGenName, int refGenOffset)
+{
+	int tmp;
+	
+	// Every Chunk starts with a byte indicating whether it has extra info;
+	unsigned char extraInfo = 0;
+	tmp = fwrite (&extraInfo, sizeof(extraInfo), 1, _ih_fp);
+
+	short len = strlen(refGenName);
+	tmp = fwrite(&len, sizeof(len), 1, _ih_fp);
+	tmp = fwrite(refGenName, sizeof(char), len, _ih_fp);
+
+	tmp = fwrite(&refGenOffset, sizeof(refGenOffset), 1, _ih_fp);
+	
+	unsigned int refGenLength = strlen(refGen);
+	tmp = fwrite(&refGenLength, sizeof(refGenLength), 1, _ih_fp);
+	tmp = fwrite(refGen, sizeof(char), refGenLength, _ih_fp);
+
+	tmp = fwrite(&size, sizeof(size), 1, _ih_fp);
+
+	int i=0,j=0;
+	unsigned char cnt=0;
+	for (i=0; i<maxSize; i++)
+	{
+		if (hashTable[i].locs != NULL)
+		{
+			tmp = fwrite(&i, sizeof(i), 1, _ih_fp);
+
+			if (hashTable[i].locs[0] < 250)
+			{
+				cnt = hashTable[i].locs[0];
+				tmp = fwrite(&cnt, sizeof(cnt), 1, _ih_fp);
+			}
+			else
+			{
+				cnt =0;
+				tmp = fwrite (&cnt, sizeof(cnt), 1, _ih_fp);
+				tmp = fwrite (&(hashTable[i].locs[0]), sizeof(hashTable[i].locs[0]), 1, _ih_fp);
+			}
+
+			for (j=1; j<=hashTable[i].locs[0]; j++)
+				tmp = fwrite(&(hashTable[i].locs[j]), sizeof(hashTable[i].locs[j]), 1, _ih_fp);
+		}
+	}
+}
+/**********************************************/
+unsigned int addIHashTableLocation(IHashTable *hashTable, int hv, int location)
+{
+	unsigned int	sizeInc				= 0;
+
+	if (hashTable[hv].locs == NULL)
+	{
+		sizeInc = 1;
+		hashTable[hv].locs = getMem (sizeof(unsigned int)*2);
+		hashTable[hv].locs[0]=1;
+		hashTable[hv].locs[1]=location;
+	}
+	else
+	{
+		int size = hashTable[hv].locs[0];
+		int i;
+		unsigned int *tmp = getMem( (size + 2) * sizeof(unsigned int) );
+
+		for (i = 0; i <= size; i++)
+		{
+			tmp[i] = hashTable[hv].locs[i];
+		}
+		size++;
+		tmp[0] = size;
+		tmp[size] = location;
+		
+		freeMem(hashTable[hv].locs, (hashTable[hv].locs[0]*(sizeof(unsigned int))));
+		hashTable[hv].locs = tmp;
+	}
+	return sizeInc;
+}
+/**********************************************/
+void generateIHashTable(char *fileName, char *indexName)
+{
+	double          startTime           = getTime();
+	unsigned int	hashTableSize		= 0;
+	unsigned int 	hashTableMaxSize	= pow(4, WINDOW_SIZE);
+	IHashTable		*hashTable			= getMem(sizeof(IHashTable)*hashTableMaxSize);
+	char 			*refGenName;
+	char			*refGen;
+	int				refGenOff			= 0;
+	int i, hv, l, flag;
+
+
+	for ( i = 0; i < hashTableMaxSize; i++)
+	{
+		hashTable[i].locs = NULL;
+	}
+
+	//Loading Fasta File
+	if (!initLoadingRefGenome(fileName))
+		return;		
+	initSavingIHashTable(indexName);
+	
+	fprintf(stdout, "Generating Index from %s", fileName);
+	fflush(stdout);
+
+	char *prev = getMem (CONTIG_NAME_SIZE);
+	prev[0]='\0';
+
+	do
+	{
+		flag = 	 loadRefGenome (&refGen, &refGenName, &refGenOff);	
+
+		if ( strcmp(prev, refGenName) != 0)
+		{
+			fprintf(stdout, "\n - %s ", refGenName);
+			fflush(stdout);
+			sprintf(prev, "%s", refGenName);
+		}
+		else
+		{
+			fprintf(stdout, ".");
+			fflush(stdout);
+		}
+		
+		l = strlen(refGen) - WINDOW_SIZE;
+
+		for (i=0; i < l; i++)
+		{
+			hv = hashVal(refGen + i);
+			if (hv != -1)
+			{
+				hashTableSize += addIHashTableLocation (hashTable, hv, i+1);
+			}
+		}
+
+		saveIHashTable(hashTable, hashTableSize, hashTableMaxSize, refGen, refGenName, refGenOff);
+		freeIHashTableContent(hashTable, hashTableMaxSize);
+		hashTableSize = 0;
+	} while (flag);
+
+	freeMem(prev, CONTIG_NAME_SIZE);
+	freeMem(hashTable, sizeof(IHashTable)*hashTableMaxSize);
+
+	finalizeLoadingRefGenome();
+	finalizeSavingIHashTable();
+
+	fprintf(stdout, "\nDONE in %0.2fs!\n", (getTime()-startTime));
+}
+
+/**********************************************/
+void finalizeLoadingIHashTable()
+{
+	freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize);
+	freeMem(_ih_hashTable, sizeof(IHashTable)* _ih_maxHashTableSize);
+	freeMem(_ih_refGen, strlen(_ih_refGen)+1) ;
+	freeMem(_ih_refGenName, strlen(_ih_refGenName)+1);
+	fclose(_ih_fp);
+}
+
+/**********************************************/
+int  loadIHashTable(double *loadTime)
+{
+	double startTime = getTime();
+	unsigned char extraInfo = 0;
+	short len;
+	unsigned int refGenLength;
+	unsigned int hashTableSize;
+	unsigned int tmpSize;
+	int tmp;
+	int i=0,j=0;
+
+	if ( fread(&extraInfo, sizeof(extraInfo), 1, _ih_fp) != sizeof(extraInfo) )
+		return 0;
+
+	freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize);
+	freeMem(_ih_refGen, strlen(_ih_refGen)+1) ;
+	freeMem(_ih_refGenName, strlen(_ih_refGenName)+1);
+
+	// Reading Chr Name
+	tmp = fread(&len, sizeof(len), 1, _ih_fp);
+	_ih_refGenName = getMem(sizeof(char)* (len+1));
+	tmp = fread(_ih_refGenName, sizeof(char), len, _ih_fp);
+	_ih_refGenName [len] ='\0';
+
+	tmp = fread(&_ih_refGenOff, sizeof (_ih_refGenOff), 1, _ih_fp);
+
+	// Reading Size and Content of Ref Genome
+	tmp = fread(&refGenLength, sizeof(refGenLength), 1, _ih_fp);
+	_ih_refGen = getMem(sizeof(char)*(refGenLength+1));
+	tmp = fread(_ih_refGen, sizeof(char), refGenLength, _ih_fp);
+	_ih_refGen[refGenLength]='\0';
+
+	//Reading Hashtable Size and Content
+	tmp = fread(&hashTableSize, sizeof(hashTableSize), 1, _ih_fp);
+	
+	unsigned int hv;
+	unsigned char cnt=0;
+	for (i=0; i<hashTableSize; i++)
+	{
+		tmp = fread(&hv, sizeof(hv), 1, _ih_fp);
+		tmp = fread(&cnt, sizeof(cnt), 1, _ih_fp);
+
+		if (cnt>0)
+		{
+			tmpSize = cnt;
+		}
+		else
+		{
+			tmp = fread(&tmpSize, sizeof(tmpSize), 1, _ih_fp);
+		}
+	
+		_ih_hashTable[hv].locs = getMem( sizeof(unsigned int)* (tmpSize+1) );
+		_ih_hashTable[hv].locs[0] = tmpSize;
+
+		for (j=1; j<=tmpSize; j++)
+			tmp = fread(&(_ih_hashTable[hv].locs[j]), sizeof(_ih_hashTable[hv].locs[j]), 1, _ih_fp);
+	}
+	*loadTime = getTime()-startTime;
+
+	return 1;
+}
+/**********************************************/
+unsigned int *getIHashTableCandidates(int hv)
+{
+	if ( hv != -1 )
+		return _ih_hashTable[hv].locs;
+	else 
+		return NULL;
+}
+/**********************************************/
+/**********************************************/
+/**********************************************/
+void configHashTable()
+{
+	if (WINDOW_SIZE <= 14)
+	{
+		generateHashTable = &generateIHashTable;
+		loadHashTable = &loadIHashTable;
+		finalizeLoadingHashTable = &finalizeLoadingIHashTable;
+		getCandidates = &getIHashTableCandidates;
+	}
+	else
+	{
+
+
+	}
+}
+/**********************************************/
+int initLoadingHashTable(char *fileName)
+{
+	int i;	
+	unsigned char bsIndex;
+	int tmp; 
+
+	_ih_fp = fileOpen(fileName, "r");	
+
+	if (_ih_fp == NULL)
+		return 0;
+
+	tmp = fread(&bsIndex, sizeof(bsIndex), 1, _ih_fp);
+	if (bsIndex)
+	{
+		fprintf(stdout, "Error: Wrong Type of Index indicated");
+		return 0;
+	}
+	
+	tmp = fread(&WINDOW_SIZE, sizeof(WINDOW_SIZE), 1, _ih_fp);
+
+	configHashTable();
+
+	if (_ih_maxHashTableSize != pow(4, WINDOW_SIZE))
+	{
+
+		if (_ih_hashTable != NULL)
+		{
+			freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize);
+			freeMem(_ih_hashTable, sizeof(IHashTable)* _ih_maxHashTableSize);
+			freeMem(_ih_refGen, strlen(_ih_refGen)+1) ;
+			freeMem(_ih_refGenName, strlen(_ih_refGenName)+1);
+		}
+
+		_ih_maxHashTableSize = pow(4, WINDOW_SIZE);
+
+		_ih_hashTable = getMem (sizeof(IHashTable) * _ih_maxHashTableSize);
+		for (i=0; i<_ih_maxHashTableSize; i++)
+			_ih_hashTable[i].locs = NULL;
+		_ih_refGen = getMem(1);
+		_ih_refGen[0]='\0';
+		_ih_refGenName = getMem(1);
+		_ih_refGenName[0] = '\0';
+	}
+
+	return 1;
+}
+/**********************************************/
+char *getRefGenome()
+{
+	return _ih_refGen;
+
+}
+/**********************************************/
+char *getRefGenomeName()
+{
+	return _ih_refGenName;
+
+}
+/**********************************************/
+int getRefGenomeOffset()
+{
+	return _ih_refGenOff;
+
+}
+/**********************************************/
+HashTable *getHashTable()
+{
+	return NULL;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/HashTable.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __HASH_TABLE__
+#define __HASH_TABLE__
+
+typedef struct HashTable
+{
+	long long hv;
+	int *locs; 
+} HashTable;
+
+typedef struct 
+{
+	unsigned int *locs;
+} IHashTable;
+
+int				hashVal(char *seq);
+void			configHashTable();
+char			*getRefGenome();
+char			*getRefGenomeName();
+int				getRefGenomeOffset();
+int				initLoadingHashTable(char *fileName);
+HashTable		*getHashTable();
+
+void 			(*generateHashTable)(char *fileName, char *indexName);
+int				(*loadHashTable)(double *loadTime);
+void			(*finalizeLoadingHashTable)();
+unsigned int	*(*getCandidates)(int hv);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Makefile	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,10 @@
+ALL: mrsfast
+
+LDFLAGS=-static
+LIBS=-lz -lm
+CFLAGS=-O3
+CC=gcc
+
+mrsfast: baseFAST.o MrsFAST.o Common.o CommandLineParser.o RefGenome.o HashTable.o Reads.o Output.o
+	$(CC) $^ -o $@ ${LDFLAGS} ${LIBS}
+	rm *.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/MrsFAST.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,1757 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-01-29
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "Common.h"
+#include "Reads.h"
+#include "HashTable.h"
+#include "Output.h"
+#include "MrsFAST.h"
+#include "RefGenome.h"
+
+float calculateScore(int index, char *seq, char *qual, int *err);
+unsigned char		mrFAST = 0;
+char				*versionNumberF="0.2";
+
+long long			verificationCnt = 0;
+long long 			mappingCnt = 0;
+long long			mappedSeqCnt = 0;
+long long			completedSeqCnt = 0;
+char				*mappingOutput;
+/**********************************************/
+char				*_msf_refGen = NULL;
+int					_msf_refGenLength = 0;
+int					_msf_refGenOffset = 0;
+char				*_msf_refGenName = NULL;
+
+int					_msf_refGenBeg;
+int					_msf_refGenEnd;
+
+IHashTable			*_msf_hashTable = NULL;
+
+int					*_msf_samplingLocs;
+int					*_msf_samplingLocsEnds;
+int					_msf_samplingLocsSize;
+
+Read				*_msf_seqList;
+int					_msf_seqListSize;
+
+ReadIndexTable		*_msf_rIndex = NULL;
+int					_msf_rIndexSize;
+int					_msf_rIndexMax;
+
+SAM					_msf_output;
+
+OPT_FIELDS			*_msf_optionalFields;
+
+char				*_msf_op;
+
+char				_msf_numbers[200][3];
+char				_msf_cigar[5];
+
+MappingInfo			*_msf_mappingInfo;
+
+int					*_msf_seqHits;
+int					_msf_openFiles = 0;
+int					_msf_maxLSize=0;
+int					_msf_maxRSize=0;
+/**********************************************/
+int compare (const void *a, const void *b)
+{
+	return ((Pair *)a)->hv - ((Pair *)b)->hv;
+}
+/**********************************************/
+void preProcessReads()
+{
+	int i=0;
+	int j=0;
+	int pos = 0;
+
+	_msf_rIndexMax = -1;
+
+	int tmpSize = _msf_seqListSize*_msf_samplingLocsSize*2;
+	Pair *tmp = getMem(sizeof(Pair)*tmpSize);
+
+	for (i=0; i<_msf_seqListSize; i++)
+	{
+		for (j=0; j< _msf_samplingLocsSize; j++)
+		{
+
+			tmp[pos].hv = hashVal(_msf_seqList[i].seq+_msf_samplingLocs[j]);
+			tmp[pos].seqInfo = pos;
+			pos++;
+		}
+		for (j=0; j<_msf_samplingLocsSize; j++)
+		{
+			tmp[pos].hv = hashVal(_msf_seqList[i].rseq+_msf_samplingLocs[j]);
+			tmp[pos].seqInfo = pos;
+			pos++;
+		}
+	}
+
+	qsort(tmp, tmpSize, sizeof(Pair), compare);
+
+
+	int uniq = 0;
+	int prev = -2;
+	int beg = -1;
+	int end = -1;
+
+	for (i=0; i<tmpSize; i++)
+	{
+		if (prev != tmp[i].hv)
+		{
+			uniq ++;
+			prev = tmp[i].hv;
+		}
+	}
+
+	_msf_rIndexSize = uniq;
+	_msf_rIndex = getMem(sizeof(ReadIndexTable)*_msf_rIndexSize);
+	prev = -2;
+
+	j=0;
+	beg =0;
+	while (beg < tmpSize)
+	{
+		end = beg;
+		while (end+1<tmpSize && tmp[end+1].hv==tmp[beg].hv)
+			end++;
+
+		_msf_rIndex[j].hv = tmp[beg].hv;
+		_msf_rIndex[j].seqInfo = getMem(sizeof(int)*(end-beg+2));
+		_msf_rIndex[j].seqInfo[0] = end-beg+1;
+		if ((end-beg+1) > _msf_rIndexMax)
+			_msf_rIndexMax = end-beg+1;
+
+		for (i=1; i<=_msf_rIndex[j].seqInfo[0]; i++)
+		{
+			_msf_rIndex[j].seqInfo[i]=tmp[beg+i-1].seqInfo;
+		}
+		j++;
+		beg = end+1;
+	}
+	freeMem(tmp, sizeof(Pair)*tmpSize);
+}
+/**********************************************/
+void initFAST(Read *seqList, int seqListSize, int *samplingLocs, int samplingLocsSize, char *genFileName)
+{
+	if (_msf_optionalFields == NULL)
+	{
+		_msf_op = getMem(SEQ_LENGTH);
+		if (pairedEndMode)
+		{
+			_msf_optionalFields = getMem(4*sizeof(OPT_FIELDS));
+		}
+		else
+		{
+			_msf_optionalFields = getMem(2*sizeof(OPT_FIELDS));
+		}
+
+		int i;
+		for (i=0; i<200;i++)
+		{
+			sprintf(_msf_numbers[i],"%d%c",i, '\0');
+		}
+		sprintf(_msf_cigar, "%dM", SEQ_LENGTH);
+	}
+
+	if (_msf_samplingLocsEnds == NULL)
+	{
+		int i;
+		_msf_samplingLocs = samplingLocs;
+		_msf_samplingLocsSize = samplingLocsSize;
+
+		_msf_samplingLocsEnds = malloc(sizeof(int)*_msf_samplingLocsSize);
+		for (i=0; i<_msf_samplingLocsSize; i++)
+		{
+			_msf_samplingLocsEnds[i]=_msf_samplingLocs[i]+WINDOW_SIZE-1;
+		}
+
+		_msf_seqList = seqList;
+		_msf_seqListSize = seqListSize;
+
+		preProcessReads();
+	}
+	if (_msf_refGenName == NULL)
+	{
+		_msf_refGenName = getMem(SEQ_LENGTH);
+	}
+	_msf_refGen =  getRefGenome();
+	_msf_refGenLength = strlen(_msf_refGen);
+	_msf_refGenOffset = getRefGenomeOffset();
+	sprintf(_msf_refGenName,"%s%c", getRefGenomeName(), '\0');
+
+	if (pairedEndMode && _msf_seqHits == NULL)
+	{
+		_msf_mappingInfo  = getMem(seqListSize * sizeof (MappingInfo));
+
+		int i=0;
+		for (i=0; i<seqListSize; i++)
+		{
+			//_msf_mappingInfo[i].next = getMem(sizeof(MappingLocations));
+			_msf_mappingInfo[i].next = NULL;
+			_msf_mappingInfo[i].size = 0;
+		}
+
+		_msf_seqHits = getMem((_msf_seqListSize/2) * sizeof(int));
+
+
+		for (i=0; i<_msf_seqListSize/2; i++)
+		{
+			_msf_seqHits[i] = 0;
+		}
+
+		initLoadingRefGenome(genFileName);
+	}
+
+	if (_msf_refGenOffset == 0)
+	{
+		_msf_refGenBeg = 1;
+	}
+	else
+	{
+		_msf_refGenBeg = CONTIG_OVERLAP - SEQ_LENGTH + 2;
+	}
+	_msf_refGenEnd = _msf_refGenLength - SEQ_LENGTH + 1;
+
+
+}
+/**********************************************/
+void finalizeFAST()
+{
+	freeMem(_msf_seqHits, (_msf_seqListSize/2) * sizeof(int));
+	freeMem(_msf_refGenName, SEQ_LENGTH);
+	int i;
+	for (i=0; i<_msf_rIndexSize; i++)
+	{
+		freeMem(_msf_rIndex[i].seqInfo, _msf_rIndex[i].seqInfo[0]+1);
+	}
+	freeMem(_msf_rIndex, _msf_rIndexSize);
+}
+
+/**********************************************/
+int verifySingleEnd(int index, char* seq, int offset)
+{
+	int curOff = 0;
+	int i;
+
+	char *ref;
+
+	int err;
+	int errCnt =0;
+	int errCntOff = 0;
+	int NCntOff = 0;
+
+	int matchCnt = 0;
+	int pp = 0;
+
+	ref = _msf_refGen + index - 1;
+
+	verificationCnt++;
+
+	for (i = 0; i < SEQ_LENGTH; i++)
+	{
+		err	= *ref != *seq;
+
+		errCnt += err;
+		if (errCnt > errThreshold)
+		{
+
+			return -1;
+		}
+
+		if (i >= _msf_samplingLocs[curOff] && i <= _msf_samplingLocsEnds[curOff])
+		{
+			errCntOff +=  err;
+			NCntOff += (*seq == 'N');
+		}
+		else if (curOff < _msf_samplingLocsSize && i>=_msf_samplingLocs[curOff+1])
+		{
+
+			if (errCntOff == 0 && NCntOff == 0 && offset > curOff)
+			{
+				return -1;
+			}
+
+			errCntOff = 0;
+			NCntOff = 0;
+			curOff++;
+
+			if ( i >= _msf_samplingLocs[curOff])
+			{
+				errCntOff += err;
+				NCntOff += (*seq == 'N');
+			}
+		}
+
+		ref++;
+		seq++;
+	}
+	return errCnt;
+}
+
+/**********************************************/
+int calculateMD(int index, char *seq, int err, char **opSeq)
+{
+	int i;
+	char *ref;
+	char *ver;
+	short matchCnt = 0;
+	char *op = *opSeq;
+	int pp = 0;
+
+	ref = _msf_refGen + index-1;
+	ver = seq;
+
+	if (err>0 || err == -1 )
+	{
+
+		err = 0;
+		for (i=0; i < SEQ_LENGTH; i++)
+		{
+			if (* ref != *ver)
+			{
+				err++;
+				if (matchCnt)
+				{
+					if (matchCnt < 10)
+					{
+						op[pp++]=_msf_numbers[matchCnt][0];
+					}
+					else if (matchCnt < 100)
+					{
+						op[pp++]=_msf_numbers[matchCnt][0];
+						op[pp++]=_msf_numbers[matchCnt][1];
+					}
+					else
+					{
+						op[pp++]=_msf_numbers[matchCnt][0];
+						op[pp++]=_msf_numbers[matchCnt][1];
+						op[pp++]=_msf_numbers[matchCnt][2];
+					}
+
+					matchCnt = 0;
+				}
+				op[pp++]=*ref;
+			}
+			else
+			{
+				matchCnt++;
+			}
+			ref++;
+			ver++;
+		}
+
+	}
+	if (err == 0)
+	{
+		matchCnt = SEQ_LENGTH;
+	}
+
+	if (matchCnt>0)
+	{
+		if (matchCnt < 10)
+		{
+			op[pp++]=_msf_numbers[matchCnt][0];
+		}
+		else if (matchCnt < 100)
+		{
+			op[pp++]=_msf_numbers[matchCnt][0];
+			op[pp++]=_msf_numbers[matchCnt][1];
+		}
+		else
+		{
+			op[pp++]=_msf_numbers[matchCnt][0];
+			op[pp++]=_msf_numbers[matchCnt][1];
+			op[pp++]=_msf_numbers[matchCnt][2];
+		}
+	}
+	op[pp]='\0';
+
+	return err;
+}
+
+/**********************************************/
+void mapSingleEndSeqListBal(unsigned int *l1, int s1, unsigned int *l2, int s2, int dir)
+{
+
+	if (s1 == 0 || s2 == 0)
+	{
+		return;
+	}
+	else if (s1 == s2 && s1 <= 50)
+	{
+
+		int j = 0;
+		int z = 0;
+		int *locs;
+		int *seqInfo;
+		char *_tmpSeq, *_tmpQual;
+		char rqual[QUAL_LENGTH+1];
+		rqual[QUAL_LENGTH]='\0';
+
+		if (dir > 0)
+		{
+			locs		= (int *) l1;
+			seqInfo		= (int *) l2;
+		}
+		else
+		{
+			locs		= (int *) l2;
+			seqInfo		= (int *) l1;
+		}
+
+
+		for (j=0; j<s2; j++)
+		{
+			int re = _msf_samplingLocsSize * 2;
+			int r = seqInfo[j]/re;
+			if (maxHits!=0 && _msf_seqList[r].hits[0] == maxHits)
+			{
+				continue;
+			}
+
+			int x = seqInfo[j] % re;
+			int o = x % _msf_samplingLocsSize;
+			char d = (x/_msf_samplingLocsSize)?1:0;
+
+
+			if (d)
+			{
+				reverse(_msf_seqList[r].qual, rqual, QUAL_LENGTH);
+				_tmpQual = rqual;
+				_tmpSeq = _msf_seqList[r].rseq;
+			}
+			else
+			{
+				_tmpQual = _msf_seqList[r].qual;
+				_tmpSeq = _msf_seqList[r].seq;
+			}
+
+
+			for (z=0; z<s1; z++)
+			{
+				int genLoc = locs[z]-_msf_samplingLocs[o];
+
+
+				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd )
+					continue;
+
+				int err = -1;
+
+
+
+				err = verifySingleEnd(genLoc, _tmpSeq, o);
+
+
+
+				if (err != -1)
+				{
+					calculateMD(genLoc, _tmpSeq, err, &_msf_op);
+					mappingCnt++;
+					_msf_seqList[r].hits[0]++;
+
+					_msf_output.QNAME		= _msf_seqList[r].name;
+					_msf_output.FLAG		= 16 * d;
+					_msf_output.RNAME		= _msf_refGenName;
+					_msf_output.POS			= genLoc + _msf_refGenOffset;
+					_msf_output.MAPQ		= 255;
+					_msf_output.CIGAR		= _msf_cigar;
+					_msf_output.MRNAME		= "*";
+					_msf_output.MPOS		= 0;
+					_msf_output.ISIZE		= 0;
+					_msf_output.SEQ			= _tmpSeq;
+					_msf_output.QUAL		= _tmpQual;
+
+					_msf_output.optSize		= 2;
+					_msf_output.optFields	= _msf_optionalFields;
+
+					_msf_optionalFields[0].tag = "NM";
+					_msf_optionalFields[0].type = 'i';
+					_msf_optionalFields[0].iVal = err;
+
+					_msf_optionalFields[1].tag = "MD";
+					_msf_optionalFields[1].type = 'Z';
+					_msf_optionalFields[1].sVal = _msf_op;
+
+					output(_msf_output);
+
+
+					if (_msf_seqList[r].hits[0] == 1)
+					{
+						mappedSeqCnt++;
+					}
+
+					if ( maxHits == 0 )
+					{
+						_msf_seqList[r].hits[0] = 2;
+					}
+
+
+					if ( maxHits!=0 && _msf_seqList[r].hits[0] == maxHits)
+					{
+						completedSeqCnt++;
+						break;
+					}
+				}
+
+			}
+		}
+	}
+	else
+	{
+		int tmp1=s1/2, tmp2= s2/2;
+		if (tmp1 != 0)
+			mapSingleEndSeqListBal(l1, tmp1, l2+tmp2, s2-tmp2, dir);
+		mapSingleEndSeqListBal(l2+tmp2, s2-tmp2, l1+tmp1, s1-tmp1, -dir);
+		if (tmp2 !=0)
+			mapSingleEndSeqListBal(l1+tmp1, s1-tmp1, l2, tmp2, dir);
+		if (tmp1 + tmp2 != 0)
+			mapSingleEndSeqListBal(l2, tmp2, l1, tmp1, -dir);
+	}
+}
+
+
+/**********************************************/
+void mapSingleEndSeqListTOP(unsigned int *l1, int s1, unsigned int *l2, int s2)
+{
+	if (s1 < s2)
+	{
+		mapSingleEndSeqListBal(l1, s1, l2, s1,1);
+		mapSingleEndSeqListTOP(l1, s1, l2+s1, s2-s1);
+	}
+	else if (s1 > s2)
+	{
+		mapSingleEndSeqListBal(l1, s2, l2, s2,1);
+		mapSingleEndSeqListTOP(l1+s2, s1-s2, l2, s2);
+	}
+	else
+	{
+		mapSingleEndSeqListBal(l1, s1, l2, s2,1);
+	}
+}
+
+
+/**********************************************/
+void mapSingleEndSeqList(unsigned int *l1, int s1, unsigned int *l2, int s2)
+{
+	if ( s2/s1 <= 2)
+	{
+		int j = 0;
+		int z = 0;
+		int *locs = (int *) l1;
+		int *seqInfo = (int *) l2;
+		char *_tmpSeq, *_tmpQual;
+		char rqual[QUAL_LENGTH+1];
+		rqual[QUAL_LENGTH]='\0';
+
+		for (j=0; j<s2; j++)
+		{
+			int re = _msf_samplingLocsSize * 2;
+			int r = seqInfo[j]/re;
+			if (maxHits!=0 && _msf_seqList[r].hits[0] == maxHits)
+			{
+				continue;
+			}
+
+			int x = seqInfo[j] % re;
+			int o = x % _msf_samplingLocsSize;
+			char d = (x/_msf_samplingLocsSize)?1:0;
+
+
+			if (d)
+			{
+				reverse(_msf_seqList[r].qual, rqual, QUAL_LENGTH);
+				_tmpQual = rqual;
+				_tmpSeq = _msf_seqList[r].rseq;
+			}
+			else
+			{
+				_tmpQual = _msf_seqList[r].qual;
+				_tmpSeq = _msf_seqList[r].seq;
+			}
+
+
+			for (z=0; z<s1; z++)
+			{
+				int genLoc = locs[z]-_msf_samplingLocs[o];
+
+
+				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd )
+					continue;
+
+				int err = -1;
+
+
+
+				err = verifySingleEnd(genLoc, _tmpSeq, o);
+
+
+
+				if (err != -1)
+				{
+					calculateMD(genLoc, _tmpSeq, err, &_msf_op);
+					mappingCnt++;
+					_msf_seqList[r].hits[0]++;
+
+					_msf_output.QNAME		= _msf_seqList[r].name;
+					_msf_output.FLAG		= 16 * d;
+					_msf_output.RNAME		= _msf_refGenName;
+					_msf_output.POS			= genLoc + _msf_refGenOffset;
+					_msf_output.MAPQ		= 255;
+					_msf_output.CIGAR		= _msf_cigar;
+					_msf_output.MRNAME		= "*";
+					_msf_output.MPOS		= 0;
+					_msf_output.ISIZE		= 0;
+					_msf_output.SEQ			= _tmpSeq;
+					_msf_output.QUAL		= _tmpQual;
+
+					_msf_output.optSize		= 2;
+					_msf_output.optFields	= _msf_optionalFields;
+
+					_msf_optionalFields[0].tag = "NM";
+					_msf_optionalFields[0].type = 'i';
+					_msf_optionalFields[0].iVal = err;
+
+					_msf_optionalFields[1].tag = "MD";
+					_msf_optionalFields[1].type = 'Z';
+					_msf_optionalFields[1].sVal = _msf_op;
+
+					output(_msf_output);
+
+
+					if (_msf_seqList[r].hits[0] == 1)
+					{
+						mappedSeqCnt++;
+					}
+
+					if ( maxHits == 0 )
+					{
+						_msf_seqList[r].hits[0] = 2;
+					}
+
+
+					if ( maxHits!=0 && _msf_seqList[r].hits[0] == maxHits)
+					{
+						completedSeqCnt++;
+						break;
+					}
+				}
+
+			}
+		}
+	}
+	else if (s1 == 1)
+	{
+		//	fprintf(stderr, "1");
+		int tmp = s2/2;
+		mapSingleEndSeqList(l1, s1, l2, tmp);
+		mapSingleEndSeqList(l1, s1, l2+tmp, s2-tmp);
+	}
+	else if (s2 == 1)
+	{
+		//	fprintf(stderr, "2");
+		int tmp = s1/2;
+		mapSingleEndSeqList(l1, tmp, l2, s2);
+		mapSingleEndSeqList(l1+tmp, s1-tmp, l2, s2);
+	}
+	else
+	{
+		//	fprintf(stderr, "3");
+		int tmp1=s1/2, tmp2= s2/2;
+		mapSingleEndSeqList(l1, tmp1, l2, tmp2);
+		mapSingleEndSeqList(l1+tmp1, s1-tmp1, l2, tmp2);
+		mapSingleEndSeqList(l1+tmp1, s1-tmp1, l2+tmp2, s2-tmp2);
+		mapSingleEndSeqList(l1, tmp1, l2+tmp2, s2-tmp2);
+	}
+}
+/**********************************************/
+int	 mapSingleEndSeq()
+{
+	int i = 0;
+	unsigned int *locs = NULL;
+	unsigned int *seqInfo = NULL;
+	while ( i < _msf_rIndexSize )
+	{
+
+		locs = getCandidates (_msf_rIndex[i].hv);
+		if ( locs != NULL)
+		{
+			seqInfo  = _msf_rIndex[i].seqInfo;
+			mapSingleEndSeqListTOP (locs+1, locs[0], seqInfo+1, seqInfo[0]);
+		}
+		i++;
+	}
+	return 1;
+}
+
+
+/**********************************************/
+/**********************************************/
+/**********************************************/
+/**********************************************/
+/**********************************************/
+int compareOut (const void *a, const void *b)
+{
+	FullMappingInfo *aInfo = (FullMappingInfo *)a;
+	FullMappingInfo *bInfo = (FullMappingInfo *)b;
+	return aInfo->loc - bInfo->loc;
+}
+
+/**********************************************/
+void mapPairedEndSeqList(unsigned int *l1, int s1, unsigned int *l2, int s2)
+{
+	if ( s2/s1 <= 2)
+	{
+		int j = 0;
+		int z = 0;
+		int *locs = (int *) l1;
+		int *seqInfo = (int *) l2;
+		char *_tmpSeq, *_tmpQual;
+		char rqual[QUAL_LENGTH+1];
+		rqual[QUAL_LENGTH]='\0';
+
+		for (j=0; j<s2; j++)
+		{
+			int re = _msf_samplingLocsSize * 2;
+			int r = seqInfo[j]/re;
+
+			if (pairedEndDiscordantMode && (_msf_seqList[r].hits[0] == 1 || (_msf_seqHits[r/2] > DISCORDANT_CUT_OFF) ))
+			{
+				continue;
+			}
+
+			int x = seqInfo[j] % re;
+			int o = x % _msf_samplingLocsSize;
+			char d = (x/_msf_samplingLocsSize)?-1:1;
+
+
+			if (d==-1)
+			{
+				_tmpSeq = _msf_seqList[r].rseq;
+			}
+			else
+			{
+				_tmpSeq = _msf_seqList[r].seq;
+			}
+
+
+			for (z=0; z<s1; z++)
+			{
+				int genLoc = locs[z]-_msf_samplingLocs[o];
+
+
+				if ( genLoc < _msf_refGenBeg || genLoc > _msf_refGenEnd )
+					continue;
+
+				int err = -1;
+
+
+
+				err = verifySingleEnd(genLoc, _tmpSeq, o);
+
+
+				if (err != -1)
+				{
+					MappingLocations *parent = NULL;
+					MappingLocations *child = _msf_mappingInfo[r].next;
+
+					genLoc+= _msf_refGenOffset;
+
+					int i = 0;
+					for (i=0; i<(_msf_mappingInfo[r].size/MAP_CHUNKS); i++)
+					{
+						parent = child;
+						child = child->next;
+					}
+
+					if (child==NULL)
+					{
+						MappingLocations *tmp = getMem(sizeof(MappingLocations));
+						tmp->next = NULL;
+						tmp->loc[0]=genLoc * d;
+						if (parent == NULL)
+							_msf_mappingInfo[r].next = tmp;
+						else
+							parent->next = tmp;
+					}
+					else
+					{
+						child->loc[_msf_mappingInfo[r].size % MAP_CHUNKS] = genLoc * d;
+					}
+
+
+
+					_msf_mappingInfo[r].size++;
+
+				}
+
+			}
+		}
+	}
+	else if (s1 == 1)
+	{
+		int tmp = s2/2;
+		mapPairedEndSeqList(l1, s1, l2, tmp);
+		mapPairedEndSeqList(l1, s1, l2+tmp, s2-tmp);
+	}
+	else if (s2 == 1)
+	{
+		int tmp = s1/2;
+		mapPairedEndSeqList(l1, tmp, l2, s2);
+		mapPairedEndSeqList(l1+tmp, s1-tmp, l2, s2);
+	}
+	else
+	{
+		int tmp1=s1/2, tmp2= s2/2;
+		mapPairedEndSeqList(l1, tmp1, l2, tmp2);
+		mapPairedEndSeqList(l1+tmp1, s1-tmp1, l2, tmp2);
+		mapPairedEndSeqList(l1+tmp1, s1-tmp1, l2+tmp2, s2-tmp2);
+		mapPairedEndSeqList(l1, tmp1, l2+tmp2, s2-tmp2);
+	}
+}
+
+/**********************************************/
+int	 mapPairedEndSeq()
+{
+	int i = 0;
+	unsigned int *locs = NULL;
+	unsigned int *seqInfo = NULL;
+	while ( i < _msf_rIndexSize )
+	{
+		locs = getCandidates (_msf_rIndex[i].hv);
+		if ( locs != NULL)
+		{
+			seqInfo  = _msf_rIndex[i].seqInfo;
+			mapPairedEndSeqList(locs+1, locs[0], seqInfo+1, seqInfo[0]);
+
+		}
+		i++;
+	}
+
+
+	char fname1[FILE_NAME_LENGTH];
+	char fname2[FILE_NAME_LENGTH];
+	MappingLocations *cur, *tmp;
+	int tmpOut;
+	int j;
+	int lmax=0, rmax=0;
+
+	sprintf(fname1, "%s__%s__%d__1",mappingOutputPath, mappingOutput, _msf_openFiles);
+	sprintf(fname2, "%s__%s__%d__2",mappingOutputPath, mappingOutput, _msf_openFiles);
+
+	FILE* out;
+	FILE* out1 = fileOpen(fname1, "w");
+	FILE* out2 = fileOpen(fname2, "w");
+
+	_msf_openFiles++;
+
+	for (i=0; i<_msf_seqListSize; i++)
+	{
+
+		if (i%2==0)
+		{
+			out = out1;
+
+			if (lmax <  _msf_mappingInfo[i].size)
+			{
+				lmax = _msf_mappingInfo[i].size;
+			}
+		}
+		else
+		{
+			out = out2;
+			if (rmax < _msf_mappingInfo[i].size)
+			{	
+				rmax = _msf_mappingInfo[i].size;
+			}
+		}
+
+		tmpOut = fwrite(&(_msf_mappingInfo[i].size), sizeof(int), 1, out);					
+		if (_msf_mappingInfo[i].size > 0)
+		{
+			cur = _msf_mappingInfo[i].next;
+			for (j=0; j < _msf_mappingInfo[i].size; j++)
+			{
+				if ( j>0  && j%MAP_CHUNKS==0)
+				{
+					cur = cur->next;
+				}
+				tmpOut = fwrite(&(cur->loc[j % MAP_CHUNKS]), sizeof(int), 1, out);
+			}
+			_msf_mappingInfo[i].size = 0;
+			//	_msf_mappingInfo[i].next = NULL;
+		}
+	}
+
+	_msf_maxLSize += lmax;
+	_msf_maxRSize += rmax;
+
+	fclose(out1);
+	fclose(out2);
+
+	//fprintf(stdout, "%d %d\n", _msf_maxLSize, _msf_maxRSize);
+
+}
+
+/**********************************************/
+void outputPairedEnd()
+{
+
+	char *curGen;
+	char *curGenName;
+	int tmpOut;
+
+	loadRefGenome(&_msf_refGen, &_msf_refGenName, &tmpOut);
+
+	FILE* in1[_msf_openFiles];
+	FILE* in2[_msf_openFiles];
+
+	char fname1[_msf_openFiles][FILE_NAME_LENGTH];	
+	char fname2[_msf_openFiles][FILE_NAME_LENGTH];	
+
+	// discordant
+	FILE *out, *out1, *out2;
+
+	char fname3[FILE_NAME_LENGTH];
+	char fname4[FILE_NAME_LENGTH];
+	char fname5[FILE_NAME_LENGTH];
+
+	if (pairedEndDiscordantMode)
+	{
+		sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput);
+		sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput);
+		sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput);
+		out = fileOpen(fname3, "a");
+		out1 = fileOpen(fname4, "a");
+		out2 = fileOpen(fname5, "a");
+	}
+
+
+
+	int i;
+
+	FullMappingInfo *mi1 = getMem(sizeof(FullMappingInfo) * _msf_maxLSize);
+	FullMappingInfo *mi2 = getMem(sizeof(FullMappingInfo) * _msf_maxRSize);
+
+
+	for (i=0; i<_msf_openFiles; i++)
+	{
+		sprintf(fname1[i], "%s__%s__%d__1", mappingOutputPath, mappingOutput, i);
+		sprintf(fname2[i], "%s__%s__%d__2", mappingOutputPath, mappingOutput, i);
+		in1[i] = fileOpen(fname1[i], "r");
+		in2[i] = fileOpen(fname2[i], "r");
+	}
+
+
+	int size;
+	int j, k;
+	int size1, size2;
+
+	for (i=0; i<_msf_seqListSize/2; i++)
+	{
+		size1 = size2 = 0;
+		for (j=0; j<_msf_openFiles; j++)
+		{
+			tmpOut = fread(&size, sizeof(int), 1, in1[j]);
+			if ( size > 0 )
+			{
+				for (k=0; k<size; k++)
+				{
+
+					mi1[size1+k].dir = 1;
+					tmpOut = fread (&(mi1[size1+k].loc), sizeof(int), 1, in1[j]);
+					if (mi1[size1+k].loc<1)
+					{	
+						mi1[size1+k].loc *= -1;
+						mi1[size1+k].dir = -1;
+					}
+				}
+				qsort(mi1+size1, size, sizeof(FullMappingInfo), compareOut);
+				size1+=size;
+			}
+		}
+
+		for (j=0; j<_msf_openFiles; j++)
+		{
+			tmpOut = fread(&size, sizeof(int), 1, in2[j]);
+			if ( size > 0 )
+			{
+				for (k=0; k<size; k++)
+				{
+
+					mi2[size2+k].dir = 1;
+					tmpOut = fread (&(mi2[size2+k].loc), sizeof(int), 1, in2[j]);
+
+					if (mi2[size2+k].loc<1)
+					{	
+						mi2[size2+k].loc *= -1;
+						mi2[size2+k].dir = -1;
+					}
+				}
+				qsort(mi2+size2, size, sizeof(FullMappingInfo), compareOut);
+				size2+=size;
+			}
+		}
+
+		//if (i == 6615)
+		//	fprintf(stdout, "%d: %s %d %d ",i, _msf_seqList[i*2].name, size1, size2);	
+
+		int lm, ll, rl, rm;
+		int pos = 0;
+
+		if (pairedEndDiscordantMode)
+		{
+
+			for (j=0; j<size1; j++)
+			{
+				lm = mi1[j].loc - maxPairEndedDiscordantDistance + 1;
+				ll = mi1[j].loc - minPairEndedDiscordantDistance + 1;
+				rl = mi1[j].loc + minPairEndedDiscordantDistance - 1;
+				rm = mi1[j].loc + maxPairEndedDiscordantDistance - 1;
+
+				while (pos<size2 && mi2[pos].loc < lm)
+				{
+					pos++;
+				}
+
+				k = pos;
+				while (k<size2 && mi2[k].loc<=rm)
+				{
+					if ( mi2[k].loc <= ll || mi2[k].loc >= rl)
+					{
+						if ( (mi1[j].loc < mi2[k].loc && mi1[j].dir==1 && mi2[k].dir == -1)  ||  
+								(mi1[j].loc > mi2[k].loc && mi1[j].dir==-1 && mi2[k].dir == 1) )
+						{
+							_msf_seqList[i*2].hits[0]=1;
+							_msf_seqList[i*2+1].hits[0]=1;
+							size1=0;
+							size2=0;
+							break;
+						}
+					}
+					k++;
+				}
+			}
+
+			_msf_seqHits[i]+= size1*size2;
+			if (_msf_seqHits[i]> DISCORDANT_CUT_OFF)
+			{
+				_msf_seqList[i*2].hits[0]=1;
+				_msf_seqList[i*2+1].hits[0]=1;
+				size1=0;
+				size2=0;
+			}
+		}
+		//if (i == 6615)
+		//	fprintf(stdout, "%d %d\n", size1, size2);	
+
+		char *seq1, *seq2, *rseq1, *rseq2, *qual1, *qual2;
+		char rqual1[QUAL_LENGTH+1], rqual2[QUAL_LENGTH+1];
+		rqual1[QUAL_LENGTH] = rqual2[QUAL_LENGTH] = '\0';
+		seq1 = _msf_seqList[i*2].seq;
+		rseq1 = _msf_seqList[i*2].rseq;
+		qual1 = _msf_seqList[i*2].qual;
+		reverse(_msf_seqList[i*2].qual, rqual1, QUAL_LENGTH);
+
+		seq2 = _msf_seqList[i*2+1].seq;
+		rseq2 = _msf_seqList[i*2+1].rseq;
+		qual2 = _msf_seqList[i*2+1].qual;
+		reverse(_msf_seqList[i*2+1].qual, rqual2, QUAL_LENGTH);
+
+
+		if (pairedEndDiscordantMode)
+		{
+			for (k=0; k<size1; k++)
+			{
+				int tm = -1;
+				mi1[k].score = calculateScore(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, (mi1[k].dir==-1)?rqual1:qual1, &tm);
+				mi1[k].err = tm;
+			}
+
+			for (k=0; k<size2; k++)
+			{
+				int tm = -1;
+				mi2[k].score = calculateScore(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, (mi2[k].dir==-1)?rqual2:qual2, &tm);
+				mi2[k].err = tm;
+			}
+
+		}
+		else
+		{
+			for (k=0; k<size1; k++)
+			{
+				mi1[k].err = calculateMD(mi1[k].loc, (mi1[k].dir==-1)?rseq1:seq1, -1, &_msf_op);
+				sprintf(mi1[k].md, "%s", _msf_op);
+			}
+
+			for (k=0; k<size2; k++)
+			{
+				mi2[k].err = calculateMD(mi2[k].loc, (mi2[k].dir==-1)?rseq2:seq2, -1, &_msf_op);
+				sprintf(mi2[k].md, "%s", _msf_op);
+			}
+		}
+		pos = 0;
+
+		for (j=0; j<size1; j++)
+		{
+			lm = mi1[j].loc - maxPairEndedDistance + 1;
+			ll = mi1[j].loc - minPairEndedDistance + 1;
+			rl = mi1[j].loc + minPairEndedDistance - 1;
+			rm = mi1[j].loc + maxPairEndedDistance - 1;
+
+			//fprintf(stdout, "%d %d %d %d %d\n",lm, ll,mi1[j].loc ,rl, rm); 
+
+			while (pos<size2 && mi2[pos].loc < lm)
+			{
+				pos++;
+			}
+
+			//fprintf(stdout, "POS: %d %d \n", pos, mi2[pos].loc);
+
+			k = pos;
+			while (k<size2 && mi2[k].loc <= rm)
+			{
+				if (mi2[k].loc <= ll || mi2[k].loc >= rl)
+				{
+					if (pairedEndDiscordantMode)
+					{
+						int tmp;
+						int rNo = i;
+						int loc = mi1[j].loc*mi1[j].dir;
+						int err = mi1[j].err;
+						float sc = mi1[j].score;
+
+						char l = strlen(_msf_refGenName);
+
+						tmp = fwrite(&rNo, sizeof(int), 1, out);
+
+						tmp = fwrite(&l, sizeof(char), 1, out);
+						tmp = fwrite(_msf_refGenName, sizeof(char), l, out);
+
+						tmp = fwrite(&loc, sizeof(int), 1, out);
+						tmp = fwrite(&err, sizeof(char), 1, out);
+						tmp = fwrite(&sc, sizeof(float), 1, out);
+
+
+						loc = mi2[k].loc*mi2[k].dir;
+						err = mi2[k].err;
+						sc = mi2[k].score;
+
+						tmp = fwrite(&loc, sizeof(int), 1, out);
+						tmp = fwrite(&err, sizeof(char), 1, out);
+						tmp = fwrite(&sc, sizeof(float), 1, out);
+					} // end discordant
+					else
+					{ //start sampe
+						char *seq;
+						char *qual;
+						char d1;
+						char d2;
+						int isize;
+						int proper=0;
+						// ISIZE CALCULATION
+						// The distance between outer edges								
+						isize = abs(mi1[j].loc - mi2[k].loc)+SEQ_LENGTH-1;												
+						if (mi1[j].loc - mi2[k].loc > 0)
+						{
+							isize *= -1;
+						}
+
+						d1 = (mi1[j].dir == -1)?1:0;
+						d2 = (mi2[k].dir == -1)?1:0;
+
+						if ( d1 )
+						{
+							seq = rseq1;
+							qual = rqual1;
+						}
+						else
+						{
+							seq = seq1;
+							qual = qual1;
+						}
+
+						if ( (mi1[j].loc < mi2[k].loc && !d1 && d2) ||
+							 (mi1[j].loc > mi2[k].loc && d1 && !d2) )
+						{
+							proper = 2;
+						}
+						else
+						{
+							proper = 0;
+						}
+						   
+
+						_msf_output.POS			= mi1[j].loc;
+						_msf_output.MPOS		= mi2[k].loc;
+						_msf_output.FLAG		= 1+proper+16*d1+32*d2+64;
+						_msf_output.ISIZE		= isize;
+						_msf_output.SEQ			= seq,
+						_msf_output.QUAL		= qual;
+						_msf_output.QNAME		= _msf_seqList[i*2].name;
+						_msf_output.RNAME		= _msf_refGenName;
+						_msf_output.MAPQ		= 255;
+						_msf_output.CIGAR		= _msf_cigar;
+						_msf_output.MRNAME		= "=";
+
+						_msf_output.optSize	= 2;
+						_msf_output.optFields	= _msf_optionalFields;
+
+						_msf_optionalFields[0].tag = "NM";
+						_msf_optionalFields[0].type = 'i';
+						_msf_optionalFields[0].iVal = mi1[j].err;
+
+						_msf_optionalFields[1].tag = "MD";
+						_msf_optionalFields[1].type = 'Z';
+						_msf_optionalFields[1].sVal = mi1[j].md;
+
+
+						output(_msf_output);
+
+						if ( d2 )
+						{
+							seq = rseq2;
+							qual = rqual2;
+						}
+						else
+						{
+							seq = seq2;
+							qual = qual2;
+						}
+
+						_msf_output.POS			= mi2[k].loc;
+						_msf_output.MPOS		= mi1[j].loc;
+						_msf_output.FLAG		= 1+proper+16*d2+32*d1+128;
+						_msf_output.ISIZE		= -isize;
+						_msf_output.SEQ			= seq,
+						_msf_output.QUAL		= qual;
+						_msf_output.QNAME		= _msf_seqList[i*2].name;
+						_msf_output.RNAME		= _msf_refGenName;
+						_msf_output.MAPQ		= 255;
+						_msf_output.CIGAR		= _msf_cigar;
+						_msf_output.MRNAME		= "=";
+
+						_msf_output.optSize	= 2;
+						_msf_output.optFields	= _msf_optionalFields;
+
+						_msf_optionalFields[0].tag = "NM";
+						_msf_optionalFields[0].type = 'i';
+						_msf_optionalFields[0].iVal = mi2[k].err;;
+
+						_msf_optionalFields[1].tag = "MD";
+						_msf_optionalFields[1].type = 'Z';
+						_msf_optionalFields[1].sVal = mi2[k].md;
+
+						output(_msf_output);
+					} //end sampe
+				}
+				k++;
+			}
+		}
+	}
+
+	if (pairedEndDiscordantMode)
+	{
+		fclose(out);
+	}
+
+
+	freeMem(mi1, sizeof(FullMappingInfo)*_msf_maxLSize);
+	freeMem(mi2, sizeof(FullMappingInfo)*_msf_maxRSize);
+
+	for (i=0; i<_msf_openFiles; i++)
+	{
+		fclose(in1[i]);
+		fclose(in2[i]);
+		//fprintf(stdout, "%s %s \n", fname1[i], fname2[i]);
+		unlink(fname1[i]);
+		unlink(fname2[i]);
+	}
+	_msf_openFiles = 0;
+
+}
+
+/**********************************************/
+/**********************************************/
+/**********************************************/
+/**********************************************/
+float calculateScore(int index, char *seq, char *qual, int *err)
+{
+	int i;
+	char *ref;
+	char *ver;
+
+	ref = _msf_refGen + index-1;
+	ver = seq;
+	float score = 1;
+
+	if (*err > 0 || *err == -1)
+	{
+		*err = 0;
+
+		for (i=0; i < SEQ_LENGTH; i++)
+		{
+			if (*ref != *ver)
+			{
+				//fprintf(stdout, "%c %c %d", *ref, *ver, *err);
+				(*err)++;
+				score *= 0.001 + 1/pow( 10, ((qual[i]-33)/10.0) );
+			}
+			ref++;
+			ver++;
+		}
+
+	}
+	return score;
+}
+
+/**********************************************/
+void outputPairedEndDiscPP()
+{
+	char genName[SEQ_LENGTH];
+	char fname1[FILE_NAME_LENGTH];
+	char fname2[FILE_NAME_LENGTH];
+	char fname3[FILE_NAME_LENGTH];
+	char fname4[FILE_NAME_LENGTH];
+	char fname5[FILE_NAME_LENGTH];
+	char fname6[FILE_NAME_LENGTH];
+	char l;
+	int loc1, loc2;
+	char err1, err2;
+	char dir1, dir2;
+	float sc1, sc2, lsc=0;
+	int flag = 0;
+	int rNo,lrNo = -1;
+	int tmp;
+	FILE *in, *in1, *in2, *out, *out1, *out2;
+
+	sprintf(fname1, "%s__%s__disc", mappingOutputPath, mappingOutput);
+	sprintf(fname2, "%s__%s__oea1", mappingOutputPath, mappingOutput);
+	sprintf(fname3, "%s__%s__oea2", mappingOutputPath, mappingOutput);
+	sprintf(fname4, "%s%s_DIVET.vh", mappingOutputPath, mappingOutput);
+	sprintf(fname5, "%s%s_OEA1.vh", mappingOutputPath, mappingOutput);
+	sprintf(fname6, "%s%s_OEA2.vh", mappingOutputPath, mappingOutput);
+
+	in   = fileOpen(fname1, "r");
+	in1  = fileOpen(fname2, "r");
+	in2  = fileOpen(fname3, "r");
+	out  = fileOpen(fname4, "w");
+	out1 = fileOpen(fname5, "w");
+	out2 = fileOpen(fname6, "w");
+	if (in != NULL)
+	{
+		flag = fread(&rNo, sizeof(int), 1, in);
+	}
+	else
+	{
+		flag  = 0;
+	}
+
+
+	while (flag)
+	{
+
+		tmp = fread(&l, sizeof(char), 1, in);
+		tmp = fread(genName, sizeof(char), l, in);
+		genName[l]='\0';
+		tmp = fread(&loc1, sizeof(int), 1, in);
+		tmp = fread(&err1, sizeof(char), 1, in);
+		tmp = fread(&sc1, sizeof(float), 1, in);
+		tmp = fread(&loc2, sizeof(int), 1, in);
+		tmp = fread(&err2, sizeof(char), 1, in);
+		tmp = fread(&sc2, sizeof(float), 1, in);
+
+		//if (rNo ==6615)
+		//	fprintf(stdout, "%s %d: %d %0.20f %d %d %0.20f\n", genName, loc1, err1, sc1, loc2, err2, sc2);
+
+		if (_msf_seqList[rNo*2].hits[0] % 2 == 0 && _msf_seqHits[rNo] < DISCORDANT_CUT_OFF)
+		{
+			dir1 = dir2 = 'F';
+
+			if (loc1 < 0)
+			{
+				dir1 = 'R';
+				loc1 = -loc1;
+			}
+
+			if (loc2 < 0)
+			{
+				dir2 = 'R';
+				loc2 = -loc2;
+			}
+
+			if (rNo != lrNo)
+			{
+				int j;
+				for (j=0; j<SEQ_LENGTH; j++)
+				{
+					lsc += _msf_seqList[rNo*2].qual[j]+_msf_seqList[rNo*2+1].qual[j];
+				}
+				lsc /= 2*SEQ_LENGTH;
+				lsc -= 33;
+				lrNo = rNo;
+			}
+
+			int inv = 0;
+			int eve = 0;
+			int dist = 0;
+			char event;
+
+			//fprintf(stdout, "%c %c ", dir1, dir2);
+
+			if ( dir1 == dir2 )
+			{
+				event = 'V';
+				//fprintf(stdout, "Inverstion \n");
+			}
+			else
+			{
+				if (loc1 < loc2)
+				{
+
+					//fprintf(stdout, "< %d ", loc2-loc1-SEQ_LENGTH);
+
+					if (dir1 == 'R' && dir2 == 'F')
+					{
+						event = 'E';
+
+						//fprintf(stdout, "Everted \n");
+					}
+					else if ( loc2 - loc1 >= maxPairEndedDiscordantDistance )
+					{
+						event = 'D';
+						//fprintf(stdout, "Deletion \n");
+					}
+					else
+					{
+						event = 'I';
+						//fprintf(stdout, "Insertion \n");
+					}
+				}
+				else if (loc2 < loc1)
+				{
+					//fprintf(stdout, "> %d ", loc1-loc2-SEQ_LENGTH);
+					if (dir2 == 'R' && dir1 == 'F')
+					{
+						event = 'E';
+						//fprintf(stdout, "Everted \n");
+					}
+					else if ( loc1 - loc2 >= maxPairEndedDiscordantDistance )
+					{
+						event = 'D';
+						//fprintf(stdout, "Deletion \n");
+					}
+					else
+					{
+						event = 'I';
+						//fprintf(stdout, "Insertion \n");
+					}
+				}
+			}
+			_msf_seqList[rNo*2].hits[0] = 2;
+			fprintf(out, "%s\t%s\t%d\t%d\t%c\t%s\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%0.20f\n",
+					_msf_seqList[rNo*2].name, genName, loc1, (loc1+SEQ_LENGTH-1), dir1, genName, loc2, (loc2+SEQ_LENGTH-1), dir2, event, (err1+err2), lsc, sc1*sc2);
+		}
+		flag = fread(&rNo, sizeof(int), 1, in);
+
+	}
+
+	/*
+	   MappingInfoNode *lr[_msf_seqListSize/2];
+	   MappingInfoNode *rr[_msf_seqListSize/2];
+	   MappingInfoNode *cur, *tmpDel, *cur2;
+
+
+	   int ls[_msf_seqListSize/2];
+	   int rs[_msf_seqListSize/2];
+
+
+	   int i=0;
+
+	   for (i = 0; i<_msf_seqListSize/2; i++)
+	   {
+	   lr[i] = rr[i] = NULL;
+	   ls[i] = rs[i] = 0;
+	   }
+
+
+
+	   if (in1 != NULL)
+	   {
+	   flag = fread(&rNo, sizeof(int), 1, in1);
+	   }
+	   else
+	   {
+	   flag  = 0;
+	   }
+
+
+	   while (flag)
+	   {
+	   tmp = fread(&loc1, sizeof(int), 1, in1);
+	   tmp = fread(&err1, sizeof(char), 1, in1);
+	   tmp = fread(&sc1, sizeof(float), 1, in1);
+	   tmp = fread(&l, sizeof(char), 1, in1);
+	   tmp = fread(genName, sizeof(char), l, in1);
+	   genName[l]='\0';
+
+	   if (_msf_seqList[rNo*2].hits[0] == 0)
+	   {
+
+	   if ( ls[rNo] < DISCORDANT_CUT_OFF )
+	   {
+	   ls[rNo]++;
+
+	   cur = lr[rNo];
+
+	   if (cur !=NULL)
+	   {
+	   if (err1 == cur->err)
+	   {
+	   MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+	   nr->loc = loc1;
+	   nr->err = err1;
+	   nr->score = sc1;
+	   nr->next = lr[rNo];
+	   sprintf(nr->chr,"%s", genName);
+	   lr[rNo] = nr;
+	   }
+	   else if (err1 < cur->err)
+	   {
+	   MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+	   nr->loc = loc1;
+	   nr->err = err1;
+	   nr->score = sc1;
+	   sprintf(nr->chr,"%s", genName);
+	   nr->next = NULL;
+	   lr[rNo] = nr;
+	while (cur!=NULL)
+	{
+		tmpDel = cur;
+		cur = cur->next;
+		freeMem(tmpDel, sizeof(MappingInfoNode));
+	}
+}
+}
+else
+{
+
+	MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+	nr->loc = loc1;
+	nr->err = err1;
+	nr->score = sc1;
+	sprintf(nr->chr,"%s", genName);
+	nr->next = NULL;
+	lr[rNo] = nr;
+}
+
+if (ls[rNo] > DISCORDANT_CUT_OFF)
+{
+	cur = lr[rNo];
+	while (cur!=NULL)
+	{
+		tmpDel = cur;
+		cur = cur->next;
+		freeMem(tmpDel, sizeof(MappingInfoNode));
+	}
+}
+}
+
+}
+flag = fread(&rNo, sizeof(int), 1, in1);
+
+}
+
+
+if (in2 != NULL)
+{
+	flag = fread(&rNo, sizeof(int), 1, in2);
+}
+else
+{
+	flag  = 0;
+}
+
+
+while (flag)
+{
+	tmp = fread(&loc1, sizeof(int), 1, in2);
+	tmp = fread(&err1, sizeof(char), 1, in2);
+	tmp = fread(&sc1, sizeof(float), 1, in2);
+	tmp = fread(&l, sizeof(char), 1, in2);
+	tmp = fread(genName, sizeof(char), l, in2);
+	genName[l]='\0';
+
+	if (_msf_seqList[rNo*2].hits[0] == 0)
+	{
+
+		if ( rs[rNo] < DISCORDANT_CUT_OFF )
+		{
+			rs[rNo]++;
+
+			cur = rr[rNo];
+
+			if (cur !=NULL)
+			{
+				if (err1 == cur->err)
+				{
+					MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+					nr->loc = loc1;
+					nr->err = err1;
+					nr->score = sc1;
+					nr->next = rr[rNo];
+					sprintf(nr->chr,"%s", genName);
+					rr[rNo] = nr;
+				}
+				else if (err1 < cur->err)
+				{
+					MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+					nr->loc = loc1;
+					nr->err = err1;
+					nr->score = sc1;
+					sprintf(nr->chr,"%s", genName);
+					nr->next = NULL;
+					rr[rNo] = nr;
+					while (cur!=NULL)
+					{
+						tmpDel = cur;
+						cur = cur->next;
+						freeMem(tmpDel, sizeof(MappingInfoNode));
+					}
+				}
+			}
+			else
+			{
+
+				MappingInfoNode *nr = getMem(sizeof(MappingInfoNode));
+
+				nr->loc = loc1;
+				nr->err = err1;
+				nr->score = sc1;
+				sprintf(nr->chr,"%s", genName);
+				nr->next = NULL;
+				rr[rNo] = nr;
+			}
+
+			if (rs[rNo] > DISCORDANT_CUT_OFF)
+			{
+				cur = rr[rNo];
+				while (cur!=NULL)
+				{
+					tmpDel = cur;
+					cur = cur->next;
+					freeMem(tmpDel, sizeof(MappingInfoNode));
+				}
+			}
+		}
+	}
+	flag = fread(&rNo, sizeof(int), 1, in2);
+
+}
+
+
+for (i=0; i<_msf_seqListSize/2; i++)
+{
+	int j;
+	for (j=0; j<SEQ_LENGTH; j++)
+	{
+		lsc += _msf_seqList[i*2].qual[j]+_msf_seqList[i*2+1].qual[j];
+	}
+	lsc /= 2*SEQ_LENGTH;
+	lsc -= 33;
+	if (ls[i] * rs[i] < DISCORDANT_CUT_OFF && ls[i] & rs[i] > 0)
+	{
+		cur = lr[i];
+		while (cur != NULL)
+		{
+			cur2 = rr[i];
+			if (cur->loc < 0)
+			{
+				dir1 = 'R';
+				loc1 = -cur->loc;
+			}
+			else
+			{
+				dir1 = 'F';
+				loc1 = cur->loc;
+			}
+			while (cur2 != NULL)
+			{
+
+				if (cur2->loc < 0)
+				{
+					dir2 = 'R';
+					loc2 = -cur2->loc;
+				}
+				else
+				{
+					dir2 = 'F';
+					loc2 = cur2->loc;
+				}
+
+				fprintf(out, "%s\t%s\t%d\t%d\t%c\t%s\t%d\t%d\t%c\t%c\t%d\t%0.0f\t%0.20f\n",
+						_msf_seqList[i*2].name, cur->chr, loc1, (loc1+SEQ_LENGTH-1), dir1, cur2->chr, loc2, (loc2+SEQ_LENGTH-1), dir2, 'T', (cur->err+cur2->err), lsc, cur->score*cur2->score);
+				cur2 = cur2->next;
+			}
+			cur = cur->next;
+		}
+	}
+
+}*/
+
+
+fclose(in);
+fclose(in1);
+fclose(in2);
+fclose(out);
+fclose(out1);
+fclose(out2);
+
+unlink(fname1);
+unlink(fname2);
+unlink(fname3);
+unlink(fname5);
+unlink(fname6);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/MrsFAST.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __MRS_FAST__
+#define __MRS_FAST__
+
+#include "Reads.h"
+
+#define MAP_CHUNKS 15
+
+// Pair is used to pre-processing and making the read index table
+typedef struct
+{
+	int hv;
+	int seqInfo;
+} Pair;
+
+typedef struct
+{
+	int hv;
+	unsigned int *seqInfo;
+} ReadIndexTable;
+
+
+typedef struct mn
+{
+	int loc;
+	char dir;
+	char err;
+	float score;
+	char md[40];
+	char chr[10];
+} FullMappingInfo;
+
+typedef struct lc
+{
+	int loc[MAP_CHUNKS];
+	struct lc *next;
+} MappingLocations;
+
+typedef struct inf
+{
+	int size;
+	MappingLocations *next;
+} MappingInfo;
+
+typedef struct 
+{
+	FILE * fp;
+	char name[400];
+} FILE_STRUCT;
+
+extern long long			verificationCnt;
+extern long long			mappingCnt;
+extern long long			mappedSeqCnt;
+extern long long			completedSeqCnt;
+
+void initFAST(	Read *seqList,
+				int seqListSize,
+				int *samplingLocs,
+				int samplingLocsSize, 
+				char *fileName);
+
+void finalizeFAST();
+
+int mapSingleEndSeq();
+int mapPaiedEndSeq();
+void outputPairedEnd();
+void outputPairedEndDiscPP();
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Output.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include <string.h>
+#include "Common.h"
+#include "Output.h"
+
+FILE			*_out_fp;
+gzFile			_out_gzfp;
+
+
+
+void finalizeGZOutput()
+{
+	gzclose(_out_gzfp);
+}
+
+void finalizeTXOutput()
+{
+	fclose(_out_fp);
+}
+
+
+void gzOutputQ(SAM map)
+{
+	gzprintf(_out_gzfp, "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s", 
+			map.QNAME, 
+			map.FLAG,
+			map.RNAME, 
+			map.POS,
+			map.MAPQ,
+			map.CIGAR,
+			map.MRNAME,
+			map.MPOS,
+			map.ISIZE,
+			map.SEQ,
+			map.QUAL);
+	
+	int i;
+
+	for ( i = 0; i < map.optSize; i++)
+	{
+		switch (map.optFields[i].type)
+		{
+			case 'A':
+						gzprintf(_out_gzfp, "\t%s:%c:%c", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].cVal);
+						break;
+			case 'i':
+						gzprintf(_out_gzfp, "\t%s:%c:%d", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].iVal);
+						break;
+			case 'f':
+						gzprintf(_out_gzfp, "\t%s:%c:%f", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].fVal);
+						break;
+			case 'Z':
+			case 'H':
+						gzprintf(_out_gzfp, "\t%s:%c:%s", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].sVal);
+						break;
+		}
+	}
+	gzprintf(_out_gzfp, "\n");
+}
+
+void outputQ(SAM map)
+{
+
+	fprintf(_out_fp, "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s", 
+			map.QNAME, 
+			map.FLAG,
+			map.RNAME, 
+			map.POS,
+			map.MAPQ,
+			map.CIGAR,
+			map.MRNAME,
+			map.MPOS,
+			map.ISIZE,
+			map.SEQ,
+			map.QUAL);
+
+	
+	int i;
+
+	for ( i = 0; i < map.optSize; i++)
+	{
+		switch (map.optFields[i].type)
+		{
+			case 'A':
+						fprintf(_out_fp, "\t%s:%c:%c", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].cVal);
+						break;
+			case 'i':
+						fprintf(_out_fp, "\t%s:%c:%d", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].iVal);
+						break;
+			case 'f':
+						fprintf(_out_fp, "\t%s:%c:%f", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].fVal);
+						break;
+			case 'Z':
+			case 'H':
+						fprintf(_out_fp, "\t%s:%c:%s", map.optFields[i].tag, map.optFields[i].type, map.optFields[i].sVal);
+						break;
+		}
+	}
+	
+	fprintf(_out_fp, "\n");
+}
+
+int initOutput ( char *fileName, int compressed)
+{
+	if (compressed)
+	{
+		char newFileName[strlen(mappingOutputPath)+strlen(fileName)+4];
+		sprintf(newFileName, "%s%s.gz", mappingOutputPath, fileName);
+		_out_gzfp = fileOpenGZ(newFileName, "w1f");
+		if (_out_gzfp == Z_NULL)
+		{
+			return 0;
+		}
+	
+		finalizeOutput = &finalizeGZOutput;
+
+		output = &gzOutputQ;
+	}
+	else
+	{
+	
+		char newFileName[strlen(mappingOutputPath)+strlen(fileName)];
+		sprintf(newFileName, "%s%s", mappingOutputPath, fileName);
+
+		_out_fp = fileOpen(newFileName, "w");
+		if (_out_fp == NULL)
+		{
+			return 0;
+		}
+
+		finalizeOutput = &finalizeTXOutput;
+		output = &outputQ;
+	}
+	return 1;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Output.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __OUTPUT__
+#define __OUTPUT__
+
+#define	FORWARD 0
+#define REVERSE 1
+
+typedef struct
+{
+	char		*tag;
+	char		type;
+	char		cVal;
+	int			iVal;
+	float		fVal;
+	char		*sVal;
+} OPT_FIELDS;
+
+typedef struct  
+{
+	char			*QNAME;
+	short			FLAG;
+	char			*RNAME;
+	int				POS;
+	unsigned char	MAPQ;
+	char			*CIGAR;
+	char			*MRNAME;
+	int				MPOS;
+	int				ISIZE;
+	char			*SEQ;
+	char			*QUAL;
+	
+	int			optSize;
+	OPT_FIELDS	*optFields;
+} SAM;
+
+int initOutput(char *fileName, int compressed);
+void (*finalizeOutput)();
+void (*output)(SAM map);
+
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Reads.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <zlib.h>
+#include "Common.h"
+#include "Reads.h"
+
+
+
+FILE *_r_fp1;
+FILE *_r_fp2;
+gzFile _r_gzfp1;
+gzFile _r_gzfp2;
+Read *_r_seq;
+int _r_seqCnt;
+int *_r_samplingLocs;
+
+/**********************************************/
+char *(*readFirstSeq)(char *);
+char *(*readSecondSeq)(char *);
+/**********************************************/
+char *readFirstSeqTXT( char *seq )
+{
+	return fgets(seq, SEQ_MAX_LENGTH, _r_fp1);
+}
+
+/**********************************************/
+char *readSecondSeqTXT( char *seq )
+{
+	return fgets(seq, SEQ_MAX_LENGTH, _r_fp2);
+}
+/**********************************************/
+char *readFirstSeqGZ( char *seq )
+{
+	return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH);
+}
+
+/**********************************************/
+char *readSecondSeqGZ( char *seq )
+{
+	return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH);
+}
+/**********************************************/
+int readAllReads(char *fileName1,
+						char *fileName2,
+						int compressed,
+						unsigned char *fastq,
+						unsigned char pairedEnd,
+						Read **seqList,
+						unsigned int *seqListSize)
+{
+	double startTime=getTime();
+
+	char seq1[SEQ_MAX_LENGTH];
+	char rseq1[SEQ_MAX_LENGTH];
+	char name1[SEQ_MAX_LENGTH];
+	char qual1[SEQ_MAX_LENGTH];
+	char seq2[SEQ_MAX_LENGTH];
+	char rseq2[SEQ_MAX_LENGTH];
+	char name2[SEQ_MAX_LENGTH];
+	char qual2[SEQ_MAX_LENGTH];
+
+	char dummy[SEQ_MAX_LENGTH];
+	char ch;
+	int err1, err2;
+	int nCnt;
+	int discarded = 0;
+	int seqCnt = 0;
+	int maxCnt = 0;
+	int i;
+	Read *list = NULL;
+
+
+	if (!compressed)
+	{
+		_r_fp1 = fileOpen( fileName1, "r");
+
+		if (_r_fp1 == NULL)
+		{
+			return 0;
+		}
+
+		ch = fgetc(_r_fp1);
+
+		if ( pairedEnd && fileName2 != NULL )
+		{
+			_r_fp2 = fileOpen ( fileName2, "r" );
+			if (_r_fp2 == NULL)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			_r_fp2 = _r_fp1;
+		}
+
+		readFirstSeq = &readFirstSeqTXT;
+		readSecondSeq = &readSecondSeqTXT;
+	}
+	else
+	{
+
+		_r_gzfp1 = fileOpenGZ (fileName1, "r");
+
+		if (_r_gzfp1 == NULL)
+		{
+			return 0;
+		}
+
+		ch = gzgetc(_r_gzfp1);
+
+		if ( pairedEnd && fileName2 != NULL )
+		{
+			_r_fp2 = fileOpenGZ ( fileName2, "r" );
+			if (_r_fp2 == NULL)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			_r_fp2 = _r_fp1;
+		}
+
+		readFirstSeq = &readFirstSeqGZ;
+		readSecondSeq = &readSecondSeqGZ;
+	}
+
+	if (ch == '>')
+		*fastq = 0;
+	else
+		*fastq = 1;
+
+	// Counting the number of lines in the file
+	while (readFirstSeq(dummy)) maxCnt++;
+
+	if (!compressed)
+	{
+		rewind(_r_fp1);
+	}
+	else
+	{
+		gzrewind(_r_gzfp1);
+	}
+
+	// Calculating the Maximum # of sequences
+	if (*fastq)
+	{
+		maxCnt /= 4;
+	}
+	else
+	{
+		maxCnt /= 2;
+	}
+
+
+
+	if (pairedEnd && fileName2 != NULL )
+		maxCnt *= 2;
+
+	list = getMem(sizeof(Read)*maxCnt);
+
+	while( readFirstSeq(name1) )
+	{
+		err1 = 0;
+		err2 = 0;
+		readFirstSeq(seq1);
+		name1[strlen(name1)-1] = '\0';
+		for (i=0; i<strlen(name1);i++)
+		{
+			if (name1[i] == ' ')
+			{
+				name1[i] = '\0';
+				break;
+			}
+
+		}
+
+		if ( *fastq )
+		{
+			readFirstSeq(dummy);
+			readFirstSeq(qual1);
+			qual1[strlen(qual1)-1] = '\0';
+		}
+		else
+		{
+			sprintf(qual1, "*");
+		}
+
+
+		// Cropping
+		if (cropSize > 0)
+		{
+			seq1[cropSize] = '\0';
+			if ( *fastq )
+				qual1[cropSize] = '\0';
+		}
+
+
+		nCnt = 0;
+		for (i=0; i<strlen(seq1); i++)
+		{
+			seq1[i] = toupper (seq1[i]);
+			if (seq1[i] == 'N')
+			{
+				nCnt++;
+			}
+			else if (isspace(seq1[i]))
+			{
+
+				seq1[i] = '\0';
+				break;
+			}
+		}
+
+		if (nCnt > errThreshold)
+		{
+			err1 = 1;
+		}
+
+		// Reading the second seq of pair-ends
+		if (pairedEnd)
+		{
+			readSecondSeq(name2);
+			readSecondSeq(seq2);
+			name2[strlen(name2)-1] = '\0';
+			for (i=0; i<strlen(name2);i++)
+			{
+				if (name2[i] == ' ')
+				{
+					name2[i] = '\0';
+					break;
+				}
+
+			}
+
+			if ( *fastq )
+			{
+				readSecondSeq(dummy);
+				readSecondSeq(qual2);
+
+				qual2[strlen(qual2)-1] = '\0';
+			}
+			else
+			{
+				sprintf(qual2, "*");
+			}
+
+
+			// Cropping
+			if (cropSize > 0)
+			{
+				seq2[cropSize] = '\0';
+				if ( *fastq )
+					qual2[cropSize] = '\0';
+			}
+
+
+			nCnt = 0;
+			for (i=0; i<strlen(seq2); i++)
+			{
+				seq2[i] = toupper (seq2[i]);
+				if (seq2[i] == 'N')
+				{
+					nCnt++;
+
+				}
+				else if (isspace(seq2[i]))
+				{
+					seq2[i] = '\0';
+				}
+			}
+			if (nCnt > errThreshold)
+			{
+				err2 = 1;
+			}
+		}
+
+		if (!pairedEnd && !err1)
+		{
+
+			int _mtmp = strlen(seq1);
+			list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1);
+			list[seqCnt].seq = list[seqCnt].hits + 1;
+			list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
+			list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
+			list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
+
+
+			reverseComplete(seq1, rseq1, _mtmp);
+			rseq1[_mtmp] =  '\0';
+			int i;
+
+			list[seqCnt].hits[0] = 0;
+
+			for (i=0; i<=_mtmp; i++)
+			{
+				list[seqCnt].seq[i] = seq1[i];
+				list[seqCnt].rseq[i] = rseq1[i] ;
+				list[seqCnt].qual[i] = qual1[i];
+			}
+			sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
+
+			seqCnt++;
+
+		}
+		else if (pairedEnd && !err1 && !err2)
+		{
+			// Naming Conventions X/1, X/2 OR X
+			int tmplen = strlen(name1);
+			if (strcmp(name1, name2) != 0)
+			{
+				tmplen = strlen(name1)-2;
+			}
+		
+			if (strcmp(name1, "@IL11_266:2:1:922:509/1") == 0)
+			{
+				fprintf(stdout, "%d\n", seqCnt);
+			}
+			//first seq
+			int _mtmp = strlen(seq1);
+			list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
+			list[seqCnt].seq = list[seqCnt].hits + 1;
+			list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
+			list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
+			list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
+
+			reverseComplete(seq1, rseq1, _mtmp);
+			rseq1[_mtmp] =  '\0';
+			int i;
+
+			list[seqCnt].hits[0] = 0;
+
+			for (i=0; i<=_mtmp; i++)
+			{
+				list[seqCnt].seq[i] = seq1[i];
+				list[seqCnt].rseq[i] = rseq1[i] ;
+				list[seqCnt].qual[i] = qual1[i];
+			}
+
+
+			name1[tmplen]='\0';
+			sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
+
+
+			seqCnt++;
+
+			//second seq
+			list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
+			list[seqCnt].seq = list[seqCnt].hits + 1;
+			list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
+			list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
+			list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
+
+			reverseComplete(seq2, rseq2, _mtmp);
+			rseq2[_mtmp] =  '\0';
+
+			list[seqCnt].hits[0] = 0;
+
+			for (i=0; i<=_mtmp; i++)
+			{
+				list[seqCnt].seq[i] = seq2[i];
+				list[seqCnt].rseq[i] = rseq2[i] ;
+				list[seqCnt].qual[i] = qual2[i];
+			}
+
+
+			name2[tmplen]='\0';
+			sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0');
+
+
+			seqCnt++;
+
+		}
+		else
+		{
+			discarded++;
+		}
+	}
+
+	if (seqCnt > 0)
+	{
+		QUAL_LENGTH = SEQ_LENGTH = strlen(list[0].seq);
+		if (! *fastq)
+		{
+			QUAL_LENGTH = 1;
+		}
+		//fprintf(stderr, "%d %d\n", SEQ_LENGTH, QUAL_LENGTH);
+	}
+	else
+	{
+		fprintf(stdout, "ERR: No reads can be found for mapping\n");
+		return 0;
+	}
+
+
+	if (pairedEnd)
+	{
+//		seqCnt /= 2;
+	}
+
+
+	// Closing Files
+	if (!compressed)
+	{
+		fclose(_r_fp1);
+		if ( pairedEnd && fileName2 != NULL )
+		{
+			fclose(_r_fp2);
+		}
+	}
+	else
+	{
+		gzclose(_r_gzfp1);
+		if ( pairedEnd && fileName2 != NULL)
+		{
+			gzclose(_r_fp2);
+		}
+	}
+
+	*seqList = list;
+	*seqListSize = seqCnt;
+
+	_r_seq = list;
+	_r_seqCnt = seqCnt;
+
+	fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
+	//totalLoadingTime+=getTime()-startTime;
+
+	return 1;
+}
+/**********************************************/
+void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize)
+{
+	int i;
+	int samLocsSize = errThreshold + 1;
+	int *samLocs = getMem(sizeof(int)*samLocsSize);
+
+	for (i=0; i<samLocsSize; i++)
+	{
+		samLocs[i] = (SEQ_LENGTH / samLocsSize) *i;
+		if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH)
+			samLocs[i] = SEQ_LENGTH - WINDOW_SIZE;
+	}
+
+	// Outputing the sampling locations
+
+/*	int j;
+ 	for (i=0; i<SEQ_LENGTH; i++)
+	{
+		fprintf(stdout, "-");
+	}
+	fprintf(stdout, "\n");
+
+	for ( i=0; i<samLocsSize; i++ )
+	{
+		for ( j=0; j<samLocs[i]; j++ )
+		{
+			fprintf(stdout," ");
+		}
+		for (j=0; j<WINDOW_SIZE; j++)
+		{
+			fprintf(stdout,"+");
+		}
+		fprintf(stdout, "\n");
+		fflush(stdout);
+	}
+	for ( i=0; i<SEQ_LENGTH; i++ )
+	{
+		fprintf(stdout, "-");
+	}
+	fprintf(stdout, "\n");*/
+	*samplingLocs = samLocs;
+	*samplingLocsSize = samLocsSize;
+	_r_samplingLocs = samLocs;
+}
+
+void finalizeReads(char *fileName)
+{
+	FILE *fp1=NULL;
+
+	if (fileName != NULL)
+	{
+		fp1 = fileOpen(fileName, "w");
+	}
+	if (pairedEndMode)
+		_r_seqCnt /=2;
+
+	int i=0;
+	for (i = 0; i < _r_seqCnt; i++)
+	{
+		if (pairedEndMode && _r_seq[2*i].hits[0] == 0 &&  strcmp(_r_seq[2*i].qual,"*")!=0)
+		{
+			fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual);
+		}
+		else if (pairedEndMode && _r_seq[2*i].hits[0] == 0)
+		{
+			fprintf(fp1, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
+		}
+		else if (_r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0)
+		{
+			fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual);
+		}
+		else if (_r_seq[i].hits[0] == 0)
+		{
+			fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq);
+		}
+	}
+
+	fclose(fp1);
+	if (pairedEndMode)
+		_r_seqCnt *= 2;
+
+	for (i = 0; i < _r_seqCnt; i++)
+	{
+		freeMem(_r_seq[i].hits,0);
+	}
+
+
+	freeMem(_r_seq,0);
+	freeMem(_r_samplingLocs,0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/Reads.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef __READ__
+#define __READ__
+
+typedef struct
+{
+	char *name;
+	char *seq;
+	char *rseq;
+	char *qual;
+	char *hits;
+} Read;
+
+int readAllReads(char *fileName1, char *fileName2, int compressed, unsigned char *fastq, unsigned char pe, Read **seqList, unsigned int *seqListSize);
+void loadSamplingLocations(int **samplingLocs, int *samplingLocsSize);
+void finalizeReads(char *fileName);
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/RefGenome.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "Common.h"
+#include "RefGenome.h"
+
+FILE *_rg_fp;
+char *_rg_gen;
+char *_rg_name;
+int _rg_offset;
+int _rg_contGen; 
+
+/**********************************************/
+int initLoadingRefGenome(char *fileName)
+{
+	char ch;
+	_rg_fp = fileOpen (fileName, "r");
+	if (fscanf(_rg_fp, "%c", &ch))
+	{
+		if (ch == '>')
+		{
+			_rg_contGen = 0;
+			_rg_offset = 0;
+			_rg_gen = getMem(CONTIG_MAX_SIZE);
+			_rg_name = getMem(CONTIG_NAME_SIZE);
+			return 1;
+		}
+	}
+	return 0;
+}
+/**********************************************/
+void finalizeLoadingRefGenome()
+{
+	freeMem(_rg_gen, CONTIG_MAX_SIZE);
+	freeMem(_rg_name, CONTIG_NAME_SIZE); 
+	fclose(_rg_fp);
+}
+/**********************************************/
+int loadRefGenome(char **refGen, char **refGenName, int *refGenOff)
+{
+	char ch;
+	int i;
+	int returnVal = 0;
+	int actualSize=0;
+	int size;
+	char *tmp;
+	
+	// New Conting 
+	if (!_rg_contGen)
+	{
+		size = 0;
+		tmp = fgets(_rg_name, SEQ_MAX_LENGTH, _rg_fp);
+		int k;
+		for (k=0; k<strlen(_rg_name);k++)
+		{
+			if (_rg_name[k] == ' ')
+			{
+				_rg_name[k]='\0';
+				break;
+			}
+		}
+			
+	}
+	else
+	{
+		size=strlen(_rg_gen);
+		for( i = 0 ; i < CONTIG_OVERLAP ; i++ )
+		{
+			
+			_rg_gen[i] = _rg_gen[size-CONTIG_OVERLAP+i];
+			if (_rg_gen[i] != 'N')
+				actualSize++;
+		}
+		size = CONTIG_OVERLAP;
+	}
+	while( fscanf(_rg_fp, "%c", &ch) > 0 )
+	{
+		if (ch == '>')
+		{
+			_rg_contGen = 0;
+			returnVal = 1;
+			break;
+		}
+		else if (!isspace(ch))
+		{
+			ch = toupper(ch);
+			_rg_gen[size++] = ch;
+			if (ch != 'N')
+			{
+				actualSize++;
+			}
+			if (actualSize == CONTIG_SIZE || size == CONTIG_MAX_SIZE)
+			{
+				_rg_contGen = 1;
+				returnVal=1;
+				break;
+			}
+		}
+
+	}
+
+	_rg_gen[size] = '\0';
+	for (i=strlen(_rg_name)-1; i >= 0; i--)
+		if (!isspace(_rg_name[i]))
+			break;
+	_rg_name[i+1] = '\0';
+
+	*refGenOff = _rg_offset;
+	*refGenName = _rg_name;
+	*refGen = _rg_gen;
+
+	if (_rg_contGen == 1)
+	{
+		_rg_offset += size-CONTIG_OVERLAP;
+	}
+	else
+	{
+		_rg_offset = 0;
+	}
+	
+	
+	return returnVal;
+}
+/**********************************************/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/RefGenome.h	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, 
+ * are permitted provided that the following conditions are met:
+ *   
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-12-08
+ */
+
+
+#ifndef _REF_GENOME_
+#define _REF_GENOME_
+
+int		initLoadingRefGenome(char *fileName);
+void	finalizeLoadingRefGenome();
+int		loadRefGenome(char **refGen, char **refGenName, int *refGenOff);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mrsfast-2.3.0.2/baseFAST.c	Tue Feb 21 10:39:28 2012 -0500
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice, this
+ *   list of conditions and the following disclaimer in the documentation and/or other
+ *   materials provided with the distribution.
+ * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without specific
+ *   prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Author         : Faraz Hach
+ * Email          : fhach AT cs DOT sfu
+ * Last Update    : 2009-02-01
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "Common.h"
+#include "CommandLineParser.h"
+#include "Reads.h"
+#include "Output.h"
+#include "HashTable.h"
+#include "MrsFAST.h"
+
+char 				*versionNumber = "2.3";			// Current Version
+unsigned char		seqFastq;
+
+int main(int argc, char *argv[])
+{
+	if (!parseCommandLine(argc, argv))
+		return 1;
+
+	configHashTable();
+	/****************************************************
+	 * INDEXING
+	 ***************************************************/
+	if (indexingMode)
+	{
+		int i;
+		/********************************
+		 * Regulard Mode
+		 ********************************/
+		if (!bisulfiteMode)
+		{
+			configHashTable();
+			for (i = 0; i < fileCnt; i++)
+			{
+				generateHashTable(fileName[i][0], fileName[i][1]);
+			}
+		}
+		else
+		/********************************
+		 * Bisulfite Mode
+		 ********************************/
+		{ // TODO
+		}
+	}
+	/****************************************************
+	 * SEARCHING
+	 ***************************************************/
+	else
+	{
+		Read *seqList;
+		unsigned int seqListSize;
+		int fc;
+		int samplingLocsSize;
+		int *samplingLocs;
+		double totalLoadingTime = 0;
+		double totalMappingTime = 0;
+		double startTime;
+		double loadingTime;
+		double mappingTime;
+		double lstartTime;
+		double ppTime;
+		double tmpTime;;
+		char *prevGen = getMem(CONTIG_NAME_SIZE);
+		prevGen[0]='\0';
+		char *curGen;
+		int	flag;
+		double maxMem=0;
+		char fname1[FILE_NAME_LENGTH];
+		char fname2[FILE_NAME_LENGTH];
+		char fname3[FILE_NAME_LENGTH];
+		char fname4[FILE_NAME_LENGTH];
+		char fname5[FILE_NAME_LENGTH];
+		// Loading Sequences & Sampling Locations
+		startTime = getTime();
+		if (bisulfiteMode && !pairedEndMode && seqFile1 == NULL)
+		{
+		    //TODO
+		}
+		else
+		{
+			if (!readAllReads(seqFile1, seqFile2, seqCompressed, &seqFastq, pairedEndMode, &seqList, &seqListSize))
+			{
+				return 1;
+			}
+		}
+
+		//loadSamplingLocations(&samplingLocs, &samplingLocsSize);
+		totalLoadingTime += getTime()-startTime;
+
+
+
+
+		if (pairedEndMode)
+		{
+			//Switching to Inferred Size 
+			minPairEndedDistance = minPairEndedDistance - SEQ_LENGTH + 2;
+			maxPairEndedDistance = maxPairEndedDistance - SEQ_LENGTH + 2;
+			if (pairedEndDiscordantMode)
+			{
+				maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance - SEQ_LENGTH + 2;
+				minPairEndedDiscordantDistance = minPairEndedDiscordantDistance - SEQ_LENGTH + 2;
+			}
+			
+			/* The size between the ends;
+			minPairEndedDistance = minPairEndedDistance + SEQ_LENGTH + 1;
+			maxPairEndedDistance = maxPairEndedDistance + SEQ_LENGTH + 1;
+			if (pairedEndDiscordantMode)
+			{
+				maxPairEndedDiscordantDistance = maxPairEndedDiscordantDistance + SEQ_LENGTH + 1;
+				minPairEndedDiscordantDistance = minPairEndedDiscordantDistance + SEQ_LENGTH + 1;
+			}*/
+			sprintf(fname1, "%s__%s__1", mappingOutputPath, mappingOutput);
+			sprintf(fname2, "%s__%s__2", mappingOutputPath, mappingOutput);
+			sprintf(fname3, "%s__%s__disc", mappingOutputPath, mappingOutput);
+			sprintf(fname4, "%s__%s__oea1", mappingOutputPath, mappingOutput);
+			sprintf(fname5, "%s__%s__oea2", mappingOutputPath, mappingOutput);
+			unlink(fname1);
+			unlink(fname2);
+			unlink(fname3);
+			unlink(fname4);
+			unlink(fname5);
+		}
+
+		// Preparing output
+		initOutput(mappingOutput, outCompressed);
+
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+		fprintf(stdout, "| %15s | %15s | %15s | %15s | %15s %15s |\n","Genome Name","Loading Time", "Mapping Time", "Memory Usage(M)","Total Mappings","Mapped reads");
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+
+		/********************************
+		 * Regular Mode
+		 ********************************/
+		if (!bisulfiteMode)
+		{
+			if (!pairedEndMode)
+			{
+				for (fc = 0; fc <fileCnt; fc++)
+				{
+					if (!initLoadingHashTable(fileName[fc][1]))
+					{
+						return 1;
+					}
+
+					loadSamplingLocations(&samplingLocs, &samplingLocsSize);
+
+					mappingTime = 0;
+					loadingTime = 0;
+					prevGen[0] = '\0';
+					flag = 1;
+					
+					do
+					{
+						flag = loadHashTable ( &tmpTime );  			// Reading a fragment
+						curGen = getRefGenomeName();
+
+						// First Time
+						if (flag && prevGen[0]== '\0')
+						{
+							sprintf(prevGen, "%s", curGen);
+						}
+
+						if ( !flag || strcmp(prevGen, curGen)!=0)
+						{
+
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+
+							totalMappingTime += mappingTime;
+							totalLoadingTime += loadingTime;
+
+							loadingTime = 0;
+							mappingTime = 0;
+							maxMem = 0;
+
+							if (!flag)
+							{
+								break;
+							}
+						}
+						else if (progressRep && mappingTime != 0)
+						{
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+						}
+
+						sprintf(prevGen, "%s", curGen);
+
+						loadingTime += tmpTime;
+						lstartTime = getTime();
+
+						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
+
+						mapSingleEndSeq();
+
+						mappingTime += getTime() - lstartTime;
+						if (maxMem < getMemUsage())
+						{
+							maxMem = getMemUsage();
+						}
+					} while (flag);
+
+				} // end for;
+				finalizeFAST();
+				finalizeLoadingHashTable();
+
+			}
+			// Pairedend Mapping Mode
+			else
+			{
+
+				for (fc = 0; fc <fileCnt; fc++)
+				{
+					if (!initLoadingHashTable(fileName[fc][1]))
+					{
+						return 1;
+					}
+					
+
+					loadSamplingLocations(&samplingLocs, &samplingLocsSize);
+					
+					mappingTime = 0;
+					loadingTime = 0;
+					prevGen[0] = '\0';
+					flag = 1;
+
+					do
+					{
+						flag = loadHashTable ( &tmpTime );  			// Reading a fragment
+						curGen = getRefGenomeName();
+
+						// First Time
+						if (flag && prevGen[0]== '\0')
+						{
+							sprintf(prevGen, "%s", curGen);
+						}
+
+						if ( !flag || strcmp(prevGen, curGen)!=0)
+						{
+
+							// DISCORDANT
+							lstartTime = getTime();					
+							outputPairedEnd();
+							mappingTime += getTime() - lstartTime;
+							//DISCORDANT			
+
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+
+							totalMappingTime += mappingTime;
+							totalLoadingTime += loadingTime;
+
+							loadingTime = 0;
+							mappingTime = 0;
+							maxMem = 0;
+
+							if (!flag)
+							{
+								break;
+							}
+						}
+						else if (progressRep && mappingTime != 0)
+						{
+							fprintf(stdout, "| %15s | %15.2f | %15.2f | %15.2f | %15lld %15lld |\n",
+									prevGen,loadingTime, mappingTime, maxMem, mappingCnt , mappedSeqCnt);
+							fflush(stdout);
+						}
+
+						sprintf(prevGen, "%s", curGen);
+
+						loadingTime += tmpTime;
+						lstartTime = getTime();
+						
+						initFAST(seqList, seqListSize, samplingLocs, samplingLocsSize, fileName[fc][0]);
+						
+						mapPairedEndSeq();
+							
+						mappingTime += getTime() - lstartTime;
+						if (maxMem < getMemUsage())
+						{
+							maxMem = getMemUsage();
+						}
+					} while (flag);
+
+				} // end for;
+
+				finalizeLoadingHashTable();
+				if (pairedEndDiscordantMode)
+				{
+					lstartTime = getTime();							
+					outputPairedEndDiscPP();
+					ppTime = getTime() - lstartTime;
+				}
+				finalizeFAST();
+			}
+		}
+		/********************************
+		 * Bisulfite Mode
+		 ********************************/
+		{
+			//TODO
+		}
+
+
+		finalizeOutput();
+
+		fprintf(stdout, "-----------------------------------------------------------------------------------------------------------\n");
+		fprintf(stdout, "%19s%16.2f%18.2f\n\n", "Total:",totalLoadingTime, totalMappingTime);
+		if (pairedEndDiscordantMode)
+			fprintf(stdout, "Post Processing Time: %18.2f \n", ppTime);
+		fprintf(stdout, "%-30s%10.2f\n","Total Time:", totalMappingTime+totalLoadingTime);
+		fprintf(stdout, "%-30s%10d\n","Total No. of Reads:", seqListSize);
+		fprintf(stdout, "%-30s%10lld\n","Total No. of Mappings:", mappingCnt);
+		fprintf(stdout, "%-30s%10.0f\n\n","Avg No. of locations verified:", ceil((float)verificationCnt/seqListSize));
+
+		int cof = (pairedEndMode)?2:1;
+
+		if (progressRep && maxHits != 0)
+		{
+			int frequency[maxHits+1];
+			int i;
+			for ( i=0 ; i <= maxHits; i++)
+			{
+				frequency[i] = 0;
+			}
+
+
+			for (fc = 0; fc < seqListSize; fc++)
+			{
+				frequency[*(seqList[fc*cof].hits)]++;
+			}
+			frequency[maxHits] = completedSeqCnt;
+			for ( i=0 ; i <= maxHits; i++)
+			{
+				fprintf(stdout, "%-30s%10d%10d%10.2f%%\n","Reads Mapped to ", i, frequency[i], 100*(float)frequency[i]/(float)seqListSize);
+			}
+		}
+
+
+		if (strcmp(unmappedOutput, "") == 0)
+		{
+			char fn[strlen(mappingOutputPath) + strlen(mappingOutput) + 6 ];
+			sprintf(fn, "%s%s.nohit", mappingOutputPath, mappingOutput );
+			finalizeReads(fn);
+		}
+		else
+		{
+			finalizeReads(unmappedOutput);
+		}
+
+
+
+		freeMem(prevGen, CONTIG_NAME_SIZE);
+	}
+
+
+
+	return 1;
+}