Mercurial > repos > calkan > mrfast
diff mrfast-2.1.0.4/CommandLineParser.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | 7b3dc85dc7fd |
children |
line wrap: on
line diff
--- a/mrfast-2.1.0.4/CommandLineParser.c Tue Feb 21 10:29:47 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,388 +0,0 @@ -/* - * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this list - * of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, this - * list of conditions and the following disclaimer in the documentation and/or other - * materials provided with the distribution. - * - Neither the names of the University of Washington, Simon Fraser University, - * nor the names of its contributors may be - * used to endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Authors: - Farhad Hormozdiari - Faraz Hach - Can Alkan - Emails: - farhadh AT uw DOT edu - fhach AT cs DOT sfu DOT ca - calkan AT uw DOT edu -*/ - - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <ctype.h> -#include "Common.h" -#include "CommandLineParser.h" - -int uniqueMode=1; -int indexingMode; -int searchingMode; -int pairedEndMode; -int pairedEndDiscordantMode; -int transChromosal=0; -int pairedEndProfilingMode; -int seqCompressed; -int outCompressed; -int cropSize = 0; -int progressRep = 0; -int minPairEndedDistance=-1; -int maxPairEndedDistance=-1; -int minPairEndedDiscordantDistance=-1; -int maxPairEndedDiscordantDistance=-1; -int bestMode; -int nosamMode; -char *seqFile1; -char *seqFile2; -char *mappingOutput = "output"; -char *mappingOutputPath = ""; -char *unmappedOutput = "unmapped"; -char fileName[1000][2][FILE_NAME_LENGTH]; -int fileCnt; -int maxOEAOutput=1000; -int maxDiscordantOutput=10000; -unsigned char errThreshold=2; -unsigned char maxHits=0; -unsigned char WINDOW_SIZE = 12; -unsigned int CONTIG_SIZE; -unsigned int CONTIG_MAX_SIZE; - -void printHelp(); - -int parseCommandLine (int argc, char *argv[]) -{ - - int o; - int index; - char *fastaFile = NULL; - char *batchFile = NULL ; - int batchMode = 0; - - static struct option longOptions[] = - { - {"pe", no_argument, &pairedEndMode, 1}, - {"discordant-vh", no_argument, &pairedEndDiscordantMode, 1}, - {"trans", no_argument, &transChromosal , 1}, - {"profile", no_argument, &pairedEndProfilingMode, 1}, - {"seqcomp", no_argument, &seqCompressed, 1}, - {"outcomp", no_argument, &outCompressed, 1}, - {"progress", no_argument, &progressRep, 1}, - {"best", no_argument, &bestMode, 1}, - {"index", required_argument, 0, 'i'}, - {"search", required_argument, 0, 's'}, - {"help", no_argument, 0, 'h'}, - {"version", no_argument, 0, 'v'}, - {"seq", required_argument, 0, 'x'}, - {"seq1", required_argument, 0, 'x'}, - {"seq2", required_argument, 0, 'y'}, - {"ws", required_argument, 0, 'w'}, - {"min", required_argument, 0, 'l'}, - {"max", required_argument, 0, 'm'}, - {"crop", required_argument, 0, 'c'}, - {"maxoea", required_argument, 0, 'a'}, - {"maxdis", required_argument, 0, 'd'}, - {"nosam", no_argument, &nosamMode, 1}, - {0, 0, 0, 0}, - }; - - while ( (o = getopt_long ( argc, argv, "bhvn:e:o:u:i:s:x:y:w:l:m:c:a:d:", longOptions, &index)) != -1 ) - { - switch (o) - { - case 'a': - maxOEAOutput = atoi(optarg); - break; - case 'd': - maxDiscordantOutput = atoi(optarg); - break; - case 'i': - indexingMode = 1; - fastaFile = optarg; - break; - case 's': - searchingMode = 1; - fastaFile = optarg; - break; - case 'b': - batchMode = 1; - break; - case 'c': - cropSize = atoi(optarg); - break; - case 'w': - WINDOW_SIZE = atoi(optarg); - break; - case 'x': - seqFile1 = optarg; - break; - case 'y': - seqFile2 = optarg; - break; - case 'u': - unmappedOutput = optarg; - break; - case 'o': - mappingOutput = getMem(FILE_NAME_LENGTH); - mappingOutputPath = getMem(FILE_NAME_LENGTH); - stripPath (optarg, &mappingOutputPath, &mappingOutput); - break; - case 'n': - maxHits = atoi(optarg); - break; - case 'e': - errThreshold = atoi(optarg); - break; - case 'l': - minPairEndedDistance = atoi(optarg); - break; - case 'm': - maxPairEndedDistance = atoi(optarg); - break; - case 'h': - printHelp(); - return 0; - break; - case 'v': - fprintf(stdout, "%s.%s\n", versionNumber, versionNumberF); - return 0; - break; - /* case '?': - fprintf(stderr, "Unknown parameter: %s\n", longOptions[index].name); - abort(); - break;*/ - } - - } - if (indexingMode + searchingMode != 1) - { - fprintf(stdout, "ERROR: Indexing / Searching mode should be selected\n"); - return 0; - } - - if (WINDOW_SIZE > 15 || WINDOW_SIZE < 11) - { - fprintf(stdout, "ERROR: Window size should be in [12..15]\n"); - return 0; - } - - - if ( indexingMode ) - { - CONTIG_SIZE = 15000000; - CONTIG_MAX_SIZE = 40000000; - - if (batchMode) - { - batchFile = fastaFile; - fastaFile = NULL; - } - - if (batchFile == NULL && fastaFile == NULL) - { - fprintf(stdout, "ERROR: Reference(s) should be indicated for indexing\n"); - return 0; - } - - if (pairedEndDiscordantMode) - { - fprintf(stdout, "ERROR: --discordant cannot be used in indexing mode. \n"); - return 0; - } - - } - - - if ( searchingMode ) - { - CONTIG_SIZE = 300000000; - CONTIG_MAX_SIZE = 300000000; - - - if (batchMode) - { - batchFile = fastaFile; - fastaFile = NULL; - } - - if (batchFile == NULL && fastaFile == NULL) - { - fprintf(stdout, "ERROR: Index File(s) should be indiciated for searching\n"); - return 0; - } - - if (seqFile1 == NULL && seqFile2 == NULL) - { - fprintf(stdout, "ERROR: Please indicate a sequence file for searching.\n"); - return 0; - } - - - if (!pairedEndMode && seqFile2 != NULL) - { - fprintf(stdout, "ERROR: Second File can be indicated in pairedend mode\n"); - return 0; - } - - if (pairedEndMode && (minPairEndedDistance <0 || maxPairEndedDistance < 0 || minPairEndedDistance > maxPairEndedDistance)) - { - fprintf(stdout, "ERROR: Please enter a valid range for pairedend sequences.\n"); - return 0; - } - - if (pairedEndMode && seqFile1 == NULL) - { - fprintf(stdout, "ERROR: Please indicate the first file for pairedend search.\n"); - return 0; - } - - if (!pairedEndMode && pairedEndDiscordantMode) - { - fprintf(stdout, "ERROR: --discordant should be used with --pe"); - return 0; - } - - if (!pairedEndMode && pairedEndProfilingMode) - { - fprintf(stdout, "ERROR: --profile should be used with --pe"); - return 0; - } - } - - int i = 0; - - - if (batchMode) - { - FILE *fp = fileOpen(batchFile, "r"); - - if (fp == NULL) - return 0; - - fileCnt = 0; - - while ( fgets(fileName[fileCnt][0], FILE_NAME_LENGTH, fp)) - { - for (i = strlen(fileName[fileCnt][0])-1; i>=0; i--) - if ( !isspace(fileName[fileCnt][0][i])) - break; - fileName[fileCnt][0][i+1] = '\0'; - - if (strcmp(fileName[fileCnt][0], "") != 0) - { - sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); - fileCnt++; - } - } - } - else - { - sprintf(fileName[fileCnt][0], "%s", fastaFile); - sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); - fileCnt++; - } - - - if (pairedEndProfilingMode) - { - - minPairEndedDistance = 0; - maxPairEndedDistance = 300000000; - - } - - if (pairedEndDiscordantMode) - { - minPairEndedDiscordantDistance = minPairEndedDistance; - maxPairEndedDiscordantDistance = maxPairEndedDistance; - - minPairEndedDistance = 0; - maxPairEndedDistance = 300000000; - } - - return 1; -} - - -void printHelp() -{ - char *errorType; - if (mrFAST) - { - fprintf(stdout,"mrFAST : Micro-Read Fast Alignment Search Tool.\n\n"); - fprintf(stdout,"Usage: mrfast [options]\n\n"); - errorType="edit distance"; - } - else - { - fprintf(stdout,"mrsFAST : Micro-Read Substitutions (only) Fast Alignment Search Tool.\n\n"); - fprintf(stdout,"mrsFAST is a cache oblivious read mapping tool. mrsFAST capable of mapping\n"); - fprintf(stdout,"single and paired end reads to the reference genome. Bisulfite treated \n"); - fprintf(stdout,"sequences are not supported in this version. By default mrsFAST reports \n"); - fprintf(stdout,"the output in SAM format.\n\n"); - fprintf(stdout,"Usage: mrsFAST [options]\n\n"); - errorType="hamming distance"; - } - - fprintf(stdout,"General Options:\n"); - fprintf(stdout," -v|--version\t\tCurrent Version.\n"); - fprintf(stdout," -h\t\t\tShows the help file.\n"); - fprintf(stdout,"\n\n"); - - fprintf(stdout,"Indexing Options:\n"); - fprintf(stdout," --index [file]\t\tGenerate an index from the specified fasta file. \n"); - fprintf(stdout," -b\t\t\tIndicates the indexing will be done in batch mode.\n\t\t\tThe file specified in --index should contain the \n\t\t\tlist of fasta files.\n"); - fprintf(stdout," --ws [int]\t\tSet window size for indexing (default:12 max:14).\n"); - fprintf(stdout,"\n\n"); - - fprintf(stdout,"Searching Options:\n"); - fprintf(stdout," --search [file]\tSearch in the specified genome. Provide the path to the fasta file. \n\t\t\tIndex file should be in the same directory.\n"); - fprintf(stdout," -b\t\t\tIndicates the mapping will be done in batch mode. \n\t\t\tThe file specified in --search should contain the \n\t\t\tlist of fasta files.\n"); - fprintf(stdout," --pe \t\t\tSearch will be done in Paired-End mode.\n"); - fprintf(stdout," --seq [file]\t\tInput sequences in fasta/fastq format [file]. If \n\t\t\tpaired end reads are interleaved, use this option.\n"); - fprintf(stdout," --seq1 [file]\t\tInput sequences in fasta/fastq format [file] (First \n\t\t\tfile). Use this option to indicate the first file of \n\t\t\tpaired end reads. \n"); - fprintf(stdout," --seq2 [file]\t\tInput sequences in fasta/fastq format [file] (Second \n\t\t\tfile). Use this option to indicate the second file of \n\t\t\tpaired end reads. \n"); - fprintf(stdout," -o [file]\t\tOutput of the mapped sequences. The default is \"output\".\n"); - fprintf(stdout," -u [file]\t\tSave unmapped sequences in fasta/fastq format.\n"); - fprintf(stdout," --best \t\tOnly the best mapping from all the possible mapping is returned.\n"); - fprintf(stdout," --seqcomp \t\tIndicates that the input sequences are compressed (gz).\n"); - fprintf(stdout," --outcomp \t\tIndicates that output file should be compressed (gz).\n"); - fprintf(stdout," -e [int]\t\tMaximum allowed %s (default 2).\n", errorType); - fprintf(stdout," --min [int]\t\tMin distance allowed between a pair of end sequences.\n"); - fprintf(stdout," --max [int]\t\tMax distance allowed between a pair of end sequences.\n"); - - fprintf(stdout," --maxoea [int]\t\tMax number of One End Anchored (OEA) returned for each read pair. We recommend 100 or above for NovelSeq use.\n"); - fprintf(stdout," --maxdis [int]\t\tMax number of discordant map locations returned for each read pair. We recommend 300 or above for VariationHunter use.\n"); -}