Mercurial > repos > calkan > mrfast
comparison mrfast-2.1.0.4/CommandLineParser.c @ 0:7b3dc85dc7fd
Uploaded mrfast source tarball
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:29:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7b3dc85dc7fd |
---|---|
1 /* | |
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without modification, | |
6 * are permitted provided that the following conditions are met: | |
7 * | |
8 * Redistributions of source code must retain the above copyright notice, this list | |
9 * of conditions and the following disclaimer. | |
10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
11 * list of conditions and the following disclaimer in the documentation and/or other | |
12 * materials provided with the distribution. | |
13 * - Neither the names of the University of Washington, Simon Fraser University, | |
14 * nor the names of its contributors may be | |
15 * used to endorse or promote products derived from this software without specific | |
16 * prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 /* | |
32 Authors: | |
33 Farhad Hormozdiari | |
34 Faraz Hach | |
35 Can Alkan | |
36 Emails: | |
37 farhadh AT uw DOT edu | |
38 fhach AT cs DOT sfu DOT ca | |
39 calkan AT uw DOT edu | |
40 */ | |
41 | |
42 | |
43 #include <stdio.h> | |
44 #include <stdlib.h> | |
45 #include <getopt.h> | |
46 #include <string.h> | |
47 #include <ctype.h> | |
48 #include "Common.h" | |
49 #include "CommandLineParser.h" | |
50 | |
51 int uniqueMode=1; | |
52 int indexingMode; | |
53 int searchingMode; | |
54 int pairedEndMode; | |
55 int pairedEndDiscordantMode; | |
56 int transChromosal=0; | |
57 int pairedEndProfilingMode; | |
58 int seqCompressed; | |
59 int outCompressed; | |
60 int cropSize = 0; | |
61 int progressRep = 0; | |
62 int minPairEndedDistance=-1; | |
63 int maxPairEndedDistance=-1; | |
64 int minPairEndedDiscordantDistance=-1; | |
65 int maxPairEndedDiscordantDistance=-1; | |
66 int bestMode; | |
67 int nosamMode; | |
68 char *seqFile1; | |
69 char *seqFile2; | |
70 char *mappingOutput = "output"; | |
71 char *mappingOutputPath = ""; | |
72 char *unmappedOutput = "unmapped"; | |
73 char fileName[1000][2][FILE_NAME_LENGTH]; | |
74 int fileCnt; | |
75 int maxOEAOutput=1000; | |
76 int maxDiscordantOutput=10000; | |
77 unsigned char errThreshold=2; | |
78 unsigned char maxHits=0; | |
79 unsigned char WINDOW_SIZE = 12; | |
80 unsigned int CONTIG_SIZE; | |
81 unsigned int CONTIG_MAX_SIZE; | |
82 | |
83 void printHelp(); | |
84 | |
85 int parseCommandLine (int argc, char *argv[]) | |
86 { | |
87 | |
88 int o; | |
89 int index; | |
90 char *fastaFile = NULL; | |
91 char *batchFile = NULL ; | |
92 int batchMode = 0; | |
93 | |
94 static struct option longOptions[] = | |
95 { | |
96 {"pe", no_argument, &pairedEndMode, 1}, | |
97 {"discordant-vh", no_argument, &pairedEndDiscordantMode, 1}, | |
98 {"trans", no_argument, &transChromosal , 1}, | |
99 {"profile", no_argument, &pairedEndProfilingMode, 1}, | |
100 {"seqcomp", no_argument, &seqCompressed, 1}, | |
101 {"outcomp", no_argument, &outCompressed, 1}, | |
102 {"progress", no_argument, &progressRep, 1}, | |
103 {"best", no_argument, &bestMode, 1}, | |
104 {"index", required_argument, 0, 'i'}, | |
105 {"search", required_argument, 0, 's'}, | |
106 {"help", no_argument, 0, 'h'}, | |
107 {"version", no_argument, 0, 'v'}, | |
108 {"seq", required_argument, 0, 'x'}, | |
109 {"seq1", required_argument, 0, 'x'}, | |
110 {"seq2", required_argument, 0, 'y'}, | |
111 {"ws", required_argument, 0, 'w'}, | |
112 {"min", required_argument, 0, 'l'}, | |
113 {"max", required_argument, 0, 'm'}, | |
114 {"crop", required_argument, 0, 'c'}, | |
115 {"maxoea", required_argument, 0, 'a'}, | |
116 {"maxdis", required_argument, 0, 'd'}, | |
117 {"nosam", no_argument, &nosamMode, 1}, | |
118 {0, 0, 0, 0}, | |
119 }; | |
120 | |
121 while ( (o = getopt_long ( argc, argv, "bhvn:e:o:u:i:s:x:y:w:l:m:c:a:d:", longOptions, &index)) != -1 ) | |
122 { | |
123 switch (o) | |
124 { | |
125 case 'a': | |
126 maxOEAOutput = atoi(optarg); | |
127 break; | |
128 case 'd': | |
129 maxDiscordantOutput = atoi(optarg); | |
130 break; | |
131 case 'i': | |
132 indexingMode = 1; | |
133 fastaFile = optarg; | |
134 break; | |
135 case 's': | |
136 searchingMode = 1; | |
137 fastaFile = optarg; | |
138 break; | |
139 case 'b': | |
140 batchMode = 1; | |
141 break; | |
142 case 'c': | |
143 cropSize = atoi(optarg); | |
144 break; | |
145 case 'w': | |
146 WINDOW_SIZE = atoi(optarg); | |
147 break; | |
148 case 'x': | |
149 seqFile1 = optarg; | |
150 break; | |
151 case 'y': | |
152 seqFile2 = optarg; | |
153 break; | |
154 case 'u': | |
155 unmappedOutput = optarg; | |
156 break; | |
157 case 'o': | |
158 mappingOutput = getMem(FILE_NAME_LENGTH); | |
159 mappingOutputPath = getMem(FILE_NAME_LENGTH); | |
160 stripPath (optarg, &mappingOutputPath, &mappingOutput); | |
161 break; | |
162 case 'n': | |
163 maxHits = atoi(optarg); | |
164 break; | |
165 case 'e': | |
166 errThreshold = atoi(optarg); | |
167 break; | |
168 case 'l': | |
169 minPairEndedDistance = atoi(optarg); | |
170 break; | |
171 case 'm': | |
172 maxPairEndedDistance = atoi(optarg); | |
173 break; | |
174 case 'h': | |
175 printHelp(); | |
176 return 0; | |
177 break; | |
178 case 'v': | |
179 fprintf(stdout, "%s.%s\n", versionNumber, versionNumberF); | |
180 return 0; | |
181 break; | |
182 /* case '?': | |
183 fprintf(stderr, "Unknown parameter: %s\n", longOptions[index].name); | |
184 abort(); | |
185 break;*/ | |
186 } | |
187 | |
188 } | |
189 if (indexingMode + searchingMode != 1) | |
190 { | |
191 fprintf(stdout, "ERROR: Indexing / Searching mode should be selected\n"); | |
192 return 0; | |
193 } | |
194 | |
195 if (WINDOW_SIZE > 15 || WINDOW_SIZE < 11) | |
196 { | |
197 fprintf(stdout, "ERROR: Window size should be in [12..15]\n"); | |
198 return 0; | |
199 } | |
200 | |
201 | |
202 if ( indexingMode ) | |
203 { | |
204 CONTIG_SIZE = 15000000; | |
205 CONTIG_MAX_SIZE = 40000000; | |
206 | |
207 if (batchMode) | |
208 { | |
209 batchFile = fastaFile; | |
210 fastaFile = NULL; | |
211 } | |
212 | |
213 if (batchFile == NULL && fastaFile == NULL) | |
214 { | |
215 fprintf(stdout, "ERROR: Reference(s) should be indicated for indexing\n"); | |
216 return 0; | |
217 } | |
218 | |
219 if (pairedEndDiscordantMode) | |
220 { | |
221 fprintf(stdout, "ERROR: --discordant cannot be used in indexing mode. \n"); | |
222 return 0; | |
223 } | |
224 | |
225 } | |
226 | |
227 | |
228 if ( searchingMode ) | |
229 { | |
230 CONTIG_SIZE = 300000000; | |
231 CONTIG_MAX_SIZE = 300000000; | |
232 | |
233 | |
234 if (batchMode) | |
235 { | |
236 batchFile = fastaFile; | |
237 fastaFile = NULL; | |
238 } | |
239 | |
240 if (batchFile == NULL && fastaFile == NULL) | |
241 { | |
242 fprintf(stdout, "ERROR: Index File(s) should be indiciated for searching\n"); | |
243 return 0; | |
244 } | |
245 | |
246 if (seqFile1 == NULL && seqFile2 == NULL) | |
247 { | |
248 fprintf(stdout, "ERROR: Please indicate a sequence file for searching.\n"); | |
249 return 0; | |
250 } | |
251 | |
252 | |
253 if (!pairedEndMode && seqFile2 != NULL) | |
254 { | |
255 fprintf(stdout, "ERROR: Second File can be indicated in pairedend mode\n"); | |
256 return 0; | |
257 } | |
258 | |
259 if (pairedEndMode && (minPairEndedDistance <0 || maxPairEndedDistance < 0 || minPairEndedDistance > maxPairEndedDistance)) | |
260 { | |
261 fprintf(stdout, "ERROR: Please enter a valid range for pairedend sequences.\n"); | |
262 return 0; | |
263 } | |
264 | |
265 if (pairedEndMode && seqFile1 == NULL) | |
266 { | |
267 fprintf(stdout, "ERROR: Please indicate the first file for pairedend search.\n"); | |
268 return 0; | |
269 } | |
270 | |
271 if (!pairedEndMode && pairedEndDiscordantMode) | |
272 { | |
273 fprintf(stdout, "ERROR: --discordant should be used with --pe"); | |
274 return 0; | |
275 } | |
276 | |
277 if (!pairedEndMode && pairedEndProfilingMode) | |
278 { | |
279 fprintf(stdout, "ERROR: --profile should be used with --pe"); | |
280 return 0; | |
281 } | |
282 } | |
283 | |
284 int i = 0; | |
285 | |
286 | |
287 if (batchMode) | |
288 { | |
289 FILE *fp = fileOpen(batchFile, "r"); | |
290 | |
291 if (fp == NULL) | |
292 return 0; | |
293 | |
294 fileCnt = 0; | |
295 | |
296 while ( fgets(fileName[fileCnt][0], FILE_NAME_LENGTH, fp)) | |
297 { | |
298 for (i = strlen(fileName[fileCnt][0])-1; i>=0; i--) | |
299 if ( !isspace(fileName[fileCnt][0][i])) | |
300 break; | |
301 fileName[fileCnt][0][i+1] = '\0'; | |
302 | |
303 if (strcmp(fileName[fileCnt][0], "") != 0) | |
304 { | |
305 sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); | |
306 fileCnt++; | |
307 } | |
308 } | |
309 } | |
310 else | |
311 { | |
312 sprintf(fileName[fileCnt][0], "%s", fastaFile); | |
313 sprintf(fileName[fileCnt][1], "%s.index", fileName[fileCnt][0]); | |
314 fileCnt++; | |
315 } | |
316 | |
317 | |
318 if (pairedEndProfilingMode) | |
319 { | |
320 | |
321 minPairEndedDistance = 0; | |
322 maxPairEndedDistance = 300000000; | |
323 | |
324 } | |
325 | |
326 if (pairedEndDiscordantMode) | |
327 { | |
328 minPairEndedDiscordantDistance = minPairEndedDistance; | |
329 maxPairEndedDiscordantDistance = maxPairEndedDistance; | |
330 | |
331 minPairEndedDistance = 0; | |
332 maxPairEndedDistance = 300000000; | |
333 } | |
334 | |
335 return 1; | |
336 } | |
337 | |
338 | |
339 void printHelp() | |
340 { | |
341 char *errorType; | |
342 if (mrFAST) | |
343 { | |
344 fprintf(stdout,"mrFAST : Micro-Read Fast Alignment Search Tool.\n\n"); | |
345 fprintf(stdout,"Usage: mrfast [options]\n\n"); | |
346 errorType="edit distance"; | |
347 } | |
348 else | |
349 { | |
350 fprintf(stdout,"mrsFAST : Micro-Read Substitutions (only) Fast Alignment Search Tool.\n\n"); | |
351 fprintf(stdout,"mrsFAST is a cache oblivious read mapping tool. mrsFAST capable of mapping\n"); | |
352 fprintf(stdout,"single and paired end reads to the reference genome. Bisulfite treated \n"); | |
353 fprintf(stdout,"sequences are not supported in this version. By default mrsFAST reports \n"); | |
354 fprintf(stdout,"the output in SAM format.\n\n"); | |
355 fprintf(stdout,"Usage: mrsFAST [options]\n\n"); | |
356 errorType="hamming distance"; | |
357 } | |
358 | |
359 fprintf(stdout,"General Options:\n"); | |
360 fprintf(stdout," -v|--version\t\tCurrent Version.\n"); | |
361 fprintf(stdout," -h\t\t\tShows the help file.\n"); | |
362 fprintf(stdout,"\n\n"); | |
363 | |
364 fprintf(stdout,"Indexing Options:\n"); | |
365 fprintf(stdout," --index [file]\t\tGenerate an index from the specified fasta file. \n"); | |
366 fprintf(stdout," -b\t\t\tIndicates the indexing will be done in batch mode.\n\t\t\tThe file specified in --index should contain the \n\t\t\tlist of fasta files.\n"); | |
367 fprintf(stdout," --ws [int]\t\tSet window size for indexing (default:12 max:14).\n"); | |
368 fprintf(stdout,"\n\n"); | |
369 | |
370 fprintf(stdout,"Searching Options:\n"); | |
371 fprintf(stdout," --search [file]\tSearch in the specified genome. Provide the path to the fasta file. \n\t\t\tIndex file should be in the same directory.\n"); | |
372 fprintf(stdout," -b\t\t\tIndicates the mapping will be done in batch mode. \n\t\t\tThe file specified in --search should contain the \n\t\t\tlist of fasta files.\n"); | |
373 fprintf(stdout," --pe \t\t\tSearch will be done in Paired-End mode.\n"); | |
374 fprintf(stdout," --seq [file]\t\tInput sequences in fasta/fastq format [file]. If \n\t\t\tpaired end reads are interleaved, use this option.\n"); | |
375 fprintf(stdout," --seq1 [file]\t\tInput sequences in fasta/fastq format [file] (First \n\t\t\tfile). Use this option to indicate the first file of \n\t\t\tpaired end reads. \n"); | |
376 fprintf(stdout," --seq2 [file]\t\tInput sequences in fasta/fastq format [file] (Second \n\t\t\tfile). Use this option to indicate the second file of \n\t\t\tpaired end reads. \n"); | |
377 fprintf(stdout," -o [file]\t\tOutput of the mapped sequences. The default is \"output\".\n"); | |
378 fprintf(stdout," -u [file]\t\tSave unmapped sequences in fasta/fastq format.\n"); | |
379 fprintf(stdout," --best \t\tOnly the best mapping from all the possible mapping is returned.\n"); | |
380 fprintf(stdout," --seqcomp \t\tIndicates that the input sequences are compressed (gz).\n"); | |
381 fprintf(stdout," --outcomp \t\tIndicates that output file should be compressed (gz).\n"); | |
382 fprintf(stdout," -e [int]\t\tMaximum allowed %s (default 2).\n", errorType); | |
383 fprintf(stdout," --min [int]\t\tMin distance allowed between a pair of end sequences.\n"); | |
384 fprintf(stdout," --max [int]\t\tMax distance allowed between a pair of end sequences.\n"); | |
385 | |
386 fprintf(stdout," --maxoea [int]\t\tMax number of One End Anchored (OEA) returned for each read pair. We recommend 100 or above for NovelSeq use.\n"); | |
387 fprintf(stdout," --maxdis [int]\t\tMax number of discordant map locations returned for each read pair. We recommend 300 or above for VariationHunter use.\n"); | |
388 } |