3
|
1 /*
|
|
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools.
|
|
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
|
|
4
|
|
5 This program is free software: you can redistribute it and/or modify
|
|
6 it under the terms of the GNU Affero General Public License as
|
|
7 published by the Free Software Foundation, either version 3 of the
|
|
8 License, or (at your option) any later version.
|
|
9
|
|
10 This program is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU Affero General Public License for more details.
|
|
14
|
|
15 You should have received a copy of the GNU Affero General Public License
|
|
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17 */
|
|
18 #include <limits.h>
|
|
19 #include <stdio.h>
|
|
20 #include <stdlib.h>
|
|
21 #include <string.h>
|
|
22 #include <getopt.h>
|
|
23 #include <errno.h>
|
|
24 #include <err.h>
|
|
25
|
|
26 #include <config.h>
|
|
27
|
|
28 #include "fastx.h"
|
|
29 #include "fastx_args.h"
|
|
30
|
|
31 #define MAX_ADAPTER_LEN 100
|
|
32
|
|
33 const char* usage=
|
|
34 "usage: fastx_trimmer [-h] [-f N] [-l N] [-z] [-v] [-i INFILE] [-o OUTFILE]\n" \
|
|
35 "\n" \
|
|
36 "version " VERSION "\n" \
|
|
37 " [-h] = This helpful help screen.\n" \
|
|
38 " [-f N] = First base to keep. Default is 1 (=first base).\n" \
|
|
39 " [-l N] = Last base to keep. Default is entire read.\n" \
|
|
40 " [-z] = Compress output with GZIP.\n" \
|
|
41 " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \
|
|
42 " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \
|
|
43 "\n";
|
|
44
|
|
45 #define DO_NOT_TRIM_LAST_BASE (0)
|
|
46
|
|
47 int keep_first_base=1;
|
|
48 int keep_last_base=DO_NOT_TRIM_LAST_BASE;
|
|
49
|
|
50 FASTX fastx;
|
|
51
|
|
52 int parse_program_args(int __attribute__((unused)) optind, int optc, char* optarg)
|
|
53 {
|
|
54 switch(optc) {
|
|
55 case 'f':
|
|
56 if (optarg==NULL)
|
|
57 errx(1, "[-f] parameter requires an argument value");
|
|
58 keep_first_base = strtoul(optarg,NULL,10);
|
|
59 if (keep_first_base<=0 || keep_first_base>=MAX_SEQ_LINE_LENGTH)
|
|
60 errx(1,"Invalid number bases to keep (-f %s)", optarg);
|
|
61 break;
|
|
62
|
|
63 case 'l':
|
|
64 if (optarg==NULL)
|
|
65 errx(1, "[-l] parameter requires an argument value");
|
|
66 keep_last_base = strtoul(optarg,NULL,10);
|
|
67 if (keep_last_base<=0 || keep_last_base>=MAX_SEQ_LINE_LENGTH)
|
|
68 errx(1,"Invalid number bases to keep (-l %s)", optarg);
|
|
69 break;
|
|
70
|
|
71 default:
|
|
72 errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ;
|
|
73
|
|
74 }
|
|
75 return 1;
|
|
76 }
|
|
77
|
|
78
|
|
79 int main(int argc, char* argv[])
|
|
80 {
|
|
81 size_t i;
|
|
82
|
|
83 fastx_parse_cmdline(argc, argv, "l:f:", parse_program_args);
|
|
84
|
|
85 fastx_init_reader(&fastx, get_input_filename(),
|
|
86 FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE);
|
|
87
|
|
88 fastx_init_writer(&fastx, get_output_filename(), OUTPUT_SAME_AS_INPUT, compress_output_flag());
|
|
89
|
|
90 while ( fastx_read_next_record(&fastx) ) {
|
|
91
|
|
92 if (keep_last_base != DO_NOT_TRIM_LAST_BASE) {
|
|
93 fastx.nucleotides[keep_last_base] = 0 ;
|
|
94 }
|
|
95
|
|
96 if (keep_first_base != 1) {
|
|
97 for (i=0; i < strlen(fastx.nucleotides)-keep_first_base+1 ; i++) {
|
|
98 fastx.nucleotides[i] = fastx.nucleotides[i+keep_first_base-1];
|
|
99 fastx.quality[i] = fastx.quality[i+keep_first_base-1];
|
|
100 }
|
|
101 fastx.nucleotides[i] = 0 ;
|
|
102 }
|
|
103
|
|
104 //none of the above condition matched, so print this sequence.
|
|
105 fastx_write_record(&fastx);
|
|
106 }
|
|
107
|
|
108 if ( verbose_flag() ) {
|
|
109 fprintf(get_report_file(), "Trimming: base %d to %d\n", keep_first_base, keep_last_base ) ;
|
|
110 fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ;
|
|
111 fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ;
|
|
112 }
|
|
113 return 0;
|
|
114 }
|