annotate fastx_toolkit-0.0.6/src/fastx_trimmer/fastx_trimmer.c @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
1 /*
997f5136985f Uploaded
xilinxu
parents:
diff changeset
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
4
997f5136985f Uploaded
xilinxu
parents:
diff changeset
5 This program is free software: you can redistribute it and/or modify
997f5136985f Uploaded
xilinxu
parents:
diff changeset
6 it under the terms of the GNU Affero General Public License as
997f5136985f Uploaded
xilinxu
parents:
diff changeset
7 published by the Free Software Foundation, either version 3 of the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
8 License, or (at your option) any later version.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
9
997f5136985f Uploaded
xilinxu
parents:
diff changeset
10 This program is distributed in the hope that it will be useful,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
997f5136985f Uploaded
xilinxu
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
13 GNU Affero General Public License for more details.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
14
997f5136985f Uploaded
xilinxu
parents:
diff changeset
15 You should have received a copy of the GNU Affero General Public License
997f5136985f Uploaded
xilinxu
parents:
diff changeset
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
17 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
18 #include <limits.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
19 #include <stdio.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
20 #include <stdlib.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
21 #include <string.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
22 #include <getopt.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
23 #include <errno.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
24 #include <err.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
25
997f5136985f Uploaded
xilinxu
parents:
diff changeset
26 #include <config.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
27
997f5136985f Uploaded
xilinxu
parents:
diff changeset
28 #include "fastx.h"
997f5136985f Uploaded
xilinxu
parents:
diff changeset
29 #include "fastx_args.h"
997f5136985f Uploaded
xilinxu
parents:
diff changeset
30
997f5136985f Uploaded
xilinxu
parents:
diff changeset
31 #define MAX_ADAPTER_LEN 100
997f5136985f Uploaded
xilinxu
parents:
diff changeset
32
997f5136985f Uploaded
xilinxu
parents:
diff changeset
33 const char* usage=
997f5136985f Uploaded
xilinxu
parents:
diff changeset
34 "usage: fastx_trimmer [-h] [-f N] [-l N] [-z] [-v] [-i INFILE] [-o OUTFILE]\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
35 "\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
36 "version " VERSION "\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
37 " [-h] = This helpful help screen.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
38 " [-f N] = First base to keep. Default is 1 (=first base).\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
39 " [-l N] = Last base to keep. Default is entire read.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
40 " [-z] = Compress output with GZIP.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
41 " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
42 " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
43 "\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
44
997f5136985f Uploaded
xilinxu
parents:
diff changeset
45 #define DO_NOT_TRIM_LAST_BASE (0)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
46
997f5136985f Uploaded
xilinxu
parents:
diff changeset
47 int keep_first_base=1;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
48 int keep_last_base=DO_NOT_TRIM_LAST_BASE;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
49
997f5136985f Uploaded
xilinxu
parents:
diff changeset
50 FASTX fastx;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
51
997f5136985f Uploaded
xilinxu
parents:
diff changeset
52 int parse_program_args(int __attribute__((unused)) optind, int optc, char* optarg)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
53 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
54 switch(optc) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
55 case 'f':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
56 if (optarg==NULL)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
57 errx(1, "[-f] parameter requires an argument value");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
58 keep_first_base = strtoul(optarg,NULL,10);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
59 if (keep_first_base<=0 || keep_first_base>=MAX_SEQ_LINE_LENGTH)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
60 errx(1,"Invalid number bases to keep (-f %s)", optarg);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
61 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
62
997f5136985f Uploaded
xilinxu
parents:
diff changeset
63 case 'l':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
64 if (optarg==NULL)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
65 errx(1, "[-l] parameter requires an argument value");
997f5136985f Uploaded
xilinxu
parents:
diff changeset
66 keep_last_base = strtoul(optarg,NULL,10);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
67 if (keep_last_base<=0 || keep_last_base>=MAX_SEQ_LINE_LENGTH)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
68 errx(1,"Invalid number bases to keep (-l %s)", optarg);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
69 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
70
997f5136985f Uploaded
xilinxu
parents:
diff changeset
71 default:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
72 errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
73
997f5136985f Uploaded
xilinxu
parents:
diff changeset
74 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
75 return 1;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
76 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
77
997f5136985f Uploaded
xilinxu
parents:
diff changeset
78
997f5136985f Uploaded
xilinxu
parents:
diff changeset
79 int main(int argc, char* argv[])
997f5136985f Uploaded
xilinxu
parents:
diff changeset
80 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
81 size_t i;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
82
997f5136985f Uploaded
xilinxu
parents:
diff changeset
83 fastx_parse_cmdline(argc, argv, "l:f:", parse_program_args);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
84
997f5136985f Uploaded
xilinxu
parents:
diff changeset
85 fastx_init_reader(&fastx, get_input_filename(),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
86 FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
87
997f5136985f Uploaded
xilinxu
parents:
diff changeset
88 fastx_init_writer(&fastx, get_output_filename(), OUTPUT_SAME_AS_INPUT, compress_output_flag());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
89
997f5136985f Uploaded
xilinxu
parents:
diff changeset
90 while ( fastx_read_next_record(&fastx) ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
91
997f5136985f Uploaded
xilinxu
parents:
diff changeset
92 if (keep_last_base != DO_NOT_TRIM_LAST_BASE) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
93 fastx.nucleotides[keep_last_base] = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
94 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
95
997f5136985f Uploaded
xilinxu
parents:
diff changeset
96 if (keep_first_base != 1) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
97 for (i=0; i < strlen(fastx.nucleotides)-keep_first_base+1 ; i++) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
98 fastx.nucleotides[i] = fastx.nucleotides[i+keep_first_base-1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
99 fastx.quality[i] = fastx.quality[i+keep_first_base-1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
100 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
101 fastx.nucleotides[i] = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
102 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
103
997f5136985f Uploaded
xilinxu
parents:
diff changeset
104 //none of the above condition matched, so print this sequence.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
105 fastx_write_record(&fastx);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
106 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
107
997f5136985f Uploaded
xilinxu
parents:
diff changeset
108 if ( verbose_flag() ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
109 fprintf(get_report_file(), "Trimming: base %d to %d\n", keep_first_base, keep_last_base ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
110 fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
111 fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
112 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
113 return 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
114 }