annotate fastx_toolkit-0.0.6/src/fastq_to_fasta/fastq_to_fasta.c @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
1 /*
997f5136985f Uploaded
xilinxu
parents:
diff changeset
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
4
997f5136985f Uploaded
xilinxu
parents:
diff changeset
5 This program is free software: you can redistribute it and/or modify
997f5136985f Uploaded
xilinxu
parents:
diff changeset
6 it under the terms of the GNU Affero General Public License as
997f5136985f Uploaded
xilinxu
parents:
diff changeset
7 published by the Free Software Foundation, either version 3 of the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
8 License, or (at your option) any later version.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
9
997f5136985f Uploaded
xilinxu
parents:
diff changeset
10 This program is distributed in the hope that it will be useful,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
997f5136985f Uploaded
xilinxu
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
13 GNU Affero General Public License for more details.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
14
997f5136985f Uploaded
xilinxu
parents:
diff changeset
15 You should have received a copy of the GNU Affero General Public License
997f5136985f Uploaded
xilinxu
parents:
diff changeset
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
17 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
18 #include <limits.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
19 #include <stdio.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
20 #include <stdlib.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
21 #include <string.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
22 #include <getopt.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
23 #include <errno.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
24 #include <err.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
25
997f5136985f Uploaded
xilinxu
parents:
diff changeset
26 #include <config.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
27
997f5136985f Uploaded
xilinxu
parents:
diff changeset
28 #include "fastx.h"
997f5136985f Uploaded
xilinxu
parents:
diff changeset
29 #include "fastx_args.h"
997f5136985f Uploaded
xilinxu
parents:
diff changeset
30
997f5136985f Uploaded
xilinxu
parents:
diff changeset
31 const char* usage=
997f5136985f Uploaded
xilinxu
parents:
diff changeset
32 "usage: fastq_to_fasta [-h] [-r] [-n] [-v] [-z] [-i INFILE] [-o OUTFILE]\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
33 "\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
34 "version " VERSION "\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
35 " [-h] = This helpful help screen.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
36 " [-r] = Rename sequence identifiers to numbers.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
37 " [-n] = keep sequences with unknown (N) nucleotides.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
38 " Default is to discard such sequences.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
39 " [-v] = Verbose - report number of sequences.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
40 " If [-o] is specified, report will be printed to STDOUT.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
41 " If [-o] is not specified (and output goes to STDOUT),\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
42 " report will be printed to STDERR.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
43 " [-z] = Compress output with GZIP.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
44 " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
45 " [-o OUTFILE] = FASTA output file. default is STDOUT.\n" \
997f5136985f Uploaded
xilinxu
parents:
diff changeset
46 "\n";
997f5136985f Uploaded
xilinxu
parents:
diff changeset
47
997f5136985f Uploaded
xilinxu
parents:
diff changeset
48 FASTX fastx;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
49 int flag_rename_seqid = 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
50 int flag_discard_N = 1 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
51
997f5136985f Uploaded
xilinxu
parents:
diff changeset
52 int parse_program_args(int __attribute__((unused)) optind, int optc, char __attribute__((unused)) *optarg)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
53 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
54 switch(optc) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
55 case 'n':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
56 flag_discard_N = 0 ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
57 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
58
997f5136985f Uploaded
xilinxu
parents:
diff changeset
59 case 'r':
997f5136985f Uploaded
xilinxu
parents:
diff changeset
60 flag_rename_seqid = 1;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
61 break;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
62 default:
997f5136985f Uploaded
xilinxu
parents:
diff changeset
63 errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
64 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
65 return 1;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
66 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
67
997f5136985f Uploaded
xilinxu
parents:
diff changeset
68
997f5136985f Uploaded
xilinxu
parents:
diff changeset
69 int main(int argc, char* argv[])
997f5136985f Uploaded
xilinxu
parents:
diff changeset
70 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
71 fastx_parse_cmdline(argc, argv, "rn", parse_program_args);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
72
997f5136985f Uploaded
xilinxu
parents:
diff changeset
73 fastx_init_reader(&fastx, get_input_filename(),
997f5136985f Uploaded
xilinxu
parents:
diff changeset
74 FASTQ_ONLY, ALLOW_N, REQUIRE_UPPERCASE);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
75
997f5136985f Uploaded
xilinxu
parents:
diff changeset
76 fastx_init_writer(&fastx, get_output_filename(), OUTPUT_FASTA, compress_output_flag());
997f5136985f Uploaded
xilinxu
parents:
diff changeset
77
997f5136985f Uploaded
xilinxu
parents:
diff changeset
78 while ( fastx_read_next_record(&fastx) ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
79 //See if the input sequence contained 'N' nucleotides
997f5136985f Uploaded
xilinxu
parents:
diff changeset
80 if ( flag_discard_N && (strchr(fastx.nucleotides,'N') != NULL))
997f5136985f Uploaded
xilinxu
parents:
diff changeset
81 continue;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
82
997f5136985f Uploaded
xilinxu
parents:
diff changeset
83 if ( flag_rename_seqid )
997f5136985f Uploaded
xilinxu
parents:
diff changeset
84 snprintf(fastx.name, sizeof(fastx.name), "%zu", num_output_reads(&fastx)+1) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
85
997f5136985f Uploaded
xilinxu
parents:
diff changeset
86 fastx_write_record(&fastx);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
87 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
88
997f5136985f Uploaded
xilinxu
parents:
diff changeset
89 //Print verbose report
997f5136985f Uploaded
xilinxu
parents:
diff changeset
90 if ( verbose_flag() ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
91 fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
92 fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
93
997f5136985f Uploaded
xilinxu
parents:
diff changeset
94 if ( flag_discard_N ) {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
95 size_t discarded = num_input_reads(&fastx) - num_output_reads(&fastx) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
96 fprintf(get_report_file(), "discarded %zu (%zu%%) low-quality reads.\n",
997f5136985f Uploaded
xilinxu
parents:
diff changeset
97 discarded, (discarded*100)/( num_input_reads(&fastx) ) ) ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
98 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
99 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
100
997f5136985f Uploaded
xilinxu
parents:
diff changeset
101 return 0;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
102 }