Mercurial > repos > xilinxu > xilinxu
comparison fastx_toolkit-0.0.6/src/fastx_artifacts_filter/fastx_artifacts_filter.c @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:dfe9332138cf | 3:997f5136985f |
---|---|
1 /* | |
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools. | |
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu) | |
4 | |
5 This program is free software: you can redistribute it and/or modify | |
6 it under the terms of the GNU Affero General Public License as | |
7 published by the Free Software Foundation, either version 3 of the | |
8 License, or (at your option) any later version. | |
9 | |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU Affero General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Affero General Public License | |
16 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 */ | |
18 #include <limits.h> | |
19 #include <stdio.h> | |
20 #include <stdlib.h> | |
21 #include <string.h> | |
22 #include <getopt.h> | |
23 #include <errno.h> | |
24 #include <err.h> | |
25 | |
26 #include <config.h> | |
27 | |
28 #include "fastx.h" | |
29 #include "fastx_args.h" | |
30 | |
31 #define MAX_ADAPTER_LEN 100 | |
32 | |
33 const char* usage= | |
34 "usage: fastx_artifacts_filter [-h] [-v] [-z] [-i INFILE] [-o OUTFILE]\n" \ | |
35 "\n" \ | |
36 "version " VERSION "\n" \ | |
37 " [-h] = This helpful help screen.\n" \ | |
38 " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ | |
39 " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ | |
40 " [-z] = Compress output with GZIP.\n" \ | |
41 " [-v] = Verbose - report number of processed reads.\n" \ | |
42 " If [-o] is specified, report will be printed to STDOUT.\n" \ | |
43 " If [-o] is not specified (and output goes to STDOUT),\n" \ | |
44 " report will be printed to STDERR.\n" \ | |
45 "\n"; | |
46 | |
47 #define DO_NOT_TRIM_LAST_BASE (0) | |
48 | |
49 FASTX fastx; | |
50 | |
51 int parse_commandline(int argc, char* argv[]) | |
52 { | |
53 return fastx_parse_cmdline(argc, argv, "", NULL); | |
54 } | |
55 | |
56 int artifact_sequence(const FASTX *fastx) | |
57 { | |
58 int n_count=0; | |
59 int a_count=0; | |
60 int c_count=0; | |
61 int t_count=0; | |
62 int g_count=0; | |
63 int total_count=0; | |
64 | |
65 int max_allowed_different_bases = 3 ; | |
66 | |
67 int i=0; | |
68 | |
69 while (1) { | |
70 if (fastx->nucleotides[i]==0) | |
71 break; | |
72 | |
73 total_count++; | |
74 switch(fastx->nucleotides[i]) | |
75 { | |
76 case 'A': | |
77 a_count++; | |
78 break; | |
79 case 'C': | |
80 c_count++; | |
81 break; | |
82 case 'G': | |
83 g_count++; | |
84 break; | |
85 case 'T': | |
86 t_count++; | |
87 break; | |
88 case 'N': | |
89 n_count++; | |
90 break; | |
91 default: | |
92 errx(1, __FILE__":%d: invalid nucleotide value (%c) at position %d", | |
93 __LINE__, fastx->nucleotides[i], i ) ; | |
94 } | |
95 i++; | |
96 } | |
97 | |
98 //Rules for artifacts | |
99 | |
100 if ( a_count>=(total_count-max_allowed_different_bases) | |
101 || | |
102 c_count>=(total_count-max_allowed_different_bases) | |
103 || | |
104 g_count>=(total_count-max_allowed_different_bases) | |
105 || | |
106 t_count>=(total_count-max_allowed_different_bases) | |
107 ) | |
108 return 1; | |
109 | |
110 | |
111 return 0; | |
112 } | |
113 | |
114 int main(int argc, char* argv[]) | |
115 { | |
116 parse_commandline(argc, argv); | |
117 | |
118 fastx_init_reader(&fastx, get_input_filename(), | |
119 FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE); | |
120 | |
121 fastx_init_writer(&fastx, get_output_filename(), | |
122 OUTPUT_SAME_AS_INPUT, compress_output_flag()); | |
123 | |
124 while ( fastx_read_next_record(&fastx) ) { | |
125 | |
126 if ( artifact_sequence(&fastx) ) { | |
127 } else { | |
128 fastx_write_record(&fastx); | |
129 } | |
130 } | |
131 | |
132 //Print verbose report | |
133 if ( verbose_flag() ) { | |
134 fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; | |
135 fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; | |
136 | |
137 size_t discarded = num_input_reads(&fastx) - num_output_reads(&fastx) ; | |
138 fprintf(get_report_file(), "discarded %zu (%zu%%) artifact reads.\n", | |
139 discarded, (discarded*100)/( num_input_reads(&fastx) ) ) ; | |
140 } | |
141 | |
142 return 0; | |
143 } |