Mercurial > repos > xilinxu > xilinxu
comparison fastx_toolkit-0.0.6/src/fastx_collapser/fastx_collapser.cpp @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:dfe9332138cf | 3:997f5136985f |
---|---|
1 /* | |
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools. | |
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu) | |
4 | |
5 This program is free software: you can redistribute it and/or modify | |
6 it under the terms of the GNU Affero General Public License as | |
7 published by the Free Software Foundation, either version 3 of the | |
8 License, or (at your option) any later version. | |
9 | |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU Affero General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Affero General Public License | |
16 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 */ | |
18 #include <err.h> | |
19 #include <getopt.h> | |
20 #include <string.h> | |
21 #include <algorithm> | |
22 #include <cstdlib> | |
23 #include <ios> | |
24 #include <iostream> | |
25 #include <string> | |
26 #include <ostream> | |
27 #include <fstream> | |
28 #include <map> | |
29 #include <list> | |
30 | |
31 #include <config.h> | |
32 | |
33 #include "fastx.h" | |
34 #include "fastx_args.h" | |
35 | |
36 using namespace std; | |
37 | |
38 const char* usage= | |
39 "usage: fastx_collapser [-h] [-v] [-i INFILE] [-o OUTFILE]\n" \ | |
40 "\n" \ | |
41 "version " VERSION "\n" \ | |
42 " [-h] = This helpful help screen.\n" \ | |
43 " [-v] = verbose: print short summary of input/output counts\n" \ | |
44 " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ | |
45 " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ | |
46 "\n"; | |
47 | |
48 FASTX fastx; | |
49 #include <tr1/unordered_map> | |
50 std::tr1::unordered_map<string,size_t> collapsed_sequences; | |
51 std::list< pair<string,size_t> > sorted_collapsed_sequences ; | |
52 | |
53 struct PrintCollapsedSequence | |
54 { | |
55 size_t counter; | |
56 size_t total_reads ; | |
57 | |
58 ostream &output ; | |
59 PrintCollapsedSequence( ostream& _output ) : | |
60 counter(0), | |
61 total_reads(0), | |
62 output(_output) {} | |
63 | |
64 void operator() ( const std::pair<string, int> & sequence ) | |
65 { | |
66 counter++; | |
67 total_reads += sequence.second ; | |
68 output << ">" << counter << "-" << sequence.second << endl << sequence.first << endl ; | |
69 } | |
70 }; | |
71 | |
72 bool sort_by_abundance_count ( const pair<string, size_t> & sequence1, const pair<string, size_t>& sequence2 ) | |
73 { | |
74 return sequence1.second < sequence2.second ; | |
75 } | |
76 | |
77 int main(int argc, char* argv[]) | |
78 { | |
79 ofstream output_file ; | |
80 | |
81 fastx_parse_cmdline(argc, argv, "", NULL ); | |
82 | |
83 fastx_init_reader(&fastx, get_input_filename(), | |
84 FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE); | |
85 | |
86 bool use_stdout = true; | |
87 if ( strcmp(get_output_filename(), "-")!=0 ) { | |
88 use_stdout = false; | |
89 output_file.open(get_output_filename()); | |
90 if (!output_file) | |
91 errx(1,"Failed to create output file (%s)", get_output_filename() ); | |
92 } | |
93 ostream& real_output = (use_stdout) ? cout : output_file ; | |
94 | |
95 while ( fastx_read_next_record(&fastx) ) { | |
96 collapsed_sequences[string(fastx.nucleotides)]++ ; | |
97 } | |
98 | |
99 copy ( collapsed_sequences.begin(), collapsed_sequences.end(), | |
100 back_inserter(sorted_collapsed_sequences) ) ; | |
101 | |
102 sorted_collapsed_sequences.sort ( sort_by_abundance_count ) ; | |
103 | |
104 PrintCollapsedSequence stats = for_each ( sorted_collapsed_sequences.rbegin(), | |
105 sorted_collapsed_sequences.rend(), PrintCollapsedSequence(real_output) ) ; | |
106 | |
107 if (stats.total_reads != num_input_reads(&fastx)) | |
108 errx(1,"Internal error: stats.total_reads (%zu) != num_input_reads(&fastx) (%zu).\n", | |
109 stats.total_reads, num_input_reads(&fastx) ); | |
110 | |
111 if ( verbose_flag() ) { | |
112 fprintf(get_report_file(), "Collapsd %zu reads into %zu unique sequences.\n", | |
113 num_input_reads(&fastx), stats.counter) ; | |
114 } | |
115 return 0; | |
116 } |