Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 #ifndef EGGLIB_FASTA_HPP | |
21 #define EGGLIB_FASTA_HPP | |
22 | |
23 #include <istream> | |
24 #include <iostream> | |
25 #include <string> | |
26 #include "Container.hpp" | |
27 | |
28 namespace egglib { | |
29 | |
30 /** \brief Fasta parser/formatted | |
31 * | |
32 * \ingroup core | |
33 * | |
34 * Reads a multifasta sequence file from a string, a stream or a file | |
35 * and returns a Container. See the description of the format below. | |
36 * Formats a fasta string from a sequence container object and places | |
37 * it in a string, a stream of a file. All methods are static and the | |
38 * class cannot be instantiated. The methods parsef and formatf will | |
39 * open the file for you while the others will read/write directly | |
40 * in a string. | |
41 * | |
42 * Specifications of the fasta format: | |
43 * | |
44 * - The number of sequences is not limited. | |
45 * | |
46 * - Each sequence is preceded by a header limited to a single | |
47 * line and starting by a ">" character. | |
48 * | |
49 * - The header length is not limited and all characters are | |
50 * allowed but white spaces and special characters are | |
51 * discouraged. | |
52 * | |
53 * - Group indices are specified by \@0, \@1, \@2... strings | |
54 * appearing at the end of the header string (just before the | |
55 * carriage return). Note that group labels are ignored by | |
56 * default. | |
57 * | |
58 * - Group indices are ignored unless specifically specified in a | |
59 * parser's options. | |
60 * | |
61 * - The sequence itself continues on following lines until the | |
62 * next ">" character or the end of the file. | |
63 * | |
64 * - White spaces, tab and carriage returns are allowed at any | |
65 * position There is no limitation in length and different | |
66 * sequences can have different lengths. | |
67 * | |
68 * - Although the standard is lower case characters, Fasta | |
69 * assumes upper case characters and only supports lower case | |
70 * characters (and converts them to upper case characters). | |
71 * Information coded by change in case is lost. | |
72 * | |
73 */ | |
74 class Fasta { | |
75 | |
76 public: | |
77 | |
78 /** \brief Imports a fasta file | |
79 * | |
80 * Imports the content of the file as is. Calls the method | |
81 * pase(std::istream*, bool) by creating its own istream. | |
82 * | |
83 * \param fname the name of a fasta file. | |
84 * | |
85 * \param importGroupLabels if set to true, scan automatically | |
86 * for groups. The format is @ followed by an integer, placed | |
87 * at the end of the header string(sequences without labels | |
88 * will be treated as \@0). | |
89 * | |
90 * \return A Container object containing the sequences. | |
91 * | |
92 */ | |
93 static Container parsef(const char* fname, bool importGroupLabels=false); | |
94 | |
95 | |
96 /** \brief Imports a fasta file | |
97 * | |
98 * Imports the content of the file as is. Calls the method | |
99 * pase(std::istream*, bool) by creating its own istream. This | |
100 * method expects a reference to a Container to which the | |
101 * sequences will be appended. | |
102 * | |
103 * \param fname the name of a fasta file. | |
104 * | |
105 * \param container a Container instance, empty or not. | |
106 * | |
107 * \param importGroupLabels if set to true, scan automatically | |
108 * for groups. The format is @ followed by an integer, placed | |
109 * at the end of the header string(sequences without labels | |
110 * will be treated as \@0). | |
111 * | |
112 * \return Nothings: the new sequences are appended to the | |
113 * Container passed as argument. | |
114 * | |
115 */ | |
116 static void parsef(const char* fname, Container& container, bool importGroupLabels=false); | |
117 | |
118 | |
119 /** \brief Imports a fasta file | |
120 * | |
121 * Imports the content of the file as is. Calls the method | |
122 * pase(std::istream*, bool) by creating its own istream. | |
123 * | |
124 * \param str a string containing the data. | |
125 * | |
126 * \param importGroupLabels if set to true, scan automatically | |
127 * for groups. The format is @ followed by an integer, placed | |
128 * at the end of the header string(sequences without labels | |
129 * will be treated as \@0). | |
130 * | |
131 * \return A Container object containing the sequences. | |
132 * | |
133 */ | |
134 static Container parse(const std::string& str, bool importGroupLabels=false); | |
135 | |
136 | |
137 /** \brief Imports a fasta file | |
138 * | |
139 * Imports the content of the file as is. Calls the method | |
140 * pase(std::istream*, bool) by creating its own istream. This | |
141 * method expects a reference to a Container to which the | |
142 * sequences will be appended. | |
143 * | |
144 * \param str a string containing the data. | |
145 * | |
146 * \param container a Container instance, empty or not. | |
147 * | |
148 * \param importGroupLabels if set to true, scan automatically | |
149 * for groups. The format is @ followed by an integer, placed | |
150 * at the end of the header string(sequences without labels | |
151 * will be treated as \@0). | |
152 * | |
153 * \return Nothing: new sequences are appended to the Container | |
154 * passed as argument. | |
155 * | |
156 */ | |
157 static void parse(const std::string& str, Container& container, bool importGroupLabels=false); | |
158 | |
159 | |
160 /** \brief Imports a fasta file from an open stream | |
161 * | |
162 * Imports the content of the file as is. | |
163 * | |
164 * \param stream an open stream (file or string) containing the | |
165 * data. | |
166 * | |
167 * \param importGroupLabels if set to true, scan automatically | |
168 * for groups. The format is @ followed by an integer, placed | |
169 * at the end of the header string(sequences without labels | |
170 * will be treated as \@0). | |
171 * | |
172 * \return A Container object containing the sequences. | |
173 * | |
174 */ | |
175 static Container parse(std::istream& stream, bool importGroupLabels=false); | |
176 | |
177 | |
178 /** \brief Imports a fasta file from an open stream | |
179 * | |
180 * Imports the content of the file as is. This | |
181 * method expects a reference to a Container to which the | |
182 * sequences will be appended. | |
183 * | |
184 * \param stream an open stream (file or string) containing the | |
185 * data. | |
186 * | |
187 * \param container a Container instance, empty or not. | |
188 * | |
189 * \param importGroupLabels if set to true, scan automatically | |
190 * for groups. The format is @ followed by an integer, placed | |
191 * at the end of the header string(sequences without labels | |
192 * will be treated as \@0). | |
193 * | |
194 * \return Nothing: the new sequences are appended to the | |
195 * Container passed as argument. | |
196 * | |
197 */ | |
198 static void parse(std::istream& stream, Container& container, bool importGroupLabels=false); | |
199 | |
200 | |
201 /** \brief Export sequences as fasta | |
202 * | |
203 * \param fname the name of the file where to place the result. | |
204 * | |
205 * \param container Container object to export. | |
206 * | |
207 * \param exportGroupLabels if set to true, exports group | |
208 * indices as a \@x at the end of the sequence name, where x is | |
209 * the group index. Otherwise, this information is discarded. | |
210 * | |
211 * \param lineLength the number of characters to place on a | |
212 * single line. If zero, no newlines are inserted within | |
213 * sequences. | |
214 * | |
215 */ | |
216 static void formatf(const char* fname, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
217 | |
218 | |
219 /** \brief Export sequences as fasta | |
220 * | |
221 * \param file an open stream. | |
222 * | |
223 * \param container Container object to export. | |
224 * | |
225 * \param exportGroupLabels if set to true, exports group | |
226 * indices as a \@x at the end of the sequence name, where x is | |
227 * the group index. Otherwise, this information is discarded. | |
228 * | |
229 * \param lineLength the number of characters to place on a | |
230 * single line. If zero, no newlines are inserted within | |
231 * sequences. | |
232 * | |
233 */ | |
234 static void format(std::ostream& file, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
235 | |
236 | |
237 /** \brief Export sequences as fasta | |
238 * | |
239 * This medod creates internally an ostringstream, calls the | |
240 * method format(ostream, container, bool) and returns the | |
241 * resulting string. | |
242 * | |
243 * \param container Container object to export. | |
244 * | |
245 * \param exportGroupLabels if set to true, exports group | |
246 * indices as a \@x at the end of the sequence name, where x is | |
247 * the group index. Otherwise, this information is discarded. | |
248 * | |
249 * \param lineLength the number of characters to place on a | |
250 * single line. If zero, no newlines are inserted within | |
251 * sequences. | |
252 * | |
253 * \return The formatted string. | |
254 * | |
255 */ | |
256 static std::string format(const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50); | |
257 | |
258 | |
259 | |
260 protected: | |
261 | |
262 /// This class cannot be instantiated | |
263 Fasta() { } | |
264 | |
265 /// This class cannot be instantiated | |
266 Fasta(const Fasta& source) { } | |
267 | |
268 /// This class cannot be or copied | |
269 Fasta& operator=(const Fasta& source) { return *this; } | |
270 | |
271 /// This class cannot be instantiated | |
272 virtual ~Fasta() { } | |
273 | |
274 | |
275 }; | |
276 } | |
277 | |
278 #endif |