Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 | |
21 #ifndef EGGLIB_ALIGN_HPP | |
22 #define EGGLIB_ALIGN_HPP | |
23 | |
24 #include "Container.hpp" | |
25 #include "CharMatrix.hpp" | |
26 #include <vector> | |
27 | |
28 /** \mainpage Summary | |
29 * | |
30 * This is the automatically-generated reference manual of the C++ | |
31 * egglib-cpp library. The library is presented as several modules, but | |
32 * note that they are only used to structure the documentation. | |
33 * | |
34 * There is a single namespace (egglib) in which all classes are | |
35 * defined. See an example of programming with egglib-cpp in the | |
36 * EggLib package main documentation. Use "Modules" or "Classes" above | |
37 * to navigate in the library reference manual. | |
38 * | |
39 */ | |
40 | |
41 | |
42 /** \defgroup core core | |
43 * | |
44 * \brief Central core of the C++ library of Egglib | |
45 * | |
46 * Data storage classes, parsers/formatters and tools, plus exception | |
47 * types. | |
48 * | |
49 */ | |
50 | |
51 namespace egglib { | |
52 | |
53 | |
54 /** \brief Handles a sequence alignment | |
55 * | |
56 * \ingroup core | |
57 * | |
58 * Creation from a file or string stream should be performed using | |
59 * the class Fasta. Align objects can be created by deep copy from | |
60 * both Align and Container type. In the latter case, the length are | |
61 * artificially equalized by "?" characters. Align objects can be | |
62 * created from a DataMatrix object (and all the way arround) using | |
63 * the specific class DMAConverter. | |
64 * | |
65 * Sequences are represented by two strings (name and sequence) and | |
66 * an integer (group) that can be accessed or modified by index.The | |
67 * order of sequences is guaranteed to be conserved, as if Align was | |
68 * a list of triplets (name, sequence, group). | |
69 * | |
70 * The data matrix is implemented as continuous array (char**) and | |
71 * allows efficient access and modification of data. For very large | |
72 * data matrices you might claim immediately the required memory | |
73 * using the constructor Align(unsigned int, char**). | |
74 * | |
75 */ | |
76 class Align : public Container, public CharMatrix { | |
77 public: | |
78 | |
79 /** \brief Creates an empty alignment | |
80 * | |
81 */ | |
82 Align(); | |
83 | |
84 | |
85 /** \brief Creates an alignment from a data matrix. | |
86 * | |
87 * Allows you to create an object from data stored in a char* | |
88 * array. The array's dimensions must be passed to the | |
89 * constructor, and as a result there is not need to | |
90 * terminate each sequence by a NULL character. | |
91 * | |
92 * \param number_of_sequences the number of sequences (the | |
93 * length of the first dimension of the array). | |
94 * | |
95 * \param alignment_length the length of sequences (the | |
96 * length of all lines of the array). | |
97 * | |
98 * \param cstring_array the pointer to the data matrix. | |
99 * | |
100 */ | |
101 Align(unsigned int number_of_sequences, unsigned int alignment_length, char const * const * const cstring_array); | |
102 | |
103 | |
104 /** \brief Creates an alignment with given dimensions | |
105 * | |
106 * Allows you to allocate directly a data matrix of a given | |
107 * size. Names are empty strings, groups 0, and all | |
108 * characters are ?. | |
109 * | |
110 * \param number_of_sequences the number of sequences (the | |
111 * length of the first dimension of the array). | |
112 * | |
113 * \param alignment_length the length of sequences (the | |
114 * length of all lines of the array). | |
115 * | |
116 */ | |
117 Align(unsigned int number_of_sequences, unsigned int alignment_length); | |
118 | |
119 | |
120 /** \brief Copy constructor | |
121 * | |
122 */ | |
123 Align(const Align& align); | |
124 | |
125 | |
126 /** \brief Copy constructor accepting a Container object | |
127 * | |
128 * All but the longest sequences are padded with ? to match | |
129 * the longest sequence's length. | |
130 * | |
131 */ | |
132 Align(const Container& container); | |
133 | |
134 | |
135 /** \brief Copy operator | |
136 * | |
137 */ | |
138 Align& operator=(const Align& align); | |
139 | |
140 | |
141 /** \brief Copy operator accepting a Container object | |
142 * | |
143 * All but the longest sequences are padded with ? to match | |
144 * the longest sequence's length. | |
145 * | |
146 */ | |
147 Align& operator=(const Container& container); | |
148 | |
149 | |
150 /** \brief Destructor | |
151 * | |
152 */ | |
153 virtual ~Align(); | |
154 | |
155 | |
156 /** \brief Adds a sequence | |
157 * | |
158 * If the object already contains at least one sequence, the | |
159 * new sequence must have the same length. Otherwise, a | |
160 * EggUnalignedError is raised. | |
161 * | |
162 * \param name the name of the sequence. | |
163 * \param sequence the sequence string. | |
164 * \param group the group index of the sequence. | |
165 * \return The new number of sequences. | |
166 * | |
167 */ | |
168 virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0); | |
169 | |
170 | |
171 /** \brief Removes a position (column) of the alignment | |
172 * | |
173 * \param pos the position to remove in the alignment. | |
174 * \return The new length of the alignment. | |
175 * | |
176 */ | |
177 virtual unsigned int removePosition(unsigned int pos); | |
178 | |
179 | |
180 /** \brief Removes a sequence from the alignment | |
181 * | |
182 * \param pos the index of the sequence to remove. | |
183 * \return The new number of sequences. | |
184 * | |
185 */ | |
186 virtual unsigned int remove(unsigned int pos); | |
187 | |
188 | |
189 /** \brief Replace a sequence string | |
190 * | |
191 * The new sequence must have the same length than the | |
192 * alignment. Otherwise, a EggUnalignedError is raised. | |
193 * | |
194 * \param seq the index of the sequence to change. | |
195 * \param sequence the new sequence. | |
196 * | |
197 */ | |
198 virtual void sequence(unsigned int seq, const char* sequence); | |
199 | |
200 | |
201 /** \brief Gets the name of a given sequence | |
202 * | |
203 * \param pos the index of the sequence. | |
204 * | |
205 * \return The sequence string for that particular sequence. | |
206 * | |
207 */ | |
208 virtual inline const char* sequence(unsigned int pos) const { return Container::sequence(pos); } | |
209 | |
210 | |
211 /** \brief Alignment length | |
212 * | |
213 * Returns 0 if the alignment is empty. | |
214 * | |
215 */ | |
216 virtual unsigned int ls() const; | |
217 | |
218 | |
219 /** \brief Length of a given sequence | |
220 * | |
221 * Calling this function is exactly the same as calling ls() | |
222 * (without arguments), regardless of the index provided, | |
223 * except that an exception is thrown if the index is out of | |
224 * bounds. Provided for compatibility with Container. | |
225 * | |
226 * \param pos the index of the sequence. | |
227 * \return the length of the alignment. | |
228 * | |
229 */ | |
230 virtual unsigned int ls(unsigned int pos) const; | |
231 | |
232 | |
233 /** \brief Fast and unsecure accessor | |
234 * | |
235 * This accessor doesn't perform out-of-bound checking! | |
236 * | |
237 * \param s the index of the sequence (line). | |
238 * \param p the position in the alignment (column). | |
239 * \return The character at the given position. | |
240 * | |
241 */ | |
242 inline char character(unsigned int s, unsigned int p) const { return sequences[s][p]; } | |
243 | |
244 | |
245 /** \brief Gets a nucleotide | |
246 * | |
247 * This modifier does perform out-of-bound checking. | |
248 * The specified position must exist. | |
249 * | |
250 * \param sequence the index of the sequence (line). | |
251 * \param position the position in the alignment (column). | |
252 * \return the character at the given position. | |
253 * | |
254 */ | |
255 virtual char get(unsigned int sequence, unsigned int position) const; | |
256 | |
257 | |
258 /** \brief Sets a matrix position to a new character | |
259 * | |
260 * This modifier does perform out-of-bound checking. | |
261 * The specified position must exist. | |
262 * | |
263 * \param sequence the index of the sequence (line). | |
264 * \param position the position in the alignment (column). | |
265 * \param ch the new character value. | |
266 */ | |
267 virtual void set(unsigned int sequence, unsigned position, char ch); | |
268 | |
269 | |
270 /** \brief Reverse a given column in binary data | |
271 * | |
272 * The specified column must contain only "0" ans "1" characters. | |
273 * "0" is replaced by "1" and all the way around | |
274 * | |
275 */ | |
276 void binSwitch(unsigned int pos); | |
277 | |
278 | |
279 /** \brief Extracts specified positions (columns) of the alignment | |
280 * | |
281 * All the specified sites are extracted in the specified | |
282 * order. This function is suitable for bootstrap (resample | |
283 * allowing redrawing the same site) and permutations. | |
284 * | |
285 * This function doesn't perform out-of-bound checking. | |
286 * | |
287 * \param list_of_sites a vector containing alignment | |
288 * positions. | |
289 * | |
290 * \return A copy of the object containing the specified | |
291 * set of positions. | |
292 * | |
293 */ | |
294 Align vslice(std::vector<unsigned int> list_of_sites); | |
295 | |
296 | |
297 /** \brief Extracts a range of positions (columns) | |
298 * | |
299 * \param a the first position. | |
300 * | |
301 * \param b the index immediately passed the last sequence to | |
302 * extract. | |
303 * | |
304 * \return A copy of the object containing the specified | |
305 * range of sequences. | |
306 * | |
307 * Positions a to b-1 are extracted, provided that the | |
308 * indices fit in the current length of sequences. To extract | |
309 * all sequences, use align.vslice(0, align.ls()). | |
310 * | |
311 * Note: invalid ranges will be silently supported. If | |
312 * a>=ls or b<=a, an empty object is returned. If b>ns, | |
313 * ls will be substituted to a. | |
314 */ | |
315 Align vslice(unsigned int a, unsigned int b); | |
316 | |
317 | |
318 /** \brief Deletes all the content of the object | |
319 * | |
320 */ | |
321 virtual void clear(); | |
322 | |
323 | |
324 /** \brief Same as ns() | |
325 * | |
326 */ | |
327 inline unsigned int numberOfSequences() const { | |
328 return _ns; | |
329 } | |
330 | |
331 | |
332 /** \brief Same as ls() | |
333 * | |
334 */ | |
335 inline unsigned int numberOfSites() const { | |
336 return _ls; | |
337 } | |
338 | |
339 | |
340 /** \brief Gets a group label (insecure) | |
341 * | |
342 */ | |
343 inline unsigned int populationLabel(unsigned int sequenceIndex) const { | |
344 return groups[sequenceIndex]; | |
345 } | |
346 | |
347 | |
348 /** \brief Just return the passed value | |
349 * | |
350 */ | |
351 inline double sitePosition(unsigned int position) const { | |
352 return (double) position; | |
353 } | |
354 | |
355 | |
356 protected: | |
357 | |
358 /// This function is not available for alignments | |
359 virtual void appendSequence(unsigned int pos, const char* sequence) {} | |
360 | |
361 // Initializer (creates a valid empty alignment) | |
362 virtual void init(); | |
363 | |
364 // Makes a deep copy of the specified data matrix - if cstring_array is NULL, then ignores it and pads with ?'s | |
365 virtual void setFromSource(unsigned int number_of_sequences, unsigned int alignment_length, const char* const * const cstring_array); | |
366 | |
367 // Copies from a Container | |
368 virtual void copyObject(const Container&); | |
369 | |
370 // Copies from an Align | |
371 virtual void copyObject(const Align&); | |
372 | |
373 // Alignment length | |
374 unsigned int _ls; | |
375 }; | |
376 } | |
377 | |
378 #endif |