comparison egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
comparison
equal deleted inserted replaced
8:6bf69b40365c 9:98c37a5d67f4
1 /*
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
3
4 This file is part of the EggLib library.
5
6 EggLib is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 EggLib is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20
21 #ifndef EGGLIB_ALIGN_HPP
22 #define EGGLIB_ALIGN_HPP
23
24 #include "Container.hpp"
25 #include "CharMatrix.hpp"
26 #include <vector>
27
28 /** \mainpage Summary
29 *
30 * This is the automatically-generated reference manual of the C++
31 * egglib-cpp library. The library is presented as several modules, but
32 * note that they are only used to structure the documentation.
33 *
34 * There is a single namespace (egglib) in which all classes are
35 * defined. See an example of programming with egglib-cpp in the
36 * EggLib package main documentation. Use "Modules" or "Classes" above
37 * to navigate in the library reference manual.
38 *
39 */
40
41
42 /** \defgroup core core
43 *
44 * \brief Central core of the C++ library of Egglib
45 *
46 * Data storage classes, parsers/formatters and tools, plus exception
47 * types.
48 *
49 */
50
51 namespace egglib {
52
53
54 /** \brief Handles a sequence alignment
55 *
56 * \ingroup core
57 *
58 * Creation from a file or string stream should be performed using
59 * the class Fasta. Align objects can be created by deep copy from
60 * both Align and Container type. In the latter case, the length are
61 * artificially equalized by "?" characters. Align objects can be
62 * created from a DataMatrix object (and all the way arround) using
63 * the specific class DMAConverter.
64 *
65 * Sequences are represented by two strings (name and sequence) and
66 * an integer (group) that can be accessed or modified by index.The
67 * order of sequences is guaranteed to be conserved, as if Align was
68 * a list of triplets (name, sequence, group).
69 *
70 * The data matrix is implemented as continuous array (char**) and
71 * allows efficient access and modification of data. For very large
72 * data matrices you might claim immediately the required memory
73 * using the constructor Align(unsigned int, char**).
74 *
75 */
76 class Align : public Container, public CharMatrix {
77 public:
78
79 /** \brief Creates an empty alignment
80 *
81 */
82 Align();
83
84
85 /** \brief Creates an alignment from a data matrix.
86 *
87 * Allows you to create an object from data stored in a char*
88 * array. The array's dimensions must be passed to the
89 * constructor, and as a result there is not need to
90 * terminate each sequence by a NULL character.
91 *
92 * \param number_of_sequences the number of sequences (the
93 * length of the first dimension of the array).
94 *
95 * \param alignment_length the length of sequences (the
96 * length of all lines of the array).
97 *
98 * \param cstring_array the pointer to the data matrix.
99 *
100 */
101 Align(unsigned int number_of_sequences, unsigned int alignment_length, char const * const * const cstring_array);
102
103
104 /** \brief Creates an alignment with given dimensions
105 *
106 * Allows you to allocate directly a data matrix of a given
107 * size. Names are empty strings, groups 0, and all
108 * characters are ?.
109 *
110 * \param number_of_sequences the number of sequences (the
111 * length of the first dimension of the array).
112 *
113 * \param alignment_length the length of sequences (the
114 * length of all lines of the array).
115 *
116 */
117 Align(unsigned int number_of_sequences, unsigned int alignment_length);
118
119
120 /** \brief Copy constructor
121 *
122 */
123 Align(const Align& align);
124
125
126 /** \brief Copy constructor accepting a Container object
127 *
128 * All but the longest sequences are padded with ? to match
129 * the longest sequence's length.
130 *
131 */
132 Align(const Container& container);
133
134
135 /** \brief Copy operator
136 *
137 */
138 Align& operator=(const Align& align);
139
140
141 /** \brief Copy operator accepting a Container object
142 *
143 * All but the longest sequences are padded with ? to match
144 * the longest sequence's length.
145 *
146 */
147 Align& operator=(const Container& container);
148
149
150 /** \brief Destructor
151 *
152 */
153 virtual ~Align();
154
155
156 /** \brief Adds a sequence
157 *
158 * If the object already contains at least one sequence, the
159 * new sequence must have the same length. Otherwise, a
160 * EggUnalignedError is raised.
161 *
162 * \param name the name of the sequence.
163 * \param sequence the sequence string.
164 * \param group the group index of the sequence.
165 * \return The new number of sequences.
166 *
167 */
168 virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0);
169
170
171 /** \brief Removes a position (column) of the alignment
172 *
173 * \param pos the position to remove in the alignment.
174 * \return The new length of the alignment.
175 *
176 */
177 virtual unsigned int removePosition(unsigned int pos);
178
179
180 /** \brief Removes a sequence from the alignment
181 *
182 * \param pos the index of the sequence to remove.
183 * \return The new number of sequences.
184 *
185 */
186 virtual unsigned int remove(unsigned int pos);
187
188
189 /** \brief Replace a sequence string
190 *
191 * The new sequence must have the same length than the
192 * alignment. Otherwise, a EggUnalignedError is raised.
193 *
194 * \param seq the index of the sequence to change.
195 * \param sequence the new sequence.
196 *
197 */
198 virtual void sequence(unsigned int seq, const char* sequence);
199
200
201 /** \brief Gets the name of a given sequence
202 *
203 * \param pos the index of the sequence.
204 *
205 * \return The sequence string for that particular sequence.
206 *
207 */
208 virtual inline const char* sequence(unsigned int pos) const { return Container::sequence(pos); }
209
210
211 /** \brief Alignment length
212 *
213 * Returns 0 if the alignment is empty.
214 *
215 */
216 virtual unsigned int ls() const;
217
218
219 /** \brief Length of a given sequence
220 *
221 * Calling this function is exactly the same as calling ls()
222 * (without arguments), regardless of the index provided,
223 * except that an exception is thrown if the index is out of
224 * bounds. Provided for compatibility with Container.
225 *
226 * \param pos the index of the sequence.
227 * \return the length of the alignment.
228 *
229 */
230 virtual unsigned int ls(unsigned int pos) const;
231
232
233 /** \brief Fast and unsecure accessor
234 *
235 * This accessor doesn't perform out-of-bound checking!
236 *
237 * \param s the index of the sequence (line).
238 * \param p the position in the alignment (column).
239 * \return The character at the given position.
240 *
241 */
242 inline char character(unsigned int s, unsigned int p) const { return sequences[s][p]; }
243
244
245 /** \brief Gets a nucleotide
246 *
247 * This modifier does perform out-of-bound checking.
248 * The specified position must exist.
249 *
250 * \param sequence the index of the sequence (line).
251 * \param position the position in the alignment (column).
252 * \return the character at the given position.
253 *
254 */
255 virtual char get(unsigned int sequence, unsigned int position) const;
256
257
258 /** \brief Sets a matrix position to a new character
259 *
260 * This modifier does perform out-of-bound checking.
261 * The specified position must exist.
262 *
263 * \param sequence the index of the sequence (line).
264 * \param position the position in the alignment (column).
265 * \param ch the new character value.
266 */
267 virtual void set(unsigned int sequence, unsigned position, char ch);
268
269
270 /** \brief Reverse a given column in binary data
271 *
272 * The specified column must contain only "0" ans "1" characters.
273 * "0" is replaced by "1" and all the way around
274 *
275 */
276 void binSwitch(unsigned int pos);
277
278
279 /** \brief Extracts specified positions (columns) of the alignment
280 *
281 * All the specified sites are extracted in the specified
282 * order. This function is suitable for bootstrap (resample
283 * allowing redrawing the same site) and permutations.
284 *
285 * This function doesn't perform out-of-bound checking.
286 *
287 * \param list_of_sites a vector containing alignment
288 * positions.
289 *
290 * \return A copy of the object containing the specified
291 * set of positions.
292 *
293 */
294 Align vslice(std::vector<unsigned int> list_of_sites);
295
296
297 /** \brief Extracts a range of positions (columns)
298 *
299 * \param a the first position.
300 *
301 * \param b the index immediately passed the last sequence to
302 * extract.
303 *
304 * \return A copy of the object containing the specified
305 * range of sequences.
306 *
307 * Positions a to b-1 are extracted, provided that the
308 * indices fit in the current length of sequences. To extract
309 * all sequences, use align.vslice(0, align.ls()).
310 *
311 * Note: invalid ranges will be silently supported. If
312 * a>=ls or b<=a, an empty object is returned. If b>ns,
313 * ls will be substituted to a.
314 */
315 Align vslice(unsigned int a, unsigned int b);
316
317
318 /** \brief Deletes all the content of the object
319 *
320 */
321 virtual void clear();
322
323
324 /** \brief Same as ns()
325 *
326 */
327 inline unsigned int numberOfSequences() const {
328 return _ns;
329 }
330
331
332 /** \brief Same as ls()
333 *
334 */
335 inline unsigned int numberOfSites() const {
336 return _ls;
337 }
338
339
340 /** \brief Gets a group label (insecure)
341 *
342 */
343 inline unsigned int populationLabel(unsigned int sequenceIndex) const {
344 return groups[sequenceIndex];
345 }
346
347
348 /** \brief Just return the passed value
349 *
350 */
351 inline double sitePosition(unsigned int position) const {
352 return (double) position;
353 }
354
355
356 protected:
357
358 /// This function is not available for alignments
359 virtual void appendSequence(unsigned int pos, const char* sequence) {}
360
361 // Initializer (creates a valid empty alignment)
362 virtual void init();
363
364 // Makes a deep copy of the specified data matrix - if cstring_array is NULL, then ignores it and pads with ?'s
365 virtual void setFromSource(unsigned int number_of_sequences, unsigned int alignment_length, const char* const * const cstring_array);
366
367 // Copies from a Container
368 virtual void copyObject(const Container&);
369
370 // Copies from an Align
371 virtual void copyObject(const Align&);
372
373 // Alignment length
374 unsigned int _ls;
375 };
376 }
377
378 #endif