Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 | |
21 #ifndef EGGLIB_CONTAINER_HPP | |
22 #define EGGLIB_CONTAINER_HPP | |
23 | |
24 | |
25 namespace egglib { | |
26 | |
27 /** \brief Handles a set of sequence alignment (aligned or not) | |
28 * | |
29 * \ingroup core | |
30 * | |
31 * Creation from a file or string stream should be performed using | |
32 * the class Fasta. | |
33 * | |
34 * Sequences are represented by two strings (name and sequence) and | |
35 * an integer (group) that can be accessed or modified by index.The | |
36 * order of sequences is guaranteed to be conserved, as if Container | |
37 * was a list of triplets (name, sequence, group). | |
38 * | |
39 * The data matrix is implemented as continuous arrays (char**) and | |
40 * allows efficient access and modification of data. For very large | |
41 * data matrices you might claim immediately the required memory | |
42 * using the constructor Container(unsigned int, char**). | |
43 * | |
44 */ | |
45 class Container { | |
46 | |
47 public: | |
48 | |
49 /** \brief Creates an empty object | |
50 * | |
51 */ | |
52 Container(); | |
53 | |
54 | |
55 /** \brief Copy constructor | |
56 * | |
57 */ | |
58 Container(const Container& source); | |
59 | |
60 | |
61 /** \brief Assignment operator | |
62 * | |
63 */ | |
64 Container& operator= (const Container& source); | |
65 | |
66 | |
67 /** \brief Creates an object from a data matrix | |
68 * | |
69 * Allows you to create an object from data stored in a char* | |
70 * array. The array's size must be passed to the constructor. | |
71 * Since sequences can have different lengths, you need to | |
72 * terminate each sequence by a NULL character. This constructor | |
73 * is dedicated to very performance-critical tasks. For usual | |
74 * tasks, using the default constructor and subsequently adding | |
75 * sequences with addSeq should be enough. | |
76 * | |
77 * \param number_of_sequences the number of sequences (the length | |
78 * of the first dimension of the array). | |
79 * | |
80 * \param cstring_array the pointer to the data matrix. | |
81 * | |
82 */ | |
83 Container(unsigned int number_of_sequences, char const* const* const cstring_array); | |
84 | |
85 | |
86 /** \brief Destructor | |
87 * | |
88 */ | |
89 virtual ~Container(); | |
90 | |
91 | |
92 /** \brief Clears all content of the object | |
93 * | |
94 */ | |
95 virtual void clear(); | |
96 | |
97 | |
98 /** \brief Adds a sequence to the object | |
99 * | |
100 * \param name the name of the sequence, as a c-string. | |
101 * \param sequence the sequence string, as a c-string. | |
102 * \param group the group index of the sequence. | |
103 * | |
104 * \return The new number of sequences. | |
105 * | |
106 */ | |
107 virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0); | |
108 | |
109 | |
110 /** \brief Removes a sequence from the object | |
111 * | |
112 * \param pos the index of the sequence to remove. | |
113 * | |
114 * \return The new number of sequences. | |
115 */ | |
116 virtual unsigned int remove(unsigned int pos); | |
117 | |
118 | |
119 /** \brief Changes the name of a given sequence | |
120 * | |
121 * \param pos the sequence index. | |
122 * \param name the new name as a C-like string. | |
123 * | |
124 */ | |
125 virtual void name(unsigned int pos, const char* name); | |
126 | |
127 | |
128 /** \brief Changes the sequence string of a given sequence | |
129 * | |
130 * \param pos the sequence index. | |
131 * \param sequence the new sequence as a C-like string. | |
132 * | |
133 */ | |
134 virtual void sequence(unsigned int pos, const char* sequence); | |
135 | |
136 | |
137 /** \brief Appends a string to the a given sequence | |
138 * | |
139 * \param pos the sequence index. | |
140 * \param sequence the sequence to append at the end of the | |
141 * current one. | |
142 * | |
143 */ | |
144 virtual void appendSequence(unsigned int pos, const char* sequence); | |
145 | |
146 | |
147 /** \brief Changes a character | |
148 * | |
149 * \param sequence the sequence index. | |
150 * \param position the character index. | |
151 * \param ch the new character value. | |
152 * | |
153 * The positions must fit in the current ranges. | |
154 * | |
155 */ | |
156 virtual void set(unsigned int sequence, unsigned position, char ch); | |
157 | |
158 | |
159 /** \brief Gets a given character | |
160 * | |
161 * \param s the sequence index. | |
162 * \param p the character index. | |
163 * | |
164 * \return the character value. | |
165 * | |
166 * The positions must fit in the current ranges. | |
167 * | |
168 */ | |
169 virtual char get(unsigned int s, unsigned int p) const; | |
170 | |
171 | |
172 /** \brief Changes the group index of a given sequence | |
173 * | |
174 * \param pos the sequence index. | |
175 * \param group the new group index value. | |
176 * | |
177 */ | |
178 virtual void group(unsigned int pos, unsigned int group); | |
179 | |
180 | |
181 /** \brief Extracts a range of sequences | |
182 * | |
183 * \param a the index of the first sequence. | |
184 * | |
185 * \param b the index immediately passed the last sequence to | |
186 * extract. | |
187 * | |
188 * \return A copy of the object containing the specified | |
189 * range of sequences. | |
190 * | |
191 * Sequences a to b-1 are extracted, provided that the | |
192 * indices fit in the current number of sequences. To extract | |
193 * all sequences, use container.hslice(0, container.ns()). | |
194 * | |
195 * Note: invalid ranges will be silently supported. If | |
196 * a>=ls or b<=a, an empty object is returned. If b>ns, | |
197 * ls will be substituted to a. | |
198 * | |
199 */ | |
200 Container hslice(unsigned int a, unsigned int b) const; | |
201 | |
202 | |
203 /** \brief Gets the number of sequences | |
204 * | |
205 */ | |
206 unsigned int ns() const; | |
207 | |
208 | |
209 /** \brief Gets the length of a given sequence | |
210 * | |
211 * \param pos the index of the sequence. | |
212 * | |
213 * \return The length of that particular sequence. | |
214 * | |
215 */ | |
216 virtual unsigned int ls(unsigned int pos) const ; | |
217 | |
218 | |
219 /** \brief Gets the name of the a given sequence | |
220 * | |
221 * \param pos the index of the sequence. | |
222 * | |
223 * \return The name of that particular sequence. | |
224 * | |
225 */ | |
226 virtual const char* name(unsigned int pos) const; | |
227 | |
228 | |
229 /** \brief Gets the name of a given sequence | |
230 * | |
231 * \param pos the index of the sequence. | |
232 * | |
233 * \return The sequence string for that particular sequence. | |
234 * | |
235 */ | |
236 virtual const char* sequence(unsigned int pos) const; | |
237 | |
238 | |
239 | |
240 /** \brief Gets the group index of a given sequence | |
241 * | |
242 * \param pos the index of the sequence. | |
243 * | |
244 * \return The group index of that particular sequence. | |
245 * | |
246 */ | |
247 virtual unsigned int group(unsigned int pos) const; | |
248 | |
249 | |
250 /** \brief Checks if all lengths are equal | |
251 * | |
252 * Returns true if the length of all sequences are equal or | |
253 * if there is less thant two sequences. | |
254 * | |
255 */ | |
256 bool isEqual() const; | |
257 | |
258 | |
259 /** \brief Equalizes sequence lengths | |
260 * | |
261 * Extends sequences as need to ensure that all sequences | |
262 * have the same length. | |
263 * | |
264 * \param ch the character to use for padding. | |
265 * | |
266 * \return The final length obtained, which is the length of | |
267 * the longest sequence before the operation. | |
268 * | |
269 */ | |
270 unsigned int equalize(char ch='?'); | |
271 | |
272 | |
273 /** \brief Finds a sequence by its name | |
274 * | |
275 * Gets the position of the first sequence with the specified | |
276 * name. | |
277 * | |
278 * \param string a sequence name. | |
279 * | |
280 * \param strict if true, seeks an exact match. If false, | |
281 * compares only until the end of the requested name (for | |
282 * example: ATCFF will match ATCFF_01 if strict is false). | |
283 * | |
284 * \return The lowest index where the name matches, -1 if no | |
285 * sequence has such name. | |
286 * | |
287 */ | |
288 int find(const char* string, bool strict=true) const; | |
289 | |
290 | |
291 protected: | |
292 // The number of sequences | |
293 unsigned int _ns; | |
294 | |
295 // The array of name lengths | |
296 unsigned int* lnames; | |
297 | |
298 // The array of names | |
299 char** names; | |
300 | |
301 // The array of sequences (as c-strings) | |
302 char** sequences; | |
303 | |
304 // The array of groups | |
305 unsigned int* groups; | |
306 | |
307 // Imports an array of c-strings | |
308 virtual void setFromSource(unsigned int number_of_sequences, const char* const* const cstring_array); | |
309 | |
310 // Constructor helper | |
311 virtual void copyObject(const Container&); | |
312 | |
313 // Constructor partial helper | |
314 virtual void getNamesAndGroups(const Container&); | |
315 | |
316 private: | |
317 | |
318 // The array of sequence lengths | |
319 unsigned int* lsequences; | |
320 | |
321 // Setup a valid empty object | |
322 virtual void init(); | |
323 }; | |
324 } | |
325 | |
326 #endif |