Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 | |
21 #ifndef EGGLIB_CONVERT_HPP | |
22 #define EGGLIB_CONVERT_HPP | |
23 | |
24 | |
25 #include "DataMatrix.hpp" | |
26 #include "Align.hpp" | |
27 #include "EggException.hpp" | |
28 #include "Random.hpp" | |
29 #include <string> | |
30 | |
31 #include "config.h" | |
32 | |
33 #ifdef HAVE_LIBBPP_SEQ | |
34 #include <Bpp/Seq/Alphabet.all> | |
35 #include <Bpp/Seq/Sequence.h> | |
36 #include <Bpp/Seq/Container.all> | |
37 #endif | |
38 | |
39 | |
40 | |
41 namespace egglib { | |
42 | |
43 | |
44 /** \brief Performs conversion between sequence holder types | |
45 * | |
46 * \ingroup core | |
47 * | |
48 * Static methods of this class allows conversion between sequence | |
49 * holder types implying parametrizable modifications. | |
50 * | |
51 */ | |
52 class Convert { | |
53 | |
54 public: | |
55 | |
56 /** \brief DataMatrix to Align conversion | |
57 * | |
58 * By defaut, this method generates an Align instance | |
59 * containing only the polymorphic sites. The integers of | |
60 * the DataMatrix will be converted as follow: 0 to A, 1 to | |
61 * C, 2 to G and 3 to T. This behaviour can be largely | |
62 * modified using options. | |
63 * | |
64 * \param dataMatrix DataMatrix instance. | |
65 * | |
66 * \param length length of the desired alignment. Non-varying | |
67 * stretches of data will be introduced to reach the | |
68 * specified length. By default the positions of segregating | |
69 * sites will be determined from the positions given by the | |
70 * DataMatrix object. Those positions are expressed in a | |
71 * continuous range, and will be discretized. Mutations | |
72 * falling on the same site will be moved of one position | |
73 * left or right (always preserving the order of mutation | |
74 * sites). If positions are all zero (the default of the | |
75 * DataMatrix class) and if length is larger than the number | |
76 * of segregating sites, then all segregating sites will | |
77 * cluster on the left-hand side of the alignment. | |
78 * | |
79 * \param random the address to a Random object allowing to | |
80 * draw random numbers (for randomizing positions and/or | |
81 * non-varying states). If an address is provided but no | |
82 * random numbers are required, it is ignored. If no address | |
83 * if provided and random numbers are required, a Random | |
84 * instance is built internally. | |
85 * | |
86 * \param randomizePositions if true, the positions specified | |
87 * in the DataMatrix objects are ignored and the positions of | |
88 * mutations are drawn randomly along the interval (only if | |
89 * the specified length is larger than the number of | |
90 * segregating sites). If randomizePositions and false and | |
91 * positions are not | |
92 * | |
93 * \param enforceLength specify whether a | |
94 * EggRuntimeError should be thrown when the number of | |
95 * polymorphic sites is larger than the specified length. If | |
96 * false (the default) and in cases where the specified | |
97 * length is too short to harbor all polymorphic sites, the | |
98 * alignment length will be increased as needed. | |
99 * | |
100 * \param randomizeNonVaryingStates if true, the stretches of | |
101 * conserved positions (between segregating sites) will be | |
102 * randomly drawn from the current symbol mapping. Otherwise, | |
103 * the symbol given by fixed will be used. | |
104 * | |
105 * \param randomizeAlleles if true, alleles will be drawn | |
106 * randomly from the mapped characters. Note that if a | |
107 * genotype value is larger than the size of the mapping, it | |
108 * will be replaced by the character given by unknown, | |
109 * without randomization. In other words, with the mapping | |
110 * "ACGT", alleles 0, 1, 2 and 3 will be randomly assigned | |
111 * to these four characters, but larger and negative alleles | |
112 * will be assigned to the unknown character. | |
113 * | |
114 * \param mapping a string given the character to assign to | |
115 * different character values read from the DataMatrix. If | |
116 * the read value is 0, the first character of the string | |
117 * will used, the the value is 1, the second character will | |
118 * be used, and so on. If the integer read is out of range | |
119 * (in particular, for any negative value), then the | |
120 * character given by unknown will be used. An empty string | |
121 * will always lead to alignments containing only the | |
122 * character given by unknown. The string "01" is suitable | |
123 * for binary data. | |
124 * | |
125 * \param unknown the character to use if an integer genotype | |
126 * value is not mapped in the mapping string (that is, if | |
127 * the mapping string is too short). | |
128 * | |
129 * \param nonVaryingState character to use for conserved | |
130 * stretches of data. It doesn't have to be included in the | |
131 * mapping. If randomizeNonVaryingState is true, this | |
132 * argument is ignored. | |
133 * | |
134 * \return The resulting Align object. | |
135 * | |
136 */ | |
137 static Align align( | |
138 DataMatrix& dataMatrix, | |
139 unsigned int length=0, | |
140 Random* random=NULL, | |
141 bool randomizePositions=false, | |
142 bool randomizeNonVaryingStates=false, | |
143 bool randomizeAlleles=false, | |
144 bool enforceLength=false, | |
145 std::string mapping="ACGT", | |
146 char unknown='?', | |
147 char nonVaryingState='A' | |
148 ); | |
149 | |
150 | |
151 #ifdef HAVE_LIBBPP_SEQ | |
152 | |
153 /** \brief Converts an alignment to the equivalent Bio++ type | |
154 * | |
155 * During conversion, name information is lost (arbitrary | |
156 * names are generated in order toprevent duplicate names). | |
157 * The object is attached to an alphabet matching the passed | |
158 * integer. The names are bare rank integers (starting at the | |
159 * value giving by *offset*). | |
160 * | |
161 * \param align the source alignment object. | |
162 * | |
163 * \param alphabetID an integer indicating which alphabet to | |
164 * use: | |
165 * - 1 for DNA | |
166 * - 2 for RNA | |
167 * - 3 for proteins | |
168 * - 4 for standard codon | |
169 * - 5 for vertebrate mitochondrial codon | |
170 * - 6 for invertebrate mitochondrial codon | |
171 * - 7 for echinoderm mitochondrial codon | |
172 * . | |
173 * Other values will result in an exception. | |
174 * | |
175 * \param outgroupFlag an integer indicating whether to | |
176 * include outgroup sequences: | |
177 * - 0 use all sequences | |
178 * - 1 use only sequences without 999 label (ingroup) | |
179 * - 2 use only sequences with 999 label (outgroup) | |
180 * . | |
181 * Other values will result in an exception. | |
182 * | |
183 * \param offset enter an integer to shift the names of the | |
184 * resulting alignment (useful to merge alignment and ensure | |
185 * that names are not duplicated). | |
186 * | |
187 * \return A Bio++ alignment. | |
188 * | |
189 */ | |
190 static bpp::AlignedSequenceContainer egglib2bpp(Align& align, unsigned int alphabetID, unsigned int outgroupFlag, unsigned int offset=0); | |
191 | |
192 #endif | |
193 | |
194 | |
195 | |
196 protected: | |
197 | |
198 /** \brief This class cannot be instantiated | |
199 * | |
200 */ | |
201 Convert() { } | |
202 | |
203 | |
204 /** \brief This class cannot be instantiated | |
205 * | |
206 */ | |
207 Convert(const Convert& source) { } | |
208 | |
209 | |
210 /** \brief This class cannot be instantiated | |
211 * | |
212 */ | |
213 Convert& operator=(const Convert& source) { return *this; } | |
214 | |
215 | |
216 /** \brief This class cannot be instantiated | |
217 * | |
218 */ | |
219 virtual ~Convert() { } | |
220 | |
221 #ifdef HAVE_LIBBPP_SEQ | |
222 static bpp::DNA dnaAlphabet; | |
223 static bpp::RNA rnaAlphabet; | |
224 static bpp::ProteicAlphabet proteicAlphabet; | |
225 static bpp::StandardCodonAlphabet standardCodonAlphabet; | |
226 static bpp::VertebrateMitochondrialCodonAlphabet vertebrateMitochondrialCodonAlphabet; | |
227 static bpp::InvertebrateMitochondrialCodonAlphabet invertebrateMitochondrialCodonAlphabet; | |
228 static bpp::EchinodermMitochondrialCodonAlphabet echinodermMitochondrialCodonAlphabet; | |
229 #endif | |
230 | |
231 }; | |
232 } | |
233 | |
234 #endif |