Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp @ 1:420b57c3c185 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 10 Jul 2015 04:39:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3e19d0dfcf3e | 1:420b57c3c185 |
---|---|
1 /* | |
2 Copyright 2009, 2010, 2012 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 #ifndef EGGLIB_MUTATOR_HPP | |
21 #define EGGLIB_MUTATOR_HPP | |
22 | |
23 | |
24 #include "DataMatrix.hpp" | |
25 #include "Random.hpp" | |
26 #include "Arg.hpp" | |
27 #include "Mutation.hpp" | |
28 | |
29 | |
30 namespace egglib { | |
31 | |
32 | |
33 /** \brief Implements mutation models | |
34 * | |
35 * \ingroup coalesce | |
36 * | |
37 * Works with a previously built Ancestral Reconbination Graph. The | |
38 * user must sets options using the setter-based interface. After | |
39 * that he or she can call the method mute() that will generates | |
40 * a DataMatrix object. | |
41 * | |
42 * Genotype data are represented by integer numbers. Regardless of | |
43 * the mutation model, the ancestral state is always 0. The user can | |
44 * set the rate of mutation (or, alternatively, fix the number of | |
45 * mutations that occurred - which is the number of segregating sites | |
46 * only with an infinite site model). | |
47 * | |
48 * Other options fall into two separate groups: the positions of the | |
49 * mutated sites and the process of mutation (how new alleles are | |
50 * generated). | |
51 * | |
52 * Concerning allele generation, several mutation models are available | |
53 * (coded by single letters): | |
54 * - F: fixed number of alleles. Among other markers, this model is | |
55 * appropriate for simulating nucleotides. The user is able | |
56 * to choose the number of alleles (where 2 is the standard | |
57 * for an infinite site model and 4 for a finite site model). | |
58 * Mutator allows assigning independent weights between all | |
59 * different transition types and can draw randomly the | |
60 * ancestral states, providing a way to emulate evolution of | |
61 * nucleotides with multiple mutations at the same site and | |
62 * reversion. | |
63 * - I: infinite number of alleles. At a given site, each mutation | |
64 * raises a new allele. The value of the alleles is therefore | |
65 * irrelevant (it only denotes its order of appearance). This | |
66 * model does not permit homoplasy. | |
67 * - S: stepwise mutation model. In this model the value of the | |
68 * alleles are interpreted as a size (typically for simulating | |
69 * a microsatellite marker). Each mutation either increases | |
70 * or decreases the allele size by an increment of one. | |
71 * - T: two-phase mutation model. This model is a generalization | |
72 * of the stepwise mutation model (S). For a mutation, the | |
73 * increment (either increase or decrease) is 1 with the | |
74 * probability given by the parameter (1-TPMproba). With | |
75 * probability TPMproba, the increment is drawn from a | |
76 * geometric distribution of parameter given by the other | |
77 * parameter (TPMparam). | |
78 * | |
79 * By default, the program will assume an infinite site model (ISM). | |
80 * Each mutation will occur to a new position drawn from the interval | |
81 * [0,1]. It is possible to set any mutation model with an ISM | |
82 * (including microsatellite-like models I, S and T). Alternatively, | |
83 * the user can specify a finite number of sites available for | |
84 * mutation. For a microsatellite marker, the user will want to | |
85 * specify a single site. It is possible to set a finite number of | |
86 * sites for all mutation models. In all cases, the mutations will | |
87 * be forced to target these sites. It is possible to apply weights | |
88 * independently to all sites. The higher the weight value | |
89 * (comparatively to the other sites), the higher the probability | |
90 * that this site mutes. The weights needs not to be relative. In | |
91 * addition, the user can set the positions of the different sites. | |
92 * Nothings forces him or her to place them in order. Note that this | |
93 * does not affect the mutation process, but on the amount of | |
94 * recombination that will be allowed between sites. | |
95 * | |
96 */ | |
97 class Mutator { | |
98 | |
99 public: | |
100 | |
101 /** \brief Initializes with default values | |
102 * | |
103 * List of default values: | |
104 * - theta = 0 | |
105 * - fixedNumberOfMutations = 0 | |
106 * - model = F (fixed number of alleles) | |
107 * - fixed number of alleles = 2 | |
108 * - infinite site model | |
109 * - TPM parameters are both preset to 0.5 | |
110 * | |
111 */ | |
112 Mutator(); | |
113 | |
114 | |
115 /** \brief Destroys the object | |
116 * | |
117 */ | |
118 ~Mutator(); | |
119 | |
120 | |
121 /** \brief Copy constructor | |
122 * | |
123 */ | |
124 Mutator(const Mutator&); | |
125 | |
126 | |
127 /** \brief Assignement operator | |
128 * | |
129 */ | |
130 Mutator& operator=(const Mutator&); | |
131 | |
132 | |
133 /** \brief Restores default values of all parameters | |
134 * | |
135 */ | |
136 void reset(); | |
137 | |
138 | |
139 /** \brief Gets the fixed number of mutations | |
140 * | |
141 */ | |
142 unsigned int fixedNumberOfMutations() const; | |
143 | |
144 | |
145 /** \brief Sets the fixed number of mutations | |
146 * | |
147 * The value can be 0. It is not allowed to set both the | |
148 * fixed number of mutations and the mutation rate at | |
149 * non-zero value | |
150 * | |
151 */ | |
152 void fixedNumberOfMutations(unsigned int); | |
153 | |
154 | |
155 /** \brief Gets the mutation rate | |
156 * | |
157 */ | |
158 double mutationRate() const; | |
159 | |
160 | |
161 /** \brief Sets the mutation rate | |
162 * | |
163 * The value cannot be negative. The value can be 0. It is | |
164 * not allowed to set both the fixed number of mutations and | |
165 * the mutation rate at non-zero value | |
166 * | |
167 */ | |
168 void mutationRate(double); | |
169 | |
170 | |
171 /** \brief Gets the mutation model | |
172 * | |
173 * See the class documentation for the signification of the | |
174 * different one-letter codes. | |
175 * | |
176 */ | |
177 char mutationModel() const; | |
178 | |
179 | |
180 /** \brief Sets the mutation model | |
181 * | |
182 * The passed character must be one of F, I, S and T. See the | |
183 * class documentation for their signification. | |
184 * | |
185 */ | |
186 void mutationModel(char); | |
187 | |
188 | |
189 /** \brief Gets the fixed number of possible alleles | |
190 * | |
191 */ | |
192 unsigned int numberOfAlleles() const; | |
193 | |
194 | |
195 /** \brief Sets the fixed number of possible alleles | |
196 * | |
197 * The value must be larger than 1. This parameter is | |
198 * meaningful only for the fixed number allele model of | |
199 * mutation, and ignored otherwise. | |
200 * | |
201 */ | |
202 void numberOfAlleles(unsigned int); | |
203 | |
204 | |
205 /** \brief Sets a transition weight | |
206 * | |
207 * \param i row (previous allele index). | |
208 * \param j column (next allele index). | |
209 * \param value weight to apply. | |
210 * | |
211 * Indices i and j must be different. Weights can be any | |
212 * strictly positive value. | |
213 * | |
214 */ | |
215 void transitionWeight(unsigned int i, unsigned int j, double value); | |
216 | |
217 | |
218 /** \brief Gets a transition weight | |
219 * | |
220 * \param i row (previous allele index). | |
221 * \param j column (next allele index). | |
222 * | |
223 * Indices i and j must be different. | |
224 * | |
225 */ | |
226 double transitionWeight(unsigned int i, unsigned int j); | |
227 | |
228 | |
229 /** \brief Set to true to draw ancestral alleles randomly | |
230 * | |
231 * By default, the ancestral allele is always 0. If this | |
232 * variable is set to true, the ancestral allele will be | |
233 * randomly drawn from the defined number of alleles. This | |
234 * option is always ignored unless in combination with the | |
235 * Fixed Allele Number model. | |
236 * | |
237 */ | |
238 void randomAncestralAllele(bool flag); | |
239 | |
240 | |
241 /** \brief true if ancestral alleles must be drawn randomly | |
242 * | |
243 */ | |
244 bool randomAncestralAllele() const; | |
245 | |
246 | |
247 /** \brief Gets the TPM probability parameter | |
248 * | |
249 */ | |
250 double TPMproba() const; | |
251 | |
252 | |
253 /** \brief Sets the TPM probability parameter | |
254 * | |
255 * This parameter is considered only if the mutation model | |
256 * is T (two-phase mutation model). It gives the probability | |
257 * that a mutation step is not fixed to be 1. If TPMproba is | |
258 * 0, the mutation model is SMM. | |
259 * | |
260 * The value must be >=0. and <=1. | |
261 * | |
262 */ | |
263 void TPMproba(double value); | |
264 | |
265 | |
266 /** \brief Gets the TPM distribution parameter | |
267 * | |
268 */ | |
269 double TPMparam() const; | |
270 | |
271 | |
272 /** \brief Sets the TPM distribution parameter | |
273 * | |
274 * This parameter is considered only if the mutation model | |
275 * is T (two-phase mutation model). It gives the parameter | |
276 * of the geometric distribution which is used to generate | |
277 * the mutation step (if it is not one). | |
278 * | |
279 * The value must be >=0. and <=1. | |
280 * | |
281 */ | |
282 void TPMparam(double value); | |
283 | |
284 | |
285 /** \brief Gets the number of mutable sites | |
286 * | |
287 * A value a zero must be interpreted as the infinite site | |
288 * model. Note that after all calls all data from the tables | |
289 * sitePositions and siteWeights will be reset. | |
290 * | |
291 */ | |
292 unsigned int numberOfSites() const; | |
293 | |
294 | |
295 /** \brief Sets the number of mutable sites | |
296 * | |
297 * The value of zero is accepted and imposed the infinite | |
298 * site model. | |
299 * | |
300 */ | |
301 void numberOfSites(unsigned int); | |
302 | |
303 | |
304 /** \brief Gets the position of a given site | |
305 * | |
306 */ | |
307 double sitePosition(unsigned int siteIndex) const; | |
308 | |
309 | |
310 /** \brief Set the position of a given site | |
311 * | |
312 * The position must be >=0 and <=1 | |
313 * | |
314 */ | |
315 void sitePosition(unsigned int siteIndex, double position); | |
316 | |
317 | |
318 /** \brief Gets the mutation weight of a given site | |
319 * | |
320 */ | |
321 double siteWeight(unsigned int siteIndex) const; | |
322 | |
323 | |
324 /** \brief Set the site weight of a given site | |
325 * | |
326 * The weight must be strictly positive. | |
327 * | |
328 */ | |
329 void siteWeight(unsigned int siteIndex, double weight); | |
330 | |
331 | |
332 /** \brief Performs mutation | |
333 * | |
334 * \param arg Ancestral recombination graph instance. If the | |
335 * ARG is partially built or not a all, or improperly so, | |
336 * the behaviour of this method is not defined. | |
337 * | |
338 * \param random The address of a Random instance to be | |
339 * used for generating random numbers. | |
340 * | |
341 * \return A DataMatrix instance containing simulated data. | |
342 * | |
343 */ | |
344 DataMatrix mute(Arg* arg, Random* random); | |
345 | |
346 | |
347 /** \brief Gets the last number of mutations | |
348 * | |
349 * Returns the number of mutations of the last call of mute( ). | |
350 * By default, this method returns 0. | |
351 * | |
352 */ | |
353 unsigned int numberOfMutations() const; | |
354 | |
355 | |
356 private: | |
357 | |
358 void clear(); | |
359 void init(); | |
360 void copy(const Mutator&); | |
361 | |
362 //int nextAllele(int allele, Random* random); | |
363 int TPMstep(double inTPMproba, Random* random); | |
364 void apply_mutation(unsigned int matrixIndex, | |
365 unsigned int actualSite, DataMatrix& data, | |
366 const Edge* edge, int allele, | |
367 unsigned int segment, Random* random); | |
368 | |
369 | |
370 char _model; | |
371 double _mutationRate; | |
372 unsigned int _fixedNumberOfMutations; | |
373 unsigned int _numberOfAlleles; | |
374 double** _transitionWeights; | |
375 bool _randomAncestralAllele; | |
376 unsigned int _numberOfSites; | |
377 double* _sitePositions; | |
378 double* _siteWeights; | |
379 double _TPMproba; | |
380 double _TPMparam; | |
381 int maxAllele; | |
382 unsigned int _numberOfMutations; | |
383 std::vector<Mutation> _cache_mutations; | |
384 unsigned int _cache_mutations_reserved; | |
385 | |
386 }; | |
387 | |
388 | |
389 bool compare(Mutation mutation1, Mutation mutation2); // returns True if mutation1 is older | |
390 | |
391 } | |
392 | |
393 | |
394 | |
395 | |
396 #endif | |
397 |