annotate egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp @ 6:ebb0ac9b6fa9 draft

planemo upload
author gandres
date Mon, 23 May 2016 17:49:17 -0400
parents 420b57c3c185
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #ifndef EGGLIB_NUCLEOTIDEDIVERSITY_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22 #define EGGLIB_NUCLEOTIDEDIVERSITY_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include "BaseDiversity.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 #include <vector>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 /** \brief Performs analyzes of population genetics
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 * \ingroup polymorphism
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 * This class computes several summary statistics based on
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 * nucleotide analysis. Note that it is possible to use the same
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 * object to analyze different data set. Calling the load() method
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 * erases all data preivously computed (if any). Calling the load()
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 * method is absolutely required to compute any statistics. Some
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 * statistics are not computed by default, but are if the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 * corresponding accessor is used (only load() is required).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * Note that "unsecure" accessors don't perform out-of-bound checks.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 * S is the number of varying sites (only in sites that were not
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 * rejected).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 * eta is the minimum number of mutations, that is the sum of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 * number of alleles minus 1 for each varying site. eta = S if all
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 * sites have no variant or 2 alleles. eta is computed independently
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 * of the option multiple and IS NOT computed over lseff sites.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 * Pi is the average number of pairwise differences between sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 * (expressed here per site) or (as computed here) the mean per site
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 * (unbiased) heterozygosity. Pi is zero if no polymorphic sites.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 * D is the Tajima's test of neutrality
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 * Ref. Tajima F.: Statistical method for testing the neutral
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 * mutation hypothesis by DNA polymorphism. Genetics 1989, 123:585-595.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 * It is arbitrary set to 0 if no polymorphic sites.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 * tW: thetaW: estimator of theta based on polymorphic sites (ref.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 * e.g. Watterson 1975 Theor. Pop. Biol.).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 * Both D and thetaW are computed assuming that rounded nseff samples
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 * have been sampled.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 * The variance of D is computed using rounded nseff instead of ns.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 * H is the Fay and Wu's test of neutrality.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 * Z is the standardized version and E a similar test.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 * Ref. Fay J. C., Wu C.-I.: Hitchhiking under positive Darwinian
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 * selection. Genetics 2000, 155:1405-1413. and Zeng K., Fu Y. X.,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * Shi S., Wu C.-I.: Statistical tests for detecting positive
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 * selection by utilizing high-frequency variants. Genetics 2006,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * 174:1431-9. Both are arbitrary set to 0 if no polymorphic or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 * orientable sites.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * tH and tL: theta H: estimators of theta based on derived
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * polymorphic sites (ref in Fay and Wu and Zeng al.). The variance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * of H/Z are computed assuming that rounded nseff samples have
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 * been sampled.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 class NucleotideDiversity : public BaseDiversity {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 /** \brief Builds an object
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 NucleotideDiversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 /** \brief Destroys an object
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 virtual ~NucleotideDiversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 /** \brief Identifies polymorphic sites and computes basis
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 * statistics
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 * \param data an alignment object (subclass of CharMatrix).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 * The presence of outgroup or of different populations will
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 * be detected based on the populationLabel members of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 * passed object. The populationLabel 999 will be interpreted
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 * as outgroups. If several outgroups are passed, sites were
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 * the outgroups are not consistent will be treated as "non-
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 * orientable".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 * \param allowMultipleMutations if true, sites with more
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 * than two alleles will not be ignored. The sum of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 * frequencies of all alleles not matching the outgroup will
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 * treated as the derived allele frequency (for orientable
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 * sites).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 * \param minimumExploitableData sites where the non-missing
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 * data (as defined by characterMapping) are at a frequency
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 * larger than this value will be removed from the analysis.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 * Use 1. to take only 'complete' sites into account and 0.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 * to use all sites. (The outgroup is not considered in this
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 * computation.)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 * \param ignoreFrequency removes sites that are polymorph
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 * because of an allele at absolute frequency smaller than or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 * equal to this value. If ignoreFrequency=1, no sites are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 * removed, if ignoreFrequency=0, singleton sites are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 * ignored. Such sites are completely removed from the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 * analysis (not counted in lseff). Note that if more than
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 * one mutation is allowed, the site is removed only if all
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 * the alleles but one are smaller than or equal to this
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 * value. For example, an alignment column AAAAAAGAAT is
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 * conserved (including the third allele T which is a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 * singleton).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 * \param characterMapping a string giving the list of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 * characters that should be considered as valid data. If a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 * space is present in the string, the characters left of the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 * space will be treated as valid data and the characters
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 * right of the space will be treated as missing data, that
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 * is tolerated but ignored. All characters not in the string
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 * will cause an EggInvalidCharacterError to be raised.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147 * \param useZeroAsAncestral if true, all outgroups (if
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 * present) will be ignored and the character "0" will be
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149 * considered as ancestral for all sites, whatever the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 * character mapping.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153 virtual void load(
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154 CharMatrix& data,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155 bool allowMultipleMutations=false,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156 double minimumExploitableData=1.,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 unsigned int ignoreFrequency=0,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 std::string characterMapping=dnaMapping,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 bool useZeroAsAncestral=false
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 );
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 // accessors for the "site analysis" section
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 /// Number of polymorphic sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 unsigned int S() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168 /// Number of polymorphic orientable sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 unsigned int So() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 /// Minimum number of mutations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 unsigned int eta() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174 /// Average of per-site number of sequences effectively used
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175 double nseff() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177 /// Number of sites effectively used
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 unsigned int lseff() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180 /// Average of number of sequences effectively used at orientable sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 double nseffo() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183 /// Number of orientable sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184 unsigned int lseffo() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186 /// Number of detected populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 unsigned int npop() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189 /// Label of the population with given index (unsecure)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 unsigned int popLabel(unsigned int popIndex) const; // no check!
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193 // accessors for the "diversity" section
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195 /// Nucleotide diversity
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 double Pi();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198 /// Watterson estimator of theta
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 double thetaW();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
200
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
201 /// Average of Pi over populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
202 double average_Pi();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
203
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
204 /// Pi of a given population (unsecure)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
205 double pop_Pi(unsigned int popIndex); // no check!
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
206
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
207 /// Tajima's D
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
208 double D();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
209
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
210 // accessors for the "outgroup diversity" section
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
211
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
212 /// Fay and Wu estimator of theta
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
213 double thetaH();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
214
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
215 /// Zeng et al. estimator of theta
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
216 double thetaL();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
217
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
218 /// Fay and Wu's H
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
219 double H();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
220
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
221 /// Standardized H
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
222 double Z();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
223
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
224 /// Zeng et al.'s E
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
225 double E();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
226
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
227 // accessors for the "differentiation" section
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
228
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
229 /// Number of sites with at least one fixed difference
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
230 unsigned int FixedDifferences();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
231
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
232 /// Number of sites with at least one allele shared among at least two populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
233 unsigned int CommonAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
234
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
235 /// Number of sites with at least one non-fixed allele shared among at least two populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
236 unsigned int SharedAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
237
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
238 /// Number of sites with at least one allele specific to one population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
239 unsigned int SpecificAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
240
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
241 /// Number of sites with at least one derived allele specific to one population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
242 unsigned int SpecificDerivedAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
243
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
244 /// Number of polymorphisms in a given population (unsecure)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
245 unsigned int Polymorphisms(unsigned int pop);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
246
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
247 /// Number of specific alleles for a given population (unsecure)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
248 unsigned int SpecificAlleles(unsigned int pop);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
249
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
250 /// Number of specific derived allele for a given population (unsecure)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
251 unsigned int SpecificDerivedAlleles(unsigned int pop);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
252
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
253 /// Number of fixed differences between a given pair of populations (unsecure; pop2 must be larger than pop1)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
254 unsigned int FixedDifferences(unsigned int pop1, unsigned int pop2);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
255
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
256 /// Number of common alleles between a given pair of populations (unsecure; pop2 must be larger than pop1)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
257 unsigned int CommonAlleles(unsigned int pop1, unsigned int pop2);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
258
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
259 /// Number of shared non-fixed alleles between a given pair of populations (unsecure; pop2 must be larger than pop1)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
260 unsigned int SharedAlleles(unsigned int pop1, unsigned int pop2);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
261
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
262
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
263 // accessor for the "triConfigurations" section
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
264
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
265 /** \brief Number falling into one of the possible site configurations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
266 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
267 * The statistics are limited to three populations.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
268 * Assuming an unrooted A/G polymorphism (A and G can be
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
269 * substitued), the site configurations are:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
270 * - 0: A&G A A specific 1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
271 * - 1: A&G A G specific 1 + fixed 2-3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
272 * - 2: A A&G A specific 2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
273 * - 3: A A&G G specific 2 + fixed 1-3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
274 * - 4: A A A&G specific 3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
275 * - 5: A G A&G specific 3 + fixed 1-2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
276 * - 6: A&G A&G A shared 1-2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
277 * - 7: A&G A A&G shared 1-3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
278 * - 8: A A&G A&G shared 2-3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
279 * - 9: A&G A&G A&G shared 1-2-3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
280 * - 10: A G G fixed 1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
281 * - 11: A G A fixed 2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
282 * - 12: A A G fixed 3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
283 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
284 * \param index must be an index from 0 to 12.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
285 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
286 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
287 unsigned int triConfiguration(unsigned int index);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
288
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
289
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
290 /// Builds and returns the vector of positions of all polymorphic sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
291 std::vector<unsigned int> polymorphic_positions() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
292
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
293
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
294 /** \brief Builds and returns the vector of positions of all singleton sites
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
295 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
296 * A site singleton when it is polymorphic according to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
297 * parameter of the diversity analysis, when it has exactly two
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
298 * alleles and one of them is at absolute frequency 1 (one
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
299 * copy) disregarding the outgroup.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
300 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
301 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
302 std::vector<unsigned int> singleton_positions() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
303
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
304
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
305 protected:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
306
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
307 /** \brief This class cannot be copied
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
308 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
309 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
310 NucleotideDiversity(const NucleotideDiversity& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
311
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
312
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
313 /** \brief This class cannot be copied
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
314 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
315 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
316 NucleotideDiversity& operator=(const NucleotideDiversity& source) { return *this; }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
317
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
318
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
319 void init(); // initializes values
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
320 void clear(); // free memory but doesn't initializes
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
321
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
322 // diversity (without outgroup)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
323 void diversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
324
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
325 // diversity with outgroup
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
326 void outgroupDiversity();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
327
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
328 // site patterns
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
329 void differentiation();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
330
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
331 // triconfigurations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
332 void triConfigurations();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
333
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
334
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
335 // holders for statistics, with booleans flagging groups of stats
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
336
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
337 bool b_analysisSites;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
338
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
339 bool b_diversity;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
340
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
341 double v_Pi; // nucleotide diversity
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
342 double v_thetaW; // theta (Watterson estimator)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
343 double v_average_Pi; // average diversity across populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
344 double *v_pop_Pi; // diversity per population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
345 double v_D; // Tajima's D
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
346
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
347 bool b_outgroupDiversity;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
348
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
349 double v_thetaH; // theta (Fay and Wu estimator)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
350 double v_thetaL; // theta (Zeng estimator)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
351 double v_H; // Fay and Wu's H
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
352 double v_Z; // normalized Fay and Wu's H
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
353 double v_E; // Zeng et al.'s E
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
354
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
355 bool b_differentiation;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
356
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
357 unsigned int *v_pairwiseFixedDifferences;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
358 unsigned int *v_pairwiseCommonAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
359 unsigned int *v_pairwiseSharedAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
360 unsigned int *v_popPolymorphic;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
361 unsigned int *v_popSpecific;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
362 unsigned int *v_popSpecificDerived;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
363 unsigned int v_countFixedDifferences;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
364 unsigned int v_countCommonAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
365 unsigned int v_countSharedAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
366 unsigned int v_countSpecificAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
367 unsigned int v_countSpecificDerivedAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
368
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
369
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
370 bool b_triConfigurations;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
371
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
372 unsigned int *v_triConfigurations;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
373
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
374 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
375 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
376
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
377 #endif