annotate egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2010 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 #ifndef EGGLIB_HFSTATISTICS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #define EGGLIB_HFSTATISTICS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28 /** \brief Computes Fst and Fit from haploid data
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 * The class requires loading data. Data are loaded by haploid
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 * (one genotype per individual). The analyses are cached: they are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 * performed upon the first call to statistics accessors. The cache
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 * is emptied whenever a datum is loaded.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 * The computations are performed after Weir and Cockerham. The
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 * statistic theta is generalized for multiple alleles. To allow
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 * computation of multi-locus statistics, variance components are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 * also available. The two components of the variance are T1 and T2
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II",
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 * Sinauer associates, Sunderland MA).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 * \ingroup polymorphism
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 class HFStatistics {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 /** \brief Constructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 HFStatistics();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 /** \brief Destructor
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 virtual ~HFStatistics();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 /** \brief Reserve sufficient memory for a given number of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 * individuals.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 * This method makes the load function faster by allocating
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 * all required memory at once.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67 * \param numberOfIndividuals a strictly positive integer.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 void reserve(unsigned int numberOfIndividuals);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 /** \brief Loads the data for one individual
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * \param genotype an integer giving the allele.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 * \param populationLabel an integer indication belonging to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * a population.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * Genotypes and population labels are not required to be
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * consecutive (both are labels, not indices). They are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * internally mapped to indices (the mapping can be obtained
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * by accessors populationLabel and allele).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 * All genotypes are considered to be valid (no missing data).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 * If statistics were computed previous to call to this
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 * function, all data will be erased.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 void loadIndividual(unsigned int genotype, unsigned int populationLabel);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 /** \brief Label of a population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 * The index corresponds to the local mapping of populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 * regardless of the ranking of population labels. (No out
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 * of bound checking.)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99 unsigned int populationLabel(unsigned int populationIndex);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 /** \brief Value of an allele
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 * The index corresponds to the local mapping of alleles
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 * regardless of the ranking of allele values. (No out of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 * bound checking.)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 unsigned int alleleValue(unsigned int alleleIndex);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 /// Allele of a given individual (no checking)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 unsigned int allele(unsigned int individualIndex) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 /// Population label of a given individual (no checking)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 unsigned int individualLabel(unsigned int individualIndex) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 /** \brief Number of alleles
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 unsigned int numberOfAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125 /** \brief Number of populations
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 unsigned int numberOfPopulations();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 /** \brief Number of loaded genotypes
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 unsigned int numberOfGenotypes() const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 /** \brief Absolute total allele frequency
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 unsigned int alleleFrequencyTotal(unsigned int alleleIndex);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 /** \brief Absolute allele frequency in a population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149 /** \brief Sample size of a population
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152 unsigned int populationFrequency(unsigned int populationIndex);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155 /** \brief Weir-Cockerham theta-statistic
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 * Note: equivalent to Fst.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 double theta();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 /** \brief Between-population component of variance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 double T1();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169 /** \brief Total variance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 double T2();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175 protected:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177 bool d_flag;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 void d_init();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179 void d_clear();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180 unsigned int d_reserved;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 unsigned int d_numberOfGenotypes;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182 unsigned int *d_genotypes;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183 unsigned int *d_populationLabels;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185 bool s_flag;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186 void s_init();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 void s_clear();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188 void s_compute();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189 void processPopulations();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 void processAlleles();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191 unsigned int getPopulationIndex(unsigned int) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192 unsigned int getAlleleIndex(unsigned int) const;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193 unsigned int s_numberOfAlleles;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194 unsigned int *s_alleleValueMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195 unsigned int s_numberOfPopulations;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 unsigned int *s_populationLabelMapping;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197 unsigned int *s_populationFrequencies;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198 unsigned int *s_alleleFrequenciesTotal;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 unsigned int **s_alleleFrequenciesPerPopulation;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
200
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
201 bool w_flag;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
202 void w_init();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
203 void w_clear();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
204 void w_compute();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
205 double w_T;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
206 double *w_T1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
207 double *w_T2;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
208 double w_nbar;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
209 double w_nc;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
210 double *w_pbar;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
211 double *w_ssquare;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
212 double w_sum_T1;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
213 double w_sum_T2;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
214
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
215
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
216 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
217
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
218 HFStatistics(const HFStatistics& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
219
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
220 HFStatistics& operator=(const HFStatistics& source) {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
221 return *this;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
222 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
223
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
224 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
225 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
226
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
227 #endif