comparison egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
comparison
equal deleted inserted replaced
8:6bf69b40365c 9:98c37a5d67f4
1 /*
2 Copyright 2010 Stéphane De Mita, Mathieu Siol
3
4 This file is part of the EggLib library.
5
6 EggLib is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 EggLib is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef EGGLIB_HFSTATISTICS_HPP
21 #define EGGLIB_HFSTATISTICS_HPP
22
23
24
25 namespace egglib {
26
27
28 /** \brief Computes Fst and Fit from haploid data
29 *
30 * The class requires loading data. Data are loaded by haploid
31 * (one genotype per individual). The analyses are cached: they are
32 * performed upon the first call to statistics accessors. The cache
33 * is emptied whenever a datum is loaded.
34 *
35 * The computations are performed after Weir and Cockerham. The
36 * statistic theta is generalized for multiple alleles. To allow
37 * computation of multi-locus statistics, variance components are
38 * also available. The two components of the variance are T1 and T2
39 * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II",
40 * Sinauer associates, Sunderland MA).
41 *
42 * \ingroup polymorphism
43 *
44 */
45 class HFStatistics {
46
47 public:
48
49 /** \brief Constructor
50 *
51 */
52 HFStatistics();
53
54
55 /** \brief Destructor
56 *
57 */
58 virtual ~HFStatistics();
59
60
61 /** \brief Reserve sufficient memory for a given number of
62 * individuals.
63 *
64 * This method makes the load function faster by allocating
65 * all required memory at once.
66 *
67 * \param numberOfIndividuals a strictly positive integer.
68 *
69 */
70 void reserve(unsigned int numberOfIndividuals);
71
72
73 /** \brief Loads the data for one individual
74 *
75 * \param genotype an integer giving the allele.
76 * \param populationLabel an integer indication belonging to
77 * a population.
78 *
79 * Genotypes and population labels are not required to be
80 * consecutive (both are labels, not indices). They are
81 * internally mapped to indices (the mapping can be obtained
82 * by accessors populationLabel and allele).
83 *
84 * All genotypes are considered to be valid (no missing data).
85 * If statistics were computed previous to call to this
86 * function, all data will be erased.
87 *
88 */
89 void loadIndividual(unsigned int genotype, unsigned int populationLabel);
90
91
92 /** \brief Label of a population
93 *
94 * The index corresponds to the local mapping of populations
95 * regardless of the ranking of population labels. (No out
96 * of bound checking.)
97 *
98 */
99 unsigned int populationLabel(unsigned int populationIndex);
100
101
102 /** \brief Value of an allele
103 *
104 * The index corresponds to the local mapping of alleles
105 * regardless of the ranking of allele values. (No out of
106 * bound checking.)
107 *
108 */
109 unsigned int alleleValue(unsigned int alleleIndex);
110
111
112 /// Allele of a given individual (no checking)
113 unsigned int allele(unsigned int individualIndex) const;
114
115 /// Population label of a given individual (no checking)
116 unsigned int individualLabel(unsigned int individualIndex) const;
117
118
119 /** \brief Number of alleles
120 *
121 */
122 unsigned int numberOfAlleles();
123
124
125 /** \brief Number of populations
126 *
127 */
128 unsigned int numberOfPopulations();
129
130
131 /** \brief Number of loaded genotypes
132 *
133 */
134 unsigned int numberOfGenotypes() const;
135
136
137 /** \brief Absolute total allele frequency
138 *
139 */
140 unsigned int alleleFrequencyTotal(unsigned int alleleIndex);
141
142
143 /** \brief Absolute allele frequency in a population
144 *
145 */
146 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex);
147
148
149 /** \brief Sample size of a population
150 *
151 */
152 unsigned int populationFrequency(unsigned int populationIndex);
153
154
155 /** \brief Weir-Cockerham theta-statistic
156 *
157 * Note: equivalent to Fst.
158 *
159 */
160 double theta();
161
162
163 /** \brief Between-population component of variance
164 *
165 */
166 double T1();
167
168
169 /** \brief Total variance
170 *
171 */
172 double T2();
173
174
175 protected:
176
177 bool d_flag;
178 void d_init();
179 void d_clear();
180 unsigned int d_reserved;
181 unsigned int d_numberOfGenotypes;
182 unsigned int *d_genotypes;
183 unsigned int *d_populationLabels;
184
185 bool s_flag;
186 void s_init();
187 void s_clear();
188 void s_compute();
189 void processPopulations();
190 void processAlleles();
191 unsigned int getPopulationIndex(unsigned int) const;
192 unsigned int getAlleleIndex(unsigned int) const;
193 unsigned int s_numberOfAlleles;
194 unsigned int *s_alleleValueMapping;
195 unsigned int s_numberOfPopulations;
196 unsigned int *s_populationLabelMapping;
197 unsigned int *s_populationFrequencies;
198 unsigned int *s_alleleFrequenciesTotal;
199 unsigned int **s_alleleFrequenciesPerPopulation;
200
201 bool w_flag;
202 void w_init();
203 void w_clear();
204 void w_compute();
205 double w_T;
206 double *w_T1;
207 double *w_T2;
208 double w_nbar;
209 double w_nc;
210 double *w_pbar;
211 double *w_ssquare;
212 double w_sum_T1;
213 double w_sum_T2;
214
215
216 private:
217
218 HFStatistics(const HFStatistics& source) { }
219
220 HFStatistics& operator=(const HFStatistics& source) {
221 return *this;
222 }
223
224 };
225 }
226
227 #endif