Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2010 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 #ifndef EGGLIB_HFSTATISTICS_HPP | |
21 #define EGGLIB_HFSTATISTICS_HPP | |
22 | |
23 | |
24 | |
25 namespace egglib { | |
26 | |
27 | |
28 /** \brief Computes Fst and Fit from haploid data | |
29 * | |
30 * The class requires loading data. Data are loaded by haploid | |
31 * (one genotype per individual). The analyses are cached: they are | |
32 * performed upon the first call to statistics accessors. The cache | |
33 * is emptied whenever a datum is loaded. | |
34 * | |
35 * The computations are performed after Weir and Cockerham. The | |
36 * statistic theta is generalized for multiple alleles. To allow | |
37 * computation of multi-locus statistics, variance components are | |
38 * also available. The two components of the variance are T1 and T2 | |
39 * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II", | |
40 * Sinauer associates, Sunderland MA). | |
41 * | |
42 * \ingroup polymorphism | |
43 * | |
44 */ | |
45 class HFStatistics { | |
46 | |
47 public: | |
48 | |
49 /** \brief Constructor | |
50 * | |
51 */ | |
52 HFStatistics(); | |
53 | |
54 | |
55 /** \brief Destructor | |
56 * | |
57 */ | |
58 virtual ~HFStatistics(); | |
59 | |
60 | |
61 /** \brief Reserve sufficient memory for a given number of | |
62 * individuals. | |
63 * | |
64 * This method makes the load function faster by allocating | |
65 * all required memory at once. | |
66 * | |
67 * \param numberOfIndividuals a strictly positive integer. | |
68 * | |
69 */ | |
70 void reserve(unsigned int numberOfIndividuals); | |
71 | |
72 | |
73 /** \brief Loads the data for one individual | |
74 * | |
75 * \param genotype an integer giving the allele. | |
76 * \param populationLabel an integer indication belonging to | |
77 * a population. | |
78 * | |
79 * Genotypes and population labels are not required to be | |
80 * consecutive (both are labels, not indices). They are | |
81 * internally mapped to indices (the mapping can be obtained | |
82 * by accessors populationLabel and allele). | |
83 * | |
84 * All genotypes are considered to be valid (no missing data). | |
85 * If statistics were computed previous to call to this | |
86 * function, all data will be erased. | |
87 * | |
88 */ | |
89 void loadIndividual(unsigned int genotype, unsigned int populationLabel); | |
90 | |
91 | |
92 /** \brief Label of a population | |
93 * | |
94 * The index corresponds to the local mapping of populations | |
95 * regardless of the ranking of population labels. (No out | |
96 * of bound checking.) | |
97 * | |
98 */ | |
99 unsigned int populationLabel(unsigned int populationIndex); | |
100 | |
101 | |
102 /** \brief Value of an allele | |
103 * | |
104 * The index corresponds to the local mapping of alleles | |
105 * regardless of the ranking of allele values. (No out of | |
106 * bound checking.) | |
107 * | |
108 */ | |
109 unsigned int alleleValue(unsigned int alleleIndex); | |
110 | |
111 | |
112 /// Allele of a given individual (no checking) | |
113 unsigned int allele(unsigned int individualIndex) const; | |
114 | |
115 /// Population label of a given individual (no checking) | |
116 unsigned int individualLabel(unsigned int individualIndex) const; | |
117 | |
118 | |
119 /** \brief Number of alleles | |
120 * | |
121 */ | |
122 unsigned int numberOfAlleles(); | |
123 | |
124 | |
125 /** \brief Number of populations | |
126 * | |
127 */ | |
128 unsigned int numberOfPopulations(); | |
129 | |
130 | |
131 /** \brief Number of loaded genotypes | |
132 * | |
133 */ | |
134 unsigned int numberOfGenotypes() const; | |
135 | |
136 | |
137 /** \brief Absolute total allele frequency | |
138 * | |
139 */ | |
140 unsigned int alleleFrequencyTotal(unsigned int alleleIndex); | |
141 | |
142 | |
143 /** \brief Absolute allele frequency in a population | |
144 * | |
145 */ | |
146 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex); | |
147 | |
148 | |
149 /** \brief Sample size of a population | |
150 * | |
151 */ | |
152 unsigned int populationFrequency(unsigned int populationIndex); | |
153 | |
154 | |
155 /** \brief Weir-Cockerham theta-statistic | |
156 * | |
157 * Note: equivalent to Fst. | |
158 * | |
159 */ | |
160 double theta(); | |
161 | |
162 | |
163 /** \brief Between-population component of variance | |
164 * | |
165 */ | |
166 double T1(); | |
167 | |
168 | |
169 /** \brief Total variance | |
170 * | |
171 */ | |
172 double T2(); | |
173 | |
174 | |
175 protected: | |
176 | |
177 bool d_flag; | |
178 void d_init(); | |
179 void d_clear(); | |
180 unsigned int d_reserved; | |
181 unsigned int d_numberOfGenotypes; | |
182 unsigned int *d_genotypes; | |
183 unsigned int *d_populationLabels; | |
184 | |
185 bool s_flag; | |
186 void s_init(); | |
187 void s_clear(); | |
188 void s_compute(); | |
189 void processPopulations(); | |
190 void processAlleles(); | |
191 unsigned int getPopulationIndex(unsigned int) const; | |
192 unsigned int getAlleleIndex(unsigned int) const; | |
193 unsigned int s_numberOfAlleles; | |
194 unsigned int *s_alleleValueMapping; | |
195 unsigned int s_numberOfPopulations; | |
196 unsigned int *s_populationLabelMapping; | |
197 unsigned int *s_populationFrequencies; | |
198 unsigned int *s_alleleFrequenciesTotal; | |
199 unsigned int **s_alleleFrequenciesPerPopulation; | |
200 | |
201 bool w_flag; | |
202 void w_init(); | |
203 void w_clear(); | |
204 void w_compute(); | |
205 double w_T; | |
206 double *w_T1; | |
207 double *w_T2; | |
208 double w_nbar; | |
209 double w_nc; | |
210 double *w_pbar; | |
211 double *w_ssquare; | |
212 double w_sum_T1; | |
213 double w_sum_T2; | |
214 | |
215 | |
216 private: | |
217 | |
218 HFStatistics(const HFStatistics& source) { } | |
219 | |
220 HFStatistics& operator=(const HFStatistics& source) { | |
221 return *this; | |
222 } | |
223 | |
224 }; | |
225 } | |
226 | |
227 #endif |