Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp @ 1:420b57c3c185 draft
Uploaded
| author | dereeper |
|---|---|
| date | Fri, 10 Jul 2015 04:39:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3e19d0dfcf3e | 1:420b57c3c185 |
|---|---|
| 1 /* | |
| 2 Copyright 2009 Stéphane De Mita, Mathieu Siol | |
| 3 | |
| 4 This file is part of the EggLib library. | |
| 5 | |
| 6 EggLib is free software: you can redistribute it and/or modify | |
| 7 it under the terms of the GNU General Public License as published by | |
| 8 the Free Software Foundation, either version 3 of the License, or | |
| 9 (at your option) any later version. | |
| 10 | |
| 11 EggLib is distributed in the hope that it will be useful, | |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 14 GNU General Public License for more details. | |
| 15 | |
| 16 You should have received a copy of the GNU General Public License | |
| 17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
| 18 */ | |
| 19 | |
| 20 #ifndef EGGLIB_FSTATISTICS_HPP | |
| 21 #define EGGLIB_FSTATISTICS_HPP | |
| 22 | |
| 23 | |
| 24 | |
| 25 namespace egglib { | |
| 26 | |
| 27 | |
| 28 /** \brief Computes Fis, Fst and Fit from diploid data | |
| 29 * | |
| 30 * The class requires loading data. Data are loaded by individual | |
| 31 * (two genotypes per individual). The analyses are cached: they are | |
| 32 * performed upon the first call to statistics accessors. The cache | |
| 33 * is emptied whenever a datum is loaded. | |
| 34 * | |
| 35 * The computations are performed after Weir and Cockerham. The | |
| 36 * statistics F, theta and f are generalized for multiple alleles. | |
| 37 * To allow computation of multi-locus statistics, variance | |
| 38 * components are also available. The three components of the | |
| 39 * variance are Vpopulation (between-population), Vindividual | |
| 40 * (within-population, between-individual) and Vallele (within- | |
| 41 * individual). The formulas to compute the F-statistics are as | |
| 42 * follows: | |
| 43 * - 1-F = Vallele/(Vpopulation+Vindividual+Vallele) | |
| 44 * - theta = Vpopulation/(Vpopulation+Vindividual+Vallele) | |
| 45 * - 1-f = Vallele/(Vindividual+Vallele). | |
| 46 * | |
| 47 * \ingroup polymorphism | |
| 48 * | |
| 49 */ | |
| 50 class FStatistics { | |
| 51 | |
| 52 public: | |
| 53 | |
| 54 /** \brief Constructor | |
| 55 * | |
| 56 */ | |
| 57 FStatistics(); | |
| 58 | |
| 59 | |
| 60 /** \brief Destructor | |
| 61 * | |
| 62 */ | |
| 63 virtual ~FStatistics(); | |
| 64 | |
| 65 | |
| 66 /** \brief Reserve sufficient memory for a given number of | |
| 67 * individuals. | |
| 68 * | |
| 69 * This method makes the load function faster by allocating | |
| 70 * all required memory at once. | |
| 71 * | |
| 72 * \param numberOfIndividuals a strictly positive integer. | |
| 73 * | |
| 74 */ | |
| 75 void reserve(unsigned int numberOfIndividuals); | |
| 76 | |
| 77 | |
| 78 /** \brief Loads the data for one individual | |
| 79 * | |
| 80 * \param genotype1 an integer giving the first allele. | |
| 81 * \param genotype2 an integer giving the second allele. | |
| 82 * \param populationLabel an integer indication belonging to | |
| 83 * a population. | |
| 84 * | |
| 85 * Genotypes and population labels are not required to be | |
| 86 * consecutive (both are labels, not indices). They are | |
| 87 * internally mapped to indices (the mapping can be obtained | |
| 88 * by accessors populationLabel and allele). | |
| 89 * | |
| 90 * All genotypes are considered to be valid (no missing data). | |
| 91 * If statistics were computed previous to call to this | |
| 92 * function, all data will be erase. | |
| 93 * | |
| 94 */ | |
| 95 void loadIndividual(unsigned int genotype1, | |
| 96 unsigned int genotype2, unsigned int populationLabel); | |
| 97 | |
| 98 | |
| 99 /** \brief Label of a population | |
| 100 * | |
| 101 * The index corresponds to the local mapping of populations | |
| 102 * regardless of the ranking of population labels. (No out | |
| 103 * of bound checking.) | |
| 104 * | |
| 105 */ | |
| 106 unsigned int populationLabel(unsigned int populationIndex); | |
| 107 | |
| 108 | |
| 109 /** \brief Value of an allele | |
| 110 * | |
| 111 * The index corresponds to the local mapping of alleles | |
| 112 * regardless of the ranking of allele values. (No out of | |
| 113 * bound checking.) | |
| 114 * | |
| 115 */ | |
| 116 unsigned int alleleValue(unsigned int alleleIndex); | |
| 117 | |
| 118 | |
| 119 /// First allele of a given individual (no checking) | |
| 120 unsigned int firstAllele(unsigned int individualIndex) const; | |
| 121 | |
| 122 /// Second allele of a given individual (no checking) | |
| 123 unsigned int secondAllele(unsigned int individualIndex) const; | |
| 124 | |
| 125 /// Population label of a given individual (no checking) | |
| 126 unsigned int individualLabel(unsigned int individualIndex) const; | |
| 127 | |
| 128 | |
| 129 /** \brief Number of alleles | |
| 130 * | |
| 131 */ | |
| 132 unsigned int numberOfAlleles(); | |
| 133 | |
| 134 | |
| 135 /** \brief Number of populations | |
| 136 * | |
| 137 */ | |
| 138 unsigned int numberOfPopulations(); | |
| 139 | |
| 140 | |
| 141 /** \brief Number of loaded genotypes | |
| 142 * | |
| 143 */ | |
| 144 unsigned int numberOfGenotypes() const; | |
| 145 | |
| 146 | |
| 147 /** \brief Absolute total allele frequency | |
| 148 * | |
| 149 */ | |
| 150 unsigned int alleleFrequencyTotal(unsigned int alleleIndex); | |
| 151 | |
| 152 | |
| 153 /** \brief Absolute allele frequency in a population | |
| 154 * | |
| 155 */ | |
| 156 unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex); | |
| 157 | |
| 158 | |
| 159 /** \brief Absolute genotype frequency | |
| 160 * | |
| 161 * Note that allele AB is considered different to BA (this | |
| 162 * means that values can be accessed both sides of the | |
| 163 * diagonal. | |
| 164 * | |
| 165 */ | |
| 166 unsigned int genotypeFrequencyTotal(unsigned int alleleIndex1, unsigned int alleleIndex2); | |
| 167 | |
| 168 | |
| 169 /** \brief Absolute genotype frequency in a population | |
| 170 * | |
| 171 * Note that allele AB is considered different to BA (this | |
| 172 * means that values can be accessed both sides of the | |
| 173 * diagonal. | |
| 174 * | |
| 175 */ | |
| 176 unsigned int genotypeFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex1, unsigned int alleleIndex2); | |
| 177 | |
| 178 | |
| 179 /** \brief Sample size of a population | |
| 180 * | |
| 181 */ | |
| 182 unsigned int populationFrequency(unsigned int populationIndex); | |
| 183 | |
| 184 | |
| 185 /** \brief Weir-Cockerham F-statistic | |
| 186 * | |
| 187 * Note: equivalent to Fit. | |
| 188 * | |
| 189 */ | |
| 190 double F(); | |
| 191 | |
| 192 | |
| 193 /** \brief Weir-Cockerham theta-statistic | |
| 194 * | |
| 195 * Note: equivalent to Fst. | |
| 196 * | |
| 197 */ | |
| 198 double theta(); | |
| 199 | |
| 200 | |
| 201 /** \brief Weir-Cockerham f-statistic | |
| 202 * | |
| 203 * Note: equivalent to Fis. | |
| 204 * | |
| 205 */ | |
| 206 double f(); | |
| 207 | |
| 208 | |
| 209 /** \brief Between-population component of variance | |
| 210 * | |
| 211 */ | |
| 212 double Vpopulation(); | |
| 213 | |
| 214 | |
| 215 /** \brief Within-population, between-individual component of variance | |
| 216 * | |
| 217 */ | |
| 218 double Vindividual(); | |
| 219 | |
| 220 | |
| 221 /** \brief Within-individual component of variance | |
| 222 * | |
| 223 */ | |
| 224 double Vallele(); | |
| 225 | |
| 226 | |
| 227 protected: | |
| 228 | |
| 229 bool d_flag; | |
| 230 void d_init(); | |
| 231 void d_clear(); | |
| 232 unsigned int d_reserved; | |
| 233 unsigned int d_numberOfGenotypes; | |
| 234 unsigned int *d_genotypes; | |
| 235 unsigned int *d_populationLabels; | |
| 236 | |
| 237 bool s_flag; | |
| 238 void s_init(); | |
| 239 void s_clear(); | |
| 240 void s_compute(); | |
| 241 void processPopulations(); | |
| 242 void processAlleles(); | |
| 243 unsigned int getPopulationIndex(unsigned int) const; | |
| 244 unsigned int getAlleleIndex(unsigned int) const; | |
| 245 unsigned int s_numberOfAlleles; | |
| 246 unsigned int *s_alleleValueMapping; | |
| 247 unsigned int s_numberOfPopulations; | |
| 248 unsigned int *s_populationLabelMapping; | |
| 249 unsigned int *s_populationFrequencies; | |
| 250 unsigned int *s_alleleFrequenciesTotal; | |
| 251 unsigned int **s_alleleFrequenciesPerPopulation; | |
| 252 unsigned int **s_genotypeFrequenciesTotal; | |
| 253 unsigned int ***s_genotypeFrequenciesPerPopulation; | |
| 254 | |
| 255 bool w_flag; | |
| 256 void w_init(); | |
| 257 void w_clear(); | |
| 258 void w_compute(); | |
| 259 double w_F; | |
| 260 double w_T; | |
| 261 double w_f; | |
| 262 double *w_a; | |
| 263 double *w_b; | |
| 264 double *w_c; | |
| 265 double w_nbar; | |
| 266 double w_nc; | |
| 267 double *w_pbar; | |
| 268 double *w_ssquare; | |
| 269 double *w_hbar; | |
| 270 double w_sum_a; | |
| 271 double w_sum_b; | |
| 272 double w_sum_c; | |
| 273 double w_sum_abc; | |
| 274 double w_sum_bc; | |
| 275 | |
| 276 | |
| 277 private: | |
| 278 | |
| 279 FStatistics(const FStatistics& source) { } | |
| 280 | |
| 281 FStatistics& operator=(const FStatistics& source) { | |
| 282 return *this; | |
| 283 } | |
| 284 | |
| 285 }; | |
| 286 } | |
| 287 | |
| 288 #endif |
