| 1 | 1 /* | 
|  | 2     Copyright 2008-2009 Stéphane De Mita, Mathieu Siol | 
|  | 3 | 
|  | 4     This file is part of the EggLib library. | 
|  | 5 | 
|  | 6     EggLib is free software: you can redistribute it and/or modify | 
|  | 7     it under the terms of the GNU General Public License as published by | 
|  | 8     the Free Software Foundation, either version 3 of the License, or | 
|  | 9     (at your option) any later version. | 
|  | 10 | 
|  | 11     EggLib is distributed in the hope that it will be useful, | 
|  | 12     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 14     GNU General Public License for more details. | 
|  | 15 | 
|  | 16     You should have received a copy of the GNU General Public License | 
|  | 17     along with EggLib.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 18 */ | 
|  | 19 | 
|  | 20 | 
|  | 21 #ifndef EGGLIB_SITEPOLYMORPHISM_HPP | 
|  | 22 #define EGGLIB_SITEPOLYMORPHISM_HPP | 
|  | 23 | 
|  | 24 | 
|  | 25 | 
|  | 26 namespace egglib { | 
|  | 27 | 
|  | 28 | 
|  | 29    /** \brief Implements diversity analysis at the site level | 
|  | 30     * | 
|  | 31     * \ingroup polymorphism | 
|  | 32     * | 
|  | 33     * Data are loaded along with a population index. It is necessary to | 
|  | 34     * set the number of populations prior to use. | 
|  | 35     * | 
|  | 36     * Outgroup sequence must be loaded separetedly. There can be any | 
|  | 37     * number of outgroups, but they must be all consistent otherwise the | 
|  | 38     * site will be considered as not orientable. | 
|  | 39     * | 
|  | 40     */ | 
|  | 41     class SitePolymorphism { | 
|  | 42 | 
|  | 43         public: | 
|  | 44 | 
|  | 45            /** \brief Builds an object | 
|  | 46             * | 
|  | 47             */ | 
|  | 48             SitePolymorphism(); | 
|  | 49 | 
|  | 50 | 
|  | 51            /** \brief Builds an object | 
|  | 52             * | 
|  | 53             * \param npop number of populations | 
|  | 54             * | 
|  | 55             */ | 
|  | 56             SitePolymorphism(unsigned int npop); | 
|  | 57 | 
|  | 58 | 
|  | 59            /** \brief Destroys an object | 
|  | 60             * | 
|  | 61             */ | 
|  | 62             virtual ~SitePolymorphism(); | 
|  | 63 | 
|  | 64 | 
|  | 65            /** \brief Copy constructor | 
|  | 66             * | 
|  | 67             */ | 
|  | 68             SitePolymorphism(const SitePolymorphism& source); | 
|  | 69 | 
|  | 70 | 
|  | 71            /** \brief Assignment operator | 
|  | 72             * | 
|  | 73             */ | 
|  | 74             SitePolymorphism& operator=(const SitePolymorphism& source); | 
|  | 75 | 
|  | 76 | 
|  | 77            /** \brief Sets the number of populations | 
|  | 78             * | 
|  | 79             * NOTE THAT all previous data is lost. | 
|  | 80             * | 
|  | 81             */ | 
|  | 82             void numberOfPopulations(unsigned int npop); | 
|  | 83 | 
|  | 84 | 
|  | 85            /** \brief Adds a character | 
|  | 86             * | 
|  | 87             * \param populationIndex the index of the population from | 
|  | 88             * which is sampled this character (do not use "population | 
|  | 89             * label"). | 
|  | 90             * | 
|  | 91             * \param character the character value (it is assumed it | 
|  | 92             * represents a valid character. | 
|  | 93             * | 
|  | 94             */ | 
|  | 95             void load(unsigned int populationIndex, char character); | 
|  | 96 | 
|  | 97 | 
|  | 98            /** \brief Loads outgroup state | 
|  | 99             * | 
|  | 100             * There can be any number of outgroup states. Only | 
|  | 101             * characters that are considered as valid (whatever the list | 
|  | 102             * is) should be loaded. | 
|  | 103             * | 
|  | 104             */ | 
|  | 105             void outgroup(char state); | 
|  | 106 | 
|  | 107 | 
|  | 108            /** \brief Number of different alleles | 
|  | 109             * | 
|  | 110             */ | 
|  | 111             unsigned int numberOfAlleles() const; | 
|  | 112 | 
|  | 113 | 
|  | 114            /** \brief Gets an allele (unsecure) | 
|  | 115             * | 
|  | 116             * Assumes that the index provided lies in the valid range | 
|  | 117             * | 
|  | 118             */ | 
|  | 119             char allele(unsigned int index) const; | 
|  | 120 | 
|  | 121 | 
|  | 122            /** \brief Gets a frequency (unsecure) | 
|  | 123             * | 
|  | 124             * The sum of of frequencies of the allele over populations | 
|  | 125             * is computed. Not out-of-bounds check is performed. | 
|  | 126             * | 
|  | 127             */ | 
|  | 128             unsigned int alleleFrequency(unsigned int alleleIndex) const; | 
|  | 129 | 
|  | 130 | 
|  | 131            /** \brief Gets the frequency of an allele in one pop (unsecure) | 
|  | 132             * | 
|  | 133             * The frequency of the allele in the given population is | 
|  | 134             * returned. Not out-of-bounds check is performed. | 
|  | 135             * | 
|  | 136             */ | 
|  | 137             unsigned int alleleFrequency(unsigned int popIndex, unsigned int alleleIndex) const; | 
|  | 138 | 
|  | 139 | 
|  | 140            /** \brief Sums the frequency of derived allele(s) | 
|  | 141             * | 
|  | 142             * This method assumes that the site is orientable. It will | 
|  | 143             * use as outgroup the first outgroup character entered, | 
|  | 144             * assuming at least one was entered and that all (if more | 
|  | 145             * than one) were identical. | 
|  | 146             * | 
|  | 147             */ | 
|  | 148             unsigned int derivedAlleleFrequency() const; | 
|  | 149 | 
|  | 150 | 
|  | 151            /** \brief Number of sequences that were analyzed | 
|  | 152             * | 
|  | 153             */ | 
|  | 154             unsigned int ns() const; | 
|  | 155 | 
|  | 156 | 
|  | 157            /** \brief Gets the number of analyzed sequences for a population | 
|  | 158             * | 
|  | 159             * No out-of-bound check is performed | 
|  | 160             * | 
|  | 161             */ | 
|  | 162             unsigned int ns(unsigned int popIndex) const; | 
|  | 163 | 
|  | 164 | 
|  | 165            /** \brief Checks if the site can be oriented | 
|  | 166             * | 
|  | 167             * Returns true if at least one outgroup datum has been | 
|  | 168             * loaded, if all outgroup data are identical (regardless of | 
|  | 169             * their value) and if the outgroup allele is one of the | 
|  | 170             * allele in the sample. | 
|  | 171             * | 
|  | 172             */ | 
|  | 173             bool isOrientable() const; | 
|  | 174 | 
|  | 175             bool isPolymorphic(unsigned int popIndex) const; | 
|  | 176             bool hasSpecificAllele(unsigned int popIndex, bool restrictToDerived) const; | 
|  | 177             bool haveFixedDifference(unsigned int pop1, unsigned int pop2) const; | 
|  | 178             bool haveCommonAllele(unsigned int pop1, unsigned int pop2) const; | 
|  | 179             bool haveSharedAllele(unsigned int pop1, unsigned int pop2) const; | 
|  | 180 | 
|  | 181 | 
|  | 182 | 
|  | 183 | 
|  | 184         protected: | 
|  | 185 | 
|  | 186             // helpers | 
|  | 187             void init(); | 
|  | 188             void clear(); | 
|  | 189             void copy(const SitePolymorphism& site); | 
|  | 190 | 
|  | 191 | 
|  | 192             // data | 
|  | 193             unsigned int m_numberOfPopulations; | 
|  | 194             unsigned int m_numberOfStates; | 
|  | 195             char * m_states; | 
|  | 196             unsigned int ** m_frequencies; | 
|  | 197             unsigned int m_numberOfOutgroups; | 
|  | 198             char * m_outgroups; | 
|  | 199             unsigned int m_ns; | 
|  | 200             unsigned int * m_pop_ns; | 
|  | 201 | 
|  | 202             bool m_cache_orientable; | 
|  | 203 | 
|  | 204     }; | 
|  | 205 } | 
|  | 206 | 
|  | 207 #endif |