| 
1
 | 
     1 /*
 | 
| 
 | 
     2     Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
 | 
| 
 | 
     3 
 | 
| 
 | 
     4     This file is part of the EggLib library.
 | 
| 
 | 
     5 
 | 
| 
 | 
     6     EggLib is free software: you can redistribute it and/or modify
 | 
| 
 | 
     7     it under the terms of the GNU General Public License as published by
 | 
| 
 | 
     8     the Free Software Foundation, either version 3 of the License, or
 | 
| 
 | 
     9     (at your option) any later version.
 | 
| 
 | 
    10 
 | 
| 
 | 
    11     EggLib is distributed in the hope that it will be useful,
 | 
| 
 | 
    12     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
| 
 | 
    13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
| 
 | 
    14     GNU General Public License for more details.
 | 
| 
 | 
    15 
 | 
| 
 | 
    16     You should have received a copy of the GNU General Public License
 | 
| 
 | 
    17     along with EggLib.  If not, see <http://www.gnu.org/licenses/>.
 | 
| 
 | 
    18 */
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 #ifndef EGGLIB_SITEPOLYMORPHISM_HPP
 | 
| 
 | 
    22 #define EGGLIB_SITEPOLYMORPHISM_HPP
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 namespace egglib {
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29    /** \brief Implements diversity analysis at the site level
 | 
| 
 | 
    30     *
 | 
| 
 | 
    31     * \ingroup polymorphism
 | 
| 
 | 
    32     * 
 | 
| 
 | 
    33     * Data are loaded along with a population index. It is necessary to
 | 
| 
 | 
    34     * set the number of populations prior to use.
 | 
| 
 | 
    35     * 
 | 
| 
 | 
    36     * Outgroup sequence must be loaded separetedly. There can be any
 | 
| 
 | 
    37     * number of outgroups, but they must be all consistent otherwise the
 | 
| 
 | 
    38     * site will be considered as not orientable.
 | 
| 
 | 
    39     * 
 | 
| 
 | 
    40     */
 | 
| 
 | 
    41     class SitePolymorphism {
 | 
| 
 | 
    42 
 | 
| 
 | 
    43         public:
 | 
| 
 | 
    44 
 | 
| 
 | 
    45            /** \brief Builds an object
 | 
| 
 | 
    46             * 
 | 
| 
 | 
    47             */
 | 
| 
 | 
    48             SitePolymorphism();
 | 
| 
 | 
    49 
 | 
| 
 | 
    50 
 | 
| 
 | 
    51            /** \brief Builds an object
 | 
| 
 | 
    52             * 
 | 
| 
 | 
    53             * \param npop number of populations
 | 
| 
 | 
    54             * 
 | 
| 
 | 
    55             */
 | 
| 
 | 
    56             SitePolymorphism(unsigned int npop);
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 
 | 
| 
 | 
    59            /** \brief Destroys an object
 | 
| 
 | 
    60             * 
 | 
| 
 | 
    61             */
 | 
| 
 | 
    62             virtual ~SitePolymorphism();
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 
 | 
| 
 | 
    65            /** \brief Copy constructor
 | 
| 
 | 
    66             * 
 | 
| 
 | 
    67             */
 | 
| 
 | 
    68             SitePolymorphism(const SitePolymorphism& source);
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 
 | 
| 
 | 
    71            /** \brief Assignment operator
 | 
| 
 | 
    72             * 
 | 
| 
 | 
    73             */
 | 
| 
 | 
    74             SitePolymorphism& operator=(const SitePolymorphism& source);
 | 
| 
 | 
    75 
 | 
| 
 | 
    76 
 | 
| 
 | 
    77            /** \brief Sets the number of populations
 | 
| 
 | 
    78             * 
 | 
| 
 | 
    79             * NOTE THAT all previous data is lost.
 | 
| 
 | 
    80             * 
 | 
| 
 | 
    81             */
 | 
| 
 | 
    82             void numberOfPopulations(unsigned int npop);
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 
 | 
| 
 | 
    85            /** \brief Adds a character
 | 
| 
 | 
    86             * 
 | 
| 
 | 
    87             * \param populationIndex the index of the population from
 | 
| 
 | 
    88             * which is sampled this character (do not use "population
 | 
| 
 | 
    89             * label").
 | 
| 
 | 
    90             * 
 | 
| 
 | 
    91             * \param character the character value (it is assumed it
 | 
| 
 | 
    92             * represents a valid character.
 | 
| 
 | 
    93             * 
 | 
| 
 | 
    94             */
 | 
| 
 | 
    95             void load(unsigned int populationIndex, char character);
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 
 | 
| 
 | 
    98            /** \brief Loads outgroup state
 | 
| 
 | 
    99             * 
 | 
| 
 | 
   100             * There can be any number of outgroup states. Only
 | 
| 
 | 
   101             * characters that are considered as valid (whatever the list
 | 
| 
 | 
   102             * is) should be loaded.
 | 
| 
 | 
   103             * 
 | 
| 
 | 
   104             */
 | 
| 
 | 
   105             void outgroup(char state);
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 
 | 
| 
 | 
   108            /** \brief Number of different alleles
 | 
| 
 | 
   109             * 
 | 
| 
 | 
   110             */
 | 
| 
 | 
   111             unsigned int numberOfAlleles() const;
 | 
| 
 | 
   112             
 | 
| 
 | 
   113             
 | 
| 
 | 
   114            /** \brief Gets an allele (unsecure)
 | 
| 
 | 
   115             * 
 | 
| 
 | 
   116             * Assumes that the index provided lies in the valid range
 | 
| 
 | 
   117             * 
 | 
| 
 | 
   118             */
 | 
| 
 | 
   119             char allele(unsigned int index) const;
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 
 | 
| 
 | 
   122            /** \brief Gets a frequency (unsecure)
 | 
| 
 | 
   123             * 
 | 
| 
 | 
   124             * The sum of of frequencies of the allele over populations
 | 
| 
 | 
   125             * is computed. Not out-of-bounds check is performed.
 | 
| 
 | 
   126             * 
 | 
| 
 | 
   127             */
 | 
| 
 | 
   128             unsigned int alleleFrequency(unsigned int alleleIndex) const;
 | 
| 
 | 
   129 
 | 
| 
 | 
   130 
 | 
| 
 | 
   131            /** \brief Gets the frequency of an allele in one pop (unsecure)
 | 
| 
 | 
   132             * 
 | 
| 
 | 
   133             * The frequency of the allele in the given population is
 | 
| 
 | 
   134             * returned. Not out-of-bounds check is performed.
 | 
| 
 | 
   135             * 
 | 
| 
 | 
   136             */
 | 
| 
 | 
   137             unsigned int alleleFrequency(unsigned int popIndex, unsigned int alleleIndex) const;
 | 
| 
 | 
   138 
 | 
| 
 | 
   139 
 | 
| 
 | 
   140            /** \brief Sums the frequency of derived allele(s)
 | 
| 
 | 
   141             * 
 | 
| 
 | 
   142             * This method assumes that the site is orientable. It will
 | 
| 
 | 
   143             * use as outgroup the first outgroup character entered,
 | 
| 
 | 
   144             * assuming at least one was entered and that all (if more
 | 
| 
 | 
   145             * than one) were identical.
 | 
| 
 | 
   146             * 
 | 
| 
 | 
   147             */
 | 
| 
 | 
   148             unsigned int derivedAlleleFrequency() const;
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 
 | 
| 
 | 
   151            /** \brief Number of sequences that were analyzed
 | 
| 
 | 
   152             * 
 | 
| 
 | 
   153             */
 | 
| 
 | 
   154             unsigned int ns() const;
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 
 | 
| 
 | 
   157            /** \brief Gets the number of analyzed sequences for a population
 | 
| 
 | 
   158             * 
 | 
| 
 | 
   159             * No out-of-bound check is performed
 | 
| 
 | 
   160             * 
 | 
| 
 | 
   161             */
 | 
| 
 | 
   162             unsigned int ns(unsigned int popIndex) const;
 | 
| 
 | 
   163 
 | 
| 
 | 
   164 
 | 
| 
 | 
   165            /** \brief Checks if the site can be oriented
 | 
| 
 | 
   166             * 
 | 
| 
 | 
   167             * Returns true if at least one outgroup datum has been
 | 
| 
 | 
   168             * loaded, if all outgroup data are identical (regardless of
 | 
| 
 | 
   169             * their value) and if the outgroup allele is one of the
 | 
| 
 | 
   170             * allele in the sample.
 | 
| 
 | 
   171             * 
 | 
| 
 | 
   172             */
 | 
| 
 | 
   173             bool isOrientable() const;
 | 
| 
 | 
   174 
 | 
| 
 | 
   175             bool isPolymorphic(unsigned int popIndex) const;
 | 
| 
 | 
   176             bool hasSpecificAllele(unsigned int popIndex, bool restrictToDerived) const;
 | 
| 
 | 
   177             bool haveFixedDifference(unsigned int pop1, unsigned int pop2) const;
 | 
| 
 | 
   178             bool haveCommonAllele(unsigned int pop1, unsigned int pop2) const;
 | 
| 
 | 
   179             bool haveSharedAllele(unsigned int pop1, unsigned int pop2) const;
 | 
| 
 | 
   180 
 | 
| 
 | 
   181 
 | 
| 
 | 
   182 
 | 
| 
 | 
   183 
 | 
| 
 | 
   184         protected:
 | 
| 
 | 
   185 
 | 
| 
 | 
   186             // helpers
 | 
| 
 | 
   187             void init();
 | 
| 
 | 
   188             void clear();
 | 
| 
 | 
   189             void copy(const SitePolymorphism& site);
 | 
| 
 | 
   190 
 | 
| 
 | 
   191 
 | 
| 
 | 
   192             // data
 | 
| 
 | 
   193             unsigned int m_numberOfPopulations;
 | 
| 
 | 
   194             unsigned int m_numberOfStates;
 | 
| 
 | 
   195             char * m_states;
 | 
| 
 | 
   196             unsigned int ** m_frequencies;
 | 
| 
 | 
   197             unsigned int m_numberOfOutgroups;
 | 
| 
 | 
   198             char * m_outgroups;
 | 
| 
 | 
   199             unsigned int m_ns;
 | 
| 
 | 
   200             unsigned int * m_pop_ns;
 | 
| 
 | 
   201             
 | 
| 
 | 
   202             bool m_cache_orientable;
 | 
| 
 | 
   203 
 | 
| 
 | 
   204     };
 | 
| 
 | 
   205 }
 | 
| 
 | 
   206 
 | 
| 
 | 
   207 #endif
 |