Mercurial > repos > dereeper > sniplay
view egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
line wrap: on
line source
/* Copyright 2009 Stéphane De Mita, Mathieu Siol This file is part of the EggLib library. EggLib is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. EggLib is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with EggLib. If not, see <http://www.gnu.org/licenses/>. */ #ifndef EGGLIB_BASEDIVERSITY_HPP #define EGGLIB_BASEDIVERSITY_HPP #include "CharMatrix.hpp" #include "SitePolymorphism.hpp" #include <string> /** \defgroup polymorphism polymorphism * * \brief Diversity analyses * * Two classes are contained in this module: NucleotideDiversity, that * performs site-centered polymorphism analyses, and HaplotypeDiversity, * that performs haplotype-centered analyses. The detection of * polymorphic sites is common to both, through the base class * BaseDiversity. However this phase must be repeated when stats from * the two classes are needed. To reduce the computational burden, the * function reserve() can be use, that directly allocates needed memory * when the eventual number of polymorphic sites is known prior to * analysis (even if not precisely). For both classes, a set of * statistics are computed immediately upon load of a data set. For * NucleotideDiversity, additional statistics are computed per group * upon use of the corresponding accessors. This number of operations * performed several times is strictly limited. This is particularly * useful when different statistics are needed for a given alignment. * However, this system allows not computing unnecessary statistics to * a certain extend. * */ namespace egglib { /** \brief Base class of diversity classes * * Mutualizes the analysis of polymorphic sites through the method * importSites() and related accessors. * * \ingroup polymorphism * */ class BaseDiversity { public: /** \brief Constructor * */ BaseDiversity(); /** \brief Destructor * */ virtual ~BaseDiversity(); /** \brief Reserve sufficient memory for a given number of * polymorphic sites. * * This method makes importSite function faster when you * already know how many polymorphic sites to expect, since * the necessary memory will be allocated prior the screening * of data. It is possible to use reserve() even if with a * number of sites that is not matching what importSites() * will find. * * \param numberOfSites a strictly positive integer. * */ virtual void reserve(unsigned int numberOfSites); /// Gets a site const SitePolymorphism* get_site(unsigned int index) const; /// Gets a site position unsigned int get_position(unsigned int index) const; /** \brief Predefined mapping string for DNA data * */ static const std::string dnaMapping; /** \brief Predefined mapping string for RNA data * */ static const std::string rnaMapping; /** \brief Predefined mapping string for amino acid data * */ static const std::string aaMapping; /// Clears and re-initializes object virtual void reset(); protected: virtual void init(); virtual void clear(); // void importSites(CharMatrix& data, bool allowMultipleMutations, double minimumExploitableData, unsigned int ignoreFrequency, std::string characterMapping, bool useZeroAsAncestral, bool ignoreOutgroup); // void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic unsigned int getPopIndex(unsigned int label) const; // returns v_npop if not found SitePolymorphism** v_sites; // holder of polymorphic site addresses bool* v_orientables; // stores whether the sites are orientable or not unsigned int* v_sitePositions; // stores position of sites unsigned int v_reserved; unsigned int v_ns; // maximum number of sequences analyzed (max of sites' ns) unsigned int v_S; // number of polymorphic sites unsigned int v_So; // number of orientable sites unsigned int v_eta; // number of mutation (whatever multiple) double v_nseff; // average number of analyzed sequence unsigned int v_lseff; // number of analyzed sites double v_nseffo; // average number of analyzed sequences for analyzes with outgroup unsigned int v_lseffo; // number of analyzed sites for analyzes with outgroup unsigned int v_npop; // number of populations unsigned int *v_popLabel; // label of each pop // options bool p_allowMultipleMutations; double p_minimumExploitableData; std::string p_characterMapping; unsigned int p_pos_sep_mapping; bool p_useZeroAsAncestral; unsigned int p_ignoreFrequency; private: BaseDiversity(const BaseDiversity& source) { } BaseDiversity& operator=(const BaseDiversity& source) { return *this; } }; } #endif