comparison egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
comparison
equal deleted inserted replaced
8:6bf69b40365c 9:98c37a5d67f4
1 /*
2 Copyright 2009 Stéphane De Mita, Mathieu Siol
3
4 This file is part of the EggLib library.
5
6 EggLib is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 EggLib is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef EGGLIB_BASEDIVERSITY_HPP
21 #define EGGLIB_BASEDIVERSITY_HPP
22
23 #include "CharMatrix.hpp"
24 #include "SitePolymorphism.hpp"
25 #include <string>
26
27 /** \defgroup polymorphism polymorphism
28 *
29 * \brief Diversity analyses
30 *
31 * Two classes are contained in this module: NucleotideDiversity, that
32 * performs site-centered polymorphism analyses, and HaplotypeDiversity,
33 * that performs haplotype-centered analyses. The detection of
34 * polymorphic sites is common to both, through the base class
35 * BaseDiversity. However this phase must be repeated when stats from
36 * the two classes are needed. To reduce the computational burden, the
37 * function reserve() can be use, that directly allocates needed memory
38 * when the eventual number of polymorphic sites is known prior to
39 * analysis (even if not precisely). For both classes, a set of
40 * statistics are computed immediately upon load of a data set. For
41 * NucleotideDiversity, additional statistics are computed per group
42 * upon use of the corresponding accessors. This number of operations
43 * performed several times is strictly limited. This is particularly
44 * useful when different statistics are needed for a given alignment.
45 * However, this system allows not computing unnecessary statistics to
46 * a certain extend.
47 *
48 */
49
50 namespace egglib {
51
52 /** \brief Base class of diversity classes
53 *
54 * Mutualizes the analysis of polymorphic sites through the method
55 * importSites() and related accessors.
56 *
57 * \ingroup polymorphism
58 *
59 */
60 class BaseDiversity {
61
62 public:
63
64 /** \brief Constructor
65 *
66 */
67 BaseDiversity();
68
69 /** \brief Destructor
70 *
71 */
72 virtual ~BaseDiversity();
73
74 /** \brief Reserve sufficient memory for a given number of
75 * polymorphic sites.
76 *
77 * This method makes importSite function faster when you
78 * already know how many polymorphic sites to expect, since
79 * the necessary memory will be allocated prior the screening
80 * of data. It is possible to use reserve() even if with a
81 * number of sites that is not matching what importSites()
82 * will find.
83 *
84 * \param numberOfSites a strictly positive integer.
85 *
86 */
87 virtual void reserve(unsigned int numberOfSites);
88
89 /// Gets a site
90 const SitePolymorphism* get_site(unsigned int index) const;
91
92 /// Gets a site position
93 unsigned int get_position(unsigned int index) const;
94
95 /** \brief Predefined mapping string for DNA data
96 *
97 */
98 static const std::string dnaMapping;
99
100
101 /** \brief Predefined mapping string for RNA data
102 *
103 */
104 static const std::string rnaMapping;
105
106
107 /** \brief Predefined mapping string for amino acid data
108 *
109 */
110 static const std::string aaMapping;
111
112
113 /// Clears and re-initializes object
114 virtual void reset();
115
116
117 protected:
118
119 virtual void init();
120 virtual void clear();
121
122 //
123 void importSites(CharMatrix& data, bool allowMultipleMutations,
124 double minimumExploitableData, unsigned int ignoreFrequency,
125 std::string characterMapping, bool useZeroAsAncestral,
126 bool ignoreOutgroup);
127
128 //
129 void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic
130 unsigned int getPopIndex(unsigned int label) const; // returns v_npop if not found
131
132 SitePolymorphism** v_sites; // holder of polymorphic site addresses
133 bool* v_orientables; // stores whether the sites are orientable or not
134 unsigned int* v_sitePositions; // stores position of sites
135
136 unsigned int v_reserved;
137 unsigned int v_ns; // maximum number of sequences analyzed (max of sites' ns)
138 unsigned int v_S; // number of polymorphic sites
139 unsigned int v_So; // number of orientable sites
140 unsigned int v_eta; // number of mutation (whatever multiple)
141 double v_nseff; // average number of analyzed sequence
142 unsigned int v_lseff; // number of analyzed sites
143 double v_nseffo; // average number of analyzed sequences for analyzes with outgroup
144 unsigned int v_lseffo; // number of analyzed sites for analyzes with outgroup
145 unsigned int v_npop; // number of populations
146 unsigned int *v_popLabel; // label of each pop
147
148 // options
149 bool p_allowMultipleMutations;
150 double p_minimumExploitableData;
151 std::string p_characterMapping;
152 unsigned int p_pos_sep_mapping;
153 bool p_useZeroAsAncestral;
154 unsigned int p_ignoreFrequency;
155
156
157
158 private:
159
160 BaseDiversity(const BaseDiversity& source) { }
161
162 BaseDiversity& operator=(const BaseDiversity& source) {
163 return *this;
164 }
165
166 };
167 }
168
169 #endif