Mercurial > repos > dereeper > sniplay
comparison egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 /* | |
2 Copyright 2009 Stéphane De Mita, Mathieu Siol | |
3 | |
4 This file is part of the EggLib library. | |
5 | |
6 EggLib is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 EggLib is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 | |
20 #ifndef EGGLIB_LINKAGEDISEQUILIBRUM_HPP | |
21 #define EGGLIB_LINKAGEDISEQUILIBRUM_HPP | |
22 | |
23 | |
24 #include "BaseDiversity.hpp" | |
25 #include "EggException.hpp" | |
26 | |
27 | |
28 namespace egglib { | |
29 | |
30 /** \brief Analyzes linkage disequilibrium per pair of polymorphic sites | |
31 * | |
32 * \ingroup polymorphism | |
33 * | |
34 * The class considers an alignment and detects polymorphic sites | |
35 * using the BaseDiversity functionality (shared with other classes | |
36 * of the module). Only sites with exactly two alleles are | |
37 * considered. Statistics of pairwise linkage disequilibrium can | |
38 * be accessed by pair index (note that out-of-range errors are not | |
39 * checked). Population labels are ignored (but outgroups are | |
40 * excluded from the analysis). | |
41 * | |
42 */ | |
43 class LinkageDisequilibrium : public BaseDiversity { | |
44 | |
45 public: | |
46 | |
47 /// Default constructor | |
48 LinkageDisequilibrium(); | |
49 | |
50 /// Destructor | |
51 virtual ~LinkageDisequilibrium(); | |
52 | |
53 /** \brief Analyzes polymorphic sites of an alignment | |
54 * | |
55 * \param data an alignment object (subclass of CharMatrix). | |
56 * The presence of outgroup or of different populations will | |
57 * be detected based on the populationLabel members of the | |
58 * passed object. The populationLabel 999 will be interpreted | |
59 * as outgroups. If several outgroups are passed, sites were | |
60 * the outgroups are not consistent will be treated as "non- | |
61 * orientable". | |
62 * | |
63 * \param minimumExploitableData site where the non-missing | |
64 * data (as defined by characterMapping) are at a frequency | |
65 * larger than this value will be removed from the analysis. | |
66 * Use 1. to take only 'complete' sites into account and 0. | |
67 * to use all sites. | |
68 * | |
69 * \param ignoreFrequency removes sites that are polymorphic | |
70 * because of an allele at absolute frequency smaller than or | |
71 * equal to this value. If ignoreFrequency=1, no sites are | |
72 * removed, if ignoreFrequency=1, singleton sites are | |
73 * ignored. Such sites are completely removed from the | |
74 * analysis (not counted in lseff). Note that if more than | |
75 * one mutation is allowed, the site is removed only if all | |
76 * the alleles but one are smaller than or equal to this | |
77 * value. For example, an alignment column AAAAAAGAAT is | |
78 * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is | |
79 * conserved (including the third allele T which is a | |
80 * singleton). | |
81 * | |
82 * \param characterMapping a string giving the list of | |
83 * characters that should be considered as valid data. If a | |
84 * space is present in the string, the characters left of the | |
85 * space will be treated as valid data and the characters | |
86 * right of the space will be treated as missing data, that | |
87 * is tolerated but ignored. All characters not in the string | |
88 * will cause an EggInvalidCharacterError to be raised. | |
89 */ | |
90 void load(CharMatrix& data, | |
91 double minimumExploitableData=1., | |
92 unsigned int ignoreFrequency=0, | |
93 std::string characterMapping=dnaMapping); | |
94 | |
95 | |
96 /// Number of pairs contained in the instance | |
97 unsigned int numberOfPairs() const; | |
98 | |
99 /// Alignment distance between a given pair | |
100 int d(unsigned int pair_index); | |
101 | |
102 /// D statistic for a given pair | |
103 double D(unsigned int pair_index); | |
104 | |
105 /// D' statistic for a given pair | |
106 double Dp(unsigned int pair_index); | |
107 | |
108 /// r statistic for a given pair | |
109 double r(unsigned int pair_index); | |
110 | |
111 /// r2 statistic for a given pair | |
112 double r2(unsigned int pair_index); | |
113 | |
114 /// position of the first site for a given pair | |
115 unsigned int site1(unsigned int pair_index); | |
116 | |
117 /// position of the second site for a given pair | |
118 unsigned int site2(unsigned int pair_index); | |
119 | |
120 /// correlation coefficient between r2 and distance | |
121 double correl() const; | |
122 | |
123 /** \brief Computes the minimal number of recombination events | |
124 * | |
125 * The computation is performed as described in Hudson, RR and | |
126 * NL Kaplan. 1985. Statistical properties of the number of | |
127 * recombination events in the history of a sample of DNA | |
128 * sequences. Genetics 111: 147-164. The returned parameter is | |
129 * the minimal number of recombination events, given by the | |
130 * number of non-overlapping pairs of segregating sites violating | |
131 * the rule of the four gamete. Only sites with two alleles are | |
132 * considered. Note that homoplasy (multiple mutations) mimicks | |
133 * recombination. The result of this function is not stored | |
134 * in this instance, and re-computed at each call. | |
135 * | |
136 * \param data the same CharMatrix instance as passed to the load | |
137 * method. The instance must not have been modified. | |
138 * | |
139 */ | |
140 unsigned int Rmin(CharMatrix& data) const; | |
141 | |
142 | |
143 | |
144 protected: | |
145 | |
146 // adds a pair of polymorphic sites | |
147 // assume position2>position1, | |
148 // sites are polymorphic with exactly 2 alleles | |
149 void add(CharMatrix& data, unsigned int position1, unsigned int position2); | |
150 | |
151 // Constructor help | |
152 void init(); | |
153 | |
154 // Destructor helper | |
155 void clear(); | |
156 | |
157 // Resizes arrays | |
158 void reset(); | |
159 | |
160 // Small helper | |
161 inline double min(double a, double b) { return (a>b)?a:b;} | |
162 | |
163 // Small helper | |
164 inline double max(double a, double b) { return (a>b)?b:a;} | |
165 | |
166 // Small helper | |
167 inline void check(unsigned int pos) { if (pos>=_n) throw EggRuntimeError("tried to access an invalid index"); } | |
168 | |
169 /* Performs correlation | |
170 * | |
171 * This function works independently from the rest of the class. | |
172 * | |
173 * \param n length of data arrays. | |
174 * \param x first data vector. | |
175 * \param y second data vector. | |
176 * \param r variable to receive the correlation coefficient. | |
177 * \param a variable to receive the regression slope. | |
178 */ | |
179 static void _correl(unsigned int n, const int* x, const double* y, double& r, double& a); | |
180 | |
181 // Distance between pairs | |
182 int* _d; | |
183 | |
184 // D (classical) measure of LD | |
185 double *_D; | |
186 | |
187 // D' | |
188 double *_Dp; | |
189 | |
190 // r, correlation coefficient | |
191 double *_r; | |
192 | |
193 // square r | |
194 double *_r2; | |
195 | |
196 // Data array (not managed by the instance) | |
197 unsigned int *_site1; | |
198 | |
199 // Data array (not managed by the instance) | |
200 unsigned int *_site2; | |
201 | |
202 // Number of pairs | |
203 unsigned int _n; | |
204 | |
205 private: | |
206 | |
207 /// Copy constructor not available | |
208 LinkageDisequilibrium(const LinkageDisequilibrium&) { } | |
209 | |
210 /// Assignment operator not available | |
211 LinkageDisequilibrium& operator=(const LinkageDisequilibrium&) { | |
212 return *this; | |
213 } | |
214 | |
215 | |
216 class Interval { | |
217 public: | |
218 Interval(unsigned int, unsigned int); | |
219 unsigned int a() const; | |
220 unsigned int b() const; | |
221 bool good() const; | |
222 void set_false(); | |
223 private: | |
224 unsigned int _a; | |
225 unsigned int _b; | |
226 unsigned int _good; | |
227 }; | |
228 | |
229 | |
230 }; | |
231 } | |
232 | |
233 #endif |