annotate egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp @ 6:ebb0ac9b6fa9 draft

planemo upload
author gandres
date Mon, 23 May 2016 17:49:17 -0400
parents 420b57c3c185
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2008,2009,2011 Stéphane De Mita and Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of the EggLib library.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 #ifndef EGGLIB_GMS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #define EGGLIB_GMS_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23 #include "DataMatrix.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include <istream>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 /** \brief ms-like sequence format parser
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 * The class provides parsing (input) and formatting (output)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 * operations in ms format, that is the format used by Richard
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 * Hudson's program ms for outputting genotypes and by the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 * associated program samplestat for reading them. Both types of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 * operations are available through static methods using either
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 * a string or a stream (which can be a stream to or from a file
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 * or a string). In either case, types from the STL are used.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 * Although ms deals only with data coded with 0 and 1, the class Ms
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39 * offers the possibility of both importing and exporting data coded
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 * with by integer. All methods have an option named "separated". If
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41 * this option is true, the parser or formatter introduces a slight
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42 * modification of the format: genotypes individual data are
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 * separated by a white space ("1 0 1 1" instead of "1011", allowing
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 * genotype values larger than 9: "1 0 11 1").
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * \ingroup core
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 class Ms {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 /** \brief Imports a sequence alignment
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 * Creates a istringstream from the string and calls the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 * overloaded method.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 * \param str the string to parse.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 * \param ns the expected number of sequences.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 * \param separated true if a white space separator is placed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 * between genotype at each site.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 * \return A sequence alignment as a data matrix.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 static DataMatrix get(std::string, unsigned int ns, bool separated=false);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68 /** \brief Imports a sequence alignment
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 * Attemps to generate a DataMatrix object from the stream.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 * Reads only one simulation and throws a SeqlibFormatError
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 * exception in case of format error.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 * Allows any number of white lines before the //, but no other
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * data. Supports \r at the end of lines (before the \n).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 * Accepted symbols are all integers (0-9).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 * \param stream the stream to parse.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * \param ns the expected number of sequences.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 * \param separated true if a white space separator is placed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * between genotype at each site.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 * \return A sequence alignment as a data matrix.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 static DataMatrix get(std::istream& stream, unsigned int ns, bool separated=false);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 /** \brief Exports a sequence alignment
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90 * Internally creates a stringstream, calls the overloaded method
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91 * and returns the outcome.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93 * \param dataMatrix the alignment object to write.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 * \param separated true if a white space separator must be placed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 * between the genotype at each site.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 static std::string format(DataMatrix& dataMatrix, bool separated=false);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 /** \brief Exports a sequence alignment
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103 * Writes the formatted string to the stream 'on the fly'. The
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 * formatted string is guaranteed to starts with a // line and
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 * ends with an empty line. The client is expected to take care
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 * of writing any header and add an additional white line between
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 * simulations if needed. The method throws a SeqlibRuntimeError
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 * if the stream is not writable. The data matrix should contain
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 * only data within range 0-9 if separated is false (default) and
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110 * any positive (>=0) integer if separated is true. Note that
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 * output generated with separated=true is never compatible with
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 * the original ms format, and that output generated with
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113 * separator=false is compatible with the original ms format only
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 * if all alleles are 0 or 1 (which is not checked by this
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 * formatted).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 * \param stream the stream (file or string stream) where to
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 * write the output.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119 * \param dataMatrix the alignment object to write.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 * \param separated true if a white space separator must be placed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 * between the genotype at each site.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 static void format(std::ostream& stream, DataMatrix& dataMatrix, bool separated=false);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 /** \brief Returns the last tMRCA read by any Ms instance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 * If a tMRCA value was present in the last simulation read by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 * any Ms instance, it will be returned by this method. A value
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131 * of -1. is returned if no simulation was read, or if the last
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 * simulation didn't contain a tMRCA value or if the last
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 * simulation provoked an exception before reaching the tMRCA
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134 * line.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 static double tMRCA();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 /** \brief Returns the last "prob" read by any Ms instance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
141 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
142 * "prob" is returned by ms when a fixed number of segregating
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
143 * sites is used in conjunction with a theta value. If a "prob"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
144 * value was present in the last simulation read by any Ms
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
145 * instance, it will be returned by this method. A value of -1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
146 * is returned if no simulation was read, or if the last
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
147 * simulation didn't contain a "prob" value or if the last
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
148 * simulation provoked an exception before reaching the "prob"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
149 * line.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
150 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
151 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
152 static double prob();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
153
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
154
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
155 /** \brief Returns the tree string found in the last simulation read by any Ms instance
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
156 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
157 * If one or more trees were present in the last simulation read
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
158 * by any Ms instance, they will be returned as a unique string
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
159 * by this method. An empty string is returned if no simulation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
160 * was read, or if the last simulation, or if the last simulation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
161 * didn't contain any tree value or if the last simulation
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
162 * provoked an exception before reaching the tree line.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
163 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
164 * Note: the trees are returned as a single line.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
165 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
166 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
167 static std::string trees();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
168
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
169
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
170 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
171 // Line parser (the last \n is extracted and discarded - no error upon EOF)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
172 std::string next_line(std::istream& stream);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
173
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
174 /// tMRCA (-1 if not found in ms output)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
175 static double _tMRCA;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
176
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
177 /// probability (-1 if not found in ms output)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
178 static double _prob;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
179
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
180 /// tree string (maybe contain several trees) (empty string if not found in ms output)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
181 static std::string _trees;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
182
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
183
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
184 /// No instantiation allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
185 Ms() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
186
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
187 /// A fortiori no destruction allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
188 ~Ms() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
189
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
190 /// No copy allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
191 Ms(const Ms&) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
192
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
193 /// No copy allowed
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
194 Ms& operator=(const Ms&) { return *this; }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
195
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
196 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
197 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
198
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
199 #endif