annotate egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp @ 1:420b57c3c185 draft

Uploaded
author dereeper
date Fri, 10 Jul 2015 04:39:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
1 /*
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
2 Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
3
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
4 This file is part of EggLib.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
5
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
6 EggLib is free software: you can redistribute it and/or modify
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
9 (at your option) any later version.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
10
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
11 EggLib is distributed in the hope that it will be useful,
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
14 GNU General Public License for more details.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
15
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
17 along with EggLib. If not, see <http://www.gnu.org/licenses/>.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
18 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
19
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
20 #ifndef EGGLIB_STADEN_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
21 #define EGGLIB_STADEN_HPP
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
22
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
23 #include <string>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
24 #include <istream>
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
25 #include "Align.hpp"
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
26
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
27 namespace egglib {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
28
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
29 /** \brief Parser of Staden output format
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
30 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
31 * \ingroup core
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
32 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
33 * The parser is available as a static method. It takes either a
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
34 * stream or a string containing data formatted by the program GAP4
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
35 * of the Staden package (command 'dump contig to file').
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
36 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
37 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
38 class Staden {
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
39
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
40 public:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
41
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
42
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
43 /** \brief Parses a string
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
44 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
45 * \param string a string containing an alignment formatted
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
46 * by the program GAP4 of the Staden package.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
47 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
48 * \param deleteConsensus if true, the sequence named
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
49 * "CONSENSUS" is deleted from the file (if it is present).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
50 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
51 * \return An Align instance containing the data found in
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
52 * the Staden while, after recoding the character following
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
53 * the standard codes.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
54 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
55 * This method opens a stream to the string and calls the
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
56 * overloaded method.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
57 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
58 * The character replacement rules assume Staden default
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
59 * convention, as follows:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
60 * - "-" codes for an unknown base and is replaced by "N".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
61 * - "*" codes for an alignment gap and is replaced by "-".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
62 * - A white space represents missing data and is replaced
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
63 * by "?".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
64 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
65 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
66 static Align parse(const std::string& string, bool deleteConsensus=true);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
67
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
68
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
69 /** \brief Parses an open stream
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
70 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
71 * \param stream the open containing an alignment formatted
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
72 * by the program GAP4 of the Staden package.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
73 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
74 * \param deleteConsensus if true, the sequence named
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
75 * "CONSENSUS" is deleted from the file (if it is present).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
76 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
77 * \return An Align instance containing the data found in
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
78 * the Staden while, after recoding the character following
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
79 * the standard codes.
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
80 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
81 * The character replacement rules assume Staden default
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
82 * convention, as follows:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
83 * - "-" codes for an unknown base and is replaced by "N".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
84 * - "*" codes for an alignment gap and is replaced by "-".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
85 * - A white space represents missing data and is replaced
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
86 * by "?".
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
87 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
88 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
89 static Align parse(std::istream& stream, bool deleteConsensus=true);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
90
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
91
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
92 private:
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
93
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
94 /// Not allowed to instantiate this class
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
95 Staden() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
96
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
97 /// Not allowed to instantiate this class
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
98 Staden(const Staden& source) { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
99
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
100 /// Not allowed to instantiate this class
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
101 ~Staden() { }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
102
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
103
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
104 /* Gets the start position of sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
105 *
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
106 * The functions gives total number of characters before the start of sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
107 * and reads through until the next backspace (ignores the first line).
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
108 */
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
109 static void getShift();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
110
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
111 // Translates according to the Staden format
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
112 static char transforme(char);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
113
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
114 // Imports one sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
115 static bool readOneSequence();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
116
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
117 // Imports and concatenates one sequence
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
118 static bool readAppendOneSequence();
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
119
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
120 // Replaces dots by the matching character from CONSENSUS
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
121 static void undot(bool delete_consensus=true);
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
122
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
123 // The number of characters before the start of sequences
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
124 static int shift;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
125
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
126 // The dynamically filled container (will result in an aligment)
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
127 static Container container;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
128
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
129 // The current position
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
130 static int currpos;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
131
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
132 // The reading stream
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
133 static std::istream* stream;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
134
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
135 // Stores unique 8 characters discriminating readings
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
136 static std::vector<std::string> ID;
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
137 };
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
138 }
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
139
420b57c3c185 Uploaded
dereeper
parents:
diff changeset
140 #endif