Mercurial > repos > davidmurphy > codonlogo
comparison corebio/ssearch_io/blastxml.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 | |
2 # Copyright (c) 2006 John Gilman | |
3 # | |
4 # This software is distributed under the MIT Open Source License. | |
5 # <http://www.opensource.org/licenses/mit-license.html> | |
6 # | |
7 # Permission is hereby granted, free of charge, to any person obtaining a | |
8 # copy of this software and associated documentation files (the "Software"), | |
9 # to deal in the Software without restriction, including without limitation | |
10 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
11 # and/or sell copies of the Software, and to permit persons to whom the | |
12 # Software is furnished to do so, subject to the following conditions: | |
13 # | |
14 # The above copyright notice and this permission notice shall be included | |
15 # in all copies or substantial portions of the Software. | |
16 # | |
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 # THE SOFTWARE. | |
24 | |
25 | |
26 """Read BLAST XML output. | |
27 | |
28 The DTD is available at | |
29 http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod.dtd | |
30 | |
31 """ | |
32 | |
33 # See also | |
34 # | |
35 # http://bugzilla.open-bio.org/show_bug.cgi?id=1933 | |
36 #http://portal.open-bio.org/pipermail/biojava-dev/2004-December/002513.html | |
37 | |
38 | |
39 from corebio.ssearch_io import Report, Result, Hit, Annotation, Alignment | |
40 | |
41 import xml.sax | |
42 from xml.sax.handler import ContentHandler | |
43 | |
44 __all__ = 'read' | |
45 | |
46 def read(fin): | |
47 """Read BLAST xml output and return a list of Result objects. | |
48 """ | |
49 parser = xml.sax.make_parser() | |
50 handler = _BlastHandler() | |
51 parser.setContentHandler(handler) | |
52 | |
53 #To avoid ValueError: unknown url type: NCBI_BlastOutput.dtd | |
54 parser.setFeature(xml.sax.handler.feature_validation, 0) | |
55 parser.setFeature(xml.sax.handler.feature_namespaces, 0) | |
56 parser.setFeature(xml.sax.handler.feature_external_pes, 0) | |
57 parser.setFeature(xml.sax.handler.feature_external_ges, 0) | |
58 | |
59 try : | |
60 parser.parse(fin) | |
61 except xml.sax.SAXParseException, e : | |
62 raise ValueError( "Cannot parse file; "+str(e)) | |
63 return handler.report | |
64 | |
65 class _BlastHandler( ContentHandler) : | |
66 def __init__(self): | |
67 """ | |
68 """ | |
69 ContentHandler.__init__(self) | |
70 self._content = [] | |
71 self.report = None | |
72 self._result = None | |
73 self._hit = None | |
74 self._hsp = None | |
75 | |
76 | |
77 def characters(self, ch): | |
78 self._content.append(ch) | |
79 | |
80 def startDocument(self): | |
81 self.report = Report() | |
82 | |
83 def endDocument(self) : | |
84 pass | |
85 | |
86 def startElement(self, name, attr): | |
87 if name == 'BlastOutput' : | |
88 pass | |
89 elif name == 'Iteration' : | |
90 result = Result() | |
91 self._result = result | |
92 self.report.results.append(result) | |
93 elif name == 'Parameters' : | |
94 pass | |
95 elif name == 'Statistics' : | |
96 pass | |
97 elif name == 'Hit' : | |
98 self._hit = Hit() | |
99 self._result.hits.append(self._hit) | |
100 elif name == 'Hsp' : | |
101 self._hsp = Alignment() | |
102 self._hit.alignments.append(self._hsp) | |
103 else : | |
104 pass | |
105 | |
106 | |
107 def endElement(self, name): | |
108 content = ''.join(self._content).strip() | |
109 self._content = [] | |
110 | |
111 report = self.report | |
112 result = self._result | |
113 hsp = self._hsp | |
114 hit = self._hit | |
115 | |
116 if name == 'BlastOutput' : | |
117 pass | |
118 elif name == 'BlastOutput_program' : | |
119 report.algorithm = content | |
120 elif name == 'BlastOutput_version' : | |
121 report.algorithm_version = content.split()[1] | |
122 elif name == 'BlastOutput_reference' : | |
123 report.algorithm_reference = content | |
124 elif name == 'BlastOutput_db' : | |
125 report.database_name = content | |
126 elif name == 'BlastOutput_query-ID' : pass | |
127 elif name == 'BlastOutput_query-def' : pass | |
128 elif name == 'BlastOutput_query-len' : pass | |
129 elif name == 'BlastOutput_query-seq' : pass | |
130 elif name == 'BlastOutput_param' : pass | |
131 elif name == 'BlastOutput_iterations' : pass | |
132 elif name == 'BlastOutput_mbstat' : pass | |
133 | |
134 elif name == 'Iteration' : pass | |
135 elif name == 'Iteration_iter-num' : pass | |
136 elif name == 'Iteration_query-ID' : | |
137 result.query.name = content | |
138 elif name == 'Iteration_query-def' : | |
139 result.query.description = content | |
140 elif name == 'Iteration_query-len' : | |
141 result.query.length = int(content) | |
142 elif name == 'Iteration_hits' : pass | |
143 elif name == 'Iteration_stat' : pass | |
144 elif name == 'Iteration_message' : pass | |
145 | |
146 elif name == 'Parameters' : | |
147 pass | |
148 elif name == 'Parameters_matrix' : | |
149 report.parameters['matrix'] = content | |
150 elif name == 'Parameters_expect' : | |
151 report.parameters['expect'] = content | |
152 elif name == 'Parameters_include' : | |
153 report.parameters['include'] = content | |
154 elif name == 'Parameters_sc-match' : | |
155 report.parameters['sc-match'] = content | |
156 elif name == 'Parameters_sc-mismatch' : | |
157 report.parameters['sc-mismatch'] = content | |
158 elif name == 'Parameters_gap-open' : | |
159 report.parameters['gap-open'] = content | |
160 elif name == 'Parameters_gap-extend' : | |
161 report.parameters['gap-extend'] = content | |
162 elif name == 'Parameters_filter' : | |
163 report.parameters['filter'] = content | |
164 elif name == 'Parameters_pattern' : | |
165 report.parameters['pattern'] = content | |
166 elif name == 'Parameters_entrez-query' : | |
167 report.parameters['entrez-query'] = content | |
168 | |
169 elif name == 'Statistics' : | |
170 pass | |
171 elif name == 'Statistics_db-num' : | |
172 result.statistics['db-num'] = int(content) | |
173 elif name == 'Statistics_db-len' : | |
174 result.statistics['db-len'] = int(content) | |
175 elif name == 'Statistics_hsp-len' : | |
176 result.statistics['hsp-len'] = int(content) | |
177 elif name == 'Statistics_eff-space' : | |
178 result.statistics['eff-space'] = float(content) | |
179 elif name == 'Statistics_kappa' : | |
180 result.statistics['kappa'] = float(content) | |
181 elif name == 'Statistics_lambda' : | |
182 result.statistics['lambda'] = float(content) | |
183 elif name == 'Statistics_entropy' : | |
184 result.statistics['entropy'] = float(content) | |
185 | |
186 elif name == 'Hit' : | |
187 self._hit = None | |
188 elif name == 'Hit_num' : | |
189 pass | |
190 elif name == 'Hit_id' : | |
191 hit.target.name = content | |
192 elif name == 'Hit_def' : | |
193 hit.target.description = content | |
194 elif name == 'Hit_accession' : | |
195 hit.target.accession = content | |
196 elif name == 'Hit_len' : | |
197 hit.target.length = int(content) | |
198 elif name == 'Hit_hsps' : | |
199 pass | |
200 | |
201 elif name == 'Hsp' : | |
202 self._hsp = None | |
203 elif name == 'Hsp_num' : | |
204 pass | |
205 elif name == 'Hsp_bit-score' : | |
206 hsp.bit_score = float(content) | |
207 elif name == 'Hsp_score' : | |
208 hsp.raw_score = float(content) | |
209 elif name == 'Hsp_evalue' : | |
210 hsp.significance = float(content) | |
211 elif name == 'Hsp_query-from' : | |
212 hsp.query_start = int(content) -1 | |
213 elif name == 'Hsp_query-to' : | |
214 #hsp.query_end= int(content) | |
215 pass | |
216 elif name == 'Hsp_hit-from' : | |
217 hsp.target_start = int(content) -1 | |
218 elif name == 'Hsp_hit-to' : | |
219 #hsp.target_end = int(content) | |
220 pass | |
221 elif name == 'Hsp_pattern-from' : | |
222 pass | |
223 elif name == 'Hsp_pattern-to' : | |
224 pass | |
225 elif name == 'Hsp_query-frame' : | |
226 hsp.query_frame = int(content) | |
227 elif name == 'Hsp_hit-frame' : | |
228 hsp.target_frame = int(content) | |
229 elif name == 'Hsp_identity' : | |
230 hsp.identical = int(content) | |
231 elif name == 'Hsp_positive' : | |
232 hsp.similar = int(content) | |
233 elif name == 'Hsp_gaps' : | |
234 hsp.gaps = int(content) | |
235 elif name == 'Hsp_align-len' : | |
236 hsp.length = int(content) | |
237 elif name == 'Hsp_density' : | |
238 pass | |
239 elif name == 'Hsp_qseq' : | |
240 hsp.query_seq = content | |
241 elif name == 'Hsp_hseq' : | |
242 hsp.target_seq = content | |
243 elif name == 'Hsp_midline' : | |
244 hsp.mid_seq = content | |
245 else : | |
246 pass | |
247 | |
248 | |
249 |