Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/BlatParser.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
37:d22fadc825e3 | 38:2c0c0a89fad7 |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 import sys | |
32 | |
33 ## this class can parse a Blat results output file | |
34 # | |
35 class BlatParser(object): | |
36 | |
37 | |
38 def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''): | |
39 self._match = match | |
40 self._mismatch = mismatch | |
41 self._repMatch = repMatch | |
42 self._N = N | |
43 self._QGapCount = QGapCount | |
44 self._QGapBases = QGapBases | |
45 self._TGapCount = TGapCount | |
46 self._TGapBases = TGapBases | |
47 self._strand = strand | |
48 self._QName = QName | |
49 self._QSize = QSize | |
50 self._QStart = QStart | |
51 self._QEnd = QEnd | |
52 self._TName = TName | |
53 self._TSize = TSize | |
54 self._TStart = TStart | |
55 self._TEnd = TEnd | |
56 self._blockCount = blockCount | |
57 self._blockSizes = blockSizes | |
58 self._qStarts = qStarts | |
59 self._tStarts = tStarts | |
60 | |
61 def __eq__(self, o): | |
62 return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd | |
63 | |
64 def setMatch(self, match): | |
65 self._match = match | |
66 | |
67 def setMismatch(self, mismatch): | |
68 self._mismatch = mismatch | |
69 | |
70 def setRepMatch(self, repMatch): | |
71 self._repMatch = repMatch | |
72 | |
73 def setN(self, N): | |
74 self._N = N | |
75 | |
76 def setQGapCount(self, QGapCount): | |
77 self._QGapCount = QGapCount | |
78 | |
79 def setQGapBases(self, QGapBases): | |
80 self._QGapBases = QGapBases | |
81 | |
82 def setTGapCount(self, TGapCount): | |
83 self._TGapCount = TGapCount | |
84 | |
85 def setTGapBases(self, TGapBases): | |
86 self._TGapBases = TGapBases | |
87 | |
88 def setStrand(self, strand): | |
89 self._strand = strand | |
90 | |
91 def setQName(self, QName): | |
92 self._QName = QName | |
93 | |
94 def setQSize(self, QSize): | |
95 self._QSize = QSize | |
96 | |
97 def setQStart(self, QStart): | |
98 self._QStart = QStart | |
99 | |
100 def setQEnd(self, QEnd): | |
101 self._QEnd = QEnd | |
102 | |
103 def setTName(self, TName): | |
104 self._TName = TName | |
105 | |
106 def setTSize(self, TSize): | |
107 self._TSize = TSize | |
108 | |
109 def setTStart(self, TStart): | |
110 self._TStart = TStart | |
111 | |
112 def setTEnd(self, TEnd): | |
113 self._TEnd = TEnd | |
114 | |
115 def setBlockCount(self, blockCount): | |
116 self._blockCount = blockCount | |
117 | |
118 def setBlockSizes(self, blockSizes): | |
119 self._blockSizes = blockSizes | |
120 | |
121 def setQStarts(self, qStarts): | |
122 self._qStarts = qStarts | |
123 | |
124 def setTStarts(self, tStarts): | |
125 self._tStarts = tStarts | |
126 | |
127 def getMatch(self): | |
128 return self._match | |
129 | |
130 def getMismatch(self): | |
131 return self._mismatch | |
132 | |
133 def getRepMatch(self): | |
134 return self._repMatch | |
135 | |
136 def getN(self): | |
137 return self._N | |
138 | |
139 def getQGapCount(self): | |
140 return self._QGapCount | |
141 | |
142 def getQGapBases(self): | |
143 return self._QGapBases | |
144 | |
145 def getTGapCount(self): | |
146 return self._TGapCount | |
147 | |
148 def getTGapBases(self): | |
149 return self._TGapBases | |
150 | |
151 def getStrand(self): | |
152 return self._strand | |
153 | |
154 def getQName(self): | |
155 return self._QName | |
156 | |
157 def getQSize(self): | |
158 return self._QSize | |
159 | |
160 def getQStart(self): | |
161 return self._QStart | |
162 | |
163 def getQEnd(self): | |
164 return self._QEnd | |
165 | |
166 def getTName(self): | |
167 return self._TName | |
168 | |
169 def getTSize(self): | |
170 return self._TSize | |
171 | |
172 def getTStart(self): | |
173 return self._TStart | |
174 | |
175 def getTEnd(self): | |
176 return self._TEnd | |
177 | |
178 def getBlockCount(self): | |
179 return self._blockCount | |
180 | |
181 def getBlockSizes(self): | |
182 return self._blockSizes | |
183 | |
184 def getQStarts(self): | |
185 return self._qStarts | |
186 | |
187 def getTStarts(self): | |
188 return self._tStarts | |
189 | |
190 def setAttributes(self, lResults, iCurrentLineNumber): | |
191 error = False | |
192 | |
193 if lResults[0] != '': | |
194 self.setMatch(lResults[0]) | |
195 else: | |
196 sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber) | |
197 error = True | |
198 | |
199 if lResults[1] != '': | |
200 self.setMismatch(lResults[1]) | |
201 else: | |
202 sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber) | |
203 error = True | |
204 | |
205 if lResults[2] != '': | |
206 self.setRepMatch(lResults[2]) | |
207 else: | |
208 sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber) | |
209 error = True | |
210 | |
211 if lResults[3] != '': | |
212 self.setN(lResults[3]) | |
213 else: | |
214 sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber) | |
215 error = True | |
216 | |
217 if lResults[4] != '': | |
218 self.setQGapCount(lResults[4]) | |
219 else: | |
220 sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
221 error = True | |
222 | |
223 if lResults[5] != '': | |
224 self.setQGapBases(lResults[5]) | |
225 else: | |
226 sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) | |
227 error = True | |
228 | |
229 if lResults[6] != '': | |
230 self.setTGapCount(lResults[6]) | |
231 else: | |
232 sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
233 error = True | |
234 | |
235 if lResults[7] != '': | |
236 self.setTGapBases(lResults[7]) | |
237 else: | |
238 sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) | |
239 error = True | |
240 | |
241 if lResults[8] != '': | |
242 self.setStrand(lResults[8]) | |
243 else: | |
244 sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber) | |
245 error = True | |
246 | |
247 if lResults[9] != '': | |
248 self.setQName(lResults[9]) | |
249 else: | |
250 sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber) | |
251 error = True | |
252 | |
253 if lResults[10] != '': | |
254 self.setQSize(lResults[10]) | |
255 else: | |
256 sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber) | |
257 error = True | |
258 | |
259 if lResults[11] != '': | |
260 self.setQStart(lResults[11]) | |
261 else: | |
262 sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber) | |
263 error = True | |
264 | |
265 if lResults[12] != '': | |
266 self.setQEnd(lResults[12]) | |
267 else: | |
268 sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber) | |
269 error = True | |
270 | |
271 if lResults[13] != '': | |
272 self.setTName(lResults[13]) | |
273 else: | |
274 sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber) | |
275 error = True | |
276 | |
277 if lResults[14] != '': | |
278 self.setTSize(lResults[14]) | |
279 else: | |
280 sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber) | |
281 error = True | |
282 | |
283 if lResults[15] != '': | |
284 self.setTStart(lResults[15]) | |
285 else: | |
286 sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber) | |
287 error = True | |
288 | |
289 if lResults[16] != '': | |
290 self.setTEnd(lResults[16]) | |
291 else: | |
292 sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber) | |
293 error = True | |
294 | |
295 if lResults[17] != '': | |
296 self.setBlockCount(lResults[17]) | |
297 else: | |
298 sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
299 error = True | |
300 | |
301 if lResults[18] != '': | |
302 self.setBlockSizes(lResults[18]) | |
303 else: | |
304 sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber) | |
305 error = True | |
306 | |
307 if lResults[19] != '': | |
308 self.setQStarts(lResults[19]) | |
309 else: | |
310 sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber) | |
311 error = True | |
312 | |
313 if lResults[20] != '': | |
314 self.setTStarts(lResults[20]) | |
315 else: | |
316 sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber) | |
317 error = True | |
318 | |
319 if error == True: | |
320 self._setAllToNull() | |
321 | |
322 def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"): | |
323 blatLine = blatLine.rstrip() | |
324 lBlatLineItem = blatLine.split(fieldSeparator) | |
325 if not len(lBlatLineItem) == 21: | |
326 sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem))) | |
327 else: | |
328 self.setAttributes(lBlatLineItem, iCurrentLineNumber) | |
329 | |
330 def _setAllToNull(self): | |
331 self._match = '' | |
332 self._mismatch = '' | |
333 self._repMatch = '' | |
334 self._N = '' | |
335 self._QGapCount = '' | |
336 self._QGapBases = '' | |
337 self._TGapCount = '' | |
338 self._TGapBases = '' | |
339 self._strand = '' | |
340 self._QName = '' | |
341 self._QSize = '' | |
342 self._QStart = '' | |
343 self._QEnd = '' | |
344 self._TName = '' | |
345 self._TSize = '' | |
346 self._TStart = '' | |
347 self._TEnd = '' | |
348 self._blockCount = '' | |
349 self._blockSizes = '' | |
350 self._qStarts = '' | |
351 self._tStarts = '' |