comparison commons/core/parsing/BlatParser.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
37:d22fadc825e3 38:2c0c0a89fad7
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31 import sys
32
33 ## this class can parse a Blat results output file
34 #
35 class BlatParser(object):
36
37
38 def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''):
39 self._match = match
40 self._mismatch = mismatch
41 self._repMatch = repMatch
42 self._N = N
43 self._QGapCount = QGapCount
44 self._QGapBases = QGapBases
45 self._TGapCount = TGapCount
46 self._TGapBases = TGapBases
47 self._strand = strand
48 self._QName = QName
49 self._QSize = QSize
50 self._QStart = QStart
51 self._QEnd = QEnd
52 self._TName = TName
53 self._TSize = TSize
54 self._TStart = TStart
55 self._TEnd = TEnd
56 self._blockCount = blockCount
57 self._blockSizes = blockSizes
58 self._qStarts = qStarts
59 self._tStarts = tStarts
60
61 def __eq__(self, o):
62 return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd
63
64 def setMatch(self, match):
65 self._match = match
66
67 def setMismatch(self, mismatch):
68 self._mismatch = mismatch
69
70 def setRepMatch(self, repMatch):
71 self._repMatch = repMatch
72
73 def setN(self, N):
74 self._N = N
75
76 def setQGapCount(self, QGapCount):
77 self._QGapCount = QGapCount
78
79 def setQGapBases(self, QGapBases):
80 self._QGapBases = QGapBases
81
82 def setTGapCount(self, TGapCount):
83 self._TGapCount = TGapCount
84
85 def setTGapBases(self, TGapBases):
86 self._TGapBases = TGapBases
87
88 def setStrand(self, strand):
89 self._strand = strand
90
91 def setQName(self, QName):
92 self._QName = QName
93
94 def setQSize(self, QSize):
95 self._QSize = QSize
96
97 def setQStart(self, QStart):
98 self._QStart = QStart
99
100 def setQEnd(self, QEnd):
101 self._QEnd = QEnd
102
103 def setTName(self, TName):
104 self._TName = TName
105
106 def setTSize(self, TSize):
107 self._TSize = TSize
108
109 def setTStart(self, TStart):
110 self._TStart = TStart
111
112 def setTEnd(self, TEnd):
113 self._TEnd = TEnd
114
115 def setBlockCount(self, blockCount):
116 self._blockCount = blockCount
117
118 def setBlockSizes(self, blockSizes):
119 self._blockSizes = blockSizes
120
121 def setQStarts(self, qStarts):
122 self._qStarts = qStarts
123
124 def setTStarts(self, tStarts):
125 self._tStarts = tStarts
126
127 def getMatch(self):
128 return self._match
129
130 def getMismatch(self):
131 return self._mismatch
132
133 def getRepMatch(self):
134 return self._repMatch
135
136 def getN(self):
137 return self._N
138
139 def getQGapCount(self):
140 return self._QGapCount
141
142 def getQGapBases(self):
143 return self._QGapBases
144
145 def getTGapCount(self):
146 return self._TGapCount
147
148 def getTGapBases(self):
149 return self._TGapBases
150
151 def getStrand(self):
152 return self._strand
153
154 def getQName(self):
155 return self._QName
156
157 def getQSize(self):
158 return self._QSize
159
160 def getQStart(self):
161 return self._QStart
162
163 def getQEnd(self):
164 return self._QEnd
165
166 def getTName(self):
167 return self._TName
168
169 def getTSize(self):
170 return self._TSize
171
172 def getTStart(self):
173 return self._TStart
174
175 def getTEnd(self):
176 return self._TEnd
177
178 def getBlockCount(self):
179 return self._blockCount
180
181 def getBlockSizes(self):
182 return self._blockSizes
183
184 def getQStarts(self):
185 return self._qStarts
186
187 def getTStarts(self):
188 return self._tStarts
189
190 def setAttributes(self, lResults, iCurrentLineNumber):
191 error = False
192
193 if lResults[0] != '':
194 self.setMatch(lResults[0])
195 else:
196 sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber)
197 error = True
198
199 if lResults[1] != '':
200 self.setMismatch(lResults[1])
201 else:
202 sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber)
203 error = True
204
205 if lResults[2] != '':
206 self.setRepMatch(lResults[2])
207 else:
208 sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber)
209 error = True
210
211 if lResults[3] != '':
212 self.setN(lResults[3])
213 else:
214 sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber)
215 error = True
216
217 if lResults[4] != '':
218 self.setQGapCount(lResults[4])
219 else:
220 sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
221 error = True
222
223 if lResults[5] != '':
224 self.setQGapBases(lResults[5])
225 else:
226 sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
227 error = True
228
229 if lResults[6] != '':
230 self.setTGapCount(lResults[6])
231 else:
232 sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
233 error = True
234
235 if lResults[7] != '':
236 self.setTGapBases(lResults[7])
237 else:
238 sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
239 error = True
240
241 if lResults[8] != '':
242 self.setStrand(lResults[8])
243 else:
244 sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber)
245 error = True
246
247 if lResults[9] != '':
248 self.setQName(lResults[9])
249 else:
250 sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber)
251 error = True
252
253 if lResults[10] != '':
254 self.setQSize(lResults[10])
255 else:
256 sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber)
257 error = True
258
259 if lResults[11] != '':
260 self.setQStart(lResults[11])
261 else:
262 sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber)
263 error = True
264
265 if lResults[12] != '':
266 self.setQEnd(lResults[12])
267 else:
268 sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
269 error = True
270
271 if lResults[13] != '':
272 self.setTName(lResults[13])
273 else:
274 sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber)
275 error = True
276
277 if lResults[14] != '':
278 self.setTSize(lResults[14])
279 else:
280 sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber)
281 error = True
282
283 if lResults[15] != '':
284 self.setTStart(lResults[15])
285 else:
286 sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber)
287 error = True
288
289 if lResults[16] != '':
290 self.setTEnd(lResults[16])
291 else:
292 sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
293 error = True
294
295 if lResults[17] != '':
296 self.setBlockCount(lResults[17])
297 else:
298 sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber)
299 error = True
300
301 if lResults[18] != '':
302 self.setBlockSizes(lResults[18])
303 else:
304 sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber)
305 error = True
306
307 if lResults[19] != '':
308 self.setQStarts(lResults[19])
309 else:
310 sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
311 error = True
312
313 if lResults[20] != '':
314 self.setTStarts(lResults[20])
315 else:
316 sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
317 error = True
318
319 if error == True:
320 self._setAllToNull()
321
322 def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
323 blatLine = blatLine.rstrip()
324 lBlatLineItem = blatLine.split(fieldSeparator)
325 if not len(lBlatLineItem) == 21:
326 sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem)))
327 else:
328 self.setAttributes(lBlatLineItem, iCurrentLineNumber)
329
330 def _setAllToNull(self):
331 self._match = ''
332 self._mismatch = ''
333 self._repMatch = ''
334 self._N = ''
335 self._QGapCount = ''
336 self._QGapBases = ''
337 self._TGapCount = ''
338 self._TGapBases = ''
339 self._strand = ''
340 self._QName = ''
341 self._QSize = ''
342 self._QStart = ''
343 self._QEnd = ''
344 self._TName = ''
345 self._TSize = ''
346 self._TStart = ''
347 self._TEnd = ''
348 self._blockCount = ''
349 self._blockSizes = ''
350 self._qStarts = ''
351 self._tStarts = ''