6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31 import sys
|
|
32
|
|
33 ## this class can parse a Blat results output file
|
|
34 #
|
|
35 class BlatParser(object):
|
|
36
|
|
37
|
|
38 def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''):
|
|
39 self._match = match
|
|
40 self._mismatch = mismatch
|
|
41 self._repMatch = repMatch
|
|
42 self._N = N
|
|
43 self._QGapCount = QGapCount
|
|
44 self._QGapBases = QGapBases
|
|
45 self._TGapCount = TGapCount
|
|
46 self._TGapBases = TGapBases
|
|
47 self._strand = strand
|
|
48 self._QName = QName
|
|
49 self._QSize = QSize
|
|
50 self._QStart = QStart
|
|
51 self._QEnd = QEnd
|
|
52 self._TName = TName
|
|
53 self._TSize = TSize
|
|
54 self._TStart = TStart
|
|
55 self._TEnd = TEnd
|
|
56 self._blockCount = blockCount
|
|
57 self._blockSizes = blockSizes
|
|
58 self._qStarts = qStarts
|
|
59 self._tStarts = tStarts
|
|
60
|
|
61 def __eq__(self, o):
|
|
62 return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd
|
|
63
|
|
64 def setMatch(self, match):
|
|
65 self._match = match
|
|
66
|
|
67 def setMismatch(self, mismatch):
|
|
68 self._mismatch = mismatch
|
|
69
|
|
70 def setRepMatch(self, repMatch):
|
|
71 self._repMatch = repMatch
|
|
72
|
|
73 def setN(self, N):
|
|
74 self._N = N
|
|
75
|
|
76 def setQGapCount(self, QGapCount):
|
|
77 self._QGapCount = QGapCount
|
|
78
|
|
79 def setQGapBases(self, QGapBases):
|
|
80 self._QGapBases = QGapBases
|
|
81
|
|
82 def setTGapCount(self, TGapCount):
|
|
83 self._TGapCount = TGapCount
|
|
84
|
|
85 def setTGapBases(self, TGapBases):
|
|
86 self._TGapBases = TGapBases
|
|
87
|
|
88 def setStrand(self, strand):
|
|
89 self._strand = strand
|
|
90
|
|
91 def setQName(self, QName):
|
|
92 self._QName = QName
|
|
93
|
|
94 def setQSize(self, QSize):
|
|
95 self._QSize = QSize
|
|
96
|
|
97 def setQStart(self, QStart):
|
|
98 self._QStart = QStart
|
|
99
|
|
100 def setQEnd(self, QEnd):
|
|
101 self._QEnd = QEnd
|
|
102
|
|
103 def setTName(self, TName):
|
|
104 self._TName = TName
|
|
105
|
|
106 def setTSize(self, TSize):
|
|
107 self._TSize = TSize
|
|
108
|
|
109 def setTStart(self, TStart):
|
|
110 self._TStart = TStart
|
|
111
|
|
112 def setTEnd(self, TEnd):
|
|
113 self._TEnd = TEnd
|
|
114
|
|
115 def setBlockCount(self, blockCount):
|
|
116 self._blockCount = blockCount
|
|
117
|
|
118 def setBlockSizes(self, blockSizes):
|
|
119 self._blockSizes = blockSizes
|
|
120
|
|
121 def setQStarts(self, qStarts):
|
|
122 self._qStarts = qStarts
|
|
123
|
|
124 def setTStarts(self, tStarts):
|
|
125 self._tStarts = tStarts
|
|
126
|
|
127 def getMatch(self):
|
|
128 return self._match
|
|
129
|
|
130 def getMismatch(self):
|
|
131 return self._mismatch
|
|
132
|
|
133 def getRepMatch(self):
|
|
134 return self._repMatch
|
|
135
|
|
136 def getN(self):
|
|
137 return self._N
|
|
138
|
|
139 def getQGapCount(self):
|
|
140 return self._QGapCount
|
|
141
|
|
142 def getQGapBases(self):
|
|
143 return self._QGapBases
|
|
144
|
|
145 def getTGapCount(self):
|
|
146 return self._TGapCount
|
|
147
|
|
148 def getTGapBases(self):
|
|
149 return self._TGapBases
|
|
150
|
|
151 def getStrand(self):
|
|
152 return self._strand
|
|
153
|
|
154 def getQName(self):
|
|
155 return self._QName
|
|
156
|
|
157 def getQSize(self):
|
|
158 return self._QSize
|
|
159
|
|
160 def getQStart(self):
|
|
161 return self._QStart
|
|
162
|
|
163 def getQEnd(self):
|
|
164 return self._QEnd
|
|
165
|
|
166 def getTName(self):
|
|
167 return self._TName
|
|
168
|
|
169 def getTSize(self):
|
|
170 return self._TSize
|
|
171
|
|
172 def getTStart(self):
|
|
173 return self._TStart
|
|
174
|
|
175 def getTEnd(self):
|
|
176 return self._TEnd
|
|
177
|
|
178 def getBlockCount(self):
|
|
179 return self._blockCount
|
|
180
|
|
181 def getBlockSizes(self):
|
|
182 return self._blockSizes
|
|
183
|
|
184 def getQStarts(self):
|
|
185 return self._qStarts
|
|
186
|
|
187 def getTStarts(self):
|
|
188 return self._tStarts
|
|
189
|
|
190 def setAttributes(self, lResults, iCurrentLineNumber):
|
|
191 error = False
|
|
192
|
|
193 if lResults[0] != '':
|
|
194 self.setMatch(lResults[0])
|
|
195 else:
|
|
196 sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
197 error = True
|
|
198
|
|
199 if lResults[1] != '':
|
|
200 self.setMismatch(lResults[1])
|
|
201 else:
|
|
202 sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
203 error = True
|
|
204
|
|
205 if lResults[2] != '':
|
|
206 self.setRepMatch(lResults[2])
|
|
207 else:
|
|
208 sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
209 error = True
|
|
210
|
|
211 if lResults[3] != '':
|
|
212 self.setN(lResults[3])
|
|
213 else:
|
|
214 sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
215 error = True
|
|
216
|
|
217 if lResults[4] != '':
|
|
218 self.setQGapCount(lResults[4])
|
|
219 else:
|
|
220 sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
221 error = True
|
|
222
|
|
223 if lResults[5] != '':
|
|
224 self.setQGapBases(lResults[5])
|
|
225 else:
|
|
226 sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
227 error = True
|
|
228
|
|
229 if lResults[6] != '':
|
|
230 self.setTGapCount(lResults[6])
|
|
231 else:
|
|
232 sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
233 error = True
|
|
234
|
|
235 if lResults[7] != '':
|
|
236 self.setTGapBases(lResults[7])
|
|
237 else:
|
|
238 sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
239 error = True
|
|
240
|
|
241 if lResults[8] != '':
|
|
242 self.setStrand(lResults[8])
|
|
243 else:
|
|
244 sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
245 error = True
|
|
246
|
|
247 if lResults[9] != '':
|
|
248 self.setQName(lResults[9])
|
|
249 else:
|
|
250 sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
251 error = True
|
|
252
|
|
253 if lResults[10] != '':
|
|
254 self.setQSize(lResults[10])
|
|
255 else:
|
|
256 sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
257 error = True
|
|
258
|
|
259 if lResults[11] != '':
|
|
260 self.setQStart(lResults[11])
|
|
261 else:
|
|
262 sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
263 error = True
|
|
264
|
|
265 if lResults[12] != '':
|
|
266 self.setQEnd(lResults[12])
|
|
267 else:
|
|
268 sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
269 error = True
|
|
270
|
|
271 if lResults[13] != '':
|
|
272 self.setTName(lResults[13])
|
|
273 else:
|
|
274 sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
275 error = True
|
|
276
|
|
277 if lResults[14] != '':
|
|
278 self.setTSize(lResults[14])
|
|
279 else:
|
|
280 sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
281 error = True
|
|
282
|
|
283 if lResults[15] != '':
|
|
284 self.setTStart(lResults[15])
|
|
285 else:
|
|
286 sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
287 error = True
|
|
288
|
|
289 if lResults[16] != '':
|
|
290 self.setTEnd(lResults[16])
|
|
291 else:
|
|
292 sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
293 error = True
|
|
294
|
|
295 if lResults[17] != '':
|
|
296 self.setBlockCount(lResults[17])
|
|
297 else:
|
|
298 sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
299 error = True
|
|
300
|
|
301 if lResults[18] != '':
|
|
302 self.setBlockSizes(lResults[18])
|
|
303 else:
|
|
304 sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
305 error = True
|
|
306
|
|
307 if lResults[19] != '':
|
|
308 self.setQStarts(lResults[19])
|
|
309 else:
|
|
310 sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
311 error = True
|
|
312
|
|
313 if lResults[20] != '':
|
|
314 self.setTStarts(lResults[20])
|
|
315 else:
|
|
316 sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
|
|
317 error = True
|
|
318
|
|
319 if error == True:
|
|
320 self._setAllToNull()
|
|
321
|
|
322 def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
|
|
323 blatLine = blatLine.rstrip()
|
|
324 lBlatLineItem = blatLine.split(fieldSeparator)
|
|
325 if not len(lBlatLineItem) == 21:
|
|
326 sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem)))
|
|
327 else:
|
|
328 self.setAttributes(lBlatLineItem, iCurrentLineNumber)
|
|
329
|
|
330 def _setAllToNull(self):
|
|
331 self._match = ''
|
|
332 self._mismatch = ''
|
|
333 self._repMatch = ''
|
|
334 self._N = ''
|
|
335 self._QGapCount = ''
|
|
336 self._QGapBases = ''
|
|
337 self._TGapCount = ''
|
|
338 self._TGapBases = ''
|
|
339 self._strand = ''
|
|
340 self._QName = ''
|
|
341 self._QSize = ''
|
|
342 self._QStart = ''
|
|
343 self._QEnd = ''
|
|
344 self._TName = ''
|
|
345 self._TSize = ''
|
|
346 self._TStart = ''
|
|
347 self._TEnd = ''
|
|
348 self._blockCount = ''
|
|
349 self._blockSizes = ''
|
|
350 self._qStarts = ''
|
|
351 self._tStarts = '' |