Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/BlatParser.py @ 38:2c0c0a89fad7
Uploaded
| author | m-zytnicki |
|---|---|
| date | Thu, 02 May 2013 09:56:47 -0400 |
| parents | 769e306b7933 |
| children |
comparison
equal
deleted
inserted
replaced
| 37:d22fadc825e3 | 38:2c0c0a89fad7 |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 import sys | |
| 32 | |
| 33 ## this class can parse a Blat results output file | |
| 34 # | |
| 35 class BlatParser(object): | |
| 36 | |
| 37 | |
| 38 def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''): | |
| 39 self._match = match | |
| 40 self._mismatch = mismatch | |
| 41 self._repMatch = repMatch | |
| 42 self._N = N | |
| 43 self._QGapCount = QGapCount | |
| 44 self._QGapBases = QGapBases | |
| 45 self._TGapCount = TGapCount | |
| 46 self._TGapBases = TGapBases | |
| 47 self._strand = strand | |
| 48 self._QName = QName | |
| 49 self._QSize = QSize | |
| 50 self._QStart = QStart | |
| 51 self._QEnd = QEnd | |
| 52 self._TName = TName | |
| 53 self._TSize = TSize | |
| 54 self._TStart = TStart | |
| 55 self._TEnd = TEnd | |
| 56 self._blockCount = blockCount | |
| 57 self._blockSizes = blockSizes | |
| 58 self._qStarts = qStarts | |
| 59 self._tStarts = tStarts | |
| 60 | |
| 61 def __eq__(self, o): | |
| 62 return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd | |
| 63 | |
| 64 def setMatch(self, match): | |
| 65 self._match = match | |
| 66 | |
| 67 def setMismatch(self, mismatch): | |
| 68 self._mismatch = mismatch | |
| 69 | |
| 70 def setRepMatch(self, repMatch): | |
| 71 self._repMatch = repMatch | |
| 72 | |
| 73 def setN(self, N): | |
| 74 self._N = N | |
| 75 | |
| 76 def setQGapCount(self, QGapCount): | |
| 77 self._QGapCount = QGapCount | |
| 78 | |
| 79 def setQGapBases(self, QGapBases): | |
| 80 self._QGapBases = QGapBases | |
| 81 | |
| 82 def setTGapCount(self, TGapCount): | |
| 83 self._TGapCount = TGapCount | |
| 84 | |
| 85 def setTGapBases(self, TGapBases): | |
| 86 self._TGapBases = TGapBases | |
| 87 | |
| 88 def setStrand(self, strand): | |
| 89 self._strand = strand | |
| 90 | |
| 91 def setQName(self, QName): | |
| 92 self._QName = QName | |
| 93 | |
| 94 def setQSize(self, QSize): | |
| 95 self._QSize = QSize | |
| 96 | |
| 97 def setQStart(self, QStart): | |
| 98 self._QStart = QStart | |
| 99 | |
| 100 def setQEnd(self, QEnd): | |
| 101 self._QEnd = QEnd | |
| 102 | |
| 103 def setTName(self, TName): | |
| 104 self._TName = TName | |
| 105 | |
| 106 def setTSize(self, TSize): | |
| 107 self._TSize = TSize | |
| 108 | |
| 109 def setTStart(self, TStart): | |
| 110 self._TStart = TStart | |
| 111 | |
| 112 def setTEnd(self, TEnd): | |
| 113 self._TEnd = TEnd | |
| 114 | |
| 115 def setBlockCount(self, blockCount): | |
| 116 self._blockCount = blockCount | |
| 117 | |
| 118 def setBlockSizes(self, blockSizes): | |
| 119 self._blockSizes = blockSizes | |
| 120 | |
| 121 def setQStarts(self, qStarts): | |
| 122 self._qStarts = qStarts | |
| 123 | |
| 124 def setTStarts(self, tStarts): | |
| 125 self._tStarts = tStarts | |
| 126 | |
| 127 def getMatch(self): | |
| 128 return self._match | |
| 129 | |
| 130 def getMismatch(self): | |
| 131 return self._mismatch | |
| 132 | |
| 133 def getRepMatch(self): | |
| 134 return self._repMatch | |
| 135 | |
| 136 def getN(self): | |
| 137 return self._N | |
| 138 | |
| 139 def getQGapCount(self): | |
| 140 return self._QGapCount | |
| 141 | |
| 142 def getQGapBases(self): | |
| 143 return self._QGapBases | |
| 144 | |
| 145 def getTGapCount(self): | |
| 146 return self._TGapCount | |
| 147 | |
| 148 def getTGapBases(self): | |
| 149 return self._TGapBases | |
| 150 | |
| 151 def getStrand(self): | |
| 152 return self._strand | |
| 153 | |
| 154 def getQName(self): | |
| 155 return self._QName | |
| 156 | |
| 157 def getQSize(self): | |
| 158 return self._QSize | |
| 159 | |
| 160 def getQStart(self): | |
| 161 return self._QStart | |
| 162 | |
| 163 def getQEnd(self): | |
| 164 return self._QEnd | |
| 165 | |
| 166 def getTName(self): | |
| 167 return self._TName | |
| 168 | |
| 169 def getTSize(self): | |
| 170 return self._TSize | |
| 171 | |
| 172 def getTStart(self): | |
| 173 return self._TStart | |
| 174 | |
| 175 def getTEnd(self): | |
| 176 return self._TEnd | |
| 177 | |
| 178 def getBlockCount(self): | |
| 179 return self._blockCount | |
| 180 | |
| 181 def getBlockSizes(self): | |
| 182 return self._blockSizes | |
| 183 | |
| 184 def getQStarts(self): | |
| 185 return self._qStarts | |
| 186 | |
| 187 def getTStarts(self): | |
| 188 return self._tStarts | |
| 189 | |
| 190 def setAttributes(self, lResults, iCurrentLineNumber): | |
| 191 error = False | |
| 192 | |
| 193 if lResults[0] != '': | |
| 194 self.setMatch(lResults[0]) | |
| 195 else: | |
| 196 sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 197 error = True | |
| 198 | |
| 199 if lResults[1] != '': | |
| 200 self.setMismatch(lResults[1]) | |
| 201 else: | |
| 202 sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 203 error = True | |
| 204 | |
| 205 if lResults[2] != '': | |
| 206 self.setRepMatch(lResults[2]) | |
| 207 else: | |
| 208 sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 209 error = True | |
| 210 | |
| 211 if lResults[3] != '': | |
| 212 self.setN(lResults[3]) | |
| 213 else: | |
| 214 sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 215 error = True | |
| 216 | |
| 217 if lResults[4] != '': | |
| 218 self.setQGapCount(lResults[4]) | |
| 219 else: | |
| 220 sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 221 error = True | |
| 222 | |
| 223 if lResults[5] != '': | |
| 224 self.setQGapBases(lResults[5]) | |
| 225 else: | |
| 226 sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 227 error = True | |
| 228 | |
| 229 if lResults[6] != '': | |
| 230 self.setTGapCount(lResults[6]) | |
| 231 else: | |
| 232 sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 233 error = True | |
| 234 | |
| 235 if lResults[7] != '': | |
| 236 self.setTGapBases(lResults[7]) | |
| 237 else: | |
| 238 sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 239 error = True | |
| 240 | |
| 241 if lResults[8] != '': | |
| 242 self.setStrand(lResults[8]) | |
| 243 else: | |
| 244 sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 245 error = True | |
| 246 | |
| 247 if lResults[9] != '': | |
| 248 self.setQName(lResults[9]) | |
| 249 else: | |
| 250 sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 251 error = True | |
| 252 | |
| 253 if lResults[10] != '': | |
| 254 self.setQSize(lResults[10]) | |
| 255 else: | |
| 256 sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 257 error = True | |
| 258 | |
| 259 if lResults[11] != '': | |
| 260 self.setQStart(lResults[11]) | |
| 261 else: | |
| 262 sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 263 error = True | |
| 264 | |
| 265 if lResults[12] != '': | |
| 266 self.setQEnd(lResults[12]) | |
| 267 else: | |
| 268 sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 269 error = True | |
| 270 | |
| 271 if lResults[13] != '': | |
| 272 self.setTName(lResults[13]) | |
| 273 else: | |
| 274 sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 275 error = True | |
| 276 | |
| 277 if lResults[14] != '': | |
| 278 self.setTSize(lResults[14]) | |
| 279 else: | |
| 280 sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 281 error = True | |
| 282 | |
| 283 if lResults[15] != '': | |
| 284 self.setTStart(lResults[15]) | |
| 285 else: | |
| 286 sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 287 error = True | |
| 288 | |
| 289 if lResults[16] != '': | |
| 290 self.setTEnd(lResults[16]) | |
| 291 else: | |
| 292 sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 293 error = True | |
| 294 | |
| 295 if lResults[17] != '': | |
| 296 self.setBlockCount(lResults[17]) | |
| 297 else: | |
| 298 sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 299 error = True | |
| 300 | |
| 301 if lResults[18] != '': | |
| 302 self.setBlockSizes(lResults[18]) | |
| 303 else: | |
| 304 sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 305 error = True | |
| 306 | |
| 307 if lResults[19] != '': | |
| 308 self.setQStarts(lResults[19]) | |
| 309 else: | |
| 310 sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 311 error = True | |
| 312 | |
| 313 if lResults[20] != '': | |
| 314 self.setTStarts(lResults[20]) | |
| 315 else: | |
| 316 sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber) | |
| 317 error = True | |
| 318 | |
| 319 if error == True: | |
| 320 self._setAllToNull() | |
| 321 | |
| 322 def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"): | |
| 323 blatLine = blatLine.rstrip() | |
| 324 lBlatLineItem = blatLine.split(fieldSeparator) | |
| 325 if not len(lBlatLineItem) == 21: | |
| 326 sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem))) | |
| 327 else: | |
| 328 self.setAttributes(lBlatLineItem, iCurrentLineNumber) | |
| 329 | |
| 330 def _setAllToNull(self): | |
| 331 self._match = '' | |
| 332 self._mismatch = '' | |
| 333 self._repMatch = '' | |
| 334 self._N = '' | |
| 335 self._QGapCount = '' | |
| 336 self._QGapBases = '' | |
| 337 self._TGapCount = '' | |
| 338 self._TGapBases = '' | |
| 339 self._strand = '' | |
| 340 self._QName = '' | |
| 341 self._QSize = '' | |
| 342 self._QStart = '' | |
| 343 self._QEnd = '' | |
| 344 self._TName = '' | |
| 345 self._TSize = '' | |
| 346 self._TStart = '' | |
| 347 self._TEnd = '' | |
| 348 self._blockCount = '' | |
| 349 self._blockSizes = '' | |
| 350 self._qStarts = '' | |
| 351 self._tStarts = '' |
