Mercurial > repos > yufei-luo > s_mart
diff commons/core/parsing/BlatToGff.py @ 36:44d5973c188c
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 15:02:29 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/parsing/BlatToGff.py Tue Apr 30 15:02:29 2013 -0400 @@ -0,0 +1,116 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import optparse +import os +from commons.core.parsing.BlatParser import BlatParser + +class BlatToGff(object): + + + def __init__(self): + pass + + def setAttributesFromCmdLine(self): + help = '\ + \nThis Script Launch BlatToGff.\n\n\ + Example 1: python BlatToGff.py -i blatResultsFile.tab -o outputFile.gff3\n\n' + parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0") + parser.add_option( '-i', '--input', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None ) + parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None ) + parser.add_option( '-n', '--methodname', dest='methodName', help='Method name in col. 3 [Default: None]', default= None ) + ( options, args ) = parser.parse_args() + self._options = options + + def checkOptions(self): + if self._options.inputBLAT == '': + raise Exception("ERROR: No Blat file specified for -i !") + elif not os.path.exists(self._options.inputBLAT): + raise Exception("ERROR: Blat Input File doesn't exist !") + else: + self._inputFileBlat = self._options.inputBLAT + + if self._options.output == '': + raise Exception("ERROR: No Output file specified for -o !") + else: + self._outputFileGFF = self._options.output + + self._methodName = self._options.methodName + + def run(self): + self.checkOptions() + self._createGFFOutputFile() + BLATFile = open(self._inputFileBlat, 'r') + + headerBlatLine = BLATFile.readline() + headerBlatLine = BLATFile.readline() + headerBlatLine = BLATFile.readline() + headerBlatLine = BLATFile.readline() + headerBlatLine = BLATFile.readline() + blatLine = BLATFile.readline() + numberLine = 6 + while blatLine != '': + gffLine = self.convertBlatObjectToGffLine(blatLine, numberLine) + self._printGFFLinesToOutputFile(gffLine) + blatLine = BLATFile.readline() + numberLine = numberLine + 1 + + def convertBlatObjectToGffLine(self, blatLine, numberLine): + iBlatHit = BlatParser() + iBlatHit.setAttributesFromString(blatLine, numberLine) + col1 = iBlatHit.getTName() + col2 = 'BlatToGff' + if self._methodName == '' or self._methodName == None: + col3 = 'BES' + else: + col3 = '%s:BES' % self._methodName + col4 = iBlatHit.getTStart() + col5 = iBlatHit.getTEnd() + col6 = '.' + col7 = '+' + col8 = '.' + col9 = 'ID=%s;Name=%s;bes_start=%s;bes_end=%s;bes_size=%s' % (iBlatHit.getQName(), iBlatHit.getQName(), iBlatHit.getTStart(), iBlatHit.getTEnd(), iBlatHit.getTSize()) + gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9) + return gffLine + + def _createGFFOutputFile(self): + GFFfile = open(self._outputFileGFF, 'w') + GFFfile.write("##gff-version 3\n") + GFFfile.close() + + def _printGFFLinesToOutputFile(self, line): + GFFfile = open(self._outputFileGFF, 'a') + GFFfile.write(line) + GFFfile.close() + +if __name__ == '__main__': + iBlatToGff = BlatToGff() + iBlatToGff.setAttributesFromCmdLine() + iBlatToGff.run() \ No newline at end of file