Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/BlatToGff.py @ 60:90f4b29d884f
Uploaded
author | m-zytnicki |
---|---|
date | Fri, 21 Feb 2014 08:32:36 -0500 |
parents | 769e306b7933 |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import optparse import os from commons.core.parsing.BlatParser import BlatParser class BlatToGff(object): def __init__(self): pass def setAttributesFromCmdLine(self): help = '\ \nThis Script Launch BlatToGff.\n\n\ Example 1: python BlatToGff.py -i blatResultsFile.tab -o outputFile.gff3\n\n' parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0") parser.add_option( '-i', '--input', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None ) parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None ) parser.add_option( '-n', '--methodname', dest='methodName', help='Method name in col. 3 [Default: None]', default= None ) ( options, args ) = parser.parse_args() self._options = options def checkOptions(self): if self._options.inputBLAT == '': raise Exception("ERROR: No Blat file specified for -i !") elif not os.path.exists(self._options.inputBLAT): raise Exception("ERROR: Blat Input File doesn't exist !") else: self._inputFileBlat = self._options.inputBLAT if self._options.output == '': raise Exception("ERROR: No Output file specified for -o !") else: self._outputFileGFF = self._options.output self._methodName = self._options.methodName def run(self): self.checkOptions() self._createGFFOutputFile() BLATFile = open(self._inputFileBlat, 'r') headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() blatLine = BLATFile.readline() numberLine = 6 while blatLine != '': gffLine = self.convertBlatObjectToGffLine(blatLine, numberLine) self._printGFFLinesToOutputFile(gffLine) blatLine = BLATFile.readline() numberLine = numberLine + 1 def convertBlatObjectToGffLine(self, blatLine, numberLine): iBlatHit = BlatParser() iBlatHit.setAttributesFromString(blatLine, numberLine) col1 = iBlatHit.getTName() col2 = 'BlatToGff' if self._methodName == '' or self._methodName == None: col3 = 'BES' else: col3 = '%s:BES' % self._methodName col4 = iBlatHit.getTStart() col5 = iBlatHit.getTEnd() col6 = '.' col7 = '+' col8 = '.' col9 = 'ID=%s;Name=%s;bes_start=%s;bes_end=%s;bes_size=%s' % (iBlatHit.getQName(), iBlatHit.getQName(), iBlatHit.getTStart(), iBlatHit.getTEnd(), iBlatHit.getTSize()) gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9) return gffLine def _createGFFOutputFile(self): GFFfile = open(self._outputFileGFF, 'w') GFFfile.write("##gff-version 3\n") GFFfile.close() def _printGFFLinesToOutputFile(self, line): GFFfile = open(self._outputFileGFF, 'a') GFFfile.write(line) GFFfile.close() if __name__ == '__main__': iBlatToGff = BlatToGff() iBlatToGff.setAttributesFromCmdLine() iBlatToGff.run()