Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py @ 64:783e6ed4eb66
Minor bug correction.
Added casts to str in Galaxy XML files. Also closed the writer in the Python script "changeTagName."
author | m-zytnicki |
---|---|
date | Mon, 19 Oct 2015 14:16:44 +0200 |
parents | 769e306b7933 |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import os import optparse from commons.core.parsing.SsrParser import SsrParser from commons.core.parsing.BlatParser import BlatParser class CrossSsrAndBesMappedByBlatToGff(object): def __init__(self): self._inputFileSSR = '' self._inputFileBlat = '' self._outputFileGFF = '' def setAttributesFromCmdLine(self): help = '\ \nThis Script Launch CrossSsrAndBesMappedByBlatToGff.\n\n\ Example 1: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3\n\ Example 2: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3 -n muscadine:filtre1\n\n' parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0") parser.add_option( '-s', '--ssr', dest='inputSSR', help='SSR Input File Name [Format: tabular]', default= None ) parser.add_option( '-b', '--blat', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None ) parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None ) parser.add_option( '-n', '--methodName', dest='methodName', help='Method name in col. 3 [Default: None]', default= None ) ( options, args ) = parser.parse_args() self.options = options def checkOptions(self): if self.options.inputSSR == '': raise Exception("ERROR: No SSR file specified for -s !") elif not os.path.exists(self.options.inputSSR): raise Exception("ERROR: SSR Input File doesn't exist !") else: self._inputFileSSR = self.options.inputSSR if self.options.inputBLAT == '': raise Exception("ERROR: No Blat file specified for -b !") elif not os.path.exists(self.options.inputBLAT): raise Exception("ERROR: Blat Input File doesn't exist !") else: self._inputFileBlat = self.options.inputBLAT if self.options.output == '': raise Exception("ERROR: No Output file specified for -o !") else: self._outputFileGFF = self.options.output self._methodName = self.options.methodName def run(self): self.checkOptions() self._createGFFOutputFile() dictSsrParser = {} dictSsrParser = self.createDictOfSsrParser(dictSsrParser) BLATFile = open(self._inputFileBlat, 'r') headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() headerBlatLine = BLATFile.readline() blatLine = BLATFile.readline() numberLine = 6 while blatLine != '' and blatLine != '\n': thisBlatHit = BlatParser() thisBlatHit.setAttributesFromString(blatLine, numberLine) besName = thisBlatHit.getQName() if besName in dictSsrParser: lLinesToPrint = self.createListOfGFFLinesForThisBesWithSSR(thisBlatHit, dictSsrParser) self._printGFFLinesToOutputFile(lLinesToPrint) blatLine = BLATFile.readline() numberLine = numberLine + 1 BLATFile.close() def createDictOfSsrParser(self, dictSsrParser): dictSsrParser = {} SSRFile = open(self._inputFileSSR, 'r') header = SSRFile.readline() line = SSRFile.readline() numberLine = 2 while line != '' and line != '\n': thisSSRHit = SsrParser() thisSSRHit.setAttributesFromString(line, numberLine) BESName = thisSSRHit.getBesName() if not BESName in dictSsrParser: list = [thisSSRHit] dictSsrParser[BESName] = list else: list = dictSsrParser[BESName] list.append(thisSSRHit) dictSsrParser[BESName] = list line = SSRFile.readline() numberLine = numberLine + 1 SSRFile.close() return dictSsrParser def createListOfGFFLinesForThisBesWithSSR(self, BlatHitObject, dictSsrParser): listGffLines = [] besNameToKeep = BlatHitObject.getQName() lOfSSRHitObject = dictSsrParser[besNameToKeep] for SSRHitObject in lOfSSRHitObject: posSSRStart = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrStart(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand()) posSSREnd = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrEnd(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand()) ssrSeq = self.getSsrSeq(SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber()) col1 = BlatHitObject.getTName() col2 = 'CrossSsrAndBesAlignedByBlat' if self._methodName != '' and self._methodName != None: col3 = '%s:SSR' %self._methodName else: col3 = 'SSR' col4 = posSSRStart col5 = posSSREnd col6 = '.' col7 = BlatHitObject.getStrand() col8 = '.' col9 = 'ID=SSR_%s_%s;Name=SSR_%s_%s;bes_name=%s;bes_size=%s;bes_matchstart=%s;bes_matchend=%s;bes_redundancy=%s;ssr_type=%s;ssr_motif=%s;ssr_motif_number=%s;ssr_start=%s;ssr_end=%s;muscadine_seq=%s' % (besNameToKeep, SSRHitObject.getBesRedundancy(), besNameToKeep, SSRHitObject.getBesRedundancy(), besNameToKeep, BlatHitObject.getQSize(), BlatHitObject.getQStart(), BlatHitObject.getQEnd(), SSRHitObject.getBesRedundancy(), SSRHitObject.getSsrNbNucleotides(), SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber(), SSRHitObject.getSsrStart(), SSRHitObject.getSsrEnd(), ssrSeq) gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9) listGffLines.append(gffLine) return listGffLines def convertSSRPositionsToChromPositions(self, ssrPos, chromPosStart, chromPosEnd, strand): if strand == '+': newPos = int(chromPosStart) + int(ssrPos) - 1 elif strand == '-': newPos = int(chromPosEnd) - int(ssrPos) + 1 return newPos def getSsrSeq(self, motif, nbMotif): ssrSeq = motif * int(nbMotif) return ssrSeq def _createGFFOutputFile(self): GFFfile = open(self._outputFileGFF, 'w') GFFfile.write("##gff-version 3\n") GFFfile.close() def _printGFFLinesToOutputFile(self, lLinesToPrint): GFFfile = open(self._outputFileGFF, 'a') for line in lLinesToPrint: GFFfile.write(line) GFFfile.close() if __name__ == '__main__': iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff() iCrossSsrAndBesMappedByBlatToGff.setAttributesFromCmdLine() iCrossSsrAndBesMappedByBlatToGff.run()