Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/LaunchTRF.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/LaunchTRF.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.LoggerFactory import LoggerFactory +from commons.core.utils.RepetOptionParser import RepetOptionParser +from commons.core.checker.ConfigChecker import ConfigRules +from commons.core.checker.ConfigChecker import ConfigChecker +import subprocess +import glob +import os + +LOG_DEPTH = "repet.launchers" + +class LaunchTRF(object): + + def __init__(self, inFileName = "", outFileName = "", maxPeriod=15, doClean = False, verbosity = 0): + self.inFileName = inFileName + self.setOutFileName(outFileName) + self.maxPeriod=maxPeriod + self._doClean = doClean + self._verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) + + def setAttributesFromCmdLine(self): + description = "Launch TRF to detect micro-satellites in sequences." + epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" + epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" + epilog += "\n\t" + parser = RepetOptionParser(description = description, epilog = epilog) + parser.add_option("-i", "--in", dest = "inFileName", action = "store", type = "string", help = "input file name [compulsory] [format: fasta]", default = "") + parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.TRF.set]", default = "") + parser.add_option("-m", "--maxPeriod", dest = "maxPeriod", action = "store", type = "int", help = " maximum period size to report [default: 15]", default = 15) + parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) + parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) + options = parser.parse_args()[0] + self._setAttributesFromOptions(options) + + def _setAttributesFromOptions(self, options): + self.setInFileName(options.inFileName) + self.setOutFileName(options.outFileName) + self.maxPeriod = options.maxPeriod + self.setDoClean(options.doClean) + self.setVerbosity(options.verbosity) + + def setInFileName(self, inFileName): + self.inFileName = inFileName + + def setOutFileName(self, outFileName): + if outFileName == "": + self._outFileName = "%s.TRF.set" % self.inFileName + else: + self._outFileName = outFileName + + def setDoClean(self, doClean): + self._doClean = doClean + + def setVerbosity(self, verbosity): + self._verbosity = verbosity + + def _checkOptions(self): + if self.inFileName == "": + self._logAndRaise("ERROR: Missing input file name") + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise Exception(errorMsg) + + + def _launchTRF(self): + cmd = "trf %s 2 3 5 80 10 20 %d -h -d" % (self.inFileName, self.maxPeriod) + self._log.debug("Running : %s" % cmd) + process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = process.communicate() + self._log.debug("Output:\n%s" % output[0]) + + def _parseTRF(self): + self._log.debug("Parsing TRF output") + with open( "%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod),'r') as inFile: + with open(self._outFileName,'w') as outFile: + nbPatterns = 0 + nbInSeq = 0 + for line in inFile.readlines(): + if line == "": + break + data = line.split(" ") + if len(data) > 1 and "Sequence:" in data[0]: + nbInSeq += 1 + seqName = data[1][:-1] + if len(data) < 14: + continue + nbPatterns += 1 + consensus = data[13] + copyNb = int( float(data[3]) + 0.5 ) + start = data[0] + end = data[1] + outFile.write( "%i\t(%s)%i\t%s\t%s\t%s\n" % ( nbPatterns, consensus, copyNb, seqName, start, end ) ) + self._log.debug("Finished Parsing TRF output") + + def _clean(self): + try: + os.remove("%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod)) + except:pass + + + def run(self): + """ + Launch TRF to detect micro-satellites in sequences. + """ + LoggerFactory.setLevel(self._log, self._verbosity) + self._checkOptions() + self._log.info("START Launch") + self._log.debug("Input file name: %s" % self.inFileName) + + self._launchTRF() + self._parseTRF() + + if self._doClean: + self._log.warning("Files will be cleaned") + self._clean() + self._log.info("END Launch") + +if __name__ == "__main__": + iLaunchTRF = LaunchTRF() + iLaunchTRF.setAttributesFromCmdLine() + iLaunchTRF.run() +