Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/LaunchNucmer.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/LaunchNucmer.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,158 @@ +#! /usr/bin/env python + +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.checker.CheckerUtils import CheckerUtils +from commons.core.utils.FileUtils import FileUtils +from commons.core.utils.RepetOptionParser import RepetOptionParser +import subprocess +from commons.core.LoggerFactory import LoggerFactory +import os + +LOG_DEPTH = "repet.tools" + +class LaunchNucmer(object): + + def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0): + self._queryFileName = queryFileName + self._refFileName = refFileName + self._prefix = prefix + self._genCoords = genCoords + self._showCoords = showCoords + self._mum = mum + self._maxgaps = maxGaps + self._minMatch = minMatch + self._nooptimize = nooptimize + self._mincluster = mincluster + self._minIdentity = minIdentity + self._minLength = minLength + self.verbosity = verbosity + self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity) + + def setMincluster(self, value): + self._mincluster = value + def getMincluster(self): + return self._mincluster + + mincluster = property(getMincluster, setMincluster) + + def setAttributesFromCmdLine(self): + description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ." + parser = RepetOptionParser(description = description) + parser.add_option("-q", "--query", dest="queryFileName", default = "", action="store", type="string", help="input query file [compulsory] [format: fasta]") + parser.add_option("-r", "--ref", dest="refFileName", default = "", action="store", type="string", help="input ref file [compulsory] [format: fasta]") + parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [optional]") + parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ") + parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ") + parser.add_option("-m", "--mum", dest="mum", action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ") + parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 90, action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 90) [optional] ") + parser.add_option("-l", "--minmatch", dest="minMatch", default = 20, action="store", type="int", help="Minimum length of an maximal exact match (default 20) [optional] ") + parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ") + parser.add_option("-j", "--mincluster", dest="mincluster", default = 65, action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ") + + parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50, action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ") + parser.add_option("-u", "--minLength", dest="minLength", default = 100, action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ") + parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ") + + (self._options, args) = parser.parse_args() + self._setAttributesFromOptions(self._options) + + def _setAttributesFromOptions(self, options): + self._queryFileName = options.queryFileName + self._refFileName = options.refFileName + self._prefix = options.prefix + self._genCoords = options.genCoords + self._showCoords = options.showCoords + self._mum = options.mum + self._maxgaps = options.maxgaps + self._minMatch = options.minMatch + self._nooptimize = options.nooptimize + self._mincluster = options.mincluster + + self._minIdentity = options.minIdentity + self._minLength = options.minLength + + self.verbosity = options.verbosity + + def _logAndRaise(self, errorMsg): + self._log.error(errorMsg) + raise Exception(errorMsg) + + def checkOptions(self): + if self._queryFileName != "": + if not FileUtils.isRessourceExists(self._queryFileName): + self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName) + else: + self._logAndRaise("ERROR: No specified --query option!") + + if self._refFileName != "": + if not FileUtils.isRessourceExists(self._refFileName): + self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName) + else: + self._logAndRaise("ERROR: No specified --ref option!") + + def run(self): + LoggerFactory.setLevel(self._log, self.verbosity) + if not CheckerUtils.isExecutableInUserPath("nucmer") : + self._logAndRaise("ERROR: nucmer must be in your path") + self.checkOptions() + + genCoords = "" + if self._genCoords: + genCoords = "-o" + mum = "" + if self._mum: + mum = "--mum" + nooptimize = "--optimize" + if self._nooptimize: + nooptimize = "--nooptimize" + prefix = "" + if self._prefix is not None: + prefix = "--prefix=%s" %(self._prefix) + cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster) + self._log.debug("Running nucmer with following commands : %s" %cmd) + cmd = cmd.split() + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process.wait() + + if self._showCoords: + #use of os.system because redirect on process is broken in python < 3.0 + cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix) + self._log.debug("Running show-coords with following commands : %s" %cmd) + os.system(cmd) + + + return process.returncode + +if __name__ == "__main__": + iLaunchNucmer = LaunchNucmer() + iLaunchNucmer.setAttributesFromCmdLine() + iLaunchNucmer.run() \ No newline at end of file