diff commons/launcher/LaunchNucmer.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchNucmer.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,158 @@
+#! /usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.CheckerUtils import CheckerUtils
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+from commons.core.LoggerFactory import LoggerFactory
+import os
+  
+LOG_DEPTH = "repet.tools"  
+  
+class LaunchNucmer(object):
+
+    def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0):
+        self._queryFileName = queryFileName
+        self._refFileName = refFileName
+        self._prefix = prefix
+        self._genCoords = genCoords
+        self._showCoords = showCoords
+        self._mum = mum
+        self._maxgaps = maxGaps
+        self._minMatch = minMatch
+        self._nooptimize = nooptimize
+        self._mincluster = mincluster
+        self._minIdentity = minIdentity
+        self._minLength = minLength
+        self.verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
+        
+    def setMincluster(self, value):
+        self._mincluster = value
+    def getMincluster(self):
+        return self._mincluster
+    
+    mincluster = property(getMincluster, setMincluster)
+         
+    def setAttributesFromCmdLine(self):
+        description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ."    
+        parser = RepetOptionParser(description = description)
+        parser.add_option("-q", "--query", dest="queryFileName", default = "",  action="store", type="string", help="input query file [compulsory] [format: fasta]")
+        parser.add_option("-r", "--ref", dest="refFileName", default = "",  action="store", type="string", help="input ref file [compulsory] [format: fasta]")
+        parser.add_option("-p", "--prefix", dest="prefix", default = None,  action="store", type="string", help="prefix name [optional]")
+        parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ")
+        parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ")        
+        parser.add_option("-m", "--mum", dest="mum",  action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ")      
+        parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 90,  action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 90) [optional] ")
+        parser.add_option("-l", "--minmatch", dest="minMatch", default = 20,  action="store", type="int", help="Minimum length of an maximal exact match (default 20) [optional] ")
+        parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ")  
+        parser.add_option("-j", "--mincluster", dest="mincluster", default = 65,  action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ")
+        
+        parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50,  action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ")
+        parser.add_option("-u", "--minLength", dest="minLength", default = 100,  action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ")
+        parser.add_option("-v", "--verbosity", dest="verbosity", default = 0,  action="store", type="int", help="verbosity [optional] ")
+        
+        (self._options, args) = parser.parse_args()
+        self._setAttributesFromOptions(self._options)
+
+    def _setAttributesFromOptions(self, options):
+        self._queryFileName = options.queryFileName
+        self._refFileName = options.refFileName
+        self._prefix = options.prefix
+        self._genCoords = options.genCoords
+        self._showCoords = options.showCoords
+        self._mum = options.mum  
+        self._maxgaps = options.maxgaps
+        self._minMatch = options.minMatch
+        self._nooptimize =  options.nooptimize
+        self._mincluster = options.mincluster
+        
+        self._minIdentity = options.minIdentity
+        self._minLength = options.minLength
+        
+        self.verbosity = options.verbosity
+        
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+            
+    def checkOptions(self):       
+        if self._queryFileName != "":
+            if not FileUtils.isRessourceExists(self._queryFileName):
+                self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)
+        else:
+            self._logAndRaise("ERROR: No specified --query option!")
+        
+        if self._refFileName != "":
+            if not FileUtils.isRessourceExists(self._refFileName):
+                self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)
+        else:
+            self._logAndRaise("ERROR: No specified --ref option!")
+                                
+    def run(self):
+        LoggerFactory.setLevel(self._log, self.verbosity)
+        if not CheckerUtils.isExecutableInUserPath("nucmer") :
+            self._logAndRaise("ERROR: nucmer must be in your path")
+        self.checkOptions()
+        
+        genCoords = ""
+        if self._genCoords:
+            genCoords = "-o"     
+        mum = ""
+        if self._mum:
+            mum =  "--mum"
+        nooptimize = "--optimize"
+        if self._nooptimize:
+            nooptimize = "--nooptimize" 
+        prefix = ""
+        if self._prefix is not None:
+            prefix = "--prefix=%s" %(self._prefix) 
+        cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster)
+        self._log.debug("Running nucmer with following commands : %s" %cmd)
+        cmd = cmd.split()
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        process.wait()
+        
+        if self._showCoords:
+            #use of os.system because redirect on process is broken in python < 3.0
+            cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix)
+            self._log.debug("Running show-coords with following commands : %s" %cmd)
+            os.system(cmd)
+
+            
+        return process.returncode
+            
+if __name__ == "__main__":
+    iLaunchNucmer = LaunchNucmer()
+    iLaunchNucmer.setAttributesFromCmdLine()
+    iLaunchNucmer.run()
\ No newline at end of file