diff commons/tools/LaunchBlaster.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/LaunchBlaster.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+import subprocess
+
+LOG_DEPTH = "repet.tools"
+
+##Launch BLASTER
+#
+class LaunchBlaster(object):
+    
+    def __init__(self, queryFileName = "", subjectFileName = "", evalue = 1e-300, identity = 90, length = 100, doAllByall = False, type = "ncbi", nbCPU = 1, program="blastn",extraParams="", doClean = False, verbosity = 0):
+        self._queryFileName = queryFileName
+        self.setSubjectFileName(subjectFileName)
+        self._eValue = evalue
+        self._identity = identity
+        self._length = length
+        self._doAllByall = doAllByall
+        self._blastType = type
+        self._program = program
+        self._extraParams = extraParams
+        self._nbCPU = nbCPU
+        self._doClean = doClean
+        self._verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
+        
+    def setAttributesFromCmdLine(self):
+        description = "Launch Blaster."
+        epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
+        epilog += "\t$ python LaunchBlaster.py -q query.fa -v 0"
+        epilog += "\n\t"
+        epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n"
+        epilog += "\t$ python LaunchBlaster.py -q query.fa -s nr.fa -c -v 2"
+        parser = RepetOptionParser(description = description, epilog = epilog)
+        parser.add_option("-q", "--query",      dest = "query",         action = "store",       type = "string", help = "query fasta file name [compulsory] [format: fasta]",       default = "")
+        parser.add_option("-s", "--subject",    dest = "subject",       action = "store",       type = "string", help = "subject fasta file name [default: query] [format: fasta]", default = "")
+        parser.add_option("-e", "--evalue",     dest = "evalue",        action = "store",       type = "string", help = "Blast e-value [default: 1e-300]",                           default = "1e-300")
+        parser.add_option("-d", "--id",         dest = "identity",      action = "store",       type = "int",    help = "Blast identity [default: 90]",                             default = 90)
+        parser.add_option("-l", "--length",     dest = "length",        action = "store",       type = "int",    help = "Minimal hit length [default: 100]",                        default = 100)
+        parser.add_option("-a", "--aba",        dest = "doAllByall",    action = "store_true",                   help = "all-by-all Blast [default: False]",                        default = False)
+        parser.add_option("-t", "--type",       dest = "type",          action = "store",       type = "string", help = "Blast type [ncbi, wu, blastplus] [default: ncbi]",         default = "ncbi")
+        parser.add_option("-u", "--program",    dest = "program",       action = "store",       type = "string", help = "Blast program type [blastn, blastx, blastx] [default: blastn]",         default = "blastn")
+        parser.add_option("-x", "--extraParams",dest = "extraParams",   action = "store",       type = "string", help = "Additional blast program parameters[default: '']",         default = "")
+        parser.add_option("-n", "--ncpu",       dest = "cpu",           action = "store",       type = "int",    help = "Number of CPUs to use [default: 1]",                       default = 1)
+        parser.add_option("-c", "--clean",      dest = "doClean",       action = "store_true",                   help = "clean temporary files [default: False]",                   default = False)
+        parser.add_option("-v", "--verbosity",  dest = "verbosity",     action = "store",       type = "int",    help = "verbosity [default: 1]",                                   default = 1)
+        options = parser.parse_args()[0]
+        self._setAttributesFromOptions(options)
+        
+    def _setAttributesFromOptions(self, options):
+        self.setQueryFileName(options.query)
+        self.setSubjectFileName(options.subject)
+        self.setEvalue(options.evalue)
+        self.setIdentity(options.identity)
+        self.setLength(options.length)
+        self.setDoAllByall(options.doAllByall)
+        self.setType(options.type)
+        self.setProgram(options.program)
+        self.setExtraParams(options.extraParams)
+        self.setCPU(options.cpu)
+        self.setDoClean(options.doClean)
+        self.setVerbosity(options.verbosity)
+        
+    
+    def setQueryFileName(self, queryFileName):
+        self._queryFileName = queryFileName
+        
+    def setSubjectFileName(self, subjectFileName):
+        if subjectFileName == "":
+            self._subjectFileName = self._queryFileName
+        else:
+            self._subjectFileName = subjectFileName
+        
+    def setEvalue(self, evalue):
+        self._eValue = evalue
+        
+    def setIdentity(self, identity):
+        self._identity = identity
+        
+    def setLength(self, length):
+        self._length = length
+        
+    def setDoAllByall(self, doAllByall):
+        self._doAllByall = doAllByall
+        
+    def setType(self, blastType):
+        self._blastType = blastType
+        
+    def setProgram(self, program):
+        self._program = program
+        
+    def setExtraParams(self, extraParams):
+        self._extraParams = extraParams
+        
+    def setCPU(self, cpu):
+        self._nbCPU = cpu
+        
+    def setDoClean(self, doClean):
+        self._doClean = doClean
+        
+    def setVerbosity(self, verbosity):
+        self._verbosity = verbosity
+        
+    def _checkOptions(self):
+        if self._queryFileName == "":
+            self._logAndRaise("ERROR: Missing input fasta file name")
+        
+        lBlastType = ["ncbi", "wu", "blastplus"]    
+        if self._blastType.lower() not in lBlastType:
+            self._logAndRaise("ERROR: unknown Blast type '%s' - correct values are %s" % (self._blastType, lBlastType))
+            
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+
+    def _getBlasterCmd(self):
+        lArgs = []
+        lArgs.append("-n %s" % self._program)
+        lArgs.append("-q %s" % self._queryFileName)
+        lArgs.append("-s %s" % self._subjectFileName)
+        lArgs.append("-B %s" % self._queryFileName)
+        if self._doAllByall:
+            lArgs.append("-a")
+        lArgs.append("-E %s" % self._eValue)
+        lArgs.append("-L %s" % self._length)
+        lArgs.append("-I %s" % self._identity)
+        if self._blastType == "ncbi": 
+            lArgs.append("-N")
+            lArgs.append("-p '-a %s %s'" % (self._nbCPU, self._extraParams))
+        elif self._blastType == "wu":
+            lArgs.append("-W")
+            lArgs.append("-p '-cpus=%s %s'" % (self._nbCPU, self._extraParams))
+        elif self._blastType == "blastplus":
+            lArgs.append("-X")
+            lArgs.append("-p '-num_threads %s %s'" % (self._nbCPU, self._extraParams))
+# TODO: check the check option at the beginning of step 2 to allow to launch megablast for blast and blast+
+#    elif config.get(sectionName, "blast") == "mega":
+#        lArgs.append("-N")
+#        lArgs.append("-n megablast")
+#    elif config.get(sectionName, "blast") == "megablastplus":
+#        lArgs.append("-X")
+#        lArgs.append("-n megablast")
+        if self._doClean:
+            lArgs.append("-c")
+        lArgs.append("-v %i" % (self._verbosity - 1))
+        return self._getSystemCommand("blaster", lArgs)
+    
+    def _getSystemCommand(self, prg, lArgs):
+        systemCmd = prg 
+        for arg in lArgs:
+            systemCmd += " " + arg
+        return systemCmd
+                    
+    def run(self):
+        LoggerFactory.setLevel(self._log, self._verbosity)
+        self._checkOptions()
+        self._log.info("START LaunchBlaster")
+        self._log.debug("Query file name: %s" % self._queryFileName)
+        self._log.debug("Subject file name: %s" % self._subjectFileName)
+        if self._doClean:
+            self._log.warning("Files will be cleaned")
+        cmd = self._getBlasterCmd()
+        process = subprocess.Popen(cmd, shell = True)
+        self._log.debug("Running : %s" % cmd)
+        process.communicate()
+        if process.returncode != 0:
+            self._logAndRaise("ERROR when launching '%s'" % cmd)
+        self._log.info("END LaunchBlaster")
+
+if __name__ == "__main__":
+    iLaunch = LaunchBlaster()
+    iLaunch.setAttributesFromCmdLine()
+    iLaunch.run()        
\ No newline at end of file