view commons/launcher/LaunchNucmer.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#! /usr/bin/env python

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.

from commons.core.checker.CheckerUtils import CheckerUtils
from commons.core.utils.FileUtils import FileUtils
from commons.core.utils.RepetOptionParser import RepetOptionParser
import subprocess
from commons.core.LoggerFactory import LoggerFactory
import os
  
LOG_DEPTH = "repet.tools"  
  
class LaunchNucmer(object):

    def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0):
        self._queryFileName = queryFileName
        self._refFileName = refFileName
        self._prefix = prefix
        self._genCoords = genCoords
        self._showCoords = showCoords
        self._mum = mum
        self._maxgaps = maxGaps
        self._minMatch = minMatch
        self._nooptimize = nooptimize
        self._mincluster = mincluster
        self._minIdentity = minIdentity
        self._minLength = minLength
        self.verbosity = verbosity
        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
        
    def setMincluster(self, value):
        self._mincluster = value
    def getMincluster(self):
        return self._mincluster
    
    mincluster = property(getMincluster, setMincluster)
         
    def setAttributesFromCmdLine(self):
        description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ."    
        parser = RepetOptionParser(description = description)
        parser.add_option("-q", "--query", dest="queryFileName", default = "",  action="store", type="string", help="input query file [compulsory] [format: fasta]")
        parser.add_option("-r", "--ref", dest="refFileName", default = "",  action="store", type="string", help="input ref file [compulsory] [format: fasta]")
        parser.add_option("-p", "--prefix", dest="prefix", default = None,  action="store", type="string", help="prefix name [optional]")
        parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ")
        parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ")        
        parser.add_option("-m", "--mum", dest="mum",  action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ")      
        parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 90,  action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 90) [optional] ")
        parser.add_option("-l", "--minmatch", dest="minMatch", default = 20,  action="store", type="int", help="Minimum length of an maximal exact match (default 20) [optional] ")
        parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ")  
        parser.add_option("-j", "--mincluster", dest="mincluster", default = 65,  action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ")
        
        parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50,  action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ")
        parser.add_option("-u", "--minLength", dest="minLength", default = 100,  action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ")
        parser.add_option("-v", "--verbosity", dest="verbosity", default = 0,  action="store", type="int", help="verbosity [optional] ")
        
        (self._options, args) = parser.parse_args()
        self._setAttributesFromOptions(self._options)

    def _setAttributesFromOptions(self, options):
        self._queryFileName = options.queryFileName
        self._refFileName = options.refFileName
        self._prefix = options.prefix
        self._genCoords = options.genCoords
        self._showCoords = options.showCoords
        self._mum = options.mum  
        self._maxgaps = options.maxgaps
        self._minMatch = options.minMatch
        self._nooptimize =  options.nooptimize
        self._mincluster = options.mincluster
        
        self._minIdentity = options.minIdentity
        self._minLength = options.minLength
        
        self.verbosity = options.verbosity
        
    def _logAndRaise(self, errorMsg):
        self._log.error(errorMsg)
        raise Exception(errorMsg)
            
    def checkOptions(self):       
        if self._queryFileName != "":
            if not FileUtils.isRessourceExists(self._queryFileName):
                self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName)
        else:
            self._logAndRaise("ERROR: No specified --query option!")
        
        if self._refFileName != "":
            if not FileUtils.isRessourceExists(self._refFileName):
                self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName)
        else:
            self._logAndRaise("ERROR: No specified --ref option!")
                                
    def run(self):
        LoggerFactory.setLevel(self._log, self.verbosity)
        if not CheckerUtils.isExecutableInUserPath("nucmer") :
            self._logAndRaise("ERROR: nucmer must be in your path")
        self.checkOptions()
        
        genCoords = ""
        if self._genCoords:
            genCoords = "-o"     
        mum = ""
        if self._mum:
            mum =  "--mum"
        nooptimize = "--optimize"
        if self._nooptimize:
            nooptimize = "--nooptimize" 
        prefix = ""
        if self._prefix is not None:
            prefix = "--prefix=%s" %(self._prefix) 
        cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster)
        self._log.debug("Running nucmer with following commands : %s" %cmd)
        cmd = cmd.split()
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        process.wait()
        
        if self._showCoords:
            #use of os.system because redirect on process is broken in python < 3.0
            cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix)
            self._log.debug("Running show-coords with following commands : %s" %cmd)
            os.system(cmd)

            
        return process.returncode
            
if __name__ == "__main__":
    iLaunchNucmer = LaunchNucmer()
    iLaunchNucmer.setAttributesFromCmdLine()
    iLaunchNucmer.run()