diff commons/launcher/RepeatMaskerProgramLauncher.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/RepeatMaskerProgramLauncher.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+
+##@file
+# Launch RepeatMasker (pairwise alignment for repeat detection).
+
+
+import os
+import sys
+
+from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
+
+
+class RepeatMaskerProgramLauncher( AbstractProgramLauncher ):
+    """
+    Launch RepeatMasker (pairwise alignment for repeat detection).
+    """
+    
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        AbstractProgramLauncher.__init__( self )
+        self._prgName = "RepeatMasker"
+        self._formatInFile = "fasta"
+        self._sbjFile = ""
+        self._nbProc = 1
+        self._calcGc = False
+        self._skipIs = False
+        self._maskSsr = True
+        self._onlySsr = False
+        self._cmdLineSpecificOptions = "s:n:gblmo:"
+        
+        
+    def getSpecificHelpAsString( self ):
+        """
+        Return the specific help as a string.
+        """
+        string = ""
+        string += "\nspecific options:"
+        string += "\n     -s: name of the subject file (repeats, format='fasta')"
+        string += "\n     -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() )
+        string += "\n     -g: calculate the GC content"
+        string += "\n     -b: skip bacterial insertion element check"
+        string += "\n     -l: does not mask low-complexity DNA or simple repeats"
+        string += "\n     -m: only masks low complex/simple repeats (no interspersed repeats)"
+        string += "\n     -o: name of the output file"
+        string += "\n         with -s: format='align', default=inFile+'.cat.align')"
+        string += "\n         with -m: format='path', default=inFile+'.cat.path')"
+        return string
+    
+    
+    def setASpecificAttributeFromCmdLine( self, o, a="" ):
+        """
+        Set a specific attribute from the command-line arguments.
+        """
+        if o =="-s":
+            self.setSubjectFile( a )
+        elif o == "-n":
+            self.setNbProcessors( a )
+        elif o == "-g":
+            self.setCalculateGCcontent()
+        elif o == "-b":
+            self.setSkipBacterialIsCheck()
+        elif o == "-l":
+            self.unsetMaskSsr()
+        elif o == "-m":
+            self.setOnlySsr()
+        elif o == "-o":
+            self.setOutputFile( a )
+            
+            
+    def setSubjectFile( self, arg ):
+        self._sbjFile = arg
+        
+        
+    def setNbProcessors( self, arg ):
+        self._nbProc = int(arg)
+        
+        
+    def setCalculateGCcontent( self ):
+        self._calcGc = True
+        
+        
+    def setSkipBacterialIsCheck( self ):
+        self._skipIs = True
+        
+        
+    def unsetMaskSsr( self ):
+        self._maskSsr = False
+        
+        
+    def setOnlySsr( self ):
+        self._onlySsr = True
+        
+        
+    def getSubjectFile( self ):
+        return self._sbjFile
+    
+    
+    def getNbProcessors( self ):
+        return self._nbProc
+    
+    
+    def getCalculateGCcontent( self ):
+        return self._calcGc
+    
+    
+    def getSkipBacterialIsCheck( self ):
+        return self._skipIs
+    
+    
+    def getMaskSsr( self ):
+        return self._maskSsr
+    
+    
+    def getOnlySsr( self ):
+        return self._onlySsr
+    
+    
+    def checkSpecificAttributes( self ):
+        """
+        Check the specific attributes before running the program.
+        """
+        if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \
+               or ( self.getSubjectFile() != "" and self.getOnlySsr() ):
+            string = "ERROR: need to specify -s or -m"
+            print string
+            print self.getHelpAsString()
+            sys.exit(1)
+        if self.getOutputFile() == "":
+            if not self.getOnlySsr():
+                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
+            else:
+                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
+                
+                
+    def setWrapperCommandLine( self ):
+        """
+        Set the command-line of the wrapper.
+        Required for RepeatMaskerClusterLauncher.
+        """
+        self._wrpCmdLine = self.getWrapperName()
+        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
+        if self.getSubjectFile() != "":
+            self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
+        self._wrpCmdLine += " -n %i"  %( self.getNbProcessors() )
+        if self.getCalculateGCcontent():
+            self._wrpCmdLine += " -g"
+        if self.getSkipBacterialIsCheck():
+            self._wrpCmdLine += " -b"
+        if not self.getMaskSsr():
+            self._wrpCmdLine += " -l"
+        if self.getOnlySsr():
+            self._wrpCmdLine += " -m"
+        if self.getOutputFile() != "":
+            self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
+        if self.getClean():
+            self._wrpCmdLine += " -c"
+        if self.getVerbosityLevel() != 0:
+            self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
+            
+            
+    def setProgramCommandLine( self ):
+        """
+        Set the command-line of the program.
+        """
+        self._prgCmdLine = self.getProgramName()
+        self._prgCmdLine += " -dir ."
+        self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() )
+        if self.getCalculateGCcontent():
+            self._prgCmdLine += " -gccalc"
+        if self.getSkipBacterialIsCheck():
+            self._prgCmdLine += " -no_is"
+        if self.getMaskSsr():
+            self._prgCmdLine += " -nolow"
+        if self.getOnlySsr():
+            self._prgCmdLine += " -int"
+        if self.getSubjectFile() != "":
+            self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() )
+        self._prgCmdLine += " %s" % ( self.getInputFile() )
+        
+        
+    def setListFilesToKeep( self ):
+        """
+        Set the list of files to keep.
+        """
+        if self.getOutputFile() == "":
+            if not self.getOnlySsr():
+                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
+            else:
+                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
+        self.appendFileToKeep( self.getOutputFile() )
+        self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) )
+        
+        
+    def setListFilesToRemove( self ):
+        """
+        Set the list of files to remove.
+        """
+        self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) )
+        self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) )
+        
+        
+    def convertCatIntoAlign( self ):
+        """
+        Convert a 'cat' file into the 'align' format.
+        """
+        cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py"
+        cmd += " -i %s.cat" % ( self.getInputFile() )
+        cmd += " -o %s.cat.align" % ( self.getInputFile() )
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR while converting 'cat' file into 'align' format"
+            print string
+            sys.exit(1)
+            
+            
+    def convertCatIntoPath( self ):
+        """
+        Convert a 'cat' file into the 'path' format.
+        """
+        cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py"
+        cmd += " -i %s.cat" % ( self.getInputFile() )
+        cmd += " -o %s.cat.path" % ( self.getInputFile() )
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR while converting 'cat' file into 'path' format"
+            print string
+            sys.exit(1)
+            
+            
+    def setSummary( self ):
+        self._summary = "input file: %s" % ( self.getInputFile() )
+        if self.getSubjectFile() != "":
+            self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
+        self._summary += "\nnb processors: %i" % ( self.getNbProcessors() )
+        if self.getCalculateGCcontent():
+            self._summary += "\ncalculate the GC content"
+        if self.getSkipBacterialIsCheck():
+            self._summary += "\nskip bacterial insertion element check"
+        if self.getMaskSsr():
+            self._summary += "\nmask low-complexity DNA or simple repeats"
+        if self.getOnlySsr():
+            self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)"
+        if self.getOutputFile() == "":
+            if not self.getMaskSsr():
+                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
+            else:
+                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
+        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
+        
+        
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+        
+        self.setProgramCommandLine()
+        cmd = self.getProgramCommandLine()
+        if self.getVerbosityLevel() > 0:
+            print "LAUNCH: %s" % ( cmd )
+            sys.stdout.flush()
+        exitStatus = os.system( cmd )
+        if exitStatus != 0:
+            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
+            print string
+            sys.exit(1)
+            
+        if not self.getOnlySsr():
+            self.convertCatIntoAlign()
+        else:
+            self.convertCatIntoPath()
+            
+        self.end()
+        
+        
+if __name__ == "__main__":
+    i = RepeatMaskerProgramLauncher()
+    i.setAttributesFromCmdLine()
+    i.run()