Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/RepeatMaskerProgramLauncher.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/RepeatMaskerProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,286 @@ +#!/usr/bin/env python + +##@file +# Launch RepeatMasker (pairwise alignment for repeat detection). + + +import os +import sys + +from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher + + +class RepeatMaskerProgramLauncher( AbstractProgramLauncher ): + """ + Launch RepeatMasker (pairwise alignment for repeat detection). + """ + + def __init__( self ): + """ + Constructor. + """ + AbstractProgramLauncher.__init__( self ) + self._prgName = "RepeatMasker" + self._formatInFile = "fasta" + self._sbjFile = "" + self._nbProc = 1 + self._calcGc = False + self._skipIs = False + self._maskSsr = True + self._onlySsr = False + self._cmdLineSpecificOptions = "s:n:gblmo:" + + + def getSpecificHelpAsString( self ): + """ + Return the specific help as a string. + """ + string = "" + string += "\nspecific options:" + string += "\n -s: name of the subject file (repeats, format='fasta')" + string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() ) + string += "\n -g: calculate the GC content" + string += "\n -b: skip bacterial insertion element check" + string += "\n -l: does not mask low-complexity DNA or simple repeats" + string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)" + string += "\n -o: name of the output file" + string += "\n with -s: format='align', default=inFile+'.cat.align')" + string += "\n with -m: format='path', default=inFile+'.cat.path')" + return string + + + def setASpecificAttributeFromCmdLine( self, o, a="" ): + """ + Set a specific attribute from the command-line arguments. + """ + if o =="-s": + self.setSubjectFile( a ) + elif o == "-n": + self.setNbProcessors( a ) + elif o == "-g": + self.setCalculateGCcontent() + elif o == "-b": + self.setSkipBacterialIsCheck() + elif o == "-l": + self.unsetMaskSsr() + elif o == "-m": + self.setOnlySsr() + elif o == "-o": + self.setOutputFile( a ) + + + def setSubjectFile( self, arg ): + self._sbjFile = arg + + + def setNbProcessors( self, arg ): + self._nbProc = int(arg) + + + def setCalculateGCcontent( self ): + self._calcGc = True + + + def setSkipBacterialIsCheck( self ): + self._skipIs = True + + + def unsetMaskSsr( self ): + self._maskSsr = False + + + def setOnlySsr( self ): + self._onlySsr = True + + + def getSubjectFile( self ): + return self._sbjFile + + + def getNbProcessors( self ): + return self._nbProc + + + def getCalculateGCcontent( self ): + return self._calcGc + + + def getSkipBacterialIsCheck( self ): + return self._skipIs + + + def getMaskSsr( self ): + return self._maskSsr + + + def getOnlySsr( self ): + return self._onlySsr + + + def checkSpecificAttributes( self ): + """ + Check the specific attributes before running the program. + """ + if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \ + or ( self.getSubjectFile() != "" and self.getOnlySsr() ): + string = "ERROR: need to specify -s or -m" + print string + print self.getHelpAsString() + sys.exit(1) + if self.getOutputFile() == "": + if not self.getOnlySsr(): + self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) + else: + self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) + + + def setWrapperCommandLine( self ): + """ + Set the command-line of the wrapper. + Required for RepeatMaskerClusterLauncher. + """ + self._wrpCmdLine = self.getWrapperName() + self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) + if self.getSubjectFile() != "": + self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) + self._wrpCmdLine += " -n %i" %( self.getNbProcessors() ) + if self.getCalculateGCcontent(): + self._wrpCmdLine += " -g" + if self.getSkipBacterialIsCheck(): + self._wrpCmdLine += " -b" + if not self.getMaskSsr(): + self._wrpCmdLine += " -l" + if self.getOnlySsr(): + self._wrpCmdLine += " -m" + if self.getOutputFile() != "": + self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) + if self.getClean(): + self._wrpCmdLine += " -c" + if self.getVerbosityLevel() != 0: + self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) + + + def setProgramCommandLine( self ): + """ + Set the command-line of the program. + """ + self._prgCmdLine = self.getProgramName() + self._prgCmdLine += " -dir ." + self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() ) + if self.getCalculateGCcontent(): + self._prgCmdLine += " -gccalc" + if self.getSkipBacterialIsCheck(): + self._prgCmdLine += " -no_is" + if self.getMaskSsr(): + self._prgCmdLine += " -nolow" + if self.getOnlySsr(): + self._prgCmdLine += " -int" + if self.getSubjectFile() != "": + self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() ) + self._prgCmdLine += " %s" % ( self.getInputFile() ) + + + def setListFilesToKeep( self ): + """ + Set the list of files to keep. + """ + if self.getOutputFile() == "": + if not self.getOnlySsr(): + self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) + else: + self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) + self.appendFileToKeep( self.getOutputFile() ) + self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) ) + + + def setListFilesToRemove( self ): + """ + Set the list of files to remove. + """ + self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) ) + + + def convertCatIntoAlign( self ): + """ + Convert a 'cat' file into the 'align' format. + """ + cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py" + cmd += " -i %s.cat" % ( self.getInputFile() ) + cmd += " -o %s.cat.align" % ( self.getInputFile() ) + exitStatus = os.system( cmd ) + if exitStatus != 0: + string = "ERROR while converting 'cat' file into 'align' format" + print string + sys.exit(1) + + + def convertCatIntoPath( self ): + """ + Convert a 'cat' file into the 'path' format. + """ + cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py" + cmd += " -i %s.cat" % ( self.getInputFile() ) + cmd += " -o %s.cat.path" % ( self.getInputFile() ) + exitStatus = os.system( cmd ) + if exitStatus != 0: + string = "ERROR while converting 'cat' file into 'path' format" + print string + sys.exit(1) + + + def setSummary( self ): + self._summary = "input file: %s" % ( self.getInputFile() ) + if self.getSubjectFile() != "": + self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) + self._summary += "\nnb processors: %i" % ( self.getNbProcessors() ) + if self.getCalculateGCcontent(): + self._summary += "\ncalculate the GC content" + if self.getSkipBacterialIsCheck(): + self._summary += "\nskip bacterial insertion element check" + if self.getMaskSsr(): + self._summary += "\nmask low-complexity DNA or simple repeats" + if self.getOnlySsr(): + self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)" + if self.getOutputFile() == "": + if not self.getMaskSsr(): + self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) + else: + self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) + self._summary += "\noutput file: %s" % ( self.getOutputFile() ) + + + def run( self ): + """ + Run the program. + """ + self.start() + + self.setProgramCommandLine() + cmd = self.getProgramCommandLine() + if self.getVerbosityLevel() > 0: + print "LAUNCH: %s" % ( cmd ) + sys.stdout.flush() + exitStatus = os.system( cmd ) + if exitStatus != 0: + string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) + print string + sys.exit(1) + + if not self.getOnlySsr(): + self.convertCatIntoAlign() + else: + self.convertCatIntoPath() + + self.end() + + +if __name__ == "__main__": + i = RepeatMaskerProgramLauncher() + i.setAttributesFromCmdLine() + i.run()