Mercurial > repos > yufei-luo > s_mart
view commons/launcher/RepeatMaskerProgramLauncher.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
#!/usr/bin/env python ##@file # Launch RepeatMasker (pairwise alignment for repeat detection). import os import sys from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher class RepeatMaskerProgramLauncher( AbstractProgramLauncher ): """ Launch RepeatMasker (pairwise alignment for repeat detection). """ def __init__( self ): """ Constructor. """ AbstractProgramLauncher.__init__( self ) self._prgName = "RepeatMasker" self._formatInFile = "fasta" self._sbjFile = "" self._nbProc = 1 self._calcGc = False self._skipIs = False self._maskSsr = True self._onlySsr = False self._cmdLineSpecificOptions = "s:n:gblmo:" def getSpecificHelpAsString( self ): """ Return the specific help as a string. """ string = "" string += "\nspecific options:" string += "\n -s: name of the subject file (repeats, format='fasta')" string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() ) string += "\n -g: calculate the GC content" string += "\n -b: skip bacterial insertion element check" string += "\n -l: does not mask low-complexity DNA or simple repeats" string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)" string += "\n -o: name of the output file" string += "\n with -s: format='align', default=inFile+'.cat.align')" string += "\n with -m: format='path', default=inFile+'.cat.path')" return string def setASpecificAttributeFromCmdLine( self, o, a="" ): """ Set a specific attribute from the command-line arguments. """ if o =="-s": self.setSubjectFile( a ) elif o == "-n": self.setNbProcessors( a ) elif o == "-g": self.setCalculateGCcontent() elif o == "-b": self.setSkipBacterialIsCheck() elif o == "-l": self.unsetMaskSsr() elif o == "-m": self.setOnlySsr() elif o == "-o": self.setOutputFile( a ) def setSubjectFile( self, arg ): self._sbjFile = arg def setNbProcessors( self, arg ): self._nbProc = int(arg) def setCalculateGCcontent( self ): self._calcGc = True def setSkipBacterialIsCheck( self ): self._skipIs = True def unsetMaskSsr( self ): self._maskSsr = False def setOnlySsr( self ): self._onlySsr = True def getSubjectFile( self ): return self._sbjFile def getNbProcessors( self ): return self._nbProc def getCalculateGCcontent( self ): return self._calcGc def getSkipBacterialIsCheck( self ): return self._skipIs def getMaskSsr( self ): return self._maskSsr def getOnlySsr( self ): return self._onlySsr def checkSpecificAttributes( self ): """ Check the specific attributes before running the program. """ if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \ or ( self.getSubjectFile() != "" and self.getOnlySsr() ): string = "ERROR: need to specify -s or -m" print string print self.getHelpAsString() sys.exit(1) if self.getOutputFile() == "": if not self.getOnlySsr(): self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) else: self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) def setWrapperCommandLine( self ): """ Set the command-line of the wrapper. Required for RepeatMaskerClusterLauncher. """ self._wrpCmdLine = self.getWrapperName() self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) if self.getSubjectFile() != "": self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) self._wrpCmdLine += " -n %i" %( self.getNbProcessors() ) if self.getCalculateGCcontent(): self._wrpCmdLine += " -g" if self.getSkipBacterialIsCheck(): self._wrpCmdLine += " -b" if not self.getMaskSsr(): self._wrpCmdLine += " -l" if self.getOnlySsr(): self._wrpCmdLine += " -m" if self.getOutputFile() != "": self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) if self.getClean(): self._wrpCmdLine += " -c" if self.getVerbosityLevel() != 0: self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) def setProgramCommandLine( self ): """ Set the command-line of the program. """ self._prgCmdLine = self.getProgramName() self._prgCmdLine += " -dir ." self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() ) if self.getCalculateGCcontent(): self._prgCmdLine += " -gccalc" if self.getSkipBacterialIsCheck(): self._prgCmdLine += " -no_is" if self.getMaskSsr(): self._prgCmdLine += " -nolow" if self.getOnlySsr(): self._prgCmdLine += " -int" if self.getSubjectFile() != "": self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() ) self._prgCmdLine += " %s" % ( self.getInputFile() ) def setListFilesToKeep( self ): """ Set the list of files to keep. """ if self.getOutputFile() == "": if not self.getOnlySsr(): self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) else: self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) self.appendFileToKeep( self.getOutputFile() ) self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) ) def setListFilesToRemove( self ): """ Set the list of files to remove. """ self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) ) def convertCatIntoAlign( self ): """ Convert a 'cat' file into the 'align' format. """ cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py" cmd += " -i %s.cat" % ( self.getInputFile() ) cmd += " -o %s.cat.align" % ( self.getInputFile() ) exitStatus = os.system( cmd ) if exitStatus != 0: string = "ERROR while converting 'cat' file into 'align' format" print string sys.exit(1) def convertCatIntoPath( self ): """ Convert a 'cat' file into the 'path' format. """ cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py" cmd += " -i %s.cat" % ( self.getInputFile() ) cmd += " -o %s.cat.path" % ( self.getInputFile() ) exitStatus = os.system( cmd ) if exitStatus != 0: string = "ERROR while converting 'cat' file into 'path' format" print string sys.exit(1) def setSummary( self ): self._summary = "input file: %s" % ( self.getInputFile() ) if self.getSubjectFile() != "": self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) self._summary += "\nnb processors: %i" % ( self.getNbProcessors() ) if self.getCalculateGCcontent(): self._summary += "\ncalculate the GC content" if self.getSkipBacterialIsCheck(): self._summary += "\nskip bacterial insertion element check" if self.getMaskSsr(): self._summary += "\nmask low-complexity DNA or simple repeats" if self.getOnlySsr(): self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)" if self.getOutputFile() == "": if not self.getMaskSsr(): self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) else: self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) self._summary += "\noutput file: %s" % ( self.getOutputFile() ) def run( self ): """ Run the program. """ self.start() self.setProgramCommandLine() cmd = self.getProgramCommandLine() if self.getVerbosityLevel() > 0: print "LAUNCH: %s" % ( cmd ) sys.stdout.flush() exitStatus = os.system( cmd ) if exitStatus != 0: string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) print string sys.exit(1) if not self.getOnlySsr(): self.convertCatIntoAlign() else: self.convertCatIntoPath() self.end() if __name__ == "__main__": i = RepeatMaskerProgramLauncher() i.setAttributesFromCmdLine() i.run()