Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/MapProgramLauncher.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/MapProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,251 @@ +#!/usr/bin/env python + +##@file +# Launch Map (multiple alignment). +# +# options: +# -h: this help +# -i: name of the input file (format='fasta') +# -s: size above which a gap is not penalized anymore (default='50') +# -m: penalty for a mismatch (default='-8') +# -O: penalty for a gap opening (default='16') +# -e: penalty for a gap extension (default='4') +# -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln') +# -c: clean +# -v: verbosity level (default=0/1) + + +import sys +import os + +from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher +from commons.core.seq.FastaUtils import FastaUtils +from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB +from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders + + +class MapProgramLauncher( AbstractProgramLauncher ): + """ + Launch Map (multiple alignment). + """ + + + def __init__( self ): + """ + Constructor. + """ + AbstractProgramLauncher.__init__( self ) + self._prgName = "rpt_map" + self._formatInFile = "fasta" + self._cmdLineSpecificOptions = "s:m:O:e:o:" + self._gapSize = 50 + self._mismatchPenalty = -8 + self._gapOpenPenalty = 16 + self._gapExtendPenalty = 4 + self._outFile = "" + + def getSpecificHelpAsString( self ): + """ + Return the specific help as a string. + """ + string = "" + string += "\nspecific options:" + string += "\n -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() ) + string += "\n -m: penalty for a mismatch (default='%i', match=10)" % ( self.getMismatchPenalty() ) + string += "\n -O: penalty for a gap opening (default='%i')" % ( self.getGapOpenPenalty() ) + string += "\n -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() ) + string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')" + return string + + + def setASpecificAttributeFromCmdLine( self, o, a="" ): + """ + Set a specific attribute from the command-line arguments. + """ + if o == "-s": + self.setGapSize( a ) + elif o == "-m": + self.setMismatchPenalty( a ) + elif o == "-O": + self.setGapOpenPenalty( a ) + elif o == "-e": + self.setGapExtendPenalty( a ) + elif o == "-o": + self.setOutputFile( a ) + + + def setGapSize( self, arg ): + self._gapSize = int(arg) + + + def setMismatchPenalty( self, arg ): + self._mismatchPenalty = int(arg) + + + def setGapOpenPenalty( self, arg ): + self._gapOpenPenalty = int(arg) + + + def setGapExtendPenalty( self, arg ): + self._gapExtendPenalty = int(arg) + + + def getGapSize( self ): + return self._gapSize + + + def getMismatchPenalty( self ): + return self._mismatchPenalty + + + def getGapOpenPenalty( self ): + return self._gapOpenPenalty + + + def getGapExtendPenalty( self ): + return self._gapExtendPenalty + + + def checkSpecificAttributes( self ): + """ + Check the specific attributes before running the program. + """ + if self.getGapSize() <= 0: + string = "ERROR: gap size should be > 0" + print string + print self.getHelpAsString() + sys.exit(1) + if self.getMismatchPenalty() >= 0: + string = "ERROR: mismatch penalty should be < 0" + print string + print self.getHelpAsString() + sys.exit(1) + if self.getGapOpenPenalty() < 0: + string = "ERROR: gap opening penalty should be >= 0" + print string + print self.getHelpAsString() + sys.exit(1) + if self.getGapExtendPenalty() < 0: + string = "ERROR: gap extension penalty should be >= 0" + print string + print self.getHelpAsString() + sys.exit(1) + if self.getOutputFile() == "": + self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) + + + def setWrapperCommandLine( self ): + """ + Set the command-line of the wrapper. + Required for MapClusterLauncher. + """ + self._wrpCmdLine = self.getWrapperName() + self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) + self._wrpCmdLine += " -s %i" % ( self.getGapSize() ) + self._wrpCmdLine += " -m %i" % ( self.getMismatchPenalty() ) + self._wrpCmdLine += " -O %i" % ( self.getGapOpenPenalty() ) + self._wrpCmdLine += " -e %i" % ( self.getGapExtendPenalty() ) + if self.getOutputFile() == "": + self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) + self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) + if self.getClean(): + self._wrpCmdLine += " -c" + self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) + + + def setProgramCommandLine( self ): + """ + Set the command-line of the program. + """ + self._prgCmdLine = self.getProgramName() + self._prgCmdLine += " %s.shortH" % ( self.getInputFile() ) + self._prgCmdLine += " %i" % ( self.getGapSize() ) + self._prgCmdLine += " %i" % ( self.getMismatchPenalty() ) + self._prgCmdLine += " %i" % ( self.getGapOpenPenalty() ) + self._prgCmdLine += " %i" % ( self.getGapExtendPenalty() ) + self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() ) + + + def setListFilesToKeep( self ): + """ + Set the list of files to keep. + """ + if self.getOutputFile() == "": + self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) + self.appendFileToKeep( self.getOutputFile() ) + + + def setListFilesToRemove( self ): + """ + Set the list of files to remove. + """ + self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) + self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) ) + + + def setSummary( self ): + self._summary = "input file: %s" % ( self.getInputFile() ) + self._summary += "\ngap size: %i" % ( self.getGapSize() ) + self._summary += "\nmismatch penalty: %i" % ( self.getMismatchPenalty() ) + self._summary += "\ngap openning penalty: %i" % ( self.getGapOpenPenalty() ) + self._summary += "\ngap extension penalty: %i" % ( self.getGapExtendPenalty() ) + if self.getOutputFile() == "": + self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) + self._summary += "\noutput file: %s" % ( self.getOutputFile() ) + + + def run( self ): + """ + Run the program. + """ + self.start() + + lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 ) + + csh = ChangeSequenceHeaders() + csh.setInputFile( self.getInputFile() ) + csh.setFormat( "fasta" ) + csh.setStep( 1 ) + csh.setPrefix( "seq" ) + csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) + csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) ) + csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) + csh.run() + + self.setProgramCommandLine() + cmd = self.getProgramCommandLine() + if self.getVerbosityLevel() > 0: + print "LAUNCH: %s" % ( cmd ) + sys.stdout.flush() + returnStatus = os.system( cmd ) + if returnStatus != 0: + string = "ERROR: program '%s' returned status '%i'" % ( self.getProgramName(), returnStatus ) + print string + sys.exit(1) + + csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) + csh.setFormat( "fasta" ) + csh.setStep( 2 ) + csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) + csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) + csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) + csh.run() + + absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) + outFileHandler = open( self.getOutputFile(), "w" ) + for header in lInitHeaders: + bs = absDB.fetch( header ) + bs.upCase() + bs.write( outFileHandler ) + outFileHandler.close() + if self.getClean(): + os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) + + self.end() + + +if __name__ == "__main__": + i = MapProgramLauncher() + i.setAttributesFromCmdLine() + i.run()