Mercurial > repos > yufei-luo > s_mart
view commons/launcher/MafftProgramLauncher.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
#!/usr/bin/env python ##@file # Launch Mafft (multiple alignment). # # options: # -h: this help # -i: name of the input file (format='fasta') # -p: parameters for 'mafft' (default='--auto') # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln') # -c: clean # -v: verbosity level (default=0/1) import os import sys import getopt import exceptions from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher from pyRepet.seq.fastaDB import * from commons.core.seq.FastaUtils import FastaUtils from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders class MafftProgramLauncher( AbstractProgramLauncher ): """ Launch Mafft (multiple alignment). """ def __init__( self ): """ Constructor. """ AbstractProgramLauncher.__init__( self ) self._prgName = "mafft" self._formatInFile = "fasta" self._prgParam = "--auto" self._cmdLineSpecificOptions = "p:o:" def getSpecificHelpAsString( self ): """ Return the specific help as a string. """ string = "" string += "\nspecific options:" string += "\n -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() ) string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')" return string def setASpecificAttributeFromCmdLine( self, o, a="" ): """ Set a specific attribute from the command-line arguments. """ if o == "-p": self.setProgramParameters( a ) elif o == "-o": self.setOutputFile( a ) def checkSpecificAttributes( self ): """ Check the specific attributes before running the program. """ if self.getOutputFile() == "": self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) def setWrapperCommandLine( self ): """ Set the command-line of the wrapper. Required for MafftClusterLauncher. """ self._wrpCmdLine = self.getWrapperName() self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() ) if self.getOutputFile() == "": self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) if self.getClean(): self._wrpCmdLine += " -c" self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) def setProgramCommandLine( self ): """ Set the command-line of the program. """ self._prgCmdLine = self.getProgramName() self._prgCmdLine += " %s" % ( self.getProgramParameters() ) if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine: self._prgCmdLine += " --quiet" self._prgCmdLine += " %s.shortH" % ( self.getInputFile() ) self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() ) if self._verbose < 2: self._prgCmdLine += " 2> /dev/null" def setListFilesToKeep( self ): """ Set the list of files to keep. """ if self.getOutputFile() == "": self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) self.appendFileToKeep( self.getOutputFile() ) def setListFilesToRemove( self ): """ Set the list of files to remove. """ self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) ) def setSummary( self ): self._summary = "input file: %s" % ( self.getInputFile() ) self._summary += "\nparameters: %s" % ( self.getProgramParameters() ) if self.getOutputFile() == "": self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) self._summary += "\noutput file: %s" % ( self.getOutputFile() ) def run( self ): """ Run the program. """ self.start() lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 ) csh = ChangeSequenceHeaders() csh.setInputFile( self.getInputFile() ) csh.setFormat( "fasta" ) csh.setStep( 1 ) csh.setPrefix( "seq" ) csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) ) csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) csh.run() bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) ) bsDB.upCase() bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) ) del bsDB os.rename( "%s.shortHtmp" % ( self.getInputFile() ), "%s.shortH" % ( self.getInputFile() ) ) self.setProgramCommandLine() cmd = self.getProgramCommandLine() if self.getVerbosityLevel() > 0: print "LAUNCH: %s" % ( cmd ) sys.stdout.flush() exitStatus = os.system( cmd ) if exitStatus != 0: string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) print string sys.exit(1) csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) csh.setFormat( "fasta" ) csh.setStep( 2 ) csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) csh.run() absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) outFileHandler = open( self.getOutputFile(), "w" ) for header in lInitHeaders: bs = absDB.fetch( header ) bs.upCase() bs.write( outFileHandler ) outFileHandler.close() os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) self.end() if __name__ == "__main__": i = MafftProgramLauncher() i.setAttributesFromCmdLine() i.run()