18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file
|
|
4 # Launch Mafft (multiple alignment).
|
|
5 #
|
|
6 # options:
|
|
7 # -h: this help
|
|
8 # -i: name of the input file (format='fasta')
|
|
9 # -p: parameters for 'mafft' (default='--auto')
|
|
10 # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
|
|
11 # -c: clean
|
|
12 # -v: verbosity level (default=0/1)
|
|
13
|
|
14
|
|
15 import os
|
|
16 import sys
|
|
17 import getopt
|
|
18 import exceptions
|
|
19
|
|
20 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
|
|
21 from pyRepet.seq.fastaDB import *
|
|
22 from commons.core.seq.FastaUtils import FastaUtils
|
|
23 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
|
|
24 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
|
|
25
|
|
26
|
|
27 class MafftProgramLauncher( AbstractProgramLauncher ):
|
|
28 """
|
|
29 Launch Mafft (multiple alignment).
|
|
30 """
|
|
31
|
|
32
|
|
33 def __init__( self ):
|
|
34 """
|
|
35 Constructor.
|
|
36 """
|
|
37 AbstractProgramLauncher.__init__( self )
|
|
38 self._prgName = "mafft"
|
|
39 self._formatInFile = "fasta"
|
|
40 self._prgParam = "--auto"
|
|
41 self._cmdLineSpecificOptions = "p:o:"
|
|
42
|
|
43
|
|
44 def getSpecificHelpAsString( self ):
|
|
45 """
|
|
46 Return the specific help as a string.
|
|
47 """
|
|
48 string = ""
|
|
49 string += "\nspecific options:"
|
|
50 string += "\n -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() )
|
|
51 string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
|
|
52 return string
|
|
53
|
|
54
|
|
55 def setASpecificAttributeFromCmdLine( self, o, a="" ):
|
|
56 """
|
|
57 Set a specific attribute from the command-line arguments.
|
|
58 """
|
|
59 if o == "-p":
|
|
60 self.setProgramParameters( a )
|
|
61 elif o == "-o":
|
|
62 self.setOutputFile( a )
|
|
63
|
|
64
|
|
65 def checkSpecificAttributes( self ):
|
|
66 """
|
|
67 Check the specific attributes before running the program.
|
|
68 """
|
|
69 if self.getOutputFile() == "":
|
|
70 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
71
|
|
72
|
|
73 def setWrapperCommandLine( self ):
|
|
74 """
|
|
75 Set the command-line of the wrapper.
|
|
76 Required for MafftClusterLauncher.
|
|
77 """
|
|
78 self._wrpCmdLine = self.getWrapperName()
|
|
79 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
|
|
80 self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
|
|
81 if self.getOutputFile() == "":
|
|
82 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
83 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
|
|
84 if self.getClean():
|
|
85 self._wrpCmdLine += " -c"
|
|
86 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
|
|
87
|
|
88
|
|
89 def setProgramCommandLine( self ):
|
|
90 """
|
|
91 Set the command-line of the program.
|
|
92 """
|
|
93 self._prgCmdLine = self.getProgramName()
|
|
94 self._prgCmdLine += " %s" % ( self.getProgramParameters() )
|
|
95 if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine:
|
|
96 self._prgCmdLine += " --quiet"
|
|
97 self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
|
|
98 self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
|
|
99 if self._verbose < 2:
|
|
100 self._prgCmdLine += " 2> /dev/null"
|
|
101
|
|
102
|
|
103 def setListFilesToKeep( self ):
|
|
104 """
|
|
105 Set the list of files to keep.
|
|
106 """
|
|
107 if self.getOutputFile() == "":
|
|
108 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
109 self.appendFileToKeep( self.getOutputFile() )
|
|
110
|
|
111
|
|
112 def setListFilesToRemove( self ):
|
|
113 """
|
|
114 Set the list of files to remove.
|
|
115 """
|
|
116 self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
|
|
117 self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
|
|
118 self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
119
|
|
120
|
|
121 def setSummary( self ):
|
|
122 self._summary = "input file: %s" % ( self.getInputFile() )
|
|
123 self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
|
|
124 if self.getOutputFile() == "":
|
|
125 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
126 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
|
|
127
|
|
128
|
|
129 def run( self ):
|
|
130 """
|
|
131 Run the program.
|
|
132 """
|
|
133 self.start()
|
|
134
|
|
135 lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
|
|
136
|
|
137 csh = ChangeSequenceHeaders()
|
|
138 csh.setInputFile( self.getInputFile() )
|
|
139 csh.setFormat( "fasta" )
|
|
140 csh.setStep( 1 )
|
|
141 csh.setPrefix( "seq" )
|
|
142 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
143 csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
|
|
144 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
|
|
145 csh.run()
|
|
146
|
|
147 bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) )
|
|
148 bsDB.upCase()
|
|
149 bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) )
|
|
150 del bsDB
|
|
151 os.rename( "%s.shortHtmp" % ( self.getInputFile() ),
|
|
152 "%s.shortH" % ( self.getInputFile() ) )
|
|
153
|
|
154 self.setProgramCommandLine()
|
|
155 cmd = self.getProgramCommandLine()
|
|
156 if self.getVerbosityLevel() > 0:
|
|
157 print "LAUNCH: %s" % ( cmd )
|
|
158 sys.stdout.flush()
|
|
159 exitStatus = os.system( cmd )
|
|
160 if exitStatus != 0:
|
|
161 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
|
|
162 print string
|
|
163 sys.exit(1)
|
|
164
|
|
165 csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
|
|
166 csh.setFormat( "fasta" )
|
|
167 csh.setStep( 2 )
|
|
168 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
169 csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
170 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
|
|
171 csh.run()
|
|
172
|
|
173 absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
174 outFileHandler = open( self.getOutputFile(), "w" )
|
|
175 for header in lInitHeaders:
|
|
176 bs = absDB.fetch( header )
|
|
177 bs.upCase()
|
|
178 bs.write( outFileHandler )
|
|
179 outFileHandler.close()
|
|
180 os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
181
|
|
182 self.end()
|
|
183
|
|
184
|
|
185 if __name__ == "__main__":
|
|
186 i = MafftProgramLauncher()
|
|
187 i.setAttributesFromCmdLine()
|
|
188 i.run()
|