18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file
|
|
4 # Launch Map (multiple alignment).
|
|
5 #
|
|
6 # options:
|
|
7 # -h: this help
|
|
8 # -i: name of the input file (format='fasta')
|
|
9 # -s: size above which a gap is not penalized anymore (default='50')
|
|
10 # -m: penalty for a mismatch (default='-8')
|
|
11 # -O: penalty for a gap opening (default='16')
|
|
12 # -e: penalty for a gap extension (default='4')
|
|
13 # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
|
|
14 # -c: clean
|
|
15 # -v: verbosity level (default=0/1)
|
|
16
|
|
17
|
|
18 import sys
|
|
19 import os
|
|
20
|
|
21 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
|
|
22 from commons.core.seq.FastaUtils import FastaUtils
|
|
23 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
|
|
24 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
|
|
25
|
|
26
|
|
27 class MapProgramLauncher( AbstractProgramLauncher ):
|
|
28 """
|
|
29 Launch Map (multiple alignment).
|
|
30 """
|
|
31
|
|
32
|
|
33 def __init__( self ):
|
|
34 """
|
|
35 Constructor.
|
|
36 """
|
|
37 AbstractProgramLauncher.__init__( self )
|
|
38 self._prgName = "rpt_map"
|
|
39 self._formatInFile = "fasta"
|
|
40 self._cmdLineSpecificOptions = "s:m:O:e:o:"
|
|
41 self._gapSize = 50
|
|
42 self._mismatchPenalty = -8
|
|
43 self._gapOpenPenalty = 16
|
|
44 self._gapExtendPenalty = 4
|
|
45 self._outFile = ""
|
|
46
|
|
47 def getSpecificHelpAsString( self ):
|
|
48 """
|
|
49 Return the specific help as a string.
|
|
50 """
|
|
51 string = ""
|
|
52 string += "\nspecific options:"
|
|
53 string += "\n -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() )
|
|
54 string += "\n -m: penalty for a mismatch (default='%i', match=10)" % ( self.getMismatchPenalty() )
|
|
55 string += "\n -O: penalty for a gap opening (default='%i')" % ( self.getGapOpenPenalty() )
|
|
56 string += "\n -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() )
|
|
57 string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
|
|
58 return string
|
|
59
|
|
60
|
|
61 def setASpecificAttributeFromCmdLine( self, o, a="" ):
|
|
62 """
|
|
63 Set a specific attribute from the command-line arguments.
|
|
64 """
|
|
65 if o == "-s":
|
|
66 self.setGapSize( a )
|
|
67 elif o == "-m":
|
|
68 self.setMismatchPenalty( a )
|
|
69 elif o == "-O":
|
|
70 self.setGapOpenPenalty( a )
|
|
71 elif o == "-e":
|
|
72 self.setGapExtendPenalty( a )
|
|
73 elif o == "-o":
|
|
74 self.setOutputFile( a )
|
|
75
|
|
76
|
|
77 def setGapSize( self, arg ):
|
|
78 self._gapSize = int(arg)
|
|
79
|
|
80
|
|
81 def setMismatchPenalty( self, arg ):
|
|
82 self._mismatchPenalty = int(arg)
|
|
83
|
|
84
|
|
85 def setGapOpenPenalty( self, arg ):
|
|
86 self._gapOpenPenalty = int(arg)
|
|
87
|
|
88
|
|
89 def setGapExtendPenalty( self, arg ):
|
|
90 self._gapExtendPenalty = int(arg)
|
|
91
|
|
92
|
|
93 def getGapSize( self ):
|
|
94 return self._gapSize
|
|
95
|
|
96
|
|
97 def getMismatchPenalty( self ):
|
|
98 return self._mismatchPenalty
|
|
99
|
|
100
|
|
101 def getGapOpenPenalty( self ):
|
|
102 return self._gapOpenPenalty
|
|
103
|
|
104
|
|
105 def getGapExtendPenalty( self ):
|
|
106 return self._gapExtendPenalty
|
|
107
|
|
108
|
|
109 def checkSpecificAttributes( self ):
|
|
110 """
|
|
111 Check the specific attributes before running the program.
|
|
112 """
|
|
113 if self.getGapSize() <= 0:
|
|
114 string = "ERROR: gap size should be > 0"
|
|
115 print string
|
|
116 print self.getHelpAsString()
|
|
117 sys.exit(1)
|
|
118 if self.getMismatchPenalty() >= 0:
|
|
119 string = "ERROR: mismatch penalty should be < 0"
|
|
120 print string
|
|
121 print self.getHelpAsString()
|
|
122 sys.exit(1)
|
|
123 if self.getGapOpenPenalty() < 0:
|
|
124 string = "ERROR: gap opening penalty should be >= 0"
|
|
125 print string
|
|
126 print self.getHelpAsString()
|
|
127 sys.exit(1)
|
|
128 if self.getGapExtendPenalty() < 0:
|
|
129 string = "ERROR: gap extension penalty should be >= 0"
|
|
130 print string
|
|
131 print self.getHelpAsString()
|
|
132 sys.exit(1)
|
|
133 if self.getOutputFile() == "":
|
|
134 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
135
|
|
136
|
|
137 def setWrapperCommandLine( self ):
|
|
138 """
|
|
139 Set the command-line of the wrapper.
|
|
140 Required for MapClusterLauncher.
|
|
141 """
|
|
142 self._wrpCmdLine = self.getWrapperName()
|
|
143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
|
|
144 self._wrpCmdLine += " -s %i" % ( self.getGapSize() )
|
|
145 self._wrpCmdLine += " -m %i" % ( self.getMismatchPenalty() )
|
|
146 self._wrpCmdLine += " -O %i" % ( self.getGapOpenPenalty() )
|
|
147 self._wrpCmdLine += " -e %i" % ( self.getGapExtendPenalty() )
|
|
148 if self.getOutputFile() == "":
|
|
149 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
150 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
|
|
151 if self.getClean():
|
|
152 self._wrpCmdLine += " -c"
|
|
153 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
|
|
154
|
|
155
|
|
156 def setProgramCommandLine( self ):
|
|
157 """
|
|
158 Set the command-line of the program.
|
|
159 """
|
|
160 self._prgCmdLine = self.getProgramName()
|
|
161 self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
|
|
162 self._prgCmdLine += " %i" % ( self.getGapSize() )
|
|
163 self._prgCmdLine += " %i" % ( self.getMismatchPenalty() )
|
|
164 self._prgCmdLine += " %i" % ( self.getGapOpenPenalty() )
|
|
165 self._prgCmdLine += " %i" % ( self.getGapExtendPenalty() )
|
|
166 self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
|
|
167
|
|
168
|
|
169 def setListFilesToKeep( self ):
|
|
170 """
|
|
171 Set the list of files to keep.
|
|
172 """
|
|
173 if self.getOutputFile() == "":
|
|
174 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
175 self.appendFileToKeep( self.getOutputFile() )
|
|
176
|
|
177
|
|
178 def setListFilesToRemove( self ):
|
|
179 """
|
|
180 Set the list of files to remove.
|
|
181 """
|
|
182 self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
|
|
183 self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
|
|
184 self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
185
|
|
186
|
|
187 def setSummary( self ):
|
|
188 self._summary = "input file: %s" % ( self.getInputFile() )
|
|
189 self._summary += "\ngap size: %i" % ( self.getGapSize() )
|
|
190 self._summary += "\nmismatch penalty: %i" % ( self.getMismatchPenalty() )
|
|
191 self._summary += "\ngap openning penalty: %i" % ( self.getGapOpenPenalty() )
|
|
192 self._summary += "\ngap extension penalty: %i" % ( self.getGapExtendPenalty() )
|
|
193 if self.getOutputFile() == "":
|
|
194 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
|
|
195 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
|
|
196
|
|
197
|
|
198 def run( self ):
|
|
199 """
|
|
200 Run the program.
|
|
201 """
|
|
202 self.start()
|
|
203
|
|
204 lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
|
|
205
|
|
206 csh = ChangeSequenceHeaders()
|
|
207 csh.setInputFile( self.getInputFile() )
|
|
208 csh.setFormat( "fasta" )
|
|
209 csh.setStep( 1 )
|
|
210 csh.setPrefix( "seq" )
|
|
211 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
212 csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
|
|
213 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
|
|
214 csh.run()
|
|
215
|
|
216 self.setProgramCommandLine()
|
|
217 cmd = self.getProgramCommandLine()
|
|
218 if self.getVerbosityLevel() > 0:
|
|
219 print "LAUNCH: %s" % ( cmd )
|
|
220 sys.stdout.flush()
|
|
221 returnStatus = os.system( cmd )
|
|
222 if returnStatus != 0:
|
|
223 string = "ERROR: program '%s' returned status '%i'" % ( self.getProgramName(), returnStatus )
|
|
224 print string
|
|
225 sys.exit(1)
|
|
226
|
|
227 csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
|
|
228 csh.setFormat( "fasta" )
|
|
229 csh.setStep( 2 )
|
|
230 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
|
|
231 csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
232 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
|
|
233 csh.run()
|
|
234
|
|
235 absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
236 outFileHandler = open( self.getOutputFile(), "w" )
|
|
237 for header in lInitHeaders:
|
|
238 bs = absDB.fetch( header )
|
|
239 bs.upCase()
|
|
240 bs.write( outFileHandler )
|
|
241 outFileHandler.close()
|
|
242 if self.getClean():
|
|
243 os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
|
|
244
|
|
245 self.end()
|
|
246
|
|
247
|
|
248 if __name__ == "__main__":
|
|
249 i = MapProgramLauncher()
|
|
250 i.setAttributesFromCmdLine()
|
|
251 i.run()
|