comparison commons/launcher/RepeatMaskerProgramLauncher.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 ##@file
4 # Launch RepeatMasker (pairwise alignment for repeat detection).
5
6
7 import os
8 import sys
9
10 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
11
12
13 class RepeatMaskerProgramLauncher( AbstractProgramLauncher ):
14 """
15 Launch RepeatMasker (pairwise alignment for repeat detection).
16 """
17
18 def __init__( self ):
19 """
20 Constructor.
21 """
22 AbstractProgramLauncher.__init__( self )
23 self._prgName = "RepeatMasker"
24 self._formatInFile = "fasta"
25 self._sbjFile = ""
26 self._nbProc = 1
27 self._calcGc = False
28 self._skipIs = False
29 self._maskSsr = True
30 self._onlySsr = False
31 self._cmdLineSpecificOptions = "s:n:gblmo:"
32
33
34 def getSpecificHelpAsString( self ):
35 """
36 Return the specific help as a string.
37 """
38 string = ""
39 string += "\nspecific options:"
40 string += "\n -s: name of the subject file (repeats, format='fasta')"
41 string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() )
42 string += "\n -g: calculate the GC content"
43 string += "\n -b: skip bacterial insertion element check"
44 string += "\n -l: does not mask low-complexity DNA or simple repeats"
45 string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)"
46 string += "\n -o: name of the output file"
47 string += "\n with -s: format='align', default=inFile+'.cat.align')"
48 string += "\n with -m: format='path', default=inFile+'.cat.path')"
49 return string
50
51
52 def setASpecificAttributeFromCmdLine( self, o, a="" ):
53 """
54 Set a specific attribute from the command-line arguments.
55 """
56 if o =="-s":
57 self.setSubjectFile( a )
58 elif o == "-n":
59 self.setNbProcessors( a )
60 elif o == "-g":
61 self.setCalculateGCcontent()
62 elif o == "-b":
63 self.setSkipBacterialIsCheck()
64 elif o == "-l":
65 self.unsetMaskSsr()
66 elif o == "-m":
67 self.setOnlySsr()
68 elif o == "-o":
69 self.setOutputFile( a )
70
71
72 def setSubjectFile( self, arg ):
73 self._sbjFile = arg
74
75
76 def setNbProcessors( self, arg ):
77 self._nbProc = int(arg)
78
79
80 def setCalculateGCcontent( self ):
81 self._calcGc = True
82
83
84 def setSkipBacterialIsCheck( self ):
85 self._skipIs = True
86
87
88 def unsetMaskSsr( self ):
89 self._maskSsr = False
90
91
92 def setOnlySsr( self ):
93 self._onlySsr = True
94
95
96 def getSubjectFile( self ):
97 return self._sbjFile
98
99
100 def getNbProcessors( self ):
101 return self._nbProc
102
103
104 def getCalculateGCcontent( self ):
105 return self._calcGc
106
107
108 def getSkipBacterialIsCheck( self ):
109 return self._skipIs
110
111
112 def getMaskSsr( self ):
113 return self._maskSsr
114
115
116 def getOnlySsr( self ):
117 return self._onlySsr
118
119
120 def checkSpecificAttributes( self ):
121 """
122 Check the specific attributes before running the program.
123 """
124 if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \
125 or ( self.getSubjectFile() != "" and self.getOnlySsr() ):
126 string = "ERROR: need to specify -s or -m"
127 print string
128 print self.getHelpAsString()
129 sys.exit(1)
130 if self.getOutputFile() == "":
131 if not self.getOnlySsr():
132 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
133 else:
134 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
135
136
137 def setWrapperCommandLine( self ):
138 """
139 Set the command-line of the wrapper.
140 Required for RepeatMaskerClusterLauncher.
141 """
142 self._wrpCmdLine = self.getWrapperName()
143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
144 if self.getSubjectFile() != "":
145 self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
146 self._wrpCmdLine += " -n %i" %( self.getNbProcessors() )
147 if self.getCalculateGCcontent():
148 self._wrpCmdLine += " -g"
149 if self.getSkipBacterialIsCheck():
150 self._wrpCmdLine += " -b"
151 if not self.getMaskSsr():
152 self._wrpCmdLine += " -l"
153 if self.getOnlySsr():
154 self._wrpCmdLine += " -m"
155 if self.getOutputFile() != "":
156 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
157 if self.getClean():
158 self._wrpCmdLine += " -c"
159 if self.getVerbosityLevel() != 0:
160 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
161
162
163 def setProgramCommandLine( self ):
164 """
165 Set the command-line of the program.
166 """
167 self._prgCmdLine = self.getProgramName()
168 self._prgCmdLine += " -dir ."
169 self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() )
170 if self.getCalculateGCcontent():
171 self._prgCmdLine += " -gccalc"
172 if self.getSkipBacterialIsCheck():
173 self._prgCmdLine += " -no_is"
174 if self.getMaskSsr():
175 self._prgCmdLine += " -nolow"
176 if self.getOnlySsr():
177 self._prgCmdLine += " -int"
178 if self.getSubjectFile() != "":
179 self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() )
180 self._prgCmdLine += " %s" % ( self.getInputFile() )
181
182
183 def setListFilesToKeep( self ):
184 """
185 Set the list of files to keep.
186 """
187 if self.getOutputFile() == "":
188 if not self.getOnlySsr():
189 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
190 else:
191 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
192 self.appendFileToKeep( self.getOutputFile() )
193 self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) )
194
195
196 def setListFilesToRemove( self ):
197 """
198 Set the list of files to remove.
199 """
200 self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) )
201 self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) )
202 self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) )
203 self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) )
204 self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) )
205 self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) )
206 self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) )
207
208
209 def convertCatIntoAlign( self ):
210 """
211 Convert a 'cat' file into the 'align' format.
212 """
213 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py"
214 cmd += " -i %s.cat" % ( self.getInputFile() )
215 cmd += " -o %s.cat.align" % ( self.getInputFile() )
216 exitStatus = os.system( cmd )
217 if exitStatus != 0:
218 string = "ERROR while converting 'cat' file into 'align' format"
219 print string
220 sys.exit(1)
221
222
223 def convertCatIntoPath( self ):
224 """
225 Convert a 'cat' file into the 'path' format.
226 """
227 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py"
228 cmd += " -i %s.cat" % ( self.getInputFile() )
229 cmd += " -o %s.cat.path" % ( self.getInputFile() )
230 exitStatus = os.system( cmd )
231 if exitStatus != 0:
232 string = "ERROR while converting 'cat' file into 'path' format"
233 print string
234 sys.exit(1)
235
236
237 def setSummary( self ):
238 self._summary = "input file: %s" % ( self.getInputFile() )
239 if self.getSubjectFile() != "":
240 self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
241 self._summary += "\nnb processors: %i" % ( self.getNbProcessors() )
242 if self.getCalculateGCcontent():
243 self._summary += "\ncalculate the GC content"
244 if self.getSkipBacterialIsCheck():
245 self._summary += "\nskip bacterial insertion element check"
246 if self.getMaskSsr():
247 self._summary += "\nmask low-complexity DNA or simple repeats"
248 if self.getOnlySsr():
249 self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)"
250 if self.getOutputFile() == "":
251 if not self.getMaskSsr():
252 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
253 else:
254 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
255 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
256
257
258 def run( self ):
259 """
260 Run the program.
261 """
262 self.start()
263
264 self.setProgramCommandLine()
265 cmd = self.getProgramCommandLine()
266 if self.getVerbosityLevel() > 0:
267 print "LAUNCH: %s" % ( cmd )
268 sys.stdout.flush()
269 exitStatus = os.system( cmd )
270 if exitStatus != 0:
271 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
272 print string
273 sys.exit(1)
274
275 if not self.getOnlySsr():
276 self.convertCatIntoAlign()
277 else:
278 self.convertCatIntoPath()
279
280 self.end()
281
282
283 if __name__ == "__main__":
284 i = RepeatMaskerProgramLauncher()
285 i.setAttributesFromCmdLine()
286 i.run()