18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file
|
|
4 # Launch RepeatMasker (pairwise alignment for repeat detection).
|
|
5
|
|
6
|
|
7 import os
|
|
8 import sys
|
|
9
|
|
10 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
|
|
11
|
|
12
|
|
13 class RepeatMaskerProgramLauncher( AbstractProgramLauncher ):
|
|
14 """
|
|
15 Launch RepeatMasker (pairwise alignment for repeat detection).
|
|
16 """
|
|
17
|
|
18 def __init__( self ):
|
|
19 """
|
|
20 Constructor.
|
|
21 """
|
|
22 AbstractProgramLauncher.__init__( self )
|
|
23 self._prgName = "RepeatMasker"
|
|
24 self._formatInFile = "fasta"
|
|
25 self._sbjFile = ""
|
|
26 self._nbProc = 1
|
|
27 self._calcGc = False
|
|
28 self._skipIs = False
|
|
29 self._maskSsr = True
|
|
30 self._onlySsr = False
|
|
31 self._cmdLineSpecificOptions = "s:n:gblmo:"
|
|
32
|
|
33
|
|
34 def getSpecificHelpAsString( self ):
|
|
35 """
|
|
36 Return the specific help as a string.
|
|
37 """
|
|
38 string = ""
|
|
39 string += "\nspecific options:"
|
|
40 string += "\n -s: name of the subject file (repeats, format='fasta')"
|
|
41 string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() )
|
|
42 string += "\n -g: calculate the GC content"
|
|
43 string += "\n -b: skip bacterial insertion element check"
|
|
44 string += "\n -l: does not mask low-complexity DNA or simple repeats"
|
|
45 string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)"
|
|
46 string += "\n -o: name of the output file"
|
|
47 string += "\n with -s: format='align', default=inFile+'.cat.align')"
|
|
48 string += "\n with -m: format='path', default=inFile+'.cat.path')"
|
|
49 return string
|
|
50
|
|
51
|
|
52 def setASpecificAttributeFromCmdLine( self, o, a="" ):
|
|
53 """
|
|
54 Set a specific attribute from the command-line arguments.
|
|
55 """
|
|
56 if o =="-s":
|
|
57 self.setSubjectFile( a )
|
|
58 elif o == "-n":
|
|
59 self.setNbProcessors( a )
|
|
60 elif o == "-g":
|
|
61 self.setCalculateGCcontent()
|
|
62 elif o == "-b":
|
|
63 self.setSkipBacterialIsCheck()
|
|
64 elif o == "-l":
|
|
65 self.unsetMaskSsr()
|
|
66 elif o == "-m":
|
|
67 self.setOnlySsr()
|
|
68 elif o == "-o":
|
|
69 self.setOutputFile( a )
|
|
70
|
|
71
|
|
72 def setSubjectFile( self, arg ):
|
|
73 self._sbjFile = arg
|
|
74
|
|
75
|
|
76 def setNbProcessors( self, arg ):
|
|
77 self._nbProc = int(arg)
|
|
78
|
|
79
|
|
80 def setCalculateGCcontent( self ):
|
|
81 self._calcGc = True
|
|
82
|
|
83
|
|
84 def setSkipBacterialIsCheck( self ):
|
|
85 self._skipIs = True
|
|
86
|
|
87
|
|
88 def unsetMaskSsr( self ):
|
|
89 self._maskSsr = False
|
|
90
|
|
91
|
|
92 def setOnlySsr( self ):
|
|
93 self._onlySsr = True
|
|
94
|
|
95
|
|
96 def getSubjectFile( self ):
|
|
97 return self._sbjFile
|
|
98
|
|
99
|
|
100 def getNbProcessors( self ):
|
|
101 return self._nbProc
|
|
102
|
|
103
|
|
104 def getCalculateGCcontent( self ):
|
|
105 return self._calcGc
|
|
106
|
|
107
|
|
108 def getSkipBacterialIsCheck( self ):
|
|
109 return self._skipIs
|
|
110
|
|
111
|
|
112 def getMaskSsr( self ):
|
|
113 return self._maskSsr
|
|
114
|
|
115
|
|
116 def getOnlySsr( self ):
|
|
117 return self._onlySsr
|
|
118
|
|
119
|
|
120 def checkSpecificAttributes( self ):
|
|
121 """
|
|
122 Check the specific attributes before running the program.
|
|
123 """
|
|
124 if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \
|
|
125 or ( self.getSubjectFile() != "" and self.getOnlySsr() ):
|
|
126 string = "ERROR: need to specify -s or -m"
|
|
127 print string
|
|
128 print self.getHelpAsString()
|
|
129 sys.exit(1)
|
|
130 if self.getOutputFile() == "":
|
|
131 if not self.getOnlySsr():
|
|
132 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
|
|
133 else:
|
|
134 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
|
|
135
|
|
136
|
|
137 def setWrapperCommandLine( self ):
|
|
138 """
|
|
139 Set the command-line of the wrapper.
|
|
140 Required for RepeatMaskerClusterLauncher.
|
|
141 """
|
|
142 self._wrpCmdLine = self.getWrapperName()
|
|
143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
|
|
144 if self.getSubjectFile() != "":
|
|
145 self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
|
|
146 self._wrpCmdLine += " -n %i" %( self.getNbProcessors() )
|
|
147 if self.getCalculateGCcontent():
|
|
148 self._wrpCmdLine += " -g"
|
|
149 if self.getSkipBacterialIsCheck():
|
|
150 self._wrpCmdLine += " -b"
|
|
151 if not self.getMaskSsr():
|
|
152 self._wrpCmdLine += " -l"
|
|
153 if self.getOnlySsr():
|
|
154 self._wrpCmdLine += " -m"
|
|
155 if self.getOutputFile() != "":
|
|
156 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
|
|
157 if self.getClean():
|
|
158 self._wrpCmdLine += " -c"
|
|
159 if self.getVerbosityLevel() != 0:
|
|
160 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
|
|
161
|
|
162
|
|
163 def setProgramCommandLine( self ):
|
|
164 """
|
|
165 Set the command-line of the program.
|
|
166 """
|
|
167 self._prgCmdLine = self.getProgramName()
|
|
168 self._prgCmdLine += " -dir ."
|
|
169 self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() )
|
|
170 if self.getCalculateGCcontent():
|
|
171 self._prgCmdLine += " -gccalc"
|
|
172 if self.getSkipBacterialIsCheck():
|
|
173 self._prgCmdLine += " -no_is"
|
|
174 if self.getMaskSsr():
|
|
175 self._prgCmdLine += " -nolow"
|
|
176 if self.getOnlySsr():
|
|
177 self._prgCmdLine += " -int"
|
|
178 if self.getSubjectFile() != "":
|
|
179 self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() )
|
|
180 self._prgCmdLine += " %s" % ( self.getInputFile() )
|
|
181
|
|
182
|
|
183 def setListFilesToKeep( self ):
|
|
184 """
|
|
185 Set the list of files to keep.
|
|
186 """
|
|
187 if self.getOutputFile() == "":
|
|
188 if not self.getOnlySsr():
|
|
189 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
|
|
190 else:
|
|
191 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
|
|
192 self.appendFileToKeep( self.getOutputFile() )
|
|
193 self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) )
|
|
194
|
|
195
|
|
196 def setListFilesToRemove( self ):
|
|
197 """
|
|
198 Set the list of files to remove.
|
|
199 """
|
|
200 self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) )
|
|
201 self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) )
|
|
202 self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) )
|
|
203 self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) )
|
|
204 self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) )
|
|
205 self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) )
|
|
206 self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) )
|
|
207
|
|
208
|
|
209 def convertCatIntoAlign( self ):
|
|
210 """
|
|
211 Convert a 'cat' file into the 'align' format.
|
|
212 """
|
|
213 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py"
|
|
214 cmd += " -i %s.cat" % ( self.getInputFile() )
|
|
215 cmd += " -o %s.cat.align" % ( self.getInputFile() )
|
|
216 exitStatus = os.system( cmd )
|
|
217 if exitStatus != 0:
|
|
218 string = "ERROR while converting 'cat' file into 'align' format"
|
|
219 print string
|
|
220 sys.exit(1)
|
|
221
|
|
222
|
|
223 def convertCatIntoPath( self ):
|
|
224 """
|
|
225 Convert a 'cat' file into the 'path' format.
|
|
226 """
|
|
227 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py"
|
|
228 cmd += " -i %s.cat" % ( self.getInputFile() )
|
|
229 cmd += " -o %s.cat.path" % ( self.getInputFile() )
|
|
230 exitStatus = os.system( cmd )
|
|
231 if exitStatus != 0:
|
|
232 string = "ERROR while converting 'cat' file into 'path' format"
|
|
233 print string
|
|
234 sys.exit(1)
|
|
235
|
|
236
|
|
237 def setSummary( self ):
|
|
238 self._summary = "input file: %s" % ( self.getInputFile() )
|
|
239 if self.getSubjectFile() != "":
|
|
240 self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
|
|
241 self._summary += "\nnb processors: %i" % ( self.getNbProcessors() )
|
|
242 if self.getCalculateGCcontent():
|
|
243 self._summary += "\ncalculate the GC content"
|
|
244 if self.getSkipBacterialIsCheck():
|
|
245 self._summary += "\nskip bacterial insertion element check"
|
|
246 if self.getMaskSsr():
|
|
247 self._summary += "\nmask low-complexity DNA or simple repeats"
|
|
248 if self.getOnlySsr():
|
|
249 self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)"
|
|
250 if self.getOutputFile() == "":
|
|
251 if not self.getMaskSsr():
|
|
252 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
|
|
253 else:
|
|
254 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
|
|
255 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
|
|
256
|
|
257
|
|
258 def run( self ):
|
|
259 """
|
|
260 Run the program.
|
|
261 """
|
|
262 self.start()
|
|
263
|
|
264 self.setProgramCommandLine()
|
|
265 cmd = self.getProgramCommandLine()
|
|
266 if self.getVerbosityLevel() > 0:
|
|
267 print "LAUNCH: %s" % ( cmd )
|
|
268 sys.stdout.flush()
|
|
269 exitStatus = os.system( cmd )
|
|
270 if exitStatus != 0:
|
|
271 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
|
|
272 print string
|
|
273 sys.exit(1)
|
|
274
|
|
275 if not self.getOnlySsr():
|
|
276 self.convertCatIntoAlign()
|
|
277 else:
|
|
278 self.convertCatIntoPath()
|
|
279
|
|
280 self.end()
|
|
281
|
|
282
|
|
283 if __name__ == "__main__":
|
|
284 i = RepeatMaskerProgramLauncher()
|
|
285 i.setAttributesFromCmdLine()
|
|
286 i.run()
|