Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/RepeatMaskerProgramLauncher.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ##@file | |
4 # Launch RepeatMasker (pairwise alignment for repeat detection). | |
5 | |
6 | |
7 import os | |
8 import sys | |
9 | |
10 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher | |
11 | |
12 | |
13 class RepeatMaskerProgramLauncher( AbstractProgramLauncher ): | |
14 """ | |
15 Launch RepeatMasker (pairwise alignment for repeat detection). | |
16 """ | |
17 | |
18 def __init__( self ): | |
19 """ | |
20 Constructor. | |
21 """ | |
22 AbstractProgramLauncher.__init__( self ) | |
23 self._prgName = "RepeatMasker" | |
24 self._formatInFile = "fasta" | |
25 self._sbjFile = "" | |
26 self._nbProc = 1 | |
27 self._calcGc = False | |
28 self._skipIs = False | |
29 self._maskSsr = True | |
30 self._onlySsr = False | |
31 self._cmdLineSpecificOptions = "s:n:gblmo:" | |
32 | |
33 | |
34 def getSpecificHelpAsString( self ): | |
35 """ | |
36 Return the specific help as a string. | |
37 """ | |
38 string = "" | |
39 string += "\nspecific options:" | |
40 string += "\n -s: name of the subject file (repeats, format='fasta')" | |
41 string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() ) | |
42 string += "\n -g: calculate the GC content" | |
43 string += "\n -b: skip bacterial insertion element check" | |
44 string += "\n -l: does not mask low-complexity DNA or simple repeats" | |
45 string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)" | |
46 string += "\n -o: name of the output file" | |
47 string += "\n with -s: format='align', default=inFile+'.cat.align')" | |
48 string += "\n with -m: format='path', default=inFile+'.cat.path')" | |
49 return string | |
50 | |
51 | |
52 def setASpecificAttributeFromCmdLine( self, o, a="" ): | |
53 """ | |
54 Set a specific attribute from the command-line arguments. | |
55 """ | |
56 if o =="-s": | |
57 self.setSubjectFile( a ) | |
58 elif o == "-n": | |
59 self.setNbProcessors( a ) | |
60 elif o == "-g": | |
61 self.setCalculateGCcontent() | |
62 elif o == "-b": | |
63 self.setSkipBacterialIsCheck() | |
64 elif o == "-l": | |
65 self.unsetMaskSsr() | |
66 elif o == "-m": | |
67 self.setOnlySsr() | |
68 elif o == "-o": | |
69 self.setOutputFile( a ) | |
70 | |
71 | |
72 def setSubjectFile( self, arg ): | |
73 self._sbjFile = arg | |
74 | |
75 | |
76 def setNbProcessors( self, arg ): | |
77 self._nbProc = int(arg) | |
78 | |
79 | |
80 def setCalculateGCcontent( self ): | |
81 self._calcGc = True | |
82 | |
83 | |
84 def setSkipBacterialIsCheck( self ): | |
85 self._skipIs = True | |
86 | |
87 | |
88 def unsetMaskSsr( self ): | |
89 self._maskSsr = False | |
90 | |
91 | |
92 def setOnlySsr( self ): | |
93 self._onlySsr = True | |
94 | |
95 | |
96 def getSubjectFile( self ): | |
97 return self._sbjFile | |
98 | |
99 | |
100 def getNbProcessors( self ): | |
101 return self._nbProc | |
102 | |
103 | |
104 def getCalculateGCcontent( self ): | |
105 return self._calcGc | |
106 | |
107 | |
108 def getSkipBacterialIsCheck( self ): | |
109 return self._skipIs | |
110 | |
111 | |
112 def getMaskSsr( self ): | |
113 return self._maskSsr | |
114 | |
115 | |
116 def getOnlySsr( self ): | |
117 return self._onlySsr | |
118 | |
119 | |
120 def checkSpecificAttributes( self ): | |
121 """ | |
122 Check the specific attributes before running the program. | |
123 """ | |
124 if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \ | |
125 or ( self.getSubjectFile() != "" and self.getOnlySsr() ): | |
126 string = "ERROR: need to specify -s or -m" | |
127 print string | |
128 print self.getHelpAsString() | |
129 sys.exit(1) | |
130 if self.getOutputFile() == "": | |
131 if not self.getOnlySsr(): | |
132 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
133 else: | |
134 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
135 | |
136 | |
137 def setWrapperCommandLine( self ): | |
138 """ | |
139 Set the command-line of the wrapper. | |
140 Required for RepeatMaskerClusterLauncher. | |
141 """ | |
142 self._wrpCmdLine = self.getWrapperName() | |
143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) | |
144 if self.getSubjectFile() != "": | |
145 self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) | |
146 self._wrpCmdLine += " -n %i" %( self.getNbProcessors() ) | |
147 if self.getCalculateGCcontent(): | |
148 self._wrpCmdLine += " -g" | |
149 if self.getSkipBacterialIsCheck(): | |
150 self._wrpCmdLine += " -b" | |
151 if not self.getMaskSsr(): | |
152 self._wrpCmdLine += " -l" | |
153 if self.getOnlySsr(): | |
154 self._wrpCmdLine += " -m" | |
155 if self.getOutputFile() != "": | |
156 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) | |
157 if self.getClean(): | |
158 self._wrpCmdLine += " -c" | |
159 if self.getVerbosityLevel() != 0: | |
160 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) | |
161 | |
162 | |
163 def setProgramCommandLine( self ): | |
164 """ | |
165 Set the command-line of the program. | |
166 """ | |
167 self._prgCmdLine = self.getProgramName() | |
168 self._prgCmdLine += " -dir ." | |
169 self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() ) | |
170 if self.getCalculateGCcontent(): | |
171 self._prgCmdLine += " -gccalc" | |
172 if self.getSkipBacterialIsCheck(): | |
173 self._prgCmdLine += " -no_is" | |
174 if self.getMaskSsr(): | |
175 self._prgCmdLine += " -nolow" | |
176 if self.getOnlySsr(): | |
177 self._prgCmdLine += " -int" | |
178 if self.getSubjectFile() != "": | |
179 self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() ) | |
180 self._prgCmdLine += " %s" % ( self.getInputFile() ) | |
181 | |
182 | |
183 def setListFilesToKeep( self ): | |
184 """ | |
185 Set the list of files to keep. | |
186 """ | |
187 if self.getOutputFile() == "": | |
188 if not self.getOnlySsr(): | |
189 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
190 else: | |
191 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
192 self.appendFileToKeep( self.getOutputFile() ) | |
193 self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) ) | |
194 | |
195 | |
196 def setListFilesToRemove( self ): | |
197 """ | |
198 Set the list of files to remove. | |
199 """ | |
200 self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) ) | |
201 self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) ) | |
202 self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) ) | |
203 self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) ) | |
204 self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) ) | |
205 self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) ) | |
206 self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) ) | |
207 | |
208 | |
209 def convertCatIntoAlign( self ): | |
210 """ | |
211 Convert a 'cat' file into the 'align' format. | |
212 """ | |
213 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py" | |
214 cmd += " -i %s.cat" % ( self.getInputFile() ) | |
215 cmd += " -o %s.cat.align" % ( self.getInputFile() ) | |
216 exitStatus = os.system( cmd ) | |
217 if exitStatus != 0: | |
218 string = "ERROR while converting 'cat' file into 'align' format" | |
219 print string | |
220 sys.exit(1) | |
221 | |
222 | |
223 def convertCatIntoPath( self ): | |
224 """ | |
225 Convert a 'cat' file into the 'path' format. | |
226 """ | |
227 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py" | |
228 cmd += " -i %s.cat" % ( self.getInputFile() ) | |
229 cmd += " -o %s.cat.path" % ( self.getInputFile() ) | |
230 exitStatus = os.system( cmd ) | |
231 if exitStatus != 0: | |
232 string = "ERROR while converting 'cat' file into 'path' format" | |
233 print string | |
234 sys.exit(1) | |
235 | |
236 | |
237 def setSummary( self ): | |
238 self._summary = "input file: %s" % ( self.getInputFile() ) | |
239 if self.getSubjectFile() != "": | |
240 self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) | |
241 self._summary += "\nnb processors: %i" % ( self.getNbProcessors() ) | |
242 if self.getCalculateGCcontent(): | |
243 self._summary += "\ncalculate the GC content" | |
244 if self.getSkipBacterialIsCheck(): | |
245 self._summary += "\nskip bacterial insertion element check" | |
246 if self.getMaskSsr(): | |
247 self._summary += "\nmask low-complexity DNA or simple repeats" | |
248 if self.getOnlySsr(): | |
249 self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)" | |
250 if self.getOutputFile() == "": | |
251 if not self.getMaskSsr(): | |
252 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
253 else: | |
254 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
255 self._summary += "\noutput file: %s" % ( self.getOutputFile() ) | |
256 | |
257 | |
258 def run( self ): | |
259 """ | |
260 Run the program. | |
261 """ | |
262 self.start() | |
263 | |
264 self.setProgramCommandLine() | |
265 cmd = self.getProgramCommandLine() | |
266 if self.getVerbosityLevel() > 0: | |
267 print "LAUNCH: %s" % ( cmd ) | |
268 sys.stdout.flush() | |
269 exitStatus = os.system( cmd ) | |
270 if exitStatus != 0: | |
271 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) | |
272 print string | |
273 sys.exit(1) | |
274 | |
275 if not self.getOnlySsr(): | |
276 self.convertCatIntoAlign() | |
277 else: | |
278 self.convertCatIntoPath() | |
279 | |
280 self.end() | |
281 | |
282 | |
283 if __name__ == "__main__": | |
284 i = RepeatMaskerProgramLauncher() | |
285 i.setAttributesFromCmdLine() | |
286 i.run() |