Mercurial > repos > yufei-luo > s_mart
comparison commons/launcher/RepeatMaskerProgramLauncher.py @ 31:0ab839023fe4
Uploaded
| author | m-zytnicki |
|---|---|
| date | Tue, 30 Apr 2013 14:33:21 -0400 |
| parents | 94ab73e8a190 |
| children |
comparison
equal
deleted
inserted
replaced
| 30:5677346472b5 | 31:0ab839023fe4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 ##@file | |
| 4 # Launch RepeatMasker (pairwise alignment for repeat detection). | |
| 5 | |
| 6 | |
| 7 import os | |
| 8 import sys | |
| 9 | |
| 10 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher | |
| 11 | |
| 12 | |
| 13 class RepeatMaskerProgramLauncher( AbstractProgramLauncher ): | |
| 14 """ | |
| 15 Launch RepeatMasker (pairwise alignment for repeat detection). | |
| 16 """ | |
| 17 | |
| 18 def __init__( self ): | |
| 19 """ | |
| 20 Constructor. | |
| 21 """ | |
| 22 AbstractProgramLauncher.__init__( self ) | |
| 23 self._prgName = "RepeatMasker" | |
| 24 self._formatInFile = "fasta" | |
| 25 self._sbjFile = "" | |
| 26 self._nbProc = 1 | |
| 27 self._calcGc = False | |
| 28 self._skipIs = False | |
| 29 self._maskSsr = True | |
| 30 self._onlySsr = False | |
| 31 self._cmdLineSpecificOptions = "s:n:gblmo:" | |
| 32 | |
| 33 | |
| 34 def getSpecificHelpAsString( self ): | |
| 35 """ | |
| 36 Return the specific help as a string. | |
| 37 """ | |
| 38 string = "" | |
| 39 string += "\nspecific options:" | |
| 40 string += "\n -s: name of the subject file (repeats, format='fasta')" | |
| 41 string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() ) | |
| 42 string += "\n -g: calculate the GC content" | |
| 43 string += "\n -b: skip bacterial insertion element check" | |
| 44 string += "\n -l: does not mask low-complexity DNA or simple repeats" | |
| 45 string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)" | |
| 46 string += "\n -o: name of the output file" | |
| 47 string += "\n with -s: format='align', default=inFile+'.cat.align')" | |
| 48 string += "\n with -m: format='path', default=inFile+'.cat.path')" | |
| 49 return string | |
| 50 | |
| 51 | |
| 52 def setASpecificAttributeFromCmdLine( self, o, a="" ): | |
| 53 """ | |
| 54 Set a specific attribute from the command-line arguments. | |
| 55 """ | |
| 56 if o =="-s": | |
| 57 self.setSubjectFile( a ) | |
| 58 elif o == "-n": | |
| 59 self.setNbProcessors( a ) | |
| 60 elif o == "-g": | |
| 61 self.setCalculateGCcontent() | |
| 62 elif o == "-b": | |
| 63 self.setSkipBacterialIsCheck() | |
| 64 elif o == "-l": | |
| 65 self.unsetMaskSsr() | |
| 66 elif o == "-m": | |
| 67 self.setOnlySsr() | |
| 68 elif o == "-o": | |
| 69 self.setOutputFile( a ) | |
| 70 | |
| 71 | |
| 72 def setSubjectFile( self, arg ): | |
| 73 self._sbjFile = arg | |
| 74 | |
| 75 | |
| 76 def setNbProcessors( self, arg ): | |
| 77 self._nbProc = int(arg) | |
| 78 | |
| 79 | |
| 80 def setCalculateGCcontent( self ): | |
| 81 self._calcGc = True | |
| 82 | |
| 83 | |
| 84 def setSkipBacterialIsCheck( self ): | |
| 85 self._skipIs = True | |
| 86 | |
| 87 | |
| 88 def unsetMaskSsr( self ): | |
| 89 self._maskSsr = False | |
| 90 | |
| 91 | |
| 92 def setOnlySsr( self ): | |
| 93 self._onlySsr = True | |
| 94 | |
| 95 | |
| 96 def getSubjectFile( self ): | |
| 97 return self._sbjFile | |
| 98 | |
| 99 | |
| 100 def getNbProcessors( self ): | |
| 101 return self._nbProc | |
| 102 | |
| 103 | |
| 104 def getCalculateGCcontent( self ): | |
| 105 return self._calcGc | |
| 106 | |
| 107 | |
| 108 def getSkipBacterialIsCheck( self ): | |
| 109 return self._skipIs | |
| 110 | |
| 111 | |
| 112 def getMaskSsr( self ): | |
| 113 return self._maskSsr | |
| 114 | |
| 115 | |
| 116 def getOnlySsr( self ): | |
| 117 return self._onlySsr | |
| 118 | |
| 119 | |
| 120 def checkSpecificAttributes( self ): | |
| 121 """ | |
| 122 Check the specific attributes before running the program. | |
| 123 """ | |
| 124 if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \ | |
| 125 or ( self.getSubjectFile() != "" and self.getOnlySsr() ): | |
| 126 string = "ERROR: need to specify -s or -m" | |
| 127 print string | |
| 128 print self.getHelpAsString() | |
| 129 sys.exit(1) | |
| 130 if self.getOutputFile() == "": | |
| 131 if not self.getOnlySsr(): | |
| 132 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
| 133 else: | |
| 134 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
| 135 | |
| 136 | |
| 137 def setWrapperCommandLine( self ): | |
| 138 """ | |
| 139 Set the command-line of the wrapper. | |
| 140 Required for RepeatMaskerClusterLauncher. | |
| 141 """ | |
| 142 self._wrpCmdLine = self.getWrapperName() | |
| 143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) | |
| 144 if self.getSubjectFile() != "": | |
| 145 self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) | |
| 146 self._wrpCmdLine += " -n %i" %( self.getNbProcessors() ) | |
| 147 if self.getCalculateGCcontent(): | |
| 148 self._wrpCmdLine += " -g" | |
| 149 if self.getSkipBacterialIsCheck(): | |
| 150 self._wrpCmdLine += " -b" | |
| 151 if not self.getMaskSsr(): | |
| 152 self._wrpCmdLine += " -l" | |
| 153 if self.getOnlySsr(): | |
| 154 self._wrpCmdLine += " -m" | |
| 155 if self.getOutputFile() != "": | |
| 156 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) | |
| 157 if self.getClean(): | |
| 158 self._wrpCmdLine += " -c" | |
| 159 if self.getVerbosityLevel() != 0: | |
| 160 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) | |
| 161 | |
| 162 | |
| 163 def setProgramCommandLine( self ): | |
| 164 """ | |
| 165 Set the command-line of the program. | |
| 166 """ | |
| 167 self._prgCmdLine = self.getProgramName() | |
| 168 self._prgCmdLine += " -dir ." | |
| 169 self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() ) | |
| 170 if self.getCalculateGCcontent(): | |
| 171 self._prgCmdLine += " -gccalc" | |
| 172 if self.getSkipBacterialIsCheck(): | |
| 173 self._prgCmdLine += " -no_is" | |
| 174 if self.getMaskSsr(): | |
| 175 self._prgCmdLine += " -nolow" | |
| 176 if self.getOnlySsr(): | |
| 177 self._prgCmdLine += " -int" | |
| 178 if self.getSubjectFile() != "": | |
| 179 self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() ) | |
| 180 self._prgCmdLine += " %s" % ( self.getInputFile() ) | |
| 181 | |
| 182 | |
| 183 def setListFilesToKeep( self ): | |
| 184 """ | |
| 185 Set the list of files to keep. | |
| 186 """ | |
| 187 if self.getOutputFile() == "": | |
| 188 if not self.getOnlySsr(): | |
| 189 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
| 190 else: | |
| 191 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
| 192 self.appendFileToKeep( self.getOutputFile() ) | |
| 193 self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) ) | |
| 194 | |
| 195 | |
| 196 def setListFilesToRemove( self ): | |
| 197 """ | |
| 198 Set the list of files to remove. | |
| 199 """ | |
| 200 self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) ) | |
| 201 self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) ) | |
| 202 self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) ) | |
| 203 self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) ) | |
| 204 self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) ) | |
| 205 self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) ) | |
| 206 self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) ) | |
| 207 | |
| 208 | |
| 209 def convertCatIntoAlign( self ): | |
| 210 """ | |
| 211 Convert a 'cat' file into the 'align' format. | |
| 212 """ | |
| 213 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py" | |
| 214 cmd += " -i %s.cat" % ( self.getInputFile() ) | |
| 215 cmd += " -o %s.cat.align" % ( self.getInputFile() ) | |
| 216 exitStatus = os.system( cmd ) | |
| 217 if exitStatus != 0: | |
| 218 string = "ERROR while converting 'cat' file into 'align' format" | |
| 219 print string | |
| 220 sys.exit(1) | |
| 221 | |
| 222 | |
| 223 def convertCatIntoPath( self ): | |
| 224 """ | |
| 225 Convert a 'cat' file into the 'path' format. | |
| 226 """ | |
| 227 cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py" | |
| 228 cmd += " -i %s.cat" % ( self.getInputFile() ) | |
| 229 cmd += " -o %s.cat.path" % ( self.getInputFile() ) | |
| 230 exitStatus = os.system( cmd ) | |
| 231 if exitStatus != 0: | |
| 232 string = "ERROR while converting 'cat' file into 'path' format" | |
| 233 print string | |
| 234 sys.exit(1) | |
| 235 | |
| 236 | |
| 237 def setSummary( self ): | |
| 238 self._summary = "input file: %s" % ( self.getInputFile() ) | |
| 239 if self.getSubjectFile() != "": | |
| 240 self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) | |
| 241 self._summary += "\nnb processors: %i" % ( self.getNbProcessors() ) | |
| 242 if self.getCalculateGCcontent(): | |
| 243 self._summary += "\ncalculate the GC content" | |
| 244 if self.getSkipBacterialIsCheck(): | |
| 245 self._summary += "\nskip bacterial insertion element check" | |
| 246 if self.getMaskSsr(): | |
| 247 self._summary += "\nmask low-complexity DNA or simple repeats" | |
| 248 if self.getOnlySsr(): | |
| 249 self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)" | |
| 250 if self.getOutputFile() == "": | |
| 251 if not self.getMaskSsr(): | |
| 252 self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) | |
| 253 else: | |
| 254 self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) | |
| 255 self._summary += "\noutput file: %s" % ( self.getOutputFile() ) | |
| 256 | |
| 257 | |
| 258 def run( self ): | |
| 259 """ | |
| 260 Run the program. | |
| 261 """ | |
| 262 self.start() | |
| 263 | |
| 264 self.setProgramCommandLine() | |
| 265 cmd = self.getProgramCommandLine() | |
| 266 if self.getVerbosityLevel() > 0: | |
| 267 print "LAUNCH: %s" % ( cmd ) | |
| 268 sys.stdout.flush() | |
| 269 exitStatus = os.system( cmd ) | |
| 270 if exitStatus != 0: | |
| 271 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) | |
| 272 print string | |
| 273 sys.exit(1) | |
| 274 | |
| 275 if not self.getOnlySsr(): | |
| 276 self.convertCatIntoAlign() | |
| 277 else: | |
| 278 self.convertCatIntoPath() | |
| 279 | |
| 280 self.end() | |
| 281 | |
| 282 | |
| 283 if __name__ == "__main__": | |
| 284 i = RepeatMaskerProgramLauncher() | |
| 285 i.setAttributesFromCmdLine() | |
| 286 i.run() |
