Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki |
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 |
|---|---|
| 1 """ | |
| 2 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. | |
| 3 """ | |
| 4 | |
| 5 import os | |
| 6 import ConfigParser | |
| 7 from commons.core.utils.FileUtils import FileUtils | |
| 8 from commons.core.LoggerFactory import LoggerFactory | |
| 9 | |
| 10 LOG_DEPTH = "repet.tools" | |
| 11 | |
| 12 class RepbaseBLRnForClassifierStep1( object ): | |
| 13 | |
| 14 """ | |
| 15 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table. | |
| 16 | |
| 17 @param inFileName: name of the input fasta file | |
| 18 @type inFileName: string | |
| 19 | |
| 20 @param launch_1: generic command at the beginning of a specific command | |
| 21 @type launch_1: string | |
| 22 | |
| 23 @param launch_2: generic command at the end of a specific command | |
| 24 @type launch_2: string | |
| 25 | |
| 26 @return: all the commands to run the job | |
| 27 @rtype: string | |
| 28 | |
| 29 @param cDir: current directory (where to retrieve the result files) | |
| 30 @ype cDir: string | |
| 31 | |
| 32 @param tmpDir: temporary directory (where the job will run) | |
| 33 @type tmpDir: string | |
| 34 | |
| 35 @param configFileName: configuration file name | |
| 36 @type configFileName: string | |
| 37 | |
| 38 @param logger: a logger Instance | |
| 39 @type logger: logger | |
| 40 | |
| 41 @param verbose: verbose(0/1/2) | |
| 42 @type verbose: int | |
| 43 | |
| 44 @param pL: program launcher | |
| 45 @type pL: programLauncher Instance | |
| 46 | |
| 47 @param project: project name | |
| 48 @type project: string | |
| 49 | |
| 50 """ | |
| 51 | |
| 52 def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project): | |
| 53 """ | |
| 54 Constructor | |
| 55 """ | |
| 56 self._inFileName = inFileName | |
| 57 self._launch_1 = launch_1 | |
| 58 self._launch_2 = launch_2 | |
| 59 self._cDir = cDir | |
| 60 self._tmpDir = tmpDir | |
| 61 self._verbose = verbose | |
| 62 self._pL = pL | |
| 63 self._project = project | |
| 64 self._fileUtils = FileUtils() | |
| 65 self._config = ConfigParser.ConfigParser() | |
| 66 self._configFileName = configFileName | |
| 67 self._config.readfp( open(self._configFileName) ) | |
| 68 self._bank = self._config.get("detect_features","TE_nucl_bank") | |
| 69 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose) | |
| 70 | |
| 71 def formatRepbase_ntIfNecessary( self ): | |
| 72 """ | |
| 73 Format Repbase (make 'cut' files). | |
| 74 """ | |
| 75 if not os.path.exists( "%s_cut" % ( self._bank ) ): | |
| 76 self._log.debug("prepare bank '%s'..." % ( self._bank )) | |
| 77 prg = os.environ["REPET_PATH"] + "/bin/blaster" | |
| 78 cmd = prg | |
| 79 cmd += " -s %s" % ( self._bank ) | |
| 80 cmd += " -n blastn" | |
| 81 if self._config.get("detect_features","wublast") == "yes": | |
| 82 cmd += " -W" | |
| 83 cmd += " -r" | |
| 84 cmd += " -P" | |
| 85 self._pL.launch( prg, cmd ) | |
| 86 os.system( "rm -f %s-blastn-*.param" % ( self._bank ) ) | |
| 87 | |
| 88 def createCmdToLaunch( self ): | |
| 89 cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster" | |
| 90 cmd += " -q %s" % ( self._inFileName ) | |
| 91 cmd += " -s %s/%s" % ( self._cDir, self._bank ) | |
| 92 cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank ) | |
| 93 cmd += " -n blastn" | |
| 94 if self._config.get("detect_features","wublast") == "yes": | |
| 95 cmd += " -W" | |
| 96 cmd += " -r" | |
| 97 cmd += " -v 1" | |
| 98 cmd += self._launch_2 | |
| 99 | |
| 100 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
| 101 cmd += "\tos.system( \"mv %s_BLRn_%s.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
| 102 cmd += "if os.path.exists( \"%s_cut\" ):\n" % ( self._inFileName ) | |
| 103 cmd += "\tos.system( \"rm -f %s_cut*\" )\n" % ( self._inFileName ) | |
| 104 cmd += "if os.path.exists( \"%s.Nstretch.map\" ):\n" % ( self._inFileName ) | |
| 105 cmd += "\tos.remove( \"%s.Nstretch.map\" )\n" % ( self._inFileName ) | |
| 106 cmd += "if os.path.exists( \"%s_BLRn_%s.raw\" ):\n" % ( self._inFileName, self._bank ) | |
| 107 cmd += "\tos.remove( \"%s_BLRn_%s.raw\" )\n" % ( self._inFileName, self._bank ) | |
| 108 cmd += "if os.path.exists( \"%s_BLRn_%s.seq_treated\" ):\n" % ( self._inFileName, self._bank ) | |
| 109 cmd += "\tos.remove( \"%s_BLRn_%s.seq_treated\" )\n" % ( self._inFileName, self._bank ) | |
| 110 | |
| 111 cmd += self._launch_1 | |
| 112 cmd += os.environ["REPET_PATH"] + "/bin/matcher" | |
| 113 cmd += " -m %s_BLRn_%s.align" % ( self._inFileName, self._bank ) | |
| 114 cmd += " -q %s" % ( self._inFileName ) | |
| 115 cmd += " -s %s/%s" % ( self._cDir, self._bank ) | |
| 116 cmd += " -j" | |
| 117 cmd += " -v 1" | |
| 118 cmd += self._launch_2 | |
| 119 | |
| 120 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.path\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
| 121 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.path %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
| 122 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.param\" ):\n" % ( self._cDir, self._inFileName, self._bank ) | |
| 123 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir ) | |
| 124 cmd += "if os.path.exists( \"%s_BLRn_%s.align\" ):\n" % ( self._inFileName, self._bank ) | |
| 125 cmd += "\tos.remove( \"%s_BLRn_%s.align\" )\n" % ( self._inFileName, self._bank ) | |
| 126 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.fa\" ):\n" % ( self._inFileName, self._bank ) | |
| 127 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.fa\" )\n" % ( self._inFileName, self._bank ) | |
| 128 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.map\" ):\n" % ( self._inFileName, self._bank ) | |
| 129 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.map\" )\n" % ( self._inFileName, self._bank ) | |
| 130 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.tab\" ):\n" % ( self._inFileName, self._bank ) | |
| 131 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.tab\" )\n" % ( self._inFileName, self._bank ) | |
| 132 | |
| 133 if self._tmpDir != self._cDir: | |
| 134 cmd += "if os.path.exists( \"%s\" ):\n" % ( self._bank ) | |
| 135 cmd += "\tos.remove( \"%s\" )\n" % ( self._bank ) | |
| 136 | |
| 137 return cmd | |
| 138 | |
| 139 def collectRepbaseBLRn( self ): | |
| 140 """ | |
| 141 Concatenate the outputs of blastn, adapt the ID and load the results into a table. | |
| 142 """ | |
| 143 bankFull = self._bank | |
| 144 bankPath, bank = os.path.split( bankFull ) | |
| 145 self._concatPathFile(bank) | |
| 146 self._adaptIDInPathFile(bank) | |
| 147 self._loadPathFileInTable(bank) | |
| 148 self._findAndRemoveUselessFiles(bank) | |
| 149 | |
| 150 def _concatPathFile(self, bank): | |
| 151 FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank, | |
| 152 "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)) | |
| 153 | |
| 154 def _adaptIDInPathFile(self, bank): | |
| 155 if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"): | |
| 156 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id" | |
| 157 cmd = prg | |
| 158 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
| 159 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
| 160 cmd += " -v %i" % (self._verbose - 1) | |
| 161 self._pL.launch(prg, cmd) | |
| 162 else: | |
| 163 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py" | |
| 164 cmd = prg | |
| 165 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
| 166 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
| 167 self._pL.launch(prg, cmd) | |
| 168 | |
| 169 def _loadPathFileInTable(self, bank): | |
| 170 prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py" | |
| 171 cmd = prg | |
| 172 cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank) | |
| 173 cmd += " -n %s_TE_BLRn_path" % (self._project) | |
| 174 cmd += " -t path" | |
| 175 cmd += " -c ../%s" % (self._configFileName) | |
| 176 self._pL.launch(prg, cmd) | |
| 177 | |
| 178 def _findAndRemoveUselessFiles(self, bank): | |
| 179 prg = "find" | |
| 180 cmd = prg | |
| 181 cmd += " .. -name \"batch_*.fa_BLRn_%s.*\" -exec rm {} \;" % (bank) | |
| 182 self._pL.launch(prg, cmd) | |
| 183 prg = "rm" | |
| 184 cmd = prg | |
| 185 cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank) | |
| 186 self._pL.launch(prg, cmd) |
