18
|
1 """
|
|
2 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.
|
|
3 """
|
|
4
|
|
5 import os
|
|
6 import ConfigParser
|
|
7 from commons.core.utils.FileUtils import FileUtils
|
|
8 from commons.core.LoggerFactory import LoggerFactory
|
|
9
|
|
10 LOG_DEPTH = "repet.tools"
|
|
11
|
|
12 class RepbaseBLRnForClassifierStep1( object ):
|
|
13
|
|
14 """
|
|
15 Launch Blaster and then Matcher to compare the input sequences with known TEs via blastn and record the results into a MySQL table.
|
|
16
|
|
17 @param inFileName: name of the input fasta file
|
|
18 @type inFileName: string
|
|
19
|
|
20 @param launch_1: generic command at the beginning of a specific command
|
|
21 @type launch_1: string
|
|
22
|
|
23 @param launch_2: generic command at the end of a specific command
|
|
24 @type launch_2: string
|
|
25
|
|
26 @return: all the commands to run the job
|
|
27 @rtype: string
|
|
28
|
|
29 @param cDir: current directory (where to retrieve the result files)
|
|
30 @ype cDir: string
|
|
31
|
|
32 @param tmpDir: temporary directory (where the job will run)
|
|
33 @type tmpDir: string
|
|
34
|
|
35 @param configFileName: configuration file name
|
|
36 @type configFileName: string
|
|
37
|
|
38 @param logger: a logger Instance
|
|
39 @type logger: logger
|
|
40
|
|
41 @param verbose: verbose(0/1/2)
|
|
42 @type verbose: int
|
|
43
|
|
44 @param pL: program launcher
|
|
45 @type pL: programLauncher Instance
|
|
46
|
|
47 @param project: project name
|
|
48 @type project: string
|
|
49
|
|
50 """
|
|
51
|
|
52 def __init__(self, inFileName, launch_1, launch_2, cDir, tmpDir, configFileName, verbose, pL, project):
|
|
53 """
|
|
54 Constructor
|
|
55 """
|
|
56 self._inFileName = inFileName
|
|
57 self._launch_1 = launch_1
|
|
58 self._launch_2 = launch_2
|
|
59 self._cDir = cDir
|
|
60 self._tmpDir = tmpDir
|
|
61 self._verbose = verbose
|
|
62 self._pL = pL
|
|
63 self._project = project
|
|
64 self._fileUtils = FileUtils()
|
|
65 self._config = ConfigParser.ConfigParser()
|
|
66 self._configFileName = configFileName
|
|
67 self._config.readfp( open(self._configFileName) )
|
|
68 self._bank = self._config.get("detect_features","TE_nucl_bank")
|
|
69 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbose)
|
|
70
|
|
71 def formatRepbase_ntIfNecessary( self ):
|
|
72 """
|
|
73 Format Repbase (make 'cut' files).
|
|
74 """
|
|
75 if not os.path.exists( "%s_cut" % ( self._bank ) ):
|
|
76 self._log.debug("prepare bank '%s'..." % ( self._bank ))
|
|
77 prg = os.environ["REPET_PATH"] + "/bin/blaster"
|
|
78 cmd = prg
|
|
79 cmd += " -s %s" % ( self._bank )
|
|
80 cmd += " -n blastn"
|
|
81 if self._config.get("detect_features","wublast") == "yes":
|
|
82 cmd += " -W"
|
|
83 cmd += " -r"
|
|
84 cmd += " -P"
|
|
85 self._pL.launch( prg, cmd )
|
|
86 os.system( "rm -f %s-blastn-*.param" % ( self._bank ) )
|
|
87
|
|
88 def createCmdToLaunch( self ):
|
|
89 cmd = self._launch_1 + os.environ["REPET_PATH"] + "/bin/blaster"
|
|
90 cmd += " -q %s" % ( self._inFileName )
|
|
91 cmd += " -s %s/%s" % ( self._cDir, self._bank )
|
|
92 cmd += " -B %s_BLRn_%s" % ( self._inFileName, self._bank )
|
|
93 cmd += " -n blastn"
|
|
94 if self._config.get("detect_features","wublast") == "yes":
|
|
95 cmd += " -W"
|
|
96 cmd += " -r"
|
|
97 cmd += " -v 1"
|
|
98 cmd += self._launch_2
|
|
99
|
|
100 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.param\" ):\n" % ( self._cDir, self._inFileName, self._bank )
|
|
101 cmd += "\tos.system( \"mv %s_BLRn_%s.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
|
|
102 cmd += "if os.path.exists( \"%s_cut\" ):\n" % ( self._inFileName )
|
|
103 cmd += "\tos.system( \"rm -f %s_cut*\" )\n" % ( self._inFileName )
|
|
104 cmd += "if os.path.exists( \"%s.Nstretch.map\" ):\n" % ( self._inFileName )
|
|
105 cmd += "\tos.remove( \"%s.Nstretch.map\" )\n" % ( self._inFileName )
|
|
106 cmd += "if os.path.exists( \"%s_BLRn_%s.raw\" ):\n" % ( self._inFileName, self._bank )
|
|
107 cmd += "\tos.remove( \"%s_BLRn_%s.raw\" )\n" % ( self._inFileName, self._bank )
|
|
108 cmd += "if os.path.exists( \"%s_BLRn_%s.seq_treated\" ):\n" % ( self._inFileName, self._bank )
|
|
109 cmd += "\tos.remove( \"%s_BLRn_%s.seq_treated\" )\n" % ( self._inFileName, self._bank )
|
|
110
|
|
111 cmd += self._launch_1
|
|
112 cmd += os.environ["REPET_PATH"] + "/bin/matcher"
|
|
113 cmd += " -m %s_BLRn_%s.align" % ( self._inFileName, self._bank )
|
|
114 cmd += " -q %s" % ( self._inFileName )
|
|
115 cmd += " -s %s/%s" % ( self._cDir, self._bank )
|
|
116 cmd += " -j"
|
|
117 cmd += " -v 1"
|
|
118 cmd += self._launch_2
|
|
119
|
|
120 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.path\" ):\n" % ( self._cDir, self._inFileName, self._bank )
|
|
121 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.path %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
|
|
122 cmd += "if not os.path.exists( \"%s/%s_BLRn_%s.align.clean_match.param\" ):\n" % ( self._cDir, self._inFileName, self._bank )
|
|
123 cmd += "\tos.system( \"mv %s_BLRn_%s.align.clean_match.param %s\" )\n" % ( self._inFileName, self._bank, self._cDir )
|
|
124 cmd += "if os.path.exists( \"%s_BLRn_%s.align\" ):\n" % ( self._inFileName, self._bank )
|
|
125 cmd += "\tos.remove( \"%s_BLRn_%s.align\" )\n" % ( self._inFileName, self._bank )
|
|
126 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.fa\" ):\n" % ( self._inFileName, self._bank )
|
|
127 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.fa\" )\n" % ( self._inFileName, self._bank )
|
|
128 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.map\" ):\n" % ( self._inFileName, self._bank )
|
|
129 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.map\" )\n" % ( self._inFileName, self._bank )
|
|
130 cmd += "if os.path.exists( \"%s_BLRn_%s.align.clean_match.tab\" ):\n" % ( self._inFileName, self._bank )
|
|
131 cmd += "\tos.remove( \"%s_BLRn_%s.align.clean_match.tab\" )\n" % ( self._inFileName, self._bank )
|
|
132
|
|
133 if self._tmpDir != self._cDir:
|
|
134 cmd += "if os.path.exists( \"%s\" ):\n" % ( self._bank )
|
|
135 cmd += "\tos.remove( \"%s\" )\n" % ( self._bank )
|
|
136
|
|
137 return cmd
|
|
138
|
|
139 def collectRepbaseBLRn( self ):
|
|
140 """
|
|
141 Concatenate the outputs of blastn, adapt the ID and load the results into a table.
|
|
142 """
|
|
143 bankFull = self._bank
|
|
144 bankPath, bank = os.path.split( bankFull )
|
|
145 self._concatPathFile(bank)
|
|
146 self._adaptIDInPathFile(bank)
|
|
147 self._loadPathFileInTable(bank)
|
|
148 self._findAndRemoveUselessFiles(bank)
|
|
149
|
|
150 def _concatPathFile(self, bank):
|
|
151 FileUtils.catFilesByPattern("../batch_*.fa_BLRn_%s.align.clean_match.path" % bank,
|
|
152 "%s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank))
|
|
153
|
|
154 def _adaptIDInPathFile(self, bank):
|
|
155 if os.path.exists(os.environ["REPET_PATH"] + "/bin/pathnum2id"):
|
|
156 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id"
|
|
157 cmd = prg
|
|
158 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
|
|
159 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
|
|
160 cmd += " -v %i" % (self._verbose - 1)
|
|
161 self._pL.launch(prg, cmd)
|
|
162 else:
|
|
163 prg = os.environ["REPET_PATH"] + "/bin/pathnum2id.py"
|
|
164 cmd = prg
|
|
165 cmd += " -i %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
|
|
166 cmd += " -o %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
|
|
167 self._pL.launch(prg, cmd)
|
|
168
|
|
169 def _loadPathFileInTable(self, bank):
|
|
170 prg = os.environ["REPET_PATH"] + "/bin/srptCreateTable.py"
|
|
171 cmd = prg
|
|
172 cmd += " -f %s_BLRn_%s.align.clean_match.path" % (self._project, bank)
|
|
173 cmd += " -n %s_TE_BLRn_path" % (self._project)
|
|
174 cmd += " -t path"
|
|
175 cmd += " -c ../%s" % (self._configFileName)
|
|
176 self._pL.launch(prg, cmd)
|
|
177
|
|
178 def _findAndRemoveUselessFiles(self, bank):
|
|
179 prg = "find"
|
|
180 cmd = prg
|
|
181 cmd += " .. -name \"batch_*.fa_BLRn_%s.*\" -exec rm {} \;" % (bank)
|
|
182 self._pL.launch(prg, cmd)
|
|
183 prg = "rm"
|
|
184 cmd = prg
|
|
185 cmd += " %s_BLRn_%s.align.clean_match.path.tmp" % (self._project, bank)
|
|
186 self._pL.launch(prg, cmd)
|