18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file
|
|
4 # Compare two fasta files of TEs to assess how reference sequences are recovered by de novo consensus.
|
|
5
|
|
6
|
|
7 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
8 # http://www.inra.fr
|
|
9 # http://urgi.versailles.inra.fr
|
|
10 #
|
|
11 # This software is governed by the CeCILL license under French law and
|
|
12 # abiding by the rules of distribution of free software. You can use,
|
|
13 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
14 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
15 # "http://www.cecill.info".
|
|
16 #
|
|
17 # As a counterpart to the access to the source code and rights to copy,
|
|
18 # modify and redistribute granted by the license, users are provided only
|
|
19 # with a limited warranty and the software's author, the holder of the
|
|
20 # economic rights, and the successive licensors have only limited
|
|
21 # liability.
|
|
22 #
|
|
23 # In this respect, the user's attention is drawn to the risks associated
|
|
24 # with loading, using, modifying and/or developing or reproducing the
|
|
25 # software by the user in light of its specific status of free software,
|
|
26 # that may mean that it is complicated to manipulate, and that also
|
|
27 # therefore means that it is reserved for developers and experienced
|
|
28 # professionals having in-depth computer knowledge. Users are therefore
|
|
29 # encouraged to load and test the software's suitability as regards their
|
|
30 # requirements in conditions enabling the security of their systems and/or
|
|
31 # data to be ensured and, more generally, to use and operate it in the
|
|
32 # same conditions as regards security.
|
|
33 #
|
|
34 # The fact that you are presently reading this means that you have had
|
|
35 # knowledge of the CeCILL license and that you accept its terms.
|
|
36
|
|
37
|
|
38 import os
|
|
39 import sys
|
|
40 import getopt
|
|
41 import shutil
|
|
42 import glob
|
|
43
|
|
44
|
|
45 import pyRepet.launcher.programLauncher
|
|
46 from commons.core.coord.AlignUtils import AlignUtils
|
|
47 from commons.core.coord.MatchUtils import MatchUtils
|
|
48 from commons.core.utils.FileUtils import FileUtils
|
|
49 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
|
|
50 from commons.core.seq.FastaUtils import FastaUtils
|
|
51 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
|
|
52
|
|
53
|
|
54 class BenchmarkTEconsensus( object ):
|
|
55
|
|
56 def __init__( self ):
|
|
57 self._qryFile = ""
|
|
58 self._sbjFile = ""
|
|
59 self._method = 1
|
|
60 self._keepConflictSbj = False
|
|
61 self._thresholdCoverage = 95
|
|
62 self._thresholdIdentity = 80
|
|
63 self._thresholdEvalue = 1e-10
|
|
64 self._thresholdCoverageMatch = 90
|
|
65 self._useCluster = False
|
|
66 self._queue = ""
|
|
67 self._configFileName = ""
|
|
68 self._clean = False
|
|
69 self._verbose = 0
|
|
70 self._pL = pyRepet.launcher.programLauncher.programLauncher()
|
|
71
|
|
72
|
|
73 def help( self ):
|
|
74 print
|
|
75 print "usage: BenchmarkTEconsensus.py [ options ]"
|
|
76 print "options:"
|
|
77 print " -h: this help"
|
|
78 print " -q: name of the query file (de novo consensus, format='fasta')"
|
|
79 print " -s: name of the subject file (reference sequences, format='fasta')"
|
|
80 print " -m: method"
|
|
81 print " 1: Blaster + Matcher (default)"
|
|
82 print " 2: Blaster + merge + Matcher (not with '-Q')"
|
|
83 print " 3: Orienter + Mafft + Matcher"
|
|
84 print " 4: Yass + Matcher"
|
|
85 print " -a: keep all conflicting subjects in Matcher"
|
|
86 print " -t: coverage threshold over which the match is 'complete' (in %% of the seq length, default=%i)" % self._thresholdCoverage
|
|
87 print " -I: identity threshold for 'CC' matches (default=%i)" % self._thresholdIdentity
|
|
88 print " -E: E-value threshold for 'CC' matches (default=1e-10)"
|
|
89 print " -T: coverage threshold for match length on query compare to subject length (default=%i)" % self._thresholdCoverageMatch
|
|
90 print " -Q: queue name to run in parallel"
|
|
91 print " -C: name of the configuration file (compulsory with '-Q')"
|
|
92 print " -c: clean"
|
|
93 print " -v: verbosity level (default=0/1/2)"
|
|
94 print
|
|
95
|
|
96
|
|
97 def setAttributesFromCmdLine( self ):
|
|
98 try:
|
|
99 opts, args = getopt.getopt( sys.argv[1:], "hq:s:m:at:I:E:T:Q:C:cv:" )
|
|
100 except getopt.GetoptError, err:
|
|
101 sys.stderr.write( "%s\n" % ( str(err) ) )
|
|
102 self.help()
|
|
103 sys.exit(1)
|
|
104 for o,a in opts:
|
|
105 if o == "-h":
|
|
106 self.help();
|
|
107 sys.exit(0)
|
|
108 elif o == "-q":
|
|
109 self._qryFile = a
|
|
110 elif o == "-s":
|
|
111 self._sbjFile = a
|
|
112 elif o == "-m":
|
|
113 self._method = int(a)
|
|
114 elif o == "-a":
|
|
115 self._keepConflictSbj = True
|
|
116 elif o == "-t":
|
|
117 self._thresholdCoverage = int(a)
|
|
118 elif o == "-I":
|
|
119 self._thresholdIdentity = int(a)
|
|
120 elif o == "-E":
|
|
121 self._thresholdEvalue = float(a)
|
|
122 elif o == "-T":
|
|
123 self._thresholdCoverageMatch = int(a)
|
|
124 elif o == "-Q":
|
|
125 self._useCluster = True
|
|
126 self._queue = a
|
|
127 elif o == "-C":
|
|
128 self._configFile = a
|
|
129 elif o == "-c":
|
|
130 self._clean = True
|
|
131 elif o == "-v":
|
|
132 self._verbose = int(a)
|
|
133
|
|
134
|
|
135 def checkAttributes( self ):
|
|
136 if self._qryFile == "":
|
|
137 msg = "ERROR: missing query file (-q)"
|
|
138 sys.stderr.write( "%s\n" % ( msg ) )
|
|
139 self.help()
|
|
140 sys.exit(1)
|
|
141 if not os.path.exists( self._qryFile ):
|
|
142 msg = "ERROR: can't find file '%s'" % ( self._qryFile )
|
|
143 sys.stderr.write( "%s\n" % ( msg ) )
|
|
144 self.help()
|
|
145 sys.exit(1)
|
|
146 if self._sbjFile == "":
|
|
147 msg = "ERROR: missing subject file (-s)"
|
|
148 sys.stderr.write( "%s\n" % ( msg ) )
|
|
149 self.help()
|
|
150 sys.exit(1)
|
|
151 if not os.path.exists( self._sbjFile ):
|
|
152 msg = "ERROR: can't find file '%s'" % ( self._sbjFile )
|
|
153 sys.stderr.write( "%s\n" % ( msg ) )
|
|
154 self.help()
|
|
155 sys.exit(1)
|
|
156 if self._useCluster:
|
|
157 if self._configFile == "":
|
|
158 msg = "ERROR: missing configuration file (-C)"
|
|
159 sys.stderr.write( "%s\n" % ( msg ) )
|
|
160 self.help()
|
|
161 sys.exit(1)
|
|
162 if not os.path.exists( self._configFile ):
|
|
163 msg = "ERROR: can't find file '%s'" % ( self._configFile )
|
|
164 sys.stderr.write( "%s\n" % ( msg ) )
|
|
165 self.help()
|
|
166 sys.exit(1)
|
|
167 if self._method == 2:
|
|
168 msg = "ERROR: can't launch method 2 in parallel"
|
|
169 sys.stderr.write( "%s\n" % ( msg ) )
|
|
170 self.help()
|
|
171 sys.exit(1)
|
|
172 nbSeqQry = FastaUtils.dbSize( self._qryFile )
|
|
173 if nbSeqQry == 0:
|
|
174 print "WARNING: query file is empty"
|
|
175 sys.exit(0)
|
|
176 nbSeqSbj = FastaUtils.dbSize( self._sbjFile )
|
|
177 if nbSeqSbj == 0:
|
|
178 print "WARNING: subject file is empty"
|
|
179 sys.exit(0)
|
|
180
|
|
181
|
|
182 def preprocess( self ):
|
|
183 tmpDir = "tmp%s_t%i_m%i_I%i" % ( os.getpid(),
|
|
184 self._thresholdCoverage,
|
|
185 self._method,
|
|
186 self._thresholdIdentity )
|
|
187 if os.path.exists( tmpDir ):
|
|
188 shutil.rmtree( tmpDir )
|
|
189 os.mkdir( tmpDir )
|
|
190 os.chdir( tmpDir )
|
|
191
|
|
192 os.symlink( "../%s" % self._qryFile, self._qryFile )
|
|
193 csh = ChangeSequenceHeaders()
|
|
194 csh.setInputFile( self._qryFile )
|
|
195 csh.setFormat( "fasta" )
|
|
196 csh.setStep( 1 )
|
|
197 csh.setPrefix( "query" )
|
|
198 csh.setOutputFile( "%s.newH" % ( self._qryFile ) )
|
|
199 csh.setVerbosityLevel( self._verbose )
|
|
200 csh.run()
|
|
201 self._qryFile += ".newH"
|
|
202
|
|
203 if not os.path.exists( self._sbjFile ):
|
|
204 os.symlink( "../%s" % self._sbjFile, self._sbjFile )
|
|
205
|
|
206
|
|
207 def compareFastaViaBlasterMatcher( self, merged=False ):
|
|
208 """
|
|
209 Blaster (+ merged) + Matcher
|
|
210 """
|
|
211 if self._keepConflictSbj:
|
|
212 s = "match"
|
|
213 else:
|
|
214 s = "clean_match"
|
|
215 matchFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
216 self._sbjFile,
|
|
217 self._method )
|
|
218 pathFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
219 self._sbjFile,
|
|
220 self._method )
|
|
221 if merged:
|
|
222 matchFile += ".merged"
|
|
223 pathFile += ".merged"
|
|
224 matchFile += ".%s.tab" % ( s )
|
|
225 pathFile += ".%s.path" % ( s )
|
|
226
|
|
227 if not self._useCluster:
|
|
228 prg = "blaster"
|
|
229 cmd = prg
|
|
230 cmd += " -q %s" % ( self._qryFile )
|
|
231 cmd += " -s %s" % ( self._sbjFile )
|
|
232 cmd += " -B %s_vs_%s.m%i" % ( self._qryFile,
|
|
233 self._sbjFile,
|
|
234 self._method )
|
|
235 cmd += " -v %i" % ( self._verbose )
|
|
236 self._pL.launch( prg, cmd )
|
|
237
|
|
238 if merged:
|
|
239 tmpFile = "%s_vs_%s.m%i.align.merged" % ( self._qryFile,
|
|
240 self._sbjFile,
|
|
241 self._method )
|
|
242 AlignUtils.mergeFile( "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
243 self._sbjFile,
|
|
244 self._method ),
|
|
245 tmpFile )
|
|
246 else:
|
|
247 tmpFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
248 self._sbjFile,
|
|
249 self._method )
|
|
250
|
|
251 prg = "matcher"
|
|
252 cmd = prg
|
|
253 cmd += " -m %s" % ( tmpFile )
|
|
254 cmd += " -q %s" % ( self._qryFile )
|
|
255 cmd += " -s %s" % ( self._sbjFile )
|
|
256 cmd += " -j"
|
|
257 if self._keepConflictSbj:
|
|
258 cmd += " -a"
|
|
259 cmd += " -v %i" % ( self._verbose )
|
|
260 self._pL.launch( prg, cmd )
|
|
261
|
|
262 else:
|
|
263 os.symlink( "../%s" % self._configFile, self._configFileName )
|
|
264
|
|
265 prg = os.environ["REPET_PATH"] + "/bin/launchBlasterMatcherPerQuery.py"
|
|
266 cmd = prg
|
|
267 cmd += " -q %s" % ( self._qryFile )
|
|
268 cmd += " -s %s" % ( self._sbjFile )
|
|
269 cmd += " -Q %s" % ( self._queue )
|
|
270 cmd += " -C %s" % ( self._configFile )
|
|
271 cmd += " -n %i" % ( 10 )
|
|
272 if self._keepConflictSbj:
|
|
273 cmd += " -M \"%s\"" % ( "-j -a" )
|
|
274 else:
|
|
275 cmd += " -M \"%s\"" % ( "-j" )
|
|
276 cmd += " -Z tab"
|
|
277 if self._clean:
|
|
278 cmd += " -c"
|
|
279 cmd += " -v %i" % ( self._verbose - 1 )
|
|
280 self._pL.launch( prg, cmd )
|
|
281
|
|
282 csh = ChangeSequenceHeaders()
|
|
283 csh.setInputFile( matchFile )
|
|
284 csh.setFormat( "tab" )
|
|
285 csh.setStep( 2 )
|
|
286 csh.setLinkFile( "%slink" % ( self._qryFile ) )
|
|
287 csh.setOutputFile( matchFile.replace(".newH","") )
|
|
288 csh.run()
|
|
289
|
|
290 csh.setInputFile( pathFile )
|
|
291 csh.setFormat( "path" )
|
|
292 csh.setStep( 2 )
|
|
293 csh.setOutputFile( pathFile.replace(".newH","") )
|
|
294 csh.run()
|
|
295
|
|
296 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
|
|
297
|
|
298
|
|
299 def compareFastaViaMafft( self ):
|
|
300 """
|
|
301 Orienter, Mafft, Matcher
|
|
302 """
|
|
303 FastaUtils.dbSplit( self._qryFile, 1, False, False, "query" )
|
|
304 FastaUtils.dbSplit( self._sbjFile, 1, False, False, "subject" )
|
|
305 lQueries = glob.glob( "query_*.fa" )
|
|
306 lSubjects = glob.glob( "subject_*.fa" )
|
|
307
|
|
308 if self._keepConflictSbj:
|
|
309 s = "match"
|
|
310 else:
|
|
311 s = "clean_match"
|
|
312 matchFile = "%s_vs_%s.m%i.align.%s.tab" % ( self._qryFile,
|
|
313 self._sbjFile,
|
|
314 self._method,
|
|
315 s )
|
|
316 os.system( "touch %s" % matchFile )
|
|
317 pathFile = "%s_vs_%s.m%i.align.%s.path" % ( self._qryFile,
|
|
318 self._sbjFile,
|
|
319 self._method,
|
|
320 s )
|
|
321 os.system( "touch %s" % pathFile )
|
|
322
|
|
323 countQueries = 0
|
|
324 for query in lQueries:
|
|
325 countQueries += 1
|
|
326 queryHeader = FastaUtils.dbHeaders( query )[0]
|
|
327 queryLength = FastaUtils.dbLengths( query )[0]
|
|
328 countSubjects = 0
|
|
329 for subject in lSubjects:
|
|
330 countSubjects += 1
|
|
331 subjectHeader = FastaUtils.dbHeaders( subject )[0]
|
|
332 subjectLength = FastaUtils.dbLengths( subject )[0]
|
|
333 if self._verbose > 0:
|
|
334 print "compare '%s' (%i bp, %i/%i) and '%s' (%i bp, %i/%i)" % ( queryHeader, queryLength,
|
|
335 countQueries, len(lQueries),
|
|
336 subjectHeader, subjectLength,
|
|
337 countSubjects, len(lSubjects) )
|
|
338 sys.stdout.flush()
|
|
339 qsLengthRatio = 100 * queryLength / float(subjectLength)
|
|
340 if qsLengthRatio < self._thresholdCoverage - 2 or qsLengthRatio > 100 + (100-self._thresholdCoverage) + 2:
|
|
341 if self._verbose > 0:
|
|
342 print "skip (q/s=%.2f%%)" % ( qsLengthRatio )
|
|
343 continue
|
|
344
|
|
345 tmpFile = "%s_vs_%s" % ( query, subject )
|
|
346 FileUtils.catFilesFromList( [ query, subject ],
|
|
347 tmpFile )
|
|
348 prg = "OrientSequences.py"
|
|
349 cmd = prg
|
|
350 cmd += " -i %s" % ( tmpFile )
|
|
351 cmd += " -p mummer"
|
|
352 cmd += " -c"
|
|
353 cmd += " -v %i" % ( self._verbose - 1 )
|
|
354 self._pL.launch( prg, cmd )
|
|
355 prg = "MafftProgramLauncher.py"
|
|
356 cmd = prg
|
|
357 cmd += " -i %s.oriented" % ( tmpFile )
|
|
358 cmd += " -c"
|
|
359 cmd += " -v %i" % ( self._verbose - 1 )
|
|
360 self._pL.launch( prg, cmd )
|
|
361
|
|
362 absDB = AlignedBioseqDB( "%s.oriented.fa_aln" % tmpFile )
|
|
363 lHeaders = absDB.getHeaderList()
|
|
364 lAligns = absDB.getAlignList( lHeaders[0], lHeaders[1] )
|
|
365 for i in lAligns:
|
|
366 if "re-oriented" in i.getQueryName():
|
|
367 i.setQueryName( queryHeader )
|
|
368 start = i.getQueryStart()
|
|
369 end = i.getQueryEnd()
|
|
370 i.setQueryStart( queryLength - end + 1 )
|
|
371 i.setQueryEnd( queryLength - start + 1 )
|
|
372 if "re-oriented" in i.getSubjectName():
|
|
373 i.setSubjectName( subjectHeader )
|
|
374 start = i.getSubjectStart()
|
|
375 end = i.getSubjectEnd()
|
|
376 i.setSubjectEnd( subjectLength - end + 1 )
|
|
377 i.setSubjectStart( subjectLength - start + 1 )
|
|
378 if not i.isQueryOnDirectStrand():
|
|
379 i.reverse()
|
|
380 AlignUtils.writeListInFile( lAligns,
|
|
381 "%s.oriented.fa_aln.align" % tmpFile )
|
|
382
|
|
383 prg = os.environ["REPET_PATH"] + "/bin/matcher"
|
|
384 cmd = prg
|
|
385 cmd += " -m %s.oriented.fa_aln.align" % ( tmpFile )
|
|
386 cmd += " -q %s" % ( query )
|
|
387 cmd += " -s %s" % ( subject )
|
|
388 cmd += " -j"
|
|
389 if self._keepConflictSbj:
|
|
390 cmd += " -a"
|
|
391 cmd += " -v %i" % ( self._verbose - 1 )
|
|
392 self._pL.launch( prg, cmd )
|
|
393
|
|
394 FileUtils.appendFileContent( "%s.oriented.fa_aln.align.%s.path" % ( tmpFile, s ), pathFile )
|
|
395 lMatches = MatchUtils.getMatchListFromFile( "%s.oriented.fa_aln.align.%s.tab" % ( tmpFile, s ) )
|
|
396 MatchUtils.writeListInFile( lMatches, matchFile, "a" )
|
|
397
|
|
398 for f in [ tmpFile,
|
|
399 "%s.oriented" % ( tmpFile ),
|
|
400 # "%s.oriented.fa_aln" % ( tmpFile ),
|
|
401 "%s.oriented.fa_aln.align" % ( tmpFile ),
|
|
402 "%s.oriented.fa_aln.align.match.fa" % ( tmpFile ),
|
|
403 "%s.oriented.fa_aln.align.match.map" % ( tmpFile ),
|
|
404 "%s.oriented.fa_aln.align.match.param" % ( tmpFile ),
|
|
405 "%s.oriented.fa_aln.align.match.path" % ( tmpFile ),
|
|
406 "%s.oriented.fa_aln.align.match.tab" % ( tmpFile ),
|
|
407 ]:
|
|
408 os.remove( f )
|
|
409
|
|
410 if not FileUtils.isEmpty( matchFile ):
|
|
411 csh = ChangeSequenceHeaders()
|
|
412 csh.setInputFile( matchFile )
|
|
413 csh.setFormat( "tab" )
|
|
414 csh.setStep( 2 )
|
|
415 csh.setLinkFile( "%slink" % ( self._qryFile ) )
|
|
416 csh.setOutputFile( matchFile.replace(".newH","") )
|
|
417 csh.run()
|
|
418 csh.setInputFile( pathFile )
|
|
419 csh.setFormat( "path" )
|
|
420 csh.setStep( 2 )
|
|
421 csh.setOutputFile( pathFile.replace(".newH","") )
|
|
422 csh.run()
|
|
423 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
|
|
424 else:
|
|
425 return "", ""
|
|
426
|
|
427
|
|
428 def compareFastaViaYassMatcher( self, merged=False ):
|
|
429 """
|
|
430 Yass + Matcher
|
|
431 """
|
|
432 if self._keepConflictSbj:
|
|
433 s = "match"
|
|
434 else:
|
|
435 s = "clean_match"
|
|
436 matchFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
437 self._sbjFile,
|
|
438 self._method )
|
|
439 pathFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
440 self._sbjFile,
|
|
441 self._method )
|
|
442 if merged:
|
|
443 matchFile += ".merged"
|
|
444 pathFile += ".merged"
|
|
445 matchFile += ".%s.tab" % ( s )
|
|
446 pathFile += ".%s.path" % ( s )
|
|
447
|
|
448 if not self._useCluster:
|
|
449 prg = os.environ["REPET_PATH"] + "/bin/YassProgramLauncher.py"
|
|
450 cmd = prg
|
|
451 cmd += " -i %s" % ( self._qryFile )
|
|
452 cmd += " -s %s" % ( self._sbjFile )
|
|
453 cmd += " -c"
|
|
454 # cmd += " -p '-i 12'"
|
|
455 cmd += " -o %s_vs_%s.m%i.align" % ( self._qryFile,
|
|
456 self._sbjFile,
|
|
457 self._method )
|
|
458 cmd += " -v %i" % ( self._verbose )
|
|
459 self._pL.launch( prg, cmd )
|
|
460
|
|
461 if merged:
|
|
462 tmpFile = "%s_vs_%s.m%i.align.merged" % ( self._qryFile,
|
|
463 self._sbjFile,
|
|
464 self._method )
|
|
465 AlignUtils.mergeFile( "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
466 self._sbjFile,
|
|
467 self._method ),
|
|
468 tmpFile )
|
|
469 else:
|
|
470 tmpFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
|
|
471 self._sbjFile,
|
|
472 self._method )
|
|
473
|
|
474 prg = os.environ["REPET_PATH"] + "/bin/matcher"
|
|
475 cmd = prg
|
|
476 cmd += " -m %s" % ( tmpFile )
|
|
477 cmd += " -q %s" % ( self._qryFile )
|
|
478 cmd += " -s %s" % ( self._sbjFile )
|
|
479 cmd += " -j"
|
|
480 if self._keepConflictSbj:
|
|
481 cmd += " -a"
|
|
482 cmd += " -v %i" % ( self._verbose )
|
|
483 self._pL.launch( prg, cmd )
|
|
484
|
|
485 csh = ChangeSequenceHeaders()
|
|
486 csh.setInputFile( matchFile )
|
|
487 csh.setFormat( "tab" )
|
|
488 csh.setStep( 2 )
|
|
489 csh.setLinkFile( "%slink" % ( self._qryFile ) )
|
|
490 csh.setOutputFile( matchFile.replace(".newH","") )
|
|
491 csh.run()
|
|
492
|
|
493 csh.setInputFile( pathFile )
|
|
494 csh.setFormat( "path" )
|
|
495 csh.setStep( 2 )
|
|
496 csh.setOutputFile( pathFile.replace(".newH","") )
|
|
497 csh.run()
|
|
498
|
|
499 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
|
|
500
|
|
501
|
|
502 def analyzeMatchFile( self, matchFile, pathFile ):
|
|
503 if matchFile != "":
|
|
504 if self._verbose > 0:
|
|
505 print "analyze the 'tab' file..."
|
|
506 sys.stdout.flush()
|
|
507 prg = os.environ["REPET_PATH"] + "/bin/tabFileReader.py"
|
|
508 cmd = prg
|
|
509 cmd += " -m %s" % ( matchFile )
|
|
510 cmd += " -q %s" % ( self._qryFile.replace(".newH","") )
|
|
511 cmd += " -s %s" % ( self._sbjFile.replace(".newH","") )
|
|
512 cmd += " -t %i" % ( self._thresholdCoverage )
|
|
513 cmd += " -I %i" % ( self._thresholdIdentity )
|
|
514 cmd += " -E %g" % ( self._thresholdEvalue )
|
|
515 cmd += " -T %i" % ( self._thresholdCoverageMatch )
|
|
516 cmd += " -v %i" % ( self._verbose - 1 )
|
|
517 self._pL.launch( prg, cmd )
|
|
518 for f in [ matchFile, pathFile,
|
|
519 "%s_tabFileReader.txt" % matchFile,
|
|
520 "%s_qryCategories.txt" % matchFile,
|
|
521 "%s_sbjCategories.txt" % matchFile ]:
|
|
522 shutil.copy( f, ".." )
|
|
523 os.chdir( ".." )
|
|
524
|
|
525
|
|
526 def start( self ):
|
|
527 self.checkAttributes()
|
|
528 if self._verbose > 0:
|
|
529 print "START BenchmarkTEconsensus.py"
|
|
530 sys.stdout.flush()
|
|
531
|
|
532
|
|
533 def end( self ):
|
|
534 if self._clean:
|
|
535 tmpDir = "tmp%s_t%i_m%i_I%i" % ( os.getpid(),
|
|
536 self._thresholdCoverage,
|
|
537 self._method,
|
|
538 self._thresholdIdentity )
|
|
539 shutil.rmtree( tmpDir )
|
|
540 if self._verbose > 0:
|
|
541 print "END BenchmarkTEconsensus.py"
|
|
542 sys.stdout.flush()
|
|
543
|
|
544
|
|
545 def run( self ):
|
|
546 self.start()
|
|
547
|
|
548 self.preprocess()
|
|
549
|
|
550 if self._method == 1:
|
|
551 matchFile, pathFile = self.compareFastaViaBlasterMatcher()
|
|
552 elif self._method == 2:
|
|
553 matchFile, pathFile = self.compareFastaViaBlasterMatcher( merged=True )
|
|
554 elif self._method == 3:
|
|
555 matchFile, pathFile = self.compareFastaViaMafft()
|
|
556 elif self._method == 4:
|
|
557 matchFile, pathFile = self.compareFastaViaYassMatcher()
|
|
558
|
|
559 self.analyzeMatchFile( matchFile, pathFile )
|
|
560
|
|
561 self.end()
|
|
562
|
|
563
|
|
564 if __name__ == "__main__":
|
|
565 i = BenchmarkTEconsensus()
|
|
566 i.setAttributesFromCmdLine()
|
|
567 i.run()
|