annotate commons/tools/BenchmarkTEconsensus.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # Compare two fasta files of TEs to assess how reference sequences are recovered by de novo consensus.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # Copyright INRA (Institut National de la Recherche Agronomique)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # http://www.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # http://urgi.versailles.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # This software is governed by the CeCILL license under French law and
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # abiding by the rules of distribution of free software. You can use,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # modify and/ or redistribute the software under the terms of the CeCILL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # license as circulated by CEA, CNRS and INRIA at the following URL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # "http://www.cecill.info".
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # As a counterpart to the access to the source code and rights to copy,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 # modify and redistribute granted by the license, users are provided only
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # with a limited warranty and the software's author, the holder of the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # economic rights, and the successive licensors have only limited
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # liability.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 # In this respect, the user's attention is drawn to the risks associated
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 # with loading, using, modifying and/or developing or reproducing the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 # software by the user in light of its specific status of free software,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 # that may mean that it is complicated to manipulate, and that also
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 # therefore means that it is reserved for developers and experienced
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 # professionals having in-depth computer knowledge. Users are therefore
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 # encouraged to load and test the software's suitability as regards their
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 # requirements in conditions enabling the security of their systems and/or
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 # data to be ensured and, more generally, to use and operate it in the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 # same conditions as regards security.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 # The fact that you are presently reading this means that you have had
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 # knowledge of the CeCILL license and that you accept its terms.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 import shutil
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 import glob
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 import pyRepet.launcher.programLauncher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 from commons.core.coord.AlignUtils import AlignUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 from commons.core.coord.MatchUtils import MatchUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 from commons.core.utils.FileUtils import FileUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 from commons.core.seq.FastaUtils import FastaUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 class BenchmarkTEconsensus( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 def __init__( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 self._qryFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self._sbjFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 self._method = 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self._keepConflictSbj = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 self._thresholdCoverage = 95
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._thresholdIdentity = 80
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 self._thresholdEvalue = 1e-10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 self._thresholdCoverageMatch = 90
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self._useCluster = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 self._queue = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 self._configFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self._clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 self._verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 self._pL = pyRepet.launcher.programLauncher.programLauncher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 def help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 print "usage: BenchmarkTEconsensus.py [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 print " -q: name of the query file (de novo consensus, format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 print " -s: name of the subject file (reference sequences, format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 print " -m: method"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 print " 1: Blaster + Matcher (default)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 print " 2: Blaster + merge + Matcher (not with '-Q')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 print " 3: Orienter + Mafft + Matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 print " 4: Yass + Matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 print " -a: keep all conflicting subjects in Matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 print " -t: coverage threshold over which the match is 'complete' (in %% of the seq length, default=%i)" % self._thresholdCoverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 print " -I: identity threshold for 'CC' matches (default=%i)" % self._thresholdIdentity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 print " -E: E-value threshold for 'CC' matches (default=1e-10)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 print " -T: coverage threshold for match length on query compare to subject length (default=%i)" % self._thresholdCoverageMatch
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 print " -Q: queue name to run in parallel"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 print " -C: name of the configuration file (compulsory with '-Q')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 print " -c: clean"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 print " -v: verbosity level (default=0/1/2)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 def setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 opts, args = getopt.getopt( sys.argv[1:], "hq:s:m:at:I:E:T:Q:C:cv:" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 sys.stderr.write( "%s\n" % ( str(err) ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 self.help();
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 elif o == "-q":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 self._qryFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 elif o == "-s":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self._sbjFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 elif o == "-m":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 self._method = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 elif o == "-a":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 self._keepConflictSbj = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 elif o == "-t":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 self._thresholdCoverage = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 elif o == "-I":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 self._thresholdIdentity = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 elif o == "-E":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 self._thresholdEvalue = float(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 elif o == "-T":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 self._thresholdCoverageMatch = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 elif o == "-Q":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 self._useCluster = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self._queue = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 elif o == "-C":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 self._configFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 self._clean = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 self._verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 if self._qryFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 msg = "ERROR: missing query file (-q)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 if not os.path.exists( self._qryFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 msg = "ERROR: can't find file '%s'" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 if self._sbjFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 msg = "ERROR: missing subject file (-s)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 if not os.path.exists( self._sbjFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 msg = "ERROR: can't find file '%s'" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 if self._useCluster:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 if self._configFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 msg = "ERROR: missing configuration file (-C)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 if not os.path.exists( self._configFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 msg = "ERROR: can't find file '%s'" % ( self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 if self._method == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 msg = "ERROR: can't launch method 2 in parallel"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 nbSeqQry = FastaUtils.dbSize( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 if nbSeqQry == 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 print "WARNING: query file is empty"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 nbSeqSbj = FastaUtils.dbSize( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 if nbSeqSbj == 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 print "WARNING: subject file is empty"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 def preprocess( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 tmpDir = "tmp%s_t%i_m%i_I%i" % ( os.getpid(),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 self._thresholdCoverage,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 self._method,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 self._thresholdIdentity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 if os.path.exists( tmpDir ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 shutil.rmtree( tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 os.mkdir( tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 os.chdir( tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 os.symlink( "../%s" % self._qryFile, self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 csh.setInputFile( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 csh.setFormat( "fasta" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 csh.setStep( 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 csh.setPrefix( "query" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 csh.setOutputFile( "%s.newH" % ( self._qryFile ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 csh.setVerbosityLevel( self._verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 self._qryFile += ".newH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 if not os.path.exists( self._sbjFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 os.symlink( "../%s" % self._sbjFile, self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 def compareFastaViaBlasterMatcher( self, merged=False ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 Blaster (+ merged) + Matcher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 s = "match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 s = "clean_match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 matchFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 pathFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 if merged:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 matchFile += ".merged"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 pathFile += ".merged"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 matchFile += ".%s.tab" % ( s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 pathFile += ".%s.path" % ( s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 if not self._useCluster:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 prg = "blaster"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 cmd += " -q %s" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 cmd += " -s %s" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 cmd += " -B %s_vs_%s.m%i" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 cmd += " -v %i" % ( self._verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 if merged:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 tmpFile = "%s_vs_%s.m%i.align.merged" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242 AlignUtils.mergeFile( "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 self._method ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 tmpFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 prg = "matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 cmd += " -m %s" % ( tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 cmd += " -q %s" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 cmd += " -s %s" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 cmd += " -j"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 cmd += " -a"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 cmd += " -v %i" % ( self._verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 os.symlink( "../%s" % self._configFile, self._configFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 prg = os.environ["REPET_PATH"] + "/bin/launchBlasterMatcherPerQuery.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267 cmd += " -q %s" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 cmd += " -s %s" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269 cmd += " -Q %s" % ( self._queue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 cmd += " -C %s" % ( self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 cmd += " -n %i" % ( 10 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273 cmd += " -M \"%s\"" % ( "-j -a" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 cmd += " -M \"%s\"" % ( "-j" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 cmd += " -Z tab"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277 if self._clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279 cmd += " -v %i" % ( self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 csh.setInputFile( matchFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284 csh.setFormat( "tab" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 csh.setLinkFile( "%slink" % ( self._qryFile ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287 csh.setOutputFile( matchFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290 csh.setInputFile( pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291 csh.setFormat( "path" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 csh.setOutputFile( pathFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
295
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
296 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
297
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
298
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
299 def compareFastaViaMafft( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
300 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
301 Orienter, Mafft, Matcher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
302 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
303 FastaUtils.dbSplit( self._qryFile, 1, False, False, "query" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
304 FastaUtils.dbSplit( self._sbjFile, 1, False, False, "subject" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
305 lQueries = glob.glob( "query_*.fa" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
306 lSubjects = glob.glob( "subject_*.fa" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
307
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
308 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
309 s = "match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
310 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
311 s = "clean_match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
312 matchFile = "%s_vs_%s.m%i.align.%s.tab" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
313 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
314 self._method,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
315 s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
316 os.system( "touch %s" % matchFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
317 pathFile = "%s_vs_%s.m%i.align.%s.path" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
318 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
319 self._method,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
320 s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
321 os.system( "touch %s" % pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
322
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
323 countQueries = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
324 for query in lQueries:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
325 countQueries += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
326 queryHeader = FastaUtils.dbHeaders( query )[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
327 queryLength = FastaUtils.dbLengths( query )[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
328 countSubjects = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
329 for subject in lSubjects:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
330 countSubjects += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
331 subjectHeader = FastaUtils.dbHeaders( subject )[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
332 subjectLength = FastaUtils.dbLengths( subject )[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
333 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
334 print "compare '%s' (%i bp, %i/%i) and '%s' (%i bp, %i/%i)" % ( queryHeader, queryLength,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
335 countQueries, len(lQueries),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
336 subjectHeader, subjectLength,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
337 countSubjects, len(lSubjects) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
338 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
339 qsLengthRatio = 100 * queryLength / float(subjectLength)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
340 if qsLengthRatio < self._thresholdCoverage - 2 or qsLengthRatio > 100 + (100-self._thresholdCoverage) + 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
341 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
342 print "skip (q/s=%.2f%%)" % ( qsLengthRatio )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
343 continue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
344
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
345 tmpFile = "%s_vs_%s" % ( query, subject )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
346 FileUtils.catFilesFromList( [ query, subject ],
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
347 tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
348 prg = "OrientSequences.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
349 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
350 cmd += " -i %s" % ( tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
351 cmd += " -p mummer"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
352 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
353 cmd += " -v %i" % ( self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
354 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
355 prg = "MafftProgramLauncher.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
356 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
357 cmd += " -i %s.oriented" % ( tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
358 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
359 cmd += " -v %i" % ( self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
360 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
361
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
362 absDB = AlignedBioseqDB( "%s.oriented.fa_aln" % tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
363 lHeaders = absDB.getHeaderList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
364 lAligns = absDB.getAlignList( lHeaders[0], lHeaders[1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
365 for i in lAligns:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
366 if "re-oriented" in i.getQueryName():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
367 i.setQueryName( queryHeader )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
368 start = i.getQueryStart()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
369 end = i.getQueryEnd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
370 i.setQueryStart( queryLength - end + 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
371 i.setQueryEnd( queryLength - start + 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
372 if "re-oriented" in i.getSubjectName():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
373 i.setSubjectName( subjectHeader )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
374 start = i.getSubjectStart()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
375 end = i.getSubjectEnd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
376 i.setSubjectEnd( subjectLength - end + 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
377 i.setSubjectStart( subjectLength - start + 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
378 if not i.isQueryOnDirectStrand():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
379 i.reverse()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
380 AlignUtils.writeListInFile( lAligns,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
381 "%s.oriented.fa_aln.align" % tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
382
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
383 prg = os.environ["REPET_PATH"] + "/bin/matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
384 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
385 cmd += " -m %s.oriented.fa_aln.align" % ( tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
386 cmd += " -q %s" % ( query )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
387 cmd += " -s %s" % ( subject )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
388 cmd += " -j"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
389 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
390 cmd += " -a"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
391 cmd += " -v %i" % ( self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
392 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
393
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
394 FileUtils.appendFileContent( "%s.oriented.fa_aln.align.%s.path" % ( tmpFile, s ), pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
395 lMatches = MatchUtils.getMatchListFromFile( "%s.oriented.fa_aln.align.%s.tab" % ( tmpFile, s ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
396 MatchUtils.writeListInFile( lMatches, matchFile, "a" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
397
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
398 for f in [ tmpFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
399 "%s.oriented" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
400 # "%s.oriented.fa_aln" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
401 "%s.oriented.fa_aln.align" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
402 "%s.oriented.fa_aln.align.match.fa" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
403 "%s.oriented.fa_aln.align.match.map" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
404 "%s.oriented.fa_aln.align.match.param" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
405 "%s.oriented.fa_aln.align.match.path" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
406 "%s.oriented.fa_aln.align.match.tab" % ( tmpFile ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
407 ]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
408 os.remove( f )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
409
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
410 if not FileUtils.isEmpty( matchFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
411 csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
412 csh.setInputFile( matchFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
413 csh.setFormat( "tab" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
414 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
415 csh.setLinkFile( "%slink" % ( self._qryFile ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
416 csh.setOutputFile( matchFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
417 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
418 csh.setInputFile( pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
419 csh.setFormat( "path" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
420 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
421 csh.setOutputFile( pathFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
422 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
423 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
424 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
425 return "", ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
426
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
427
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
428 def compareFastaViaYassMatcher( self, merged=False ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
429 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
430 Yass + Matcher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
431 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
432 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
433 s = "match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
434 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
435 s = "clean_match"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
436 matchFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
437 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
438 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
439 pathFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
440 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
441 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
442 if merged:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
443 matchFile += ".merged"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
444 pathFile += ".merged"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
445 matchFile += ".%s.tab" % ( s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
446 pathFile += ".%s.path" % ( s )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
447
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
448 if not self._useCluster:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
449 prg = os.environ["REPET_PATH"] + "/bin/YassProgramLauncher.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
450 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
451 cmd += " -i %s" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
452 cmd += " -s %s" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
453 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
454 # cmd += " -p '-i 12'"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
455 cmd += " -o %s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
456 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
457 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
458 cmd += " -v %i" % ( self._verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
459 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
460
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
461 if merged:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
462 tmpFile = "%s_vs_%s.m%i.align.merged" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
463 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
464 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
465 AlignUtils.mergeFile( "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
466 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
467 self._method ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
468 tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
469 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
470 tmpFile = "%s_vs_%s.m%i.align" % ( self._qryFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
471 self._sbjFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
472 self._method )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
473
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
474 prg = os.environ["REPET_PATH"] + "/bin/matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
475 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
476 cmd += " -m %s" % ( tmpFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
477 cmd += " -q %s" % ( self._qryFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
478 cmd += " -s %s" % ( self._sbjFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
479 cmd += " -j"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
480 if self._keepConflictSbj:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
481 cmd += " -a"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
482 cmd += " -v %i" % ( self._verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
483 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
484
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
485 csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
486 csh.setInputFile( matchFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
487 csh.setFormat( "tab" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
488 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
489 csh.setLinkFile( "%slink" % ( self._qryFile ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
490 csh.setOutputFile( matchFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
491 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
492
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
493 csh.setInputFile( pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
494 csh.setFormat( "path" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
495 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
496 csh.setOutputFile( pathFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
497 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
498
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
499 return matchFile.replace(".newH",""), pathFile.replace(".newH","")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
500
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
501
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
502 def analyzeMatchFile( self, matchFile, pathFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
503 if matchFile != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
504 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
505 print "analyze the 'tab' file..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
506 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
507 prg = os.environ["REPET_PATH"] + "/bin/tabFileReader.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
508 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
509 cmd += " -m %s" % ( matchFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
510 cmd += " -q %s" % ( self._qryFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
511 cmd += " -s %s" % ( self._sbjFile.replace(".newH","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
512 cmd += " -t %i" % ( self._thresholdCoverage )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
513 cmd += " -I %i" % ( self._thresholdIdentity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
514 cmd += " -E %g" % ( self._thresholdEvalue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
515 cmd += " -T %i" % ( self._thresholdCoverageMatch )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
516 cmd += " -v %i" % ( self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
517 self._pL.launch( prg, cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
518 for f in [ matchFile, pathFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
519 "%s_tabFileReader.txt" % matchFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
520 "%s_qryCategories.txt" % matchFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
521 "%s_sbjCategories.txt" % matchFile ]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
522 shutil.copy( f, ".." )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
523 os.chdir( ".." )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
524
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
525
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
526 def start( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
527 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
528 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
529 print "START BenchmarkTEconsensus.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
530 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
531
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
532
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
533 def end( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
534 if self._clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
535 tmpDir = "tmp%s_t%i_m%i_I%i" % ( os.getpid(),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
536 self._thresholdCoverage,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
537 self._method,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
538 self._thresholdIdentity )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
539 shutil.rmtree( tmpDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
540 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
541 print "END BenchmarkTEconsensus.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
542 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
543
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
544
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
545 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
546 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
547
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
548 self.preprocess()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
549
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
550 if self._method == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
551 matchFile, pathFile = self.compareFastaViaBlasterMatcher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
552 elif self._method == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
553 matchFile, pathFile = self.compareFastaViaBlasterMatcher( merged=True )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
554 elif self._method == 3:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
555 matchFile, pathFile = self.compareFastaViaMafft()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
556 elif self._method == 4:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
557 matchFile, pathFile = self.compareFastaViaYassMatcher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
558
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
559 self.analyzeMatchFile( matchFile, pathFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
560
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
561 self.end()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
562
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
563
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
564 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
565 i = BenchmarkTEconsensus()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
566 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
567 i.run()