annotate TEisotools-1.0/commons/core/coord/Align.py @ 6:20ec0d14798e draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 05:00:24 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
2 # http://www.inra.fr
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
4 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
9 # "http://www.cecill.info".
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
10 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
15 # liability.
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
16 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
26 # same conditions as regards security.
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
27 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
30
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
31 import time
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
32 from commons.core.coord.Map import Map
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
33 from commons.core.coord.Range import Range
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
34
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
35 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
36 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
37 class Align( object ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
38
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
39 __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__')
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
40
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
41 ## Constructor
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
42 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
43 # @param range_q: a Range instance for the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
44 # @param range_s: a Range instance for the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
45 # @param e_value: E-value of the match
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
46 # @param identity: identity percentage of the match
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
47 # @param score: score of the match
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
48 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
50 self.range_query = range_q
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
51 self.range_subject = range_s
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
52 self.e_value = float(e_value)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
53 self.score = float(score)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
54 self.identity = float(identity)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
55
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
56 ## Return True if the instance is empty, False otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
57 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
58 def isEmpty(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
59 return self.range_query.isEmpty() or self.range_subject.isEmpty()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
60
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
61 ## Equal operator
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
62 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
63 def __eq__(self, o):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
64 if type(o) is not type(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
65 return False
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
66 else:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
67 return self.range_query==o.range_query and self.range_subject==o.range_subject and \
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
68 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
69
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
70 ## Unequal operator
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
71 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
72 # @param o a Range instance
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
73 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
74 def __ne__(self, o):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
75 return not self.__eq__(o)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
76
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
77 ## Convert the object into a string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
78 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
79 # @note used in 'print myObject'
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
80 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
81 def __str__( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
82 return self.toString()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
83
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
84 ## Read attributes from an Align file
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
85 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
86 # @param fileHandler: file handler of the file being read
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
87 # @return: 1 on success, 0 at the end of the file
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
88 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
89 def read(self, fileHandler):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
90 self.reset()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
91 line = fileHandler.readline()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
92 if line == "":
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
93 return 0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
94 tokens = line.split("\t")
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
95 if len(tokens) < 5:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
96 return 0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
97 self.setFromTuple(tokens)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
98 return 1
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
99
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
100 ## Set attributes from tuple
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
101 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
102 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
103 # @note data are loaded such that the query is always on the direct strand
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
104 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
105 def setFromTuple( self, tuple ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
106 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
107 self.range_query = Range()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
108 self.range_subject = Range()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
109 if int(tuple[1]) < int(tuple[2]):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
110 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
111 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
112 else:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
113 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
114 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
115 self.e_value = float(tuple[6])
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
116 self.score = float(tuple[7])
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
117 self.identity = float(tuple[8])
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
118
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
119 ## Reset
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
120 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
121 def reset( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
122 self.range_query.reset()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
123 self.range_subject.reset()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
124 self.e_value = 0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
125 self.score = 0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
126 self.identity = 0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
127
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
128 ## Return the attributes as a formatted string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
129 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
130 def toString(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
131 string = "%s" % ( self.range_query.toString() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
132 string += "\t%s" % ( self.range_subject.toString() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
133 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
134 return string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
135
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
136
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
137 ## Return the attributes as a GFF-formatted string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
138 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
139 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
140 if not self.isSubjectOnDirectStrand():
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
141 self.reverse()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
142 string = "%s" % ( self.getQueryName() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
143 string += "\t%s" % ( source )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
144 string += "\t%s" % ( type )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
145 string += "\t%s" % ( self.getQueryMin() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
146 string += "\t%s" % ( self.getQueryMax() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
147 string += "\t%g" % ( self.e_value )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
148 string += "\t%s" % ( self.getQueryStrand() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
149 string += "\t%s" % ( phase )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
150 attributes = ""
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
151 if ID != "":
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
152 attributes += "ID=%s" % ( ID )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
153 else:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
154 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
155 if Parent != "":
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
156 attributes += ";Parent=%s" % ( Parent )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
157 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
158 string += "\t%s" % ( attributes )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
159 return string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
160
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
161
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
162 ## Reverse query and subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
163 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
164 def reverse(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
165 self.range_query.reverse()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
166 self.range_subject.reverse()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
167
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
168 ## Show the attributes
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
169 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
170 def show(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
171 print self.toString()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
172
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
173 ## Write attributes into an Align file
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
174 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
175 # @param fileHandler: file handler of the file being filled
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
176 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
177 def write(self, fileHandler):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
178 fileHandler.write("%s\n" % (self.toString()))
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
179
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
180 ## Save attributes into an Align file
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
181 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
182 # @param file: name of the file being filled
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
183 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
184 def save(self, file):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
185 fileHandler = open( file, "a" )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
186 self.write( fileHandler )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
187 fileHandler.close()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
188
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
189 ## Return the score
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
190 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
191 def getScore(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
192 return self.score
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
193
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
194 ## Return the identity
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
195 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
196 def getIdentity(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
197 return self.identity
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
198
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
199 def getEvalue(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
200 return self.e_value
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
201
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
202 ## Return the length on the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
203 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
204 def getLengthOnQuery(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
205 return self.range_query.getLength()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
206
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
207 ## Return the name of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
208 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
209 def getQueryName( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
210 return self.range_query.seqname
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
211
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
212 ## Return the start of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
213 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
214 def getQueryStart( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
215 return self.range_query.start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
216
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
217 ## Return the end of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
218 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
219 def getQueryEnd( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
220 return self.range_query.end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
221
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
222 ## Return the min of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
223 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
224 def getQueryMin( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
225 return self.range_query.getMin()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
226
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
227 ## Return the max of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
228 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
229 def getQueryMax( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
230 return self.range_query.getMax()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
231
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
232 ## Return the strand of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
233 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
234 def getQueryStrand( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
235 return self.range_query.getStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
236
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
237 ## Return the length on the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
238 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
239 def getLengthOnSubject(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
240 return self.range_subject.getLength()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
241
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
242 ## Return the name of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
243 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
244 def getSubjectName( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
245 return self.range_subject.seqname
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
246
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
247 ## Return the start of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
248 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
249 def getSubjectStart( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
250 return self.range_subject.start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
251
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
252 ## Return the end of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
253 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
254 def getSubjectEnd( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
255 return self.range_subject.end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
256
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
257 ## Return the min of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
258 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
259 def getSubjectMin( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
260 return self.range_subject.getMin()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
261
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
262 ## Return the max of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
263 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
264 def getSubjectMax( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
265 return self.range_subject.getMax()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
266
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
267 ## Return the strand of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
268 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
269 def getSubjectStrand( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
270 return self.range_subject.getStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
271
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
272 ## Return the query as a Range instance
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
273 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
274 def getQueryAsRange( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
275 return self.range_query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
276
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
277 ## Return the subject as a Range instance
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
278 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
279 def getSubjectAsRange( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
280 return self.range_subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
281
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
282 ## Set the name of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
283 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
284 def setQueryName( self, name ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
285 self.range_query.seqname = name
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
286
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
287 ## Set the start of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
288 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
289 def setQueryStart( self, start ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
290 self.range_query.start = start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
291
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
292 ## Set the end of the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
293 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
294 def setQueryEnd( self, end ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
295 self.range_query.end = end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
296
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
297 ## Set the name of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
298 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
299 def setSubjectName( self, name ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
300 self.range_subject.seqname = name
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
301
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
302 ## Set the start of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
303 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
304 def setSubjectStart( self, start ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
305 self.range_subject.start = start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
306
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
307 ## Set the end of the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
308 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
309 def setSubjectEnd( self, end ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
310 self.range_subject.end = end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
311
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
312 ## Merge the instance with another Align instance
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
313 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
314 # @param o an Align instance
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
315 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
316 def merge(self, o):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
317 if self.range_query.seqname != o.range_query.seqname \
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
318 or self.range_subject.seqname != o.range_subject.seqname:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
319 return
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
320 self.range_query.merge(o.range_query)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
321 self.range_subject.merge(o.range_subject)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
322 self.score = max(self.score,o.score)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
323 self.e_value = min(self.e_value,o.e_value)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
324 self.identity = max(self.identity,o.identity)
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
325
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
326 ## Return a Map instance with the subject mapped on the query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
327 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
328 def getSubjectAsMapOfQuery(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
329 iMap = Map()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
330 iMap.name = self.range_subject.seqname
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
331 iMap.seqname = self.range_query.seqname
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
332 if self.range_subject.isOnDirectStrand():
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
333 iMap.start = self.range_query.start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
334 iMap.end = self.range_query.end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
335 else:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
336 iMap.start = self.range_query.end
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
337 iMap.end = self.range_query.start
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
338 return iMap
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
339
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
340 ## Return True if query is on direct strand
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
341 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
342 def isQueryOnDirectStrand( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
343 return self.range_query.isOnDirectStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
344
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
345 ## Return True if subject is on direct strand
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
346 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
347 def isSubjectOnDirectStrand( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
348 return self.range_subject.isOnDirectStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
349
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
350 ## Return True if query and subject are on the same strand, False otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
351 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
352 def areQrySbjOnSameStrand(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
353 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
354
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
355 ## Return False if query and subject are on the same strand, True otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
356 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
357 def areQrySbjOnOppositeStrands(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
358 return not self.areQrySbjOnSameStrand()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
359
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
360 ## Set attributes from string
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
361 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
362 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
363 # @param sep field separator
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
364 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
365 def setFromString(self, string, sep="\t"):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
366 if string[-1] == "\n":
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
367 string = string[:-1]
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
368 self.setFromTuple( string.split(sep) )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
369
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
370 ## Return a first Map instance for the query and a second for the subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
371 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
372 def getMapsOfQueryAndSubject(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
373 iMapQuery = Map( name="repet",
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
374 seqname=self.range_query.seqname,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
375 start=self.range_query.start,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
376 end=self.range_query.end )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
377 iMapSubject = Map( name="repet",
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
378 seqname=self.range_subject.seqname,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
379 start=self.range_subject.start,
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
380 end=self.range_subject.end )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
381 return iMapQuery, iMapSubject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
382
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
383 ## Write query coordinates as Map in a file
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
384 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
385 # @param fileHandler: file handler of the file being filled
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
386 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
387 def writeSubjectAsMapOfQuery( self, fileHandler ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
388 m = self.getSubjectAsMapOfQuery()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
389 m.write( fileHandler )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
390
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
391 ## Return a bin for fast database access
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
392 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
393 def getBin(self):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
394 return self.range_query.getBin()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
395
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
396 ## Switch query and subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
397 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
398 def switchQuerySubject( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
399 tmpRange = self.range_query
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
400 self.range_query = self.range_subject
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
401 self.range_subject = tmpRange
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
402 if not self.isQueryOnDirectStrand():
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
403 self.reverse()
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
404
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
405 ## Return True if the query overlaps with the query of another Align instance, False otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
406 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
407 def isQueryOverlapping( self, iAlign ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
408 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
409
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
410 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
411 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
412 def isSubjectOverlapping( self, iAlign ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
413 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
414
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
415 ## Return True if the Align instance overlaps with another Align instance, False otherwise
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
416 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
417 def isOverlapping( self, iAlign ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
418 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
419 return True
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
420 else:
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
421 return False
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
422
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
423 ## Update the score
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
424 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
425 # @note the new score is the length on the query times the percentage of identity
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
426 #
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
427 def updateScore( self ):
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
428 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
20ec0d14798e Uploaded
urgi-team
parents:
diff changeset
429 self.score = newScore