6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 from commons.core.coord.Path import Path
|
|
33 from commons.core.coord.PathUtils import PathUtils
|
|
34 from commons.core.sql.TableAdaptator import TableAdaptator
|
|
35 from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator
|
|
36
|
|
37
|
|
38 ## Adaptator for a Path table
|
|
39 #
|
|
40 class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ):
|
|
41
|
|
42 ## Give a list of Path instances having the same identifier
|
|
43 #
|
|
44 # @param id integer identifier number
|
|
45 # @return lPath a list of Path instances
|
|
46 #
|
|
47 def getPathListFromId( self, id ):
|
|
48 sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id )
|
|
49 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
50 return lPath
|
|
51
|
|
52 ## Give a list of Path instances according to the given list of identifier numbers
|
|
53 #
|
|
54 # @param lId integer list
|
|
55 # @return lPath a list of Path instances
|
|
56 #
|
|
57 def getPathListFromIdList( self, lId ):
|
|
58 lPath=[]
|
|
59 if lId == []:
|
|
60 return lPath
|
|
61 sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])
|
|
62 for i in lId[1:]:
|
|
63 sqlCmd += " or path=%d" % (i)
|
|
64 sqlCmd += ";"
|
|
65 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
66 return lPath
|
|
67
|
|
68 ## Give a list of Path instances having the same given query name
|
|
69 #
|
|
70 # @param query string name of the query
|
|
71 # @return lPath a list of Path instances
|
|
72 #
|
|
73 def getPathListFromQuery( self, query ):
|
|
74 lPath = self._getPathListFromTypeName("query", query)
|
|
75 return lPath
|
|
76
|
|
77 ## Give a list of Path instances having the same given subject name
|
|
78 #
|
|
79 # @param subject string name of the subject
|
|
80 # @return lPath a list of Path instances
|
|
81 #
|
|
82 def getPathListFromSubject( self, subject ):
|
|
83 lPath = self._getPathListFromTypeName("subject", subject)
|
|
84 return lPath
|
|
85
|
|
86 ## Give a list of the distinct subject names present in the table
|
|
87 #
|
|
88 # @return lDistinctSubjectNames string list
|
|
89 #
|
|
90 def getSubjectList(self):
|
|
91 lDistinctSubjectNames = self._getDistinctTypeNamesList("subject")
|
|
92 return lDistinctSubjectNames
|
|
93
|
|
94 ## Give a list of the distinct query names present in the table
|
|
95 #
|
|
96 # @return lDistinctQueryNames string list
|
|
97 #
|
|
98 def getQueryList(self):
|
|
99 lDistinctQueryNames = self._getDistinctTypeNamesList("query")
|
|
100 return lDistinctQueryNames
|
|
101
|
|
102 ## Give a list of the distinct query names present in the table
|
|
103 # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py)
|
|
104 #
|
|
105 # @return lDistinctContigNames string list
|
|
106 #
|
|
107 def getSeqNameList(self):
|
|
108 return self.getQueryList()
|
|
109
|
|
110 ## Give a list with all the distinct identifiers corresponding to the query
|
|
111 #
|
|
112 # @param query string name of the subject
|
|
113 # @return lId a list of integer
|
|
114 #
|
|
115 def getIdListFromQuery( self, query ):
|
|
116 lId = self._getIdListFromTypeName("query", query)
|
|
117 return lId
|
|
118
|
|
119 ## Give a list with all the distinct identifiers corresponding to the subject
|
|
120 #
|
|
121 # @param subject string name of the subject
|
|
122 # @return lId a list of integer
|
|
123 #
|
|
124 def getIdListFromSubject( self, subject ):
|
|
125 lId = self._getIdListFromTypeName("subject", subject)
|
|
126 return lId
|
|
127
|
|
128 ## Give a list of identifiers contained in the table
|
|
129 #
|
|
130 # @return lId integer list
|
|
131 #
|
|
132 def getIdList(self):
|
|
133 sqlCmd = "SELECT DISTINCT path from %s;" % (self._table)
|
|
134 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
|
|
135 return lId
|
|
136
|
|
137 ## Give a list of the distinct subject names present in the table given a query name
|
|
138 #
|
|
139 # @param queryName string
|
|
140 # @return lDistinctSubjectNamesPerQuery string list
|
|
141 #
|
|
142 def getSubjectListFromQuery( self, queryName ):
|
|
143 sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName )
|
|
144 lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd)
|
|
145 return lDistinctSubjectNamesPerQuery
|
|
146
|
|
147 ## Give the data contained in the table as a list of Paths instances
|
|
148 #
|
|
149 # @return lPaths list of paths instances
|
|
150 #
|
|
151 def getListOfAllPaths( self ):
|
|
152 return self.getListOfAllCoordObject()
|
|
153
|
|
154 ## Give a list of Path instances with the given query and subject, both on direct strand
|
|
155 #
|
|
156 # @param query string query name
|
|
157 # @param subject string subject name
|
|
158 # @return lPaths list of path instances
|
|
159 #
|
|
160 def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ):
|
|
161 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
|
|
162 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
163 return lPaths
|
|
164
|
|
165 ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand
|
|
166 #
|
|
167 # @param query string query name
|
|
168 # @param subject string subject name
|
|
169 # @return lPaths list of path instances
|
|
170 #
|
|
171 def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ):
|
|
172 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
|
|
173 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
174 return lPaths
|
|
175
|
|
176 ## Give the number of Path instances with the given query name
|
|
177 #
|
|
178 # @param query string query name
|
|
179 # @return pathNb integer the number of Path instances
|
|
180 #
|
|
181 def getNbPathsFromQuery( self, query ):
|
|
182 pathNb = self._getPathsNbFromTypeName("query", query)
|
|
183 return pathNb
|
|
184
|
|
185 ## Give the number of Path instances with the given subject name
|
|
186 #
|
|
187 # @param subject string subject name
|
|
188 # @return pathNb integer the number of Path instances
|
|
189 #
|
|
190 def getNbPathsFromSubject( self, subject ):
|
|
191 pathNb = self._getPathsNbFromTypeName("subject", subject)
|
|
192 return pathNb
|
|
193
|
|
194 ## Give the number of distinct path identifiers
|
|
195 #
|
|
196 # @return idNb integer the number of Path instances
|
|
197 #
|
|
198 def getNbIds( self ):
|
|
199 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table )
|
|
200 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
|
|
201 return idNb
|
|
202
|
|
203 ## Give the number of distinct path identifiers for a given subject
|
|
204 #
|
|
205 # @param subjectName string subject name
|
|
206 # @return idNb integer the number of Path instances
|
|
207 #
|
|
208 def getNbIdsFromSubject( self, subjectName ):
|
|
209 idNb = self._getIdNbFromTypeName("subject", subjectName)
|
|
210 return idNb
|
|
211
|
|
212 ## Give the number of distinct path identifiers for a given query
|
|
213 #
|
|
214 # @param queryName string query name
|
|
215 # @return idNb integer the number of Path instances
|
|
216 #
|
|
217 def getNbIdsFromQuery( self, queryName ):
|
|
218 idNb = self._getIdNbFromTypeName("query", queryName)
|
|
219 return idNb
|
|
220
|
|
221 ## Give a list of Path instances included in a given query region
|
|
222 #
|
|
223 # @param query string query name
|
|
224 # @param start integer start coordinate
|
|
225 # @param end integer end coordinate
|
|
226 # @return lPaths list of Path instances
|
|
227 #
|
|
228 def getPathListIncludedInQueryCoord( self, query, start, end ):
|
|
229 if( start > end ):
|
|
230 tmp = start
|
|
231 start = end
|
|
232 end = tmp
|
|
233 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end )
|
|
234 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
235 return lPaths
|
|
236
|
|
237 ## Give a list of Path instances overlapping a given region
|
|
238 #
|
|
239 # @param query string query name
|
|
240 # @param start integer start coordinate
|
|
241 # @param end integer end coordinate
|
|
242 # @return lPath list of Path instances
|
|
243 #
|
|
244 def getPathListOverlappingQueryCoord( self, query, start, end ):
|
|
245 if( start > end ):
|
|
246 tmp = start
|
|
247 start = end
|
|
248 end = tmp
|
|
249 sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query )
|
|
250 sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
|
|
251 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
|
|
252 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end )
|
|
253 sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end )
|
|
254 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
255 return lPaths
|
|
256
|
|
257 ## Give a list of Path instances overlapping a given region
|
|
258 #
|
|
259 # @note whole chains are returned, even if only a fragment overlap with the given region
|
|
260 # @param query string query name
|
|
261 # @param start integer start coordinate
|
|
262 # @param end integer end coordinate
|
|
263 # @return lPath list of Path instances
|
|
264 #
|
|
265 def getChainListOverlappingQueryCoord( self, query, start, end ):
|
|
266 if( start > end ):
|
|
267 tmp = start
|
|
268 start = end
|
|
269 end = tmp
|
|
270 sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query )
|
|
271 sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
|
|
272 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
|
|
273 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end )
|
|
274 sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end )
|
|
275 lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
|
|
276 lPaths = self.getPathListFromIdList( lIdentifiers )
|
|
277 return lPaths
|
|
278
|
|
279 ## Give a list of Set instances overlapping a given region
|
|
280 #
|
|
281 # @param query string query name
|
|
282 # @param start integer start coordinate
|
|
283 # @param end integer end coordinate
|
|
284 # @return lSet list of Set instances
|
|
285 #
|
|
286 def getSetListOverlappingQueryCoord(self, query, start, end):
|
|
287 lPath = self.getPathListOverlappingQueryCoord(query, start, end)
|
|
288 lSet = PathUtils.getSetListFromQueries(lPath)
|
|
289 return lSet
|
|
290
|
|
291 ## Give a list of Set instances included in a given region
|
|
292 #
|
|
293 # @param query string query name
|
|
294 # @param start integer start coordinate
|
|
295 # @param end integer end coordinate
|
|
296 # @return lSet list of Set instances
|
|
297 #
|
|
298 def getSetListIncludedInQueryCoord(self, query, start, end):
|
|
299 lPath=self.getPathListIncludedInQueryCoord(query, start, end)
|
|
300 lSet = PathUtils.getSetListFromQueries(lPath)
|
|
301 return lSet
|
|
302
|
|
303 ## Give a a list of Path instances sorted by query coordinates
|
|
304 #
|
|
305 # @return lPaths list of Path instances
|
|
306 #
|
|
307 def getPathListSortedByQueryCoord( self ):
|
|
308 sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table )
|
|
309 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
310 return lPaths
|
|
311
|
|
312 ## Give a a list of Path instances sorted by query coordinates for a given query
|
|
313 #
|
|
314 # @return lPaths list of Path instances
|
|
315 #
|
|
316 def getPathListSortedByQueryCoordFromQuery( self, queryName ):
|
|
317 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName )
|
|
318 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
319 return lPaths
|
|
320
|
|
321 ## Give a a list of Path instances sorted by query coordinates and score for a given query
|
|
322 #
|
|
323 # @return lPaths list of Path instances
|
|
324 #
|
|
325 def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName):
|
|
326 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName)
|
|
327 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
328 return lPaths
|
|
329
|
|
330 ## Give a cumulative length of all paths (fragments) for a given subject name
|
|
331 #
|
|
332 # @param subjectName string subject name
|
|
333 # @return nb Cumulative length for all path
|
|
334 #
|
|
335 # @warning doesn't take into account the overlaps !!
|
|
336 #
|
|
337 def getCumulLengthFromSubject( self, subjectName ):
|
|
338 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
|
|
339 nb = self._iDb.getIntegerWithSQLCmd(sqlCmd)
|
|
340 return nb
|
|
341
|
|
342 ## Give a list of the length of all chains of paths for a given subject name
|
|
343 #
|
|
344 # @param subjectName string name of the subject
|
|
345 # @return lChainLengths list of lengths per chain of paths
|
|
346 #
|
|
347 # @warning doesn't take into account the overlaps !!
|
|
348 #
|
|
349 def getChainLengthListFromSubject( self, subjectName ):
|
|
350 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
|
|
351 lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
|
|
352 return lChainLengths
|
|
353
|
|
354 ## Give a list of identity of all chains of paths for a given subject name
|
|
355 #
|
|
356 # @param subjectName string name of the subject
|
|
357 # @return lChainIdentities list of identities per chain of paths
|
|
358 #
|
|
359 # @warning doesn't take into account the overlaps !!
|
|
360 #
|
|
361 def getChainIdentityListFromSubject( self, subjectName ):
|
|
362 lChainIdentities = []
|
|
363 sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
|
|
364 self._iDb.execute( sqlCmd )
|
|
365 res = self._iDb.fetchall()
|
|
366 for i in res:
|
|
367 if i[0] != None:
|
|
368 lChainIdentities.append( round( float( i[0] ), 2 ) )
|
|
369 return lChainIdentities
|
|
370
|
|
371 ## Give a list of the length of all paths for a given subject name
|
|
372 #
|
|
373 # @param subjectName string name of the subject
|
|
374 # @return lPathLengths list of lengths per path
|
|
375 #
|
|
376 # @warning doesn't take into account the overlaps !!
|
|
377 #
|
|
378 def getPathLengthListFromSubject( self, subjectName ):
|
|
379 sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
|
|
380 lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
|
|
381 return lPathLengths
|
|
382
|
|
383 ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains
|
|
384 #
|
|
385 # @param subjectName string subject name
|
|
386 # @return lPathNums a list of paths Id
|
|
387 #
|
|
388 def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ):
|
|
389 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
|
|
390 sqlCmd += " FROM %s" % ( self._table )
|
|
391 sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
|
|
392 sqlCmd += " GROUP BY path"
|
|
393 sqlCmd += " ORDER BY length DESC";
|
|
394 lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
|
|
395 return lPathNums
|
|
396
|
|
397 ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold
|
|
398 #
|
|
399 # @param subjectName string subject name
|
|
400 # @lengthThreshold length threshold below which chains are filtered
|
|
401 # @return lPathNums a list of paths Id
|
|
402 #
|
|
403 def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ):
|
|
404 lPathNums = []
|
|
405 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
|
|
406 sqlCmd += " FROM %s" % ( self._table )
|
|
407 sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
|
|
408 sqlCmd += " GROUP BY path"
|
|
409 sqlCmd += " ORDER BY length DESC";
|
|
410 self._iDb.execute( sqlCmd )
|
|
411 res = self._iDb.fetchall()
|
|
412 for i in res:
|
|
413 if int(i[1]) >= int(lengthThreshold):
|
|
414 lPathNums.append( i[0] )
|
|
415 return lPathNums
|
|
416
|
|
417 ## Give a Set instances list of a query annotation
|
|
418 #
|
|
419 # @param query string query name
|
|
420 # @return lSets list of set instance
|
|
421 #
|
|
422 def getSetListFromQuery(self, query):
|
|
423 lpath = self.getPathListFromQuery(query)
|
|
424 lSets = PathUtils.getSetListFromQueries(lpath)
|
|
425 return lSets
|
|
426
|
|
427 ## Give a Set instances list of a query annotation
|
|
428 # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py)
|
|
429 #
|
|
430 # @param query string query name
|
|
431 # @return lSets list of set instance
|
|
432 #
|
|
433 def getSetListFromSeqName(self, query):
|
|
434 return self.getSetListFromQuery(query)
|
|
435
|
|
436 ## Delete path corresponding to a given identifier number
|
|
437 #
|
|
438 # @param id integer identifier number
|
|
439 #
|
|
440 def deleteFromId(self,id):
|
|
441 sqlCmd = "delete from %s where path=%d;" % (self._table, id)
|
|
442 self._iDb.execute(sqlCmd)
|
|
443
|
|
444 ## Delete path corresponding to a given object path line
|
|
445 #
|
|
446 # @param path object
|
|
447 #
|
|
448 def deleteFromPath(self,path):
|
|
449 sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore()))
|
|
450 self._iDb.execute(sqlCmd)
|
|
451
|
|
452 ## Delete path corresponding to a given list of identifier number
|
|
453 #
|
|
454 # @param lId list of identifier number
|
|
455 #
|
|
456 def deleteFromIdList(self,lId):
|
|
457 if lId == []:
|
|
458 return
|
|
459 sqlCmd = "delete from %s where path=%d" % (self._table, lId[0])
|
|
460 for id in lId[1:]:
|
|
461 sqlCmd += " or path=%d" %(id)
|
|
462 sqlCmd += ";"
|
|
463 self._iDb.execute(sqlCmd)
|
|
464
|
|
465 ## Get a new id number
|
|
466 #
|
|
467 # @return newId integer new id
|
|
468 #
|
|
469 def getNewId(self):
|
|
470 sqlCmd = 'select max(path) from %s;' % (self._table)
|
|
471 maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd)
|
|
472 newId = int(maxId)+1
|
|
473 return newId
|
|
474
|
|
475 ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2
|
|
476 #
|
|
477 # @param id1 integer id path number
|
|
478 # @param id2 integer id path number
|
|
479 # @return newId integer minimum of id1 id2
|
|
480 # @note this method modify the ID even if this one not existing in the path table
|
|
481 #
|
|
482 def joinTwoPaths(self, id1, id2):
|
|
483 if id1 < id2:
|
|
484 newId = id1
|
|
485 oldId = id2
|
|
486 else:
|
|
487 newId = id2
|
|
488 oldId = id1
|
|
489 sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\
|
|
490 % (self._table, newId, oldId)
|
|
491 self._iDb.execute(sqlCmd)
|
|
492 return newId
|
|
493
|
|
494 ## Create a 'pathRange' table from a 'path' table.
|
|
495 # The output table summarizes the information per identifier.
|
|
496 # The min and max value are taken.
|
|
497 # The identity is averaged over the fragments.
|
|
498 # It may overwrite an existing table.
|
|
499 #
|
|
500 # @param outTable string name of the output table
|
|
501 # @return outTable string Table which summarizes the information per identifier
|
|
502 #
|
|
503 def path2PathRange( self, outTable="" ):
|
|
504 return self._path2PathRangeOrPath2PathRangeQuery(outTable)
|
|
505
|
|
506 ## Create a 'pathrange' table from a 'path' table for the given query name
|
|
507 # The output table summarizes the information per identifier
|
|
508 # The min and max value are taken
|
|
509 # The identity is averaged over the fragments, weighted by the length of the of the query
|
|
510 # It may overwrite an existing table
|
|
511 #
|
|
512 # @param outTable string name of the output table
|
|
513 # @param query string query name
|
|
514 # @return outTable string Table which summarizes the information per identifier
|
|
515 #
|
|
516 def _path2PathRangeFromQuery( self, queryName, outTable="" ):
|
|
517 return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName)
|
|
518
|
|
519 def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""):
|
|
520 self._iDb.createIndex( self._table, "path" )
|
|
521 if outTable == "":
|
|
522 outTable = "%s_range" % ( self._table )
|
|
523 self._iDb.dropTable( outTable )
|
|
524
|
|
525 tmpTable = "%s_tmp" % ( self._table )
|
|
526 self._iDb.dropTable( tmpTable )
|
|
527
|
|
528 sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable)
|
|
529 self._iDb.execute(sqlCmd)
|
|
530
|
|
531 sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable )
|
|
532 self._iDb.execute( sqlCmd )
|
|
533
|
|
534 sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable )
|
|
535 self._iDb.execute( sqlCmd )
|
|
536
|
|
537 self._iDb.createIndex( outTable, "path" )
|
|
538 self._iDb.dropTable( tmpTable )
|
|
539 return outTable
|
|
540
|
|
541 ## Give a list of Path lists sorted by weighted identity.
|
|
542 #
|
|
543 # @return lChains list of chains
|
|
544 #
|
|
545 def getListOfChainsSortedByAscIdentityFromQuery( self, qry ):
|
|
546 lChains = []
|
|
547 tmpTable = self._path2PathRangeFromQuery( qry )
|
|
548 sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable )
|
|
549 self._iDb.execute( sqlCmd )
|
|
550 lPathnums = self._iDb.fetchall()
|
|
551 self._iDb.dropTable( tmpTable )
|
|
552 for pathnum in lPathnums:
|
|
553 lChains.append( self.getPathListFromId( int(pathnum[0]) ) )
|
|
554 return lChains
|
|
555
|
|
556 ## Give a list of path instances sorted by increasing E-value
|
|
557 #
|
|
558 # @return lPaths list of path instances
|
|
559 #
|
|
560 def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ):
|
|
561 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName )
|
|
562 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
563 return lPaths
|
|
564
|
|
565
|
|
566 ## Return the number of times a given instance is present in the table
|
|
567 # The identifier is not considered,
|
|
568 # only coordinates, score, E-value and identity.
|
|
569 #
|
|
570 # @return nbOcc integer
|
|
571 #
|
|
572 def getNbOccurrences( self, iPath ):
|
|
573 sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table )
|
|
574 sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname )
|
|
575 sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start )
|
|
576 sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end )
|
|
577 sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname )
|
|
578 sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start )
|
|
579 sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end )
|
|
580 sqlCmd += " AND score='%s'" % ( iPath.score )
|
|
581 sqlCmd += " AND e_value='%s'" % ( iPath.e_value )
|
|
582 sqlCmd += " AND identity='%s'" % ( iPath.identity )
|
|
583 nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd )
|
|
584 return nbOcc
|
|
585
|
|
586
|
|
587 def _getPathListFromTypeName( self, type, typeName ):
|
|
588 sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName )
|
|
589 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
|
|
590 return lPath
|
|
591
|
|
592 def _getDistinctTypeNamesList( self, type ):
|
|
593 sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table )
|
|
594 lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
|
|
595 return lDistinctTypeNames
|
|
596
|
|
597 def _getPathsNbFromTypeName( self, type, typeName ):
|
|
598 sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
|
|
599 pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
|
|
600 return pathNb
|
|
601
|
|
602 def _getIdListFromTypeName( self, type, typeName ):
|
|
603 sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
|
|
604 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
|
|
605 return lId
|
|
606
|
|
607 def _getIdNbFromTypeName( self, type, typeName ):
|
|
608 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
|
|
609 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
|
|
610 return idNb
|
|
611
|
|
612 def _getTypeAndAttr2Insert(self, path):
|
|
613 type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'")
|
|
614 if path.range_query.isOnDirectStrand():
|
|
615 queryStart = path.range_query.start
|
|
616 queryEnd = path.range_query.end
|
|
617 subjectStart = path.range_subject.start
|
|
618 subjectEnd = path.range_subject.end
|
|
619 else:
|
|
620 queryStart = path.range_query.end
|
|
621 queryEnd = path.range_query.start
|
|
622 subjectStart = path.range_subject.end
|
|
623 subjectEnd = path.range_subject.start
|
|
624 attr2Insert = ( path.id,\
|
|
625 path.range_query.seqname,\
|
|
626 queryStart,\
|
|
627 queryEnd,\
|
|
628 path.range_subject.seqname,\
|
|
629 subjectStart,\
|
|
630 subjectEnd,\
|
|
631 path.e_value,\
|
|
632 path.score,\
|
|
633 path.identity\
|
|
634 )
|
|
635 return type2Insert, attr2Insert
|
|
636
|
|
637 def _getInstanceToAdapt(self):
|
|
638 iPath = Path()
|
|
639 return iPath
|
|
640
|
|
641 def _escapeAntislash(self, obj):
|
|
642 obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\")
|
|
643 obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\")
|
|
644
|
|
645 def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable):
|
|
646 sqlCmd = ""
|
|
647 if queryName == "":
|
|
648 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table)
|
|
649 else:
|
|
650 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName)
|
|
651 return sqlCmd
|
|
652
|
|
653 ## return a filtered list with only one unique occurrence of path of a given list
|
|
654 #
|
|
655 # @param lPath a list of Path instances
|
|
656 # @return lUniquePath a list of Path instances
|
|
657 #
|
|
658 def getListOfUniqueOccPath(self, lPath):
|
|
659 if len(lPath) < 2 :
|
|
660 return lPath
|
|
661
|
|
662 sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd()))
|
|
663 lUniquePath = []
|
|
664 for i in xrange(1, len(sortedListPath)):
|
|
665 previousPath = sortedListPath [i-1]
|
|
666 currentPath = sortedListPath [i]
|
|
667 if previousPath != currentPath:
|
|
668 lUniquePath.append(previousPath)
|
|
669
|
|
670 if previousPath != currentPath:
|
|
671 lUniquePath.append(currentPath)
|
|
672
|
|
673 return lUniquePath |