Mercurial > repos > yufei-luo > s_mart
comparison commons/core/sql/TablePathAdaptator.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 | |
32 from commons.core.coord.Path import Path | |
33 from commons.core.coord.PathUtils import PathUtils | |
34 from commons.core.sql.TableAdaptator import TableAdaptator | |
35 from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator | |
36 | |
37 | |
38 ## Adaptator for a Path table | |
39 # | |
40 class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ): | |
41 | |
42 ## Give a list of Path instances having the same identifier | |
43 # | |
44 # @param id integer identifier number | |
45 # @return lPath a list of Path instances | |
46 # | |
47 def getPathListFromId( self, id ): | |
48 sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id ) | |
49 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
50 return lPath | |
51 | |
52 ## Give a list of Path instances according to the given list of identifier numbers | |
53 # | |
54 # @param lId integer list | |
55 # @return lPath a list of Path instances | |
56 # | |
57 def getPathListFromIdList( self, lId ): | |
58 lPath=[] | |
59 if lId == []: | |
60 return lPath | |
61 sqlCmd = "select * from %s where path=%d" % (self._table, lId[0]) | |
62 for i in lId[1:]: | |
63 sqlCmd += " or path=%d" % (i) | |
64 sqlCmd += ";" | |
65 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
66 return lPath | |
67 | |
68 ## Give a list of Path instances having the same given query name | |
69 # | |
70 # @param query string name of the query | |
71 # @return lPath a list of Path instances | |
72 # | |
73 def getPathListFromQuery( self, query ): | |
74 lPath = self._getPathListFromTypeName("query", query) | |
75 return lPath | |
76 | |
77 ## Give a list of Path instances having the same given subject name | |
78 # | |
79 # @param subject string name of the subject | |
80 # @return lPath a list of Path instances | |
81 # | |
82 def getPathListFromSubject( self, subject ): | |
83 lPath = self._getPathListFromTypeName("subject", subject) | |
84 return lPath | |
85 | |
86 ## Give a list of the distinct subject names present in the table | |
87 # | |
88 # @return lDistinctSubjectNames string list | |
89 # | |
90 def getSubjectList(self): | |
91 lDistinctSubjectNames = self._getDistinctTypeNamesList("subject") | |
92 return lDistinctSubjectNames | |
93 | |
94 ## Give a list of the distinct query names present in the table | |
95 # | |
96 # @return lDistinctQueryNames string list | |
97 # | |
98 def getQueryList(self): | |
99 lDistinctQueryNames = self._getDistinctTypeNamesList("query") | |
100 return lDistinctQueryNames | |
101 | |
102 ## Give a list of the distinct query names present in the table | |
103 # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py) | |
104 # | |
105 # @return lDistinctContigNames string list | |
106 # | |
107 def getSeqNameList(self): | |
108 return self.getQueryList() | |
109 | |
110 ## Give a list with all the distinct identifiers corresponding to the query | |
111 # | |
112 # @param query string name of the subject | |
113 # @return lId a list of integer | |
114 # | |
115 def getIdListFromQuery( self, query ): | |
116 lId = self._getIdListFromTypeName("query", query) | |
117 return lId | |
118 | |
119 ## Give a list with all the distinct identifiers corresponding to the subject | |
120 # | |
121 # @param subject string name of the subject | |
122 # @return lId a list of integer | |
123 # | |
124 def getIdListFromSubject( self, subject ): | |
125 lId = self._getIdListFromTypeName("subject", subject) | |
126 return lId | |
127 | |
128 ## Give a list of identifiers contained in the table | |
129 # | |
130 # @return lId integer list | |
131 # | |
132 def getIdList(self): | |
133 sqlCmd = "SELECT DISTINCT path from %s;" % (self._table) | |
134 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
135 return lId | |
136 | |
137 ## Give a list of the distinct subject names present in the table given a query name | |
138 # | |
139 # @param queryName string | |
140 # @return lDistinctSubjectNamesPerQuery string list | |
141 # | |
142 def getSubjectListFromQuery( self, queryName ): | |
143 sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName ) | |
144 lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd) | |
145 return lDistinctSubjectNamesPerQuery | |
146 | |
147 ## Give the data contained in the table as a list of Paths instances | |
148 # | |
149 # @return lPaths list of paths instances | |
150 # | |
151 def getListOfAllPaths( self ): | |
152 return self.getListOfAllCoordObject() | |
153 | |
154 ## Give a list of Path instances with the given query and subject, both on direct strand | |
155 # | |
156 # @param query string query name | |
157 # @param subject string subject name | |
158 # @return lPaths list of path instances | |
159 # | |
160 def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ): | |
161 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) | |
162 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
163 return lPaths | |
164 | |
165 ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand | |
166 # | |
167 # @param query string query name | |
168 # @param subject string subject name | |
169 # @return lPaths list of path instances | |
170 # | |
171 def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ): | |
172 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) | |
173 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
174 return lPaths | |
175 | |
176 ## Give the number of Path instances with the given query name | |
177 # | |
178 # @param query string query name | |
179 # @return pathNb integer the number of Path instances | |
180 # | |
181 def getNbPathsFromQuery( self, query ): | |
182 pathNb = self._getPathsNbFromTypeName("query", query) | |
183 return pathNb | |
184 | |
185 ## Give the number of Path instances with the given subject name | |
186 # | |
187 # @param subject string subject name | |
188 # @return pathNb integer the number of Path instances | |
189 # | |
190 def getNbPathsFromSubject( self, subject ): | |
191 pathNb = self._getPathsNbFromTypeName("subject", subject) | |
192 return pathNb | |
193 | |
194 ## Give the number of distinct path identifiers | |
195 # | |
196 # @return idNb integer the number of Path instances | |
197 # | |
198 def getNbIds( self ): | |
199 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table ) | |
200 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
201 return idNb | |
202 | |
203 ## Give the number of distinct path identifiers for a given subject | |
204 # | |
205 # @param subjectName string subject name | |
206 # @return idNb integer the number of Path instances | |
207 # | |
208 def getNbIdsFromSubject( self, subjectName ): | |
209 idNb = self._getIdNbFromTypeName("subject", subjectName) | |
210 return idNb | |
211 | |
212 ## Give the number of distinct path identifiers for a given query | |
213 # | |
214 # @param queryName string query name | |
215 # @return idNb integer the number of Path instances | |
216 # | |
217 def getNbIdsFromQuery( self, queryName ): | |
218 idNb = self._getIdNbFromTypeName("query", queryName) | |
219 return idNb | |
220 | |
221 ## Give a list of Path instances included in a given query region | |
222 # | |
223 # @param query string query name | |
224 # @param start integer start coordinate | |
225 # @param end integer end coordinate | |
226 # @return lPaths list of Path instances | |
227 # | |
228 def getPathListIncludedInQueryCoord( self, query, start, end ): | |
229 if( start > end ): | |
230 tmp = start | |
231 start = end | |
232 end = tmp | |
233 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end ) | |
234 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
235 return lPaths | |
236 | |
237 ## Give a list of Path instances overlapping a given region | |
238 # | |
239 # @param query string query name | |
240 # @param start integer start coordinate | |
241 # @param end integer end coordinate | |
242 # @return lPath list of Path instances | |
243 # | |
244 def getPathListOverlappingQueryCoord( self, query, start, end ): | |
245 if( start > end ): | |
246 tmp = start | |
247 start = end | |
248 end = tmp | |
249 sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query ) | |
250 sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) | |
251 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) | |
252 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end ) | |
253 sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end ) | |
254 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
255 return lPaths | |
256 | |
257 ## Give a list of Path instances overlapping a given region | |
258 # | |
259 # @note whole chains are returned, even if only a fragment overlap with the given region | |
260 # @param query string query name | |
261 # @param start integer start coordinate | |
262 # @param end integer end coordinate | |
263 # @return lPath list of Path instances | |
264 # | |
265 def getChainListOverlappingQueryCoord( self, query, start, end ): | |
266 if( start > end ): | |
267 tmp = start | |
268 start = end | |
269 end = tmp | |
270 sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query ) | |
271 sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) | |
272 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) | |
273 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end ) | |
274 sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end ) | |
275 lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
276 lPaths = self.getPathListFromIdList( lIdentifiers ) | |
277 return lPaths | |
278 | |
279 ## Give a list of Set instances overlapping a given region | |
280 # | |
281 # @param query string query name | |
282 # @param start integer start coordinate | |
283 # @param end integer end coordinate | |
284 # @return lSet list of Set instances | |
285 # | |
286 def getSetListOverlappingQueryCoord(self, query, start, end): | |
287 lPath = self.getPathListOverlappingQueryCoord(query, start, end) | |
288 lSet = PathUtils.getSetListFromQueries(lPath) | |
289 return lSet | |
290 | |
291 ## Give a list of Set instances included in a given region | |
292 # | |
293 # @param query string query name | |
294 # @param start integer start coordinate | |
295 # @param end integer end coordinate | |
296 # @return lSet list of Set instances | |
297 # | |
298 def getSetListIncludedInQueryCoord(self, query, start, end): | |
299 lPath=self.getPathListIncludedInQueryCoord(query, start, end) | |
300 lSet = PathUtils.getSetListFromQueries(lPath) | |
301 return lSet | |
302 | |
303 ## Give a a list of Path instances sorted by query coordinates | |
304 # | |
305 # @return lPaths list of Path instances | |
306 # | |
307 def getPathListSortedByQueryCoord( self ): | |
308 sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table ) | |
309 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
310 return lPaths | |
311 | |
312 ## Give a a list of Path instances sorted by query coordinates for a given query | |
313 # | |
314 # @return lPaths list of Path instances | |
315 # | |
316 def getPathListSortedByQueryCoordFromQuery( self, queryName ): | |
317 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName ) | |
318 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
319 return lPaths | |
320 | |
321 ## Give a a list of Path instances sorted by query coordinates and score for a given query | |
322 # | |
323 # @return lPaths list of Path instances | |
324 # | |
325 def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName): | |
326 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName) | |
327 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
328 return lPaths | |
329 | |
330 ## Give a cumulative length of all paths (fragments) for a given subject name | |
331 # | |
332 # @param subjectName string subject name | |
333 # @return nb Cumulative length for all path | |
334 # | |
335 # @warning doesn't take into account the overlaps !! | |
336 # | |
337 def getCumulLengthFromSubject( self, subjectName ): | |
338 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) | |
339 nb = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
340 return nb | |
341 | |
342 ## Give a list of the length of all chains of paths for a given subject name | |
343 # | |
344 # @param subjectName string name of the subject | |
345 # @return lChainLengths list of lengths per chain of paths | |
346 # | |
347 # @warning doesn't take into account the overlaps !! | |
348 # | |
349 def getChainLengthListFromSubject( self, subjectName ): | |
350 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) | |
351 lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
352 return lChainLengths | |
353 | |
354 ## Give a list of identity of all chains of paths for a given subject name | |
355 # | |
356 # @param subjectName string name of the subject | |
357 # @return lChainIdentities list of identities per chain of paths | |
358 # | |
359 # @warning doesn't take into account the overlaps !! | |
360 # | |
361 def getChainIdentityListFromSubject( self, subjectName ): | |
362 lChainIdentities = [] | |
363 sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) | |
364 self._iDb.execute( sqlCmd ) | |
365 res = self._iDb.fetchall() | |
366 for i in res: | |
367 if i[0] != None: | |
368 lChainIdentities.append( round( float( i[0] ), 2 ) ) | |
369 return lChainIdentities | |
370 | |
371 ## Give a list of the length of all paths for a given subject name | |
372 # | |
373 # @param subjectName string name of the subject | |
374 # @return lPathLengths list of lengths per path | |
375 # | |
376 # @warning doesn't take into account the overlaps !! | |
377 # | |
378 def getPathLengthListFromSubject( self, subjectName ): | |
379 sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) | |
380 lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
381 return lPathLengths | |
382 | |
383 ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains | |
384 # | |
385 # @param subjectName string subject name | |
386 # @return lPathNums a list of paths Id | |
387 # | |
388 def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ): | |
389 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" | |
390 sqlCmd += " FROM %s" % ( self._table ) | |
391 sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) | |
392 sqlCmd += " GROUP BY path" | |
393 sqlCmd += " ORDER BY length DESC"; | |
394 lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
395 return lPathNums | |
396 | |
397 ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold | |
398 # | |
399 # @param subjectName string subject name | |
400 # @lengthThreshold length threshold below which chains are filtered | |
401 # @return lPathNums a list of paths Id | |
402 # | |
403 def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ): | |
404 lPathNums = [] | |
405 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" | |
406 sqlCmd += " FROM %s" % ( self._table ) | |
407 sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) | |
408 sqlCmd += " GROUP BY path" | |
409 sqlCmd += " ORDER BY length DESC"; | |
410 self._iDb.execute( sqlCmd ) | |
411 res = self._iDb.fetchall() | |
412 for i in res: | |
413 if int(i[1]) >= int(lengthThreshold): | |
414 lPathNums.append( i[0] ) | |
415 return lPathNums | |
416 | |
417 ## Give a Set instances list of a query annotation | |
418 # | |
419 # @param query string query name | |
420 # @return lSets list of set instance | |
421 # | |
422 def getSetListFromQuery(self, query): | |
423 lpath = self.getPathListFromQuery(query) | |
424 lSets = PathUtils.getSetListFromQueries(lpath) | |
425 return lSets | |
426 | |
427 ## Give a Set instances list of a query annotation | |
428 # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py) | |
429 # | |
430 # @param query string query name | |
431 # @return lSets list of set instance | |
432 # | |
433 def getSetListFromSeqName(self, query): | |
434 return self.getSetListFromQuery(query) | |
435 | |
436 ## Delete path corresponding to a given identifier number | |
437 # | |
438 # @param id integer identifier number | |
439 # | |
440 def deleteFromId(self,id): | |
441 sqlCmd = "delete from %s where path=%d;" % (self._table, id) | |
442 self._iDb.execute(sqlCmd) | |
443 | |
444 ## Delete path corresponding to a given object path line | |
445 # | |
446 # @param path object | |
447 # | |
448 def deleteFromPath(self,path): | |
449 sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore())) | |
450 self._iDb.execute(sqlCmd) | |
451 | |
452 ## Delete path corresponding to a given list of identifier number | |
453 # | |
454 # @param lId list of identifier number | |
455 # | |
456 def deleteFromIdList(self,lId): | |
457 if lId == []: | |
458 return | |
459 sqlCmd = "delete from %s where path=%d" % (self._table, lId[0]) | |
460 for id in lId[1:]: | |
461 sqlCmd += " or path=%d" %(id) | |
462 sqlCmd += ";" | |
463 self._iDb.execute(sqlCmd) | |
464 | |
465 ## Get a new id number | |
466 # | |
467 # @return newId integer new id | |
468 # | |
469 def getNewId(self): | |
470 sqlCmd = 'select max(path) from %s;' % (self._table) | |
471 maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
472 newId = int(maxId)+1 | |
473 return newId | |
474 | |
475 ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2 | |
476 # | |
477 # @param id1 integer id path number | |
478 # @param id2 integer id path number | |
479 # @return newId integer minimum of id1 id2 | |
480 # @note this method modify the ID even if this one not existing in the path table | |
481 # | |
482 def joinTwoPaths(self, id1, id2): | |
483 if id1 < id2: | |
484 newId = id1 | |
485 oldId = id2 | |
486 else: | |
487 newId = id2 | |
488 oldId = id1 | |
489 sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\ | |
490 % (self._table, newId, oldId) | |
491 self._iDb.execute(sqlCmd) | |
492 return newId | |
493 | |
494 ## Create a 'pathRange' table from a 'path' table. | |
495 # The output table summarizes the information per identifier. | |
496 # The min and max value are taken. | |
497 # The identity is averaged over the fragments. | |
498 # It may overwrite an existing table. | |
499 # | |
500 # @param outTable string name of the output table | |
501 # @return outTable string Table which summarizes the information per identifier | |
502 # | |
503 def path2PathRange( self, outTable="" ): | |
504 return self._path2PathRangeOrPath2PathRangeQuery(outTable) | |
505 | |
506 ## Create a 'pathrange' table from a 'path' table for the given query name | |
507 # The output table summarizes the information per identifier | |
508 # The min and max value are taken | |
509 # The identity is averaged over the fragments, weighted by the length of the of the query | |
510 # It may overwrite an existing table | |
511 # | |
512 # @param outTable string name of the output table | |
513 # @param query string query name | |
514 # @return outTable string Table which summarizes the information per identifier | |
515 # | |
516 def _path2PathRangeFromQuery( self, queryName, outTable="" ): | |
517 return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName) | |
518 | |
519 def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""): | |
520 self._iDb.createIndex( self._table, "path" ) | |
521 if outTable == "": | |
522 outTable = "%s_range" % ( self._table ) | |
523 self._iDb.dropTable( outTable ) | |
524 | |
525 tmpTable = "%s_tmp" % ( self._table ) | |
526 self._iDb.dropTable( tmpTable ) | |
527 | |
528 sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable) | |
529 self._iDb.execute(sqlCmd) | |
530 | |
531 sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable ) | |
532 self._iDb.execute( sqlCmd ) | |
533 | |
534 sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable ) | |
535 self._iDb.execute( sqlCmd ) | |
536 | |
537 self._iDb.createIndex( outTable, "path" ) | |
538 self._iDb.dropTable( tmpTable ) | |
539 return outTable | |
540 | |
541 ## Give a list of Path lists sorted by weighted identity. | |
542 # | |
543 # @return lChains list of chains | |
544 # | |
545 def getListOfChainsSortedByAscIdentityFromQuery( self, qry ): | |
546 lChains = [] | |
547 tmpTable = self._path2PathRangeFromQuery( qry ) | |
548 sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable ) | |
549 self._iDb.execute( sqlCmd ) | |
550 lPathnums = self._iDb.fetchall() | |
551 self._iDb.dropTable( tmpTable ) | |
552 for pathnum in lPathnums: | |
553 lChains.append( self.getPathListFromId( int(pathnum[0]) ) ) | |
554 return lChains | |
555 | |
556 ## Give a list of path instances sorted by increasing E-value | |
557 # | |
558 # @return lPaths list of path instances | |
559 # | |
560 def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ): | |
561 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName ) | |
562 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
563 return lPaths | |
564 | |
565 | |
566 ## Return the number of times a given instance is present in the table | |
567 # The identifier is not considered, | |
568 # only coordinates, score, E-value and identity. | |
569 # | |
570 # @return nbOcc integer | |
571 # | |
572 def getNbOccurrences( self, iPath ): | |
573 sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table ) | |
574 sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname ) | |
575 sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start ) | |
576 sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end ) | |
577 sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname ) | |
578 sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start ) | |
579 sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end ) | |
580 sqlCmd += " AND score='%s'" % ( iPath.score ) | |
581 sqlCmd += " AND e_value='%s'" % ( iPath.e_value ) | |
582 sqlCmd += " AND identity='%s'" % ( iPath.identity ) | |
583 nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
584 return nbOcc | |
585 | |
586 | |
587 def _getPathListFromTypeName( self, type, typeName ): | |
588 sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName ) | |
589 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
590 return lPath | |
591 | |
592 def _getDistinctTypeNamesList( self, type ): | |
593 sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table ) | |
594 lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd) | |
595 return lDistinctTypeNames | |
596 | |
597 def _getPathsNbFromTypeName( self, type, typeName ): | |
598 sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
599 pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
600 return pathNb | |
601 | |
602 def _getIdListFromTypeName( self, type, typeName ): | |
603 sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
604 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
605 return lId | |
606 | |
607 def _getIdNbFromTypeName( self, type, typeName ): | |
608 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
609 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
610 return idNb | |
611 | |
612 def _getTypeAndAttr2Insert(self, path): | |
613 type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'") | |
614 if path.range_query.isOnDirectStrand(): | |
615 queryStart = path.range_query.start | |
616 queryEnd = path.range_query.end | |
617 subjectStart = path.range_subject.start | |
618 subjectEnd = path.range_subject.end | |
619 else: | |
620 queryStart = path.range_query.end | |
621 queryEnd = path.range_query.start | |
622 subjectStart = path.range_subject.end | |
623 subjectEnd = path.range_subject.start | |
624 attr2Insert = ( path.id,\ | |
625 path.range_query.seqname,\ | |
626 queryStart,\ | |
627 queryEnd,\ | |
628 path.range_subject.seqname,\ | |
629 subjectStart,\ | |
630 subjectEnd,\ | |
631 path.e_value,\ | |
632 path.score,\ | |
633 path.identity\ | |
634 ) | |
635 return type2Insert, attr2Insert | |
636 | |
637 def _getInstanceToAdapt(self): | |
638 iPath = Path() | |
639 return iPath | |
640 | |
641 def _escapeAntislash(self, obj): | |
642 obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\") | |
643 obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\") | |
644 | |
645 def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable): | |
646 sqlCmd = "" | |
647 if queryName == "": | |
648 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table) | |
649 else: | |
650 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName) | |
651 return sqlCmd | |
652 | |
653 ## return a filtered list with only one unique occurrence of path of a given list | |
654 # | |
655 # @param lPath a list of Path instances | |
656 # @return lUniquePath a list of Path instances | |
657 # | |
658 def getListOfUniqueOccPath(self, lPath): | |
659 if len(lPath) < 2 : | |
660 return lPath | |
661 | |
662 sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd())) | |
663 lUniquePath = [] | |
664 for i in xrange(1, len(sortedListPath)): | |
665 previousPath = sortedListPath [i-1] | |
666 currentPath = sortedListPath [i] | |
667 if previousPath != currentPath: | |
668 lUniquePath.append(previousPath) | |
669 | |
670 if previousPath != currentPath: | |
671 lUniquePath.append(currentPath) | |
672 | |
673 return lUniquePath |