comparison commons/core/sql/TablePathAdaptator.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 from commons.core.coord.Path import Path
33 from commons.core.coord.PathUtils import PathUtils
34 from commons.core.sql.TableAdaptator import TableAdaptator
35 from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator
36
37
38 ## Adaptator for a Path table
39 #
40 class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ):
41
42 ## Give a list of Path instances having the same identifier
43 #
44 # @param id integer identifier number
45 # @return lPath a list of Path instances
46 #
47 def getPathListFromId( self, id ):
48 sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id )
49 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
50 return lPath
51
52 ## Give a list of Path instances according to the given list of identifier numbers
53 #
54 # @param lId integer list
55 # @return lPath a list of Path instances
56 #
57 def getPathListFromIdList( self, lId ):
58 lPath=[]
59 if lId == []:
60 return lPath
61 sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])
62 for i in lId[1:]:
63 sqlCmd += " or path=%d" % (i)
64 sqlCmd += ";"
65 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
66 return lPath
67
68 ## Give a list of Path instances having the same given query name
69 #
70 # @param query string name of the query
71 # @return lPath a list of Path instances
72 #
73 def getPathListFromQuery( self, query ):
74 lPath = self._getPathListFromTypeName("query", query)
75 return lPath
76
77 ## Give a list of Path instances having the same given subject name
78 #
79 # @param subject string name of the subject
80 # @return lPath a list of Path instances
81 #
82 def getPathListFromSubject( self, subject ):
83 lPath = self._getPathListFromTypeName("subject", subject)
84 return lPath
85
86 ## Give a list of the distinct subject names present in the table
87 #
88 # @return lDistinctSubjectNames string list
89 #
90 def getSubjectList(self):
91 lDistinctSubjectNames = self._getDistinctTypeNamesList("subject")
92 return lDistinctSubjectNames
93
94 ## Give a list of the distinct query names present in the table
95 #
96 # @return lDistinctQueryNames string list
97 #
98 def getQueryList(self):
99 lDistinctQueryNames = self._getDistinctTypeNamesList("query")
100 return lDistinctQueryNames
101
102 ## Give a list of the distinct query names present in the table
103 # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py)
104 #
105 # @return lDistinctContigNames string list
106 #
107 def getSeqNameList(self):
108 return self.getQueryList()
109
110 ## Give a list with all the distinct identifiers corresponding to the query
111 #
112 # @param query string name of the subject
113 # @return lId a list of integer
114 #
115 def getIdListFromQuery( self, query ):
116 lId = self._getIdListFromTypeName("query", query)
117 return lId
118
119 ## Give a list with all the distinct identifiers corresponding to the subject
120 #
121 # @param subject string name of the subject
122 # @return lId a list of integer
123 #
124 def getIdListFromSubject( self, subject ):
125 lId = self._getIdListFromTypeName("subject", subject)
126 return lId
127
128 ## Give a list of identifiers contained in the table
129 #
130 # @return lId integer list
131 #
132 def getIdList(self):
133 sqlCmd = "SELECT DISTINCT path from %s;" % (self._table)
134 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
135 return lId
136
137 ## Give a list of the distinct subject names present in the table given a query name
138 #
139 # @param queryName string
140 # @return lDistinctSubjectNamesPerQuery string list
141 #
142 def getSubjectListFromQuery( self, queryName ):
143 sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName )
144 lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd)
145 return lDistinctSubjectNamesPerQuery
146
147 ## Give the data contained in the table as a list of Paths instances
148 #
149 # @return lPaths list of paths instances
150 #
151 def getListOfAllPaths( self ):
152 return self.getListOfAllCoordObject()
153
154 ## Give a list of Path instances with the given query and subject, both on direct strand
155 #
156 # @param query string query name
157 # @param subject string subject name
158 # @return lPaths list of path instances
159 #
160 def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ):
161 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
162 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
163 return lPaths
164
165 ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand
166 #
167 # @param query string query name
168 # @param subject string subject name
169 # @return lPaths list of path instances
170 #
171 def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ):
172 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
173 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
174 return lPaths
175
176 ## Give the number of Path instances with the given query name
177 #
178 # @param query string query name
179 # @return pathNb integer the number of Path instances
180 #
181 def getNbPathsFromQuery( self, query ):
182 pathNb = self._getPathsNbFromTypeName("query", query)
183 return pathNb
184
185 ## Give the number of Path instances with the given subject name
186 #
187 # @param subject string subject name
188 # @return pathNb integer the number of Path instances
189 #
190 def getNbPathsFromSubject( self, subject ):
191 pathNb = self._getPathsNbFromTypeName("subject", subject)
192 return pathNb
193
194 ## Give the number of distinct path identifiers
195 #
196 # @return idNb integer the number of Path instances
197 #
198 def getNbIds( self ):
199 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table )
200 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
201 return idNb
202
203 ## Give the number of distinct path identifiers for a given subject
204 #
205 # @param subjectName string subject name
206 # @return idNb integer the number of Path instances
207 #
208 def getNbIdsFromSubject( self, subjectName ):
209 idNb = self._getIdNbFromTypeName("subject", subjectName)
210 return idNb
211
212 ## Give the number of distinct path identifiers for a given query
213 #
214 # @param queryName string query name
215 # @return idNb integer the number of Path instances
216 #
217 def getNbIdsFromQuery( self, queryName ):
218 idNb = self._getIdNbFromTypeName("query", queryName)
219 return idNb
220
221 ## Give a list of Path instances included in a given query region
222 #
223 # @param query string query name
224 # @param start integer start coordinate
225 # @param end integer end coordinate
226 # @return lPaths list of Path instances
227 #
228 def getPathListIncludedInQueryCoord( self, query, start, end ):
229 if( start > end ):
230 tmp = start
231 start = end
232 end = tmp
233 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end )
234 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
235 return lPaths
236
237 ## Give a list of Path instances overlapping a given region
238 #
239 # @param query string query name
240 # @param start integer start coordinate
241 # @param end integer end coordinate
242 # @return lPath list of Path instances
243 #
244 def getPathListOverlappingQueryCoord( self, query, start, end ):
245 if( start > end ):
246 tmp = start
247 start = end
248 end = tmp
249 sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query )
250 sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
251 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
252 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end )
253 sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end )
254 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
255 return lPaths
256
257 ## Give a list of Path instances overlapping a given region
258 #
259 # @note whole chains are returned, even if only a fragment overlap with the given region
260 # @param query string query name
261 # @param start integer start coordinate
262 # @param end integer end coordinate
263 # @return lPath list of Path instances
264 #
265 def getChainListOverlappingQueryCoord( self, query, start, end ):
266 if( start > end ):
267 tmp = start
268 start = end
269 end = tmp
270 sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query )
271 sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
272 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
273 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end )
274 sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end )
275 lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
276 lPaths = self.getPathListFromIdList( lIdentifiers )
277 return lPaths
278
279 ## Give a list of Set instances overlapping a given region
280 #
281 # @param query string query name
282 # @param start integer start coordinate
283 # @param end integer end coordinate
284 # @return lSet list of Set instances
285 #
286 def getSetListOverlappingQueryCoord(self, query, start, end):
287 lPath = self.getPathListOverlappingQueryCoord(query, start, end)
288 lSet = PathUtils.getSetListFromQueries(lPath)
289 return lSet
290
291 ## Give a list of Set instances included in a given region
292 #
293 # @param query string query name
294 # @param start integer start coordinate
295 # @param end integer end coordinate
296 # @return lSet list of Set instances
297 #
298 def getSetListIncludedInQueryCoord(self, query, start, end):
299 lPath=self.getPathListIncludedInQueryCoord(query, start, end)
300 lSet = PathUtils.getSetListFromQueries(lPath)
301 return lSet
302
303 ## Give a a list of Path instances sorted by query coordinates
304 #
305 # @return lPaths list of Path instances
306 #
307 def getPathListSortedByQueryCoord( self ):
308 sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table )
309 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
310 return lPaths
311
312 ## Give a a list of Path instances sorted by query coordinates for a given query
313 #
314 # @return lPaths list of Path instances
315 #
316 def getPathListSortedByQueryCoordFromQuery( self, queryName ):
317 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName )
318 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
319 return lPaths
320
321 ## Give a a list of Path instances sorted by query coordinates and score for a given query
322 #
323 # @return lPaths list of Path instances
324 #
325 def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName):
326 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName)
327 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
328 return lPaths
329
330 ## Give a cumulative length of all paths (fragments) for a given subject name
331 #
332 # @param subjectName string subject name
333 # @return nb Cumulative length for all path
334 #
335 # @warning doesn't take into account the overlaps !!
336 #
337 def getCumulLengthFromSubject( self, subjectName ):
338 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
339 nb = self._iDb.getIntegerWithSQLCmd(sqlCmd)
340 return nb
341
342 ## Give a list of the length of all chains of paths for a given subject name
343 #
344 # @param subjectName string name of the subject
345 # @return lChainLengths list of lengths per chain of paths
346 #
347 # @warning doesn't take into account the overlaps !!
348 #
349 def getChainLengthListFromSubject( self, subjectName ):
350 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
351 lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
352 return lChainLengths
353
354 ## Give a list of identity of all chains of paths for a given subject name
355 #
356 # @param subjectName string name of the subject
357 # @return lChainIdentities list of identities per chain of paths
358 #
359 # @warning doesn't take into account the overlaps !!
360 #
361 def getChainIdentityListFromSubject( self, subjectName ):
362 lChainIdentities = []
363 sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
364 self._iDb.execute( sqlCmd )
365 res = self._iDb.fetchall()
366 for i in res:
367 if i[0] != None:
368 lChainIdentities.append( round( float( i[0] ), 2 ) )
369 return lChainIdentities
370
371 ## Give a list of the length of all paths for a given subject name
372 #
373 # @param subjectName string name of the subject
374 # @return lPathLengths list of lengths per path
375 #
376 # @warning doesn't take into account the overlaps !!
377 #
378 def getPathLengthListFromSubject( self, subjectName ):
379 sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
380 lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
381 return lPathLengths
382
383 ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains
384 #
385 # @param subjectName string subject name
386 # @return lPathNums a list of paths Id
387 #
388 def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ):
389 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
390 sqlCmd += " FROM %s" % ( self._table )
391 sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
392 sqlCmd += " GROUP BY path"
393 sqlCmd += " ORDER BY length DESC";
394 lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
395 return lPathNums
396
397 ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold
398 #
399 # @param subjectName string subject name
400 # @lengthThreshold length threshold below which chains are filtered
401 # @return lPathNums a list of paths Id
402 #
403 def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ):
404 lPathNums = []
405 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
406 sqlCmd += " FROM %s" % ( self._table )
407 sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
408 sqlCmd += " GROUP BY path"
409 sqlCmd += " ORDER BY length DESC";
410 self._iDb.execute( sqlCmd )
411 res = self._iDb.fetchall()
412 for i in res:
413 if int(i[1]) >= int(lengthThreshold):
414 lPathNums.append( i[0] )
415 return lPathNums
416
417 ## Give a Set instances list of a query annotation
418 #
419 # @param query string query name
420 # @return lSets list of set instance
421 #
422 def getSetListFromQuery(self, query):
423 lpath = self.getPathListFromQuery(query)
424 lSets = PathUtils.getSetListFromQueries(lpath)
425 return lSets
426
427 ## Give a Set instances list of a query annotation
428 # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py)
429 #
430 # @param query string query name
431 # @return lSets list of set instance
432 #
433 def getSetListFromSeqName(self, query):
434 return self.getSetListFromQuery(query)
435
436 ## Delete path corresponding to a given identifier number
437 #
438 # @param id integer identifier number
439 #
440 def deleteFromId(self,id):
441 sqlCmd = "delete from %s where path=%d;" % (self._table, id)
442 self._iDb.execute(sqlCmd)
443
444 ## Delete path corresponding to a given object path line
445 #
446 # @param path object
447 #
448 def deleteFromPath(self,path):
449 sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore()))
450 self._iDb.execute(sqlCmd)
451
452 ## Delete path corresponding to a given list of identifier number
453 #
454 # @param lId list of identifier number
455 #
456 def deleteFromIdList(self,lId):
457 if lId == []:
458 return
459 sqlCmd = "delete from %s where path=%d" % (self._table, lId[0])
460 for id in lId[1:]:
461 sqlCmd += " or path=%d" %(id)
462 sqlCmd += ";"
463 self._iDb.execute(sqlCmd)
464
465 ## Get a new id number
466 #
467 # @return newId integer new id
468 #
469 def getNewId(self):
470 sqlCmd = 'select max(path) from %s;' % (self._table)
471 maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd)
472 newId = int(maxId)+1
473 return newId
474
475 ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2
476 #
477 # @param id1 integer id path number
478 # @param id2 integer id path number
479 # @return newId integer minimum of id1 id2
480 # @note this method modify the ID even if this one not existing in the path table
481 #
482 def joinTwoPaths(self, id1, id2):
483 if id1 < id2:
484 newId = id1
485 oldId = id2
486 else:
487 newId = id2
488 oldId = id1
489 sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\
490 % (self._table, newId, oldId)
491 self._iDb.execute(sqlCmd)
492 return newId
493
494 ## Create a 'pathRange' table from a 'path' table.
495 # The output table summarizes the information per identifier.
496 # The min and max value are taken.
497 # The identity is averaged over the fragments.
498 # It may overwrite an existing table.
499 #
500 # @param outTable string name of the output table
501 # @return outTable string Table which summarizes the information per identifier
502 #
503 def path2PathRange( self, outTable="" ):
504 return self._path2PathRangeOrPath2PathRangeQuery(outTable)
505
506 ## Create a 'pathrange' table from a 'path' table for the given query name
507 # The output table summarizes the information per identifier
508 # The min and max value are taken
509 # The identity is averaged over the fragments, weighted by the length of the of the query
510 # It may overwrite an existing table
511 #
512 # @param outTable string name of the output table
513 # @param query string query name
514 # @return outTable string Table which summarizes the information per identifier
515 #
516 def _path2PathRangeFromQuery( self, queryName, outTable="" ):
517 return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName)
518
519 def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""):
520 self._iDb.createIndex( self._table, "path" )
521 if outTable == "":
522 outTable = "%s_range" % ( self._table )
523 self._iDb.dropTable( outTable )
524
525 tmpTable = "%s_tmp" % ( self._table )
526 self._iDb.dropTable( tmpTable )
527
528 sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable)
529 self._iDb.execute(sqlCmd)
530
531 sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable )
532 self._iDb.execute( sqlCmd )
533
534 sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable )
535 self._iDb.execute( sqlCmd )
536
537 self._iDb.createIndex( outTable, "path" )
538 self._iDb.dropTable( tmpTable )
539 return outTable
540
541 ## Give a list of Path lists sorted by weighted identity.
542 #
543 # @return lChains list of chains
544 #
545 def getListOfChainsSortedByAscIdentityFromQuery( self, qry ):
546 lChains = []
547 tmpTable = self._path2PathRangeFromQuery( qry )
548 sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable )
549 self._iDb.execute( sqlCmd )
550 lPathnums = self._iDb.fetchall()
551 self._iDb.dropTable( tmpTable )
552 for pathnum in lPathnums:
553 lChains.append( self.getPathListFromId( int(pathnum[0]) ) )
554 return lChains
555
556 ## Give a list of path instances sorted by increasing E-value
557 #
558 # @return lPaths list of path instances
559 #
560 def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ):
561 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName )
562 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
563 return lPaths
564
565
566 ## Return the number of times a given instance is present in the table
567 # The identifier is not considered,
568 # only coordinates, score, E-value and identity.
569 #
570 # @return nbOcc integer
571 #
572 def getNbOccurrences( self, iPath ):
573 sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table )
574 sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname )
575 sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start )
576 sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end )
577 sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname )
578 sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start )
579 sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end )
580 sqlCmd += " AND score='%s'" % ( iPath.score )
581 sqlCmd += " AND e_value='%s'" % ( iPath.e_value )
582 sqlCmd += " AND identity='%s'" % ( iPath.identity )
583 nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd )
584 return nbOcc
585
586
587 def _getPathListFromTypeName( self, type, typeName ):
588 sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName )
589 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
590 return lPath
591
592 def _getDistinctTypeNamesList( self, type ):
593 sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table )
594 lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
595 return lDistinctTypeNames
596
597 def _getPathsNbFromTypeName( self, type, typeName ):
598 sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
599 pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
600 return pathNb
601
602 def _getIdListFromTypeName( self, type, typeName ):
603 sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
604 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
605 return lId
606
607 def _getIdNbFromTypeName( self, type, typeName ):
608 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
609 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
610 return idNb
611
612 def _getTypeAndAttr2Insert(self, path):
613 type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'")
614 if path.range_query.isOnDirectStrand():
615 queryStart = path.range_query.start
616 queryEnd = path.range_query.end
617 subjectStart = path.range_subject.start
618 subjectEnd = path.range_subject.end
619 else:
620 queryStart = path.range_query.end
621 queryEnd = path.range_query.start
622 subjectStart = path.range_subject.end
623 subjectEnd = path.range_subject.start
624 attr2Insert = ( path.id,\
625 path.range_query.seqname,\
626 queryStart,\
627 queryEnd,\
628 path.range_subject.seqname,\
629 subjectStart,\
630 subjectEnd,\
631 path.e_value,\
632 path.score,\
633 path.identity\
634 )
635 return type2Insert, attr2Insert
636
637 def _getInstanceToAdapt(self):
638 iPath = Path()
639 return iPath
640
641 def _escapeAntislash(self, obj):
642 obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\")
643 obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\")
644
645 def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable):
646 sqlCmd = ""
647 if queryName == "":
648 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table)
649 else:
650 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName)
651 return sqlCmd
652
653 ## return a filtered list with only one unique occurrence of path of a given list
654 #
655 # @param lPath a list of Path instances
656 # @return lUniquePath a list of Path instances
657 #
658 def getListOfUniqueOccPath(self, lPath):
659 if len(lPath) < 2 :
660 return lPath
661
662 sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd()))
663 lUniquePath = []
664 for i in xrange(1, len(sortedListPath)):
665 previousPath = sortedListPath [i-1]
666 currentPath = sortedListPath [i]
667 if previousPath != currentPath:
668 lUniquePath.append(previousPath)
669
670 if previousPath != currentPath:
671 lUniquePath.append(currentPath)
672
673 return lUniquePath