Mercurial > repos > yufei-luo > s_mart
comparison commons/core/sql/TablePathAdaptator.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 | |
| 32 from commons.core.coord.Path import Path | |
| 33 from commons.core.coord.PathUtils import PathUtils | |
| 34 from commons.core.sql.TableAdaptator import TableAdaptator | |
| 35 from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator | |
| 36 | |
| 37 | |
| 38 ## Adaptator for a Path table | |
| 39 # | |
| 40 class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ): | |
| 41 | |
| 42 ## Give a list of Path instances having the same identifier | |
| 43 # | |
| 44 # @param id integer identifier number | |
| 45 # @return lPath a list of Path instances | |
| 46 # | |
| 47 def getPathListFromId( self, id ): | |
| 48 sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id ) | |
| 49 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 50 return lPath | |
| 51 | |
| 52 ## Give a list of Path instances according to the given list of identifier numbers | |
| 53 # | |
| 54 # @param lId integer list | |
| 55 # @return lPath a list of Path instances | |
| 56 # | |
| 57 def getPathListFromIdList( self, lId ): | |
| 58 lPath=[] | |
| 59 if lId == []: | |
| 60 return lPath | |
| 61 sqlCmd = "select * from %s where path=%d" % (self._table, lId[0]) | |
| 62 for i in lId[1:]: | |
| 63 sqlCmd += " or path=%d" % (i) | |
| 64 sqlCmd += ";" | |
| 65 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 66 return lPath | |
| 67 | |
| 68 ## Give a list of Path instances having the same given query name | |
| 69 # | |
| 70 # @param query string name of the query | |
| 71 # @return lPath a list of Path instances | |
| 72 # | |
| 73 def getPathListFromQuery( self, query ): | |
| 74 lPath = self._getPathListFromTypeName("query", query) | |
| 75 return lPath | |
| 76 | |
| 77 ## Give a list of Path instances having the same given subject name | |
| 78 # | |
| 79 # @param subject string name of the subject | |
| 80 # @return lPath a list of Path instances | |
| 81 # | |
| 82 def getPathListFromSubject( self, subject ): | |
| 83 lPath = self._getPathListFromTypeName("subject", subject) | |
| 84 return lPath | |
| 85 | |
| 86 ## Give a list of the distinct subject names present in the table | |
| 87 # | |
| 88 # @return lDistinctSubjectNames string list | |
| 89 # | |
| 90 def getSubjectList(self): | |
| 91 lDistinctSubjectNames = self._getDistinctTypeNamesList("subject") | |
| 92 return lDistinctSubjectNames | |
| 93 | |
| 94 ## Give a list of the distinct query names present in the table | |
| 95 # | |
| 96 # @return lDistinctQueryNames string list | |
| 97 # | |
| 98 def getQueryList(self): | |
| 99 lDistinctQueryNames = self._getDistinctTypeNamesList("query") | |
| 100 return lDistinctQueryNames | |
| 101 | |
| 102 ## Give a list of the distinct query names present in the table | |
| 103 # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py) | |
| 104 # | |
| 105 # @return lDistinctContigNames string list | |
| 106 # | |
| 107 def getSeqNameList(self): | |
| 108 return self.getQueryList() | |
| 109 | |
| 110 ## Give a list with all the distinct identifiers corresponding to the query | |
| 111 # | |
| 112 # @param query string name of the subject | |
| 113 # @return lId a list of integer | |
| 114 # | |
| 115 def getIdListFromQuery( self, query ): | |
| 116 lId = self._getIdListFromTypeName("query", query) | |
| 117 return lId | |
| 118 | |
| 119 ## Give a list with all the distinct identifiers corresponding to the subject | |
| 120 # | |
| 121 # @param subject string name of the subject | |
| 122 # @return lId a list of integer | |
| 123 # | |
| 124 def getIdListFromSubject( self, subject ): | |
| 125 lId = self._getIdListFromTypeName("subject", subject) | |
| 126 return lId | |
| 127 | |
| 128 ## Give a list of identifiers contained in the table | |
| 129 # | |
| 130 # @return lId integer list | |
| 131 # | |
| 132 def getIdList(self): | |
| 133 sqlCmd = "SELECT DISTINCT path from %s;" % (self._table) | |
| 134 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
| 135 return lId | |
| 136 | |
| 137 ## Give a list of the distinct subject names present in the table given a query name | |
| 138 # | |
| 139 # @param queryName string | |
| 140 # @return lDistinctSubjectNamesPerQuery string list | |
| 141 # | |
| 142 def getSubjectListFromQuery( self, queryName ): | |
| 143 sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName ) | |
| 144 lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd) | |
| 145 return lDistinctSubjectNamesPerQuery | |
| 146 | |
| 147 ## Give the data contained in the table as a list of Paths instances | |
| 148 # | |
| 149 # @return lPaths list of paths instances | |
| 150 # | |
| 151 def getListOfAllPaths( self ): | |
| 152 return self.getListOfAllCoordObject() | |
| 153 | |
| 154 ## Give a list of Path instances with the given query and subject, both on direct strand | |
| 155 # | |
| 156 # @param query string query name | |
| 157 # @param subject string subject name | |
| 158 # @return lPaths list of path instances | |
| 159 # | |
| 160 def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ): | |
| 161 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) | |
| 162 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 163 return lPaths | |
| 164 | |
| 165 ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand | |
| 166 # | |
| 167 # @param query string query name | |
| 168 # @param subject string subject name | |
| 169 # @return lPaths list of path instances | |
| 170 # | |
| 171 def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ): | |
| 172 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) | |
| 173 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 174 return lPaths | |
| 175 | |
| 176 ## Give the number of Path instances with the given query name | |
| 177 # | |
| 178 # @param query string query name | |
| 179 # @return pathNb integer the number of Path instances | |
| 180 # | |
| 181 def getNbPathsFromQuery( self, query ): | |
| 182 pathNb = self._getPathsNbFromTypeName("query", query) | |
| 183 return pathNb | |
| 184 | |
| 185 ## Give the number of Path instances with the given subject name | |
| 186 # | |
| 187 # @param subject string subject name | |
| 188 # @return pathNb integer the number of Path instances | |
| 189 # | |
| 190 def getNbPathsFromSubject( self, subject ): | |
| 191 pathNb = self._getPathsNbFromTypeName("subject", subject) | |
| 192 return pathNb | |
| 193 | |
| 194 ## Give the number of distinct path identifiers | |
| 195 # | |
| 196 # @return idNb integer the number of Path instances | |
| 197 # | |
| 198 def getNbIds( self ): | |
| 199 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table ) | |
| 200 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
| 201 return idNb | |
| 202 | |
| 203 ## Give the number of distinct path identifiers for a given subject | |
| 204 # | |
| 205 # @param subjectName string subject name | |
| 206 # @return idNb integer the number of Path instances | |
| 207 # | |
| 208 def getNbIdsFromSubject( self, subjectName ): | |
| 209 idNb = self._getIdNbFromTypeName("subject", subjectName) | |
| 210 return idNb | |
| 211 | |
| 212 ## Give the number of distinct path identifiers for a given query | |
| 213 # | |
| 214 # @param queryName string query name | |
| 215 # @return idNb integer the number of Path instances | |
| 216 # | |
| 217 def getNbIdsFromQuery( self, queryName ): | |
| 218 idNb = self._getIdNbFromTypeName("query", queryName) | |
| 219 return idNb | |
| 220 | |
| 221 ## Give a list of Path instances included in a given query region | |
| 222 # | |
| 223 # @param query string query name | |
| 224 # @param start integer start coordinate | |
| 225 # @param end integer end coordinate | |
| 226 # @return lPaths list of Path instances | |
| 227 # | |
| 228 def getPathListIncludedInQueryCoord( self, query, start, end ): | |
| 229 if( start > end ): | |
| 230 tmp = start | |
| 231 start = end | |
| 232 end = tmp | |
| 233 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end ) | |
| 234 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 235 return lPaths | |
| 236 | |
| 237 ## Give a list of Path instances overlapping a given region | |
| 238 # | |
| 239 # @param query string query name | |
| 240 # @param start integer start coordinate | |
| 241 # @param end integer end coordinate | |
| 242 # @return lPath list of Path instances | |
| 243 # | |
| 244 def getPathListOverlappingQueryCoord( self, query, start, end ): | |
| 245 if( start > end ): | |
| 246 tmp = start | |
| 247 start = end | |
| 248 end = tmp | |
| 249 sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query ) | |
| 250 sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) | |
| 251 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) | |
| 252 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end ) | |
| 253 sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end ) | |
| 254 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 255 return lPaths | |
| 256 | |
| 257 ## Give a list of Path instances overlapping a given region | |
| 258 # | |
| 259 # @note whole chains are returned, even if only a fragment overlap with the given region | |
| 260 # @param query string query name | |
| 261 # @param start integer start coordinate | |
| 262 # @param end integer end coordinate | |
| 263 # @return lPath list of Path instances | |
| 264 # | |
| 265 def getChainListOverlappingQueryCoord( self, query, start, end ): | |
| 266 if( start > end ): | |
| 267 tmp = start | |
| 268 start = end | |
| 269 end = tmp | |
| 270 sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query ) | |
| 271 sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) | |
| 272 sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) | |
| 273 sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end ) | |
| 274 sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end ) | |
| 275 lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
| 276 lPaths = self.getPathListFromIdList( lIdentifiers ) | |
| 277 return lPaths | |
| 278 | |
| 279 ## Give a list of Set instances overlapping a given region | |
| 280 # | |
| 281 # @param query string query name | |
| 282 # @param start integer start coordinate | |
| 283 # @param end integer end coordinate | |
| 284 # @return lSet list of Set instances | |
| 285 # | |
| 286 def getSetListOverlappingQueryCoord(self, query, start, end): | |
| 287 lPath = self.getPathListOverlappingQueryCoord(query, start, end) | |
| 288 lSet = PathUtils.getSetListFromQueries(lPath) | |
| 289 return lSet | |
| 290 | |
| 291 ## Give a list of Set instances included in a given region | |
| 292 # | |
| 293 # @param query string query name | |
| 294 # @param start integer start coordinate | |
| 295 # @param end integer end coordinate | |
| 296 # @return lSet list of Set instances | |
| 297 # | |
| 298 def getSetListIncludedInQueryCoord(self, query, start, end): | |
| 299 lPath=self.getPathListIncludedInQueryCoord(query, start, end) | |
| 300 lSet = PathUtils.getSetListFromQueries(lPath) | |
| 301 return lSet | |
| 302 | |
| 303 ## Give a a list of Path instances sorted by query coordinates | |
| 304 # | |
| 305 # @return lPaths list of Path instances | |
| 306 # | |
| 307 def getPathListSortedByQueryCoord( self ): | |
| 308 sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table ) | |
| 309 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 310 return lPaths | |
| 311 | |
| 312 ## Give a a list of Path instances sorted by query coordinates for a given query | |
| 313 # | |
| 314 # @return lPaths list of Path instances | |
| 315 # | |
| 316 def getPathListSortedByQueryCoordFromQuery( self, queryName ): | |
| 317 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName ) | |
| 318 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 319 return lPaths | |
| 320 | |
| 321 ## Give a a list of Path instances sorted by query coordinates and score for a given query | |
| 322 # | |
| 323 # @return lPaths list of Path instances | |
| 324 # | |
| 325 def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName): | |
| 326 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName) | |
| 327 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 328 return lPaths | |
| 329 | |
| 330 ## Give a cumulative length of all paths (fragments) for a given subject name | |
| 331 # | |
| 332 # @param subjectName string subject name | |
| 333 # @return nb Cumulative length for all path | |
| 334 # | |
| 335 # @warning doesn't take into account the overlaps !! | |
| 336 # | |
| 337 def getCumulLengthFromSubject( self, subjectName ): | |
| 338 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) | |
| 339 nb = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
| 340 return nb | |
| 341 | |
| 342 ## Give a list of the length of all chains of paths for a given subject name | |
| 343 # | |
| 344 # @param subjectName string name of the subject | |
| 345 # @return lChainLengths list of lengths per chain of paths | |
| 346 # | |
| 347 # @warning doesn't take into account the overlaps !! | |
| 348 # | |
| 349 def getChainLengthListFromSubject( self, subjectName ): | |
| 350 sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) | |
| 351 lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
| 352 return lChainLengths | |
| 353 | |
| 354 ## Give a list of identity of all chains of paths for a given subject name | |
| 355 # | |
| 356 # @param subjectName string name of the subject | |
| 357 # @return lChainIdentities list of identities per chain of paths | |
| 358 # | |
| 359 # @warning doesn't take into account the overlaps !! | |
| 360 # | |
| 361 def getChainIdentityListFromSubject( self, subjectName ): | |
| 362 lChainIdentities = [] | |
| 363 sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) | |
| 364 self._iDb.execute( sqlCmd ) | |
| 365 res = self._iDb.fetchall() | |
| 366 for i in res: | |
| 367 if i[0] != None: | |
| 368 lChainIdentities.append( round( float( i[0] ), 2 ) ) | |
| 369 return lChainIdentities | |
| 370 | |
| 371 ## Give a list of the length of all paths for a given subject name | |
| 372 # | |
| 373 # @param subjectName string name of the subject | |
| 374 # @return lPathLengths list of lengths per path | |
| 375 # | |
| 376 # @warning doesn't take into account the overlaps !! | |
| 377 # | |
| 378 def getPathLengthListFromSubject( self, subjectName ): | |
| 379 sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) | |
| 380 lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
| 381 return lPathLengths | |
| 382 | |
| 383 ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains | |
| 384 # | |
| 385 # @param subjectName string subject name | |
| 386 # @return lPathNums a list of paths Id | |
| 387 # | |
| 388 def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ): | |
| 389 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" | |
| 390 sqlCmd += " FROM %s" % ( self._table ) | |
| 391 sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) | |
| 392 sqlCmd += " GROUP BY path" | |
| 393 sqlCmd += " ORDER BY length DESC"; | |
| 394 lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd) | |
| 395 return lPathNums | |
| 396 | |
| 397 ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold | |
| 398 # | |
| 399 # @param subjectName string subject name | |
| 400 # @lengthThreshold length threshold below which chains are filtered | |
| 401 # @return lPathNums a list of paths Id | |
| 402 # | |
| 403 def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ): | |
| 404 lPathNums = [] | |
| 405 sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" | |
| 406 sqlCmd += " FROM %s" % ( self._table ) | |
| 407 sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) | |
| 408 sqlCmd += " GROUP BY path" | |
| 409 sqlCmd += " ORDER BY length DESC"; | |
| 410 self._iDb.execute( sqlCmd ) | |
| 411 res = self._iDb.fetchall() | |
| 412 for i in res: | |
| 413 if int(i[1]) >= int(lengthThreshold): | |
| 414 lPathNums.append( i[0] ) | |
| 415 return lPathNums | |
| 416 | |
| 417 ## Give a Set instances list of a query annotation | |
| 418 # | |
| 419 # @param query string query name | |
| 420 # @return lSets list of set instance | |
| 421 # | |
| 422 def getSetListFromQuery(self, query): | |
| 423 lpath = self.getPathListFromQuery(query) | |
| 424 lSets = PathUtils.getSetListFromQueries(lpath) | |
| 425 return lSets | |
| 426 | |
| 427 ## Give a Set instances list of a query annotation | |
| 428 # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py) | |
| 429 # | |
| 430 # @param query string query name | |
| 431 # @return lSets list of set instance | |
| 432 # | |
| 433 def getSetListFromSeqName(self, query): | |
| 434 return self.getSetListFromQuery(query) | |
| 435 | |
| 436 ## Delete path corresponding to a given identifier number | |
| 437 # | |
| 438 # @param id integer identifier number | |
| 439 # | |
| 440 def deleteFromId(self,id): | |
| 441 sqlCmd = "delete from %s where path=%d;" % (self._table, id) | |
| 442 self._iDb.execute(sqlCmd) | |
| 443 | |
| 444 ## Delete path corresponding to a given object path line | |
| 445 # | |
| 446 # @param path object | |
| 447 # | |
| 448 def deleteFromPath(self,path): | |
| 449 sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore())) | |
| 450 self._iDb.execute(sqlCmd) | |
| 451 | |
| 452 ## Delete path corresponding to a given list of identifier number | |
| 453 # | |
| 454 # @param lId list of identifier number | |
| 455 # | |
| 456 def deleteFromIdList(self,lId): | |
| 457 if lId == []: | |
| 458 return | |
| 459 sqlCmd = "delete from %s where path=%d" % (self._table, lId[0]) | |
| 460 for id in lId[1:]: | |
| 461 sqlCmd += " or path=%d" %(id) | |
| 462 sqlCmd += ";" | |
| 463 self._iDb.execute(sqlCmd) | |
| 464 | |
| 465 ## Get a new id number | |
| 466 # | |
| 467 # @return newId integer new id | |
| 468 # | |
| 469 def getNewId(self): | |
| 470 sqlCmd = 'select max(path) from %s;' % (self._table) | |
| 471 maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd) | |
| 472 newId = int(maxId)+1 | |
| 473 return newId | |
| 474 | |
| 475 ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2 | |
| 476 # | |
| 477 # @param id1 integer id path number | |
| 478 # @param id2 integer id path number | |
| 479 # @return newId integer minimum of id1 id2 | |
| 480 # @note this method modify the ID even if this one not existing in the path table | |
| 481 # | |
| 482 def joinTwoPaths(self, id1, id2): | |
| 483 if id1 < id2: | |
| 484 newId = id1 | |
| 485 oldId = id2 | |
| 486 else: | |
| 487 newId = id2 | |
| 488 oldId = id1 | |
| 489 sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\ | |
| 490 % (self._table, newId, oldId) | |
| 491 self._iDb.execute(sqlCmd) | |
| 492 return newId | |
| 493 | |
| 494 ## Create a 'pathRange' table from a 'path' table. | |
| 495 # The output table summarizes the information per identifier. | |
| 496 # The min and max value are taken. | |
| 497 # The identity is averaged over the fragments. | |
| 498 # It may overwrite an existing table. | |
| 499 # | |
| 500 # @param outTable string name of the output table | |
| 501 # @return outTable string Table which summarizes the information per identifier | |
| 502 # | |
| 503 def path2PathRange( self, outTable="" ): | |
| 504 return self._path2PathRangeOrPath2PathRangeQuery(outTable) | |
| 505 | |
| 506 ## Create a 'pathrange' table from a 'path' table for the given query name | |
| 507 # The output table summarizes the information per identifier | |
| 508 # The min and max value are taken | |
| 509 # The identity is averaged over the fragments, weighted by the length of the of the query | |
| 510 # It may overwrite an existing table | |
| 511 # | |
| 512 # @param outTable string name of the output table | |
| 513 # @param query string query name | |
| 514 # @return outTable string Table which summarizes the information per identifier | |
| 515 # | |
| 516 def _path2PathRangeFromQuery( self, queryName, outTable="" ): | |
| 517 return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName) | |
| 518 | |
| 519 def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""): | |
| 520 self._iDb.createIndex( self._table, "path" ) | |
| 521 if outTable == "": | |
| 522 outTable = "%s_range" % ( self._table ) | |
| 523 self._iDb.dropTable( outTable ) | |
| 524 | |
| 525 tmpTable = "%s_tmp" % ( self._table ) | |
| 526 self._iDb.dropTable( tmpTable ) | |
| 527 | |
| 528 sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable) | |
| 529 self._iDb.execute(sqlCmd) | |
| 530 | |
| 531 sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable ) | |
| 532 self._iDb.execute( sqlCmd ) | |
| 533 | |
| 534 sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable ) | |
| 535 self._iDb.execute( sqlCmd ) | |
| 536 | |
| 537 self._iDb.createIndex( outTable, "path" ) | |
| 538 self._iDb.dropTable( tmpTable ) | |
| 539 return outTable | |
| 540 | |
| 541 ## Give a list of Path lists sorted by weighted identity. | |
| 542 # | |
| 543 # @return lChains list of chains | |
| 544 # | |
| 545 def getListOfChainsSortedByAscIdentityFromQuery( self, qry ): | |
| 546 lChains = [] | |
| 547 tmpTable = self._path2PathRangeFromQuery( qry ) | |
| 548 sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable ) | |
| 549 self._iDb.execute( sqlCmd ) | |
| 550 lPathnums = self._iDb.fetchall() | |
| 551 self._iDb.dropTable( tmpTable ) | |
| 552 for pathnum in lPathnums: | |
| 553 lChains.append( self.getPathListFromId( int(pathnum[0]) ) ) | |
| 554 return lChains | |
| 555 | |
| 556 ## Give a list of path instances sorted by increasing E-value | |
| 557 # | |
| 558 # @return lPaths list of path instances | |
| 559 # | |
| 560 def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ): | |
| 561 sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName ) | |
| 562 lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 563 return lPaths | |
| 564 | |
| 565 | |
| 566 ## Return the number of times a given instance is present in the table | |
| 567 # The identifier is not considered, | |
| 568 # only coordinates, score, E-value and identity. | |
| 569 # | |
| 570 # @return nbOcc integer | |
| 571 # | |
| 572 def getNbOccurrences( self, iPath ): | |
| 573 sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table ) | |
| 574 sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname ) | |
| 575 sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start ) | |
| 576 sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end ) | |
| 577 sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname ) | |
| 578 sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start ) | |
| 579 sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end ) | |
| 580 sqlCmd += " AND score='%s'" % ( iPath.score ) | |
| 581 sqlCmd += " AND e_value='%s'" % ( iPath.e_value ) | |
| 582 sqlCmd += " AND identity='%s'" % ( iPath.identity ) | |
| 583 nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
| 584 return nbOcc | |
| 585 | |
| 586 | |
| 587 def _getPathListFromTypeName( self, type, typeName ): | |
| 588 sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName ) | |
| 589 lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) | |
| 590 return lPath | |
| 591 | |
| 592 def _getDistinctTypeNamesList( self, type ): | |
| 593 sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table ) | |
| 594 lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd) | |
| 595 return lDistinctTypeNames | |
| 596 | |
| 597 def _getPathsNbFromTypeName( self, type, typeName ): | |
| 598 sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
| 599 pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
| 600 return pathNb | |
| 601 | |
| 602 def _getIdListFromTypeName( self, type, typeName ): | |
| 603 sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
| 604 lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) | |
| 605 return lId | |
| 606 | |
| 607 def _getIdNbFromTypeName( self, type, typeName ): | |
| 608 sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) | |
| 609 idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) | |
| 610 return idNb | |
| 611 | |
| 612 def _getTypeAndAttr2Insert(self, path): | |
| 613 type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'") | |
| 614 if path.range_query.isOnDirectStrand(): | |
| 615 queryStart = path.range_query.start | |
| 616 queryEnd = path.range_query.end | |
| 617 subjectStart = path.range_subject.start | |
| 618 subjectEnd = path.range_subject.end | |
| 619 else: | |
| 620 queryStart = path.range_query.end | |
| 621 queryEnd = path.range_query.start | |
| 622 subjectStart = path.range_subject.end | |
| 623 subjectEnd = path.range_subject.start | |
| 624 attr2Insert = ( path.id,\ | |
| 625 path.range_query.seqname,\ | |
| 626 queryStart,\ | |
| 627 queryEnd,\ | |
| 628 path.range_subject.seqname,\ | |
| 629 subjectStart,\ | |
| 630 subjectEnd,\ | |
| 631 path.e_value,\ | |
| 632 path.score,\ | |
| 633 path.identity\ | |
| 634 ) | |
| 635 return type2Insert, attr2Insert | |
| 636 | |
| 637 def _getInstanceToAdapt(self): | |
| 638 iPath = Path() | |
| 639 return iPath | |
| 640 | |
| 641 def _escapeAntislash(self, obj): | |
| 642 obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\") | |
| 643 obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\") | |
| 644 | |
| 645 def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable): | |
| 646 sqlCmd = "" | |
| 647 if queryName == "": | |
| 648 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table) | |
| 649 else: | |
| 650 sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName) | |
| 651 return sqlCmd | |
| 652 | |
| 653 ## return a filtered list with only one unique occurrence of path of a given list | |
| 654 # | |
| 655 # @param lPath a list of Path instances | |
| 656 # @return lUniquePath a list of Path instances | |
| 657 # | |
| 658 def getListOfUniqueOccPath(self, lPath): | |
| 659 if len(lPath) < 2 : | |
| 660 return lPath | |
| 661 | |
| 662 sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd())) | |
| 663 lUniquePath = [] | |
| 664 for i in xrange(1, len(sortedListPath)): | |
| 665 previousPath = sortedListPath [i-1] | |
| 666 currentPath = sortedListPath [i] | |
| 667 if previousPath != currentPath: | |
| 668 lUniquePath.append(previousPath) | |
| 669 | |
| 670 if previousPath != currentPath: | |
| 671 lUniquePath.append(currentPath) | |
| 672 | |
| 673 return lUniquePath |
