Mercurial > repos > yufei-luo > s_mart

diff commons/core/sql/TablePathAdaptator.py @ 6:769e306b7933
Change the repository level.
author: yufei-luo
date: Fri, 18 Jan 2013 04:54:14 -0500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TablePathAdaptator.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,673 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.coord.Path import Path
+from commons.core.coord.PathUtils import PathUtils
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator
+
+
+## Adaptator for a Path table
+#
+class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ):
+
+    ## Give a list of Path instances having the same identifier
+    #
+    # @param id integer identifier number
+    # @return lPath a list of Path instances
+    #
+    def getPathListFromId( self, id ):
+        sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id )
+        lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPath
+    
+    ## Give a list of Path instances according to the given list of identifier numbers
+    #
+    # @param lId integer list 
+    # @return lPath a list of Path instances
+    #
+    def getPathListFromIdList( self, lId ):
+        lPath=[]
+        if lId == []:
+            return lPath
+        sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])
+        for i in lId[1:]:
+            sqlCmd += " or path=%d" % (i)
+        sqlCmd += ";"
+        lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPath
+    
+    ## Give a list of Path instances having the same given query name
+    #
+    # @param query string name of the query 
+    # @return lPath a list of Path instances
+    #
+    def getPathListFromQuery( self, query ):
+        lPath = self._getPathListFromTypeName("query", query)
+        return lPath
+    
+    ## Give a list of Path instances having the same given subject name
+    #
+    # @param subject string name of the subject 
+    # @return lPath a list of Path instances
+    #
+    def getPathListFromSubject( self, subject ):
+        lPath = self._getPathListFromTypeName("subject", subject)
+        return lPath
+    
+    ## Give a list of the distinct subject names present in the table
+    #
+    # @return lDistinctSubjectNames string list
+    #
+    def getSubjectList(self):
+        lDistinctSubjectNames = self._getDistinctTypeNamesList("subject")
+        return lDistinctSubjectNames
+    
+    ## Give a list of the distinct query names present in the table
+    #
+    # @return lDistinctQueryNames string list
+    #
+    def getQueryList(self):
+        lDistinctQueryNames = self._getDistinctTypeNamesList("query")
+        return lDistinctQueryNames
+    
+    ## Give a list of the distinct query names present in the table
+    # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py)
+    #
+    # @return lDistinctContigNames string list
+    #
+    def getSeqNameList(self):
+        return self.getQueryList()
+    
+    ## Give a list with all the distinct identifiers corresponding to the query
+    #
+    # @param query string name of the subject 
+    # @return lId a list of integer
+    #
+    def getIdListFromQuery( self, query ):
+        lId = self._getIdListFromTypeName("query", query)
+        return lId
+    
+    ## Give a list with all the distinct identifiers corresponding to the subject
+    #
+    # @param subject string name of the subject 
+    # @return lId a list of integer
+    #
+    def getIdListFromSubject( self, subject ):
+        lId = self._getIdListFromTypeName("subject", subject)
+        return lId
+    
+    ## Give a list of identifiers contained in the table
+    #
+    # @return lId integer list
+    #
+    def getIdList(self):
+        sqlCmd = "SELECT DISTINCT path from %s;" % (self._table)
+        lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
+        return lId
+        
+    ## Give a list of the distinct subject names present in the table given a query name
+    #
+    # @param queryName string 
+    # @return lDistinctSubjectNamesPerQuery string list
+    #
+    def getSubjectListFromQuery( self, queryName ):
+        sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName )
+        lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd)
+        return lDistinctSubjectNamesPerQuery
+    
+    ## Give the data contained in the table as a list of Paths instances
+    #
+    # @return lPaths list of paths instances
+    #
+    def getListOfAllPaths( self ):
+        return self.getListOfAllCoordObject()
+    
+    ## Give a list of Path instances with the given query and subject, both on direct strand
+    #
+    # @param query string query name
+    # @param subject string subject name
+    # @return lPaths list of path instances
+    #
+    def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand
+    #
+    # @param query string query name
+    # @param subject string subject name
+    # @return lPaths list of path instances
+    #
+    def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+
+    ## Give the number of Path instances with the given query name
+    #
+    # @param query string query name
+    # @return pathNb integer the number of Path instances
+    #
+    def getNbPathsFromQuery( self, query ):
+        pathNb = self._getPathsNbFromTypeName("query", query)
+        return pathNb
+    
+    ## Give the number of Path instances with the given subject name
+    #
+    # @param subject string subject name
+    # @return pathNb integer the number of Path instances
+    #
+    def getNbPathsFromSubject( self, subject ):
+        pathNb = self._getPathsNbFromTypeName("subject", subject)
+        return pathNb
+    
+    ## Give the number of distinct path identifiers
+    #
+    # @return idNb integer the number of Path instances
+    #
+    def getNbIds( self ):
+        sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table )
+        idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
+        return idNb
+    
+    ## Give the number of distinct path identifiers for a given subject
+    #
+    # @param subjectName string subject name
+    # @return idNb integer the number of Path instances
+    #
+    def getNbIdsFromSubject( self, subjectName ):
+        idNb = self._getIdNbFromTypeName("subject", subjectName)
+        return idNb
+    
+    ## Give the number of distinct path identifiers for a given query
+    #
+    # @param queryName string query name
+    # @return idNb integer the number of Path instances
+    #
+    def getNbIdsFromQuery( self, queryName ):
+        idNb = self._getIdNbFromTypeName("query", queryName)
+        return idNb
+    
+    ## Give a list of Path instances included in a given query region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lPaths list of Path instances
+    #
+    def getPathListIncludedInQueryCoord( self, query, start, end ):
+        if( start > end ):
+            tmp = start
+            start = end
+            end = tmp
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a list of Path instances overlapping a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lPath list of Path instances
+    #
+    def getPathListOverlappingQueryCoord( self, query, start, end ):
+        if( start > end ):
+            tmp = start
+            start = end
+            end = tmp
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query )
+        sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
+        sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
+        sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end )
+        sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a list of Path instances overlapping a given region
+    #
+    # @note whole chains are returned, even if only a fragment overlap with the given region
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lPath list of Path instances
+    #
+    def getChainListOverlappingQueryCoord( self, query, start, end ):
+        if( start > end ):
+            tmp = start
+            start = end
+            end = tmp
+        sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query )
+        sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end )
+        sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end )
+        sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end )
+        sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end )
+        lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
+        lPaths = self.getPathListFromIdList( lIdentifiers )
+        return lPaths
+    
+    ## Give a list of Set instances overlapping a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListOverlappingQueryCoord(self, query, start, end):
+        lPath = self.getPathListOverlappingQueryCoord(query, start, end)
+        lSet = PathUtils.getSetListFromQueries(lPath)
+        return lSet
+    
+    ## Give a list of Set instances included in a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListIncludedInQueryCoord(self, query, start, end):
+        lPath=self.getPathListIncludedInQueryCoord(query, start, end)
+        lSet = PathUtils.getSetListFromQueries(lPath) 
+        return lSet
+    
+    ## Give a a list of Path instances sorted by query coordinates
+    #
+    # @return lPaths list of Path instances
+    #
+    def getPathListSortedByQueryCoord( self ):
+        sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a a list of Path instances sorted by query coordinates for a given query
+    #
+    # @return lPaths list of Path instances
+    #
+    def getPathListSortedByQueryCoordFromQuery( self, queryName ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a a list of Path instances sorted by query coordinates and score for a given query
+    #
+    # @return lPaths list of Path instances
+    #
+    def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName)
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    ## Give a cumulative length of all paths (fragments) for a given subject name
+    #
+    # @param subjectName string subject name
+    # @return nb Cumulative length for all path
+    #
+    # @warning doesn't take into account the overlaps !!
+    #
+    def getCumulLengthFromSubject( self, subjectName ):
+        sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
+        nb = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return nb
+    
+    ## Give a list of the length of all chains of paths for a given subject name
+    #
+    # @param subjectName string  name of the subject
+    # @return lChainLengths list of lengths per chain of paths
+    #
+    # @warning doesn't take into account the overlaps !!
+    #
+    def getChainLengthListFromSubject( self, subjectName ):
+        sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
+        lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
+        return lChainLengths
+    
+    ## Give a list of identity of all chains of paths for a given subject name
+    #
+    # @param subjectName string name of the subject
+    # @return lChainIdentities list of identities per chain of paths
+    #
+    # @warning doesn't take into account the overlaps !!
+    #
+    def getChainIdentityListFromSubject( self, subjectName ):
+        lChainIdentities = []
+        sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        for i in res:
+            if i[0] != None:
+                lChainIdentities.append( round( float( i[0] ), 2 ) )
+        return lChainIdentities
+    
+    ## Give a list of the length of all paths for a given subject name
+    #
+    # @param subjectName string name of the subject
+    # @return lPathLengths list of lengths per path
+    #
+    # @warning doesn't take into account the overlaps !!
+    #
+    def getPathLengthListFromSubject( self, subjectName ):
+        sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName )
+        lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
+        return lPathLengths
+
+    ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains
+    #    
+    # @param subjectName string subject name
+    # @return lPathNums a list of paths Id
+    #
+    def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ):
+        sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
+        sqlCmd += " FROM %s" % ( self._table )
+        sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
+        sqlCmd += " GROUP BY path"
+        sqlCmd += " ORDER BY length DESC";
+        lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd)
+        return lPathNums
+
+    ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold
+    #    
+    # @param subjectName string subject name
+    # @lengthThreshold length threshold below which chains are filtered
+    # @return lPathNums a list of paths Id
+    #
+    def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ):
+        lPathNums = []
+        sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length"
+        sqlCmd += " FROM %s" % ( self._table )
+        sqlCmd += " WHERE subject_name='%s'" % ( subjectName )
+        sqlCmd += " GROUP BY path"
+        sqlCmd += " ORDER BY length DESC";
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        for i in res:
+            if int(i[1]) >= int(lengthThreshold):
+                lPathNums.append( i[0] )
+        return lPathNums
+    
+    ## Give a Set instances list of a query annotation
+    #
+    # @param query string query name
+    # @return lSets list of set instance 
+    #
+    def getSetListFromQuery(self, query):
+        lpath = self.getPathListFromQuery(query)
+        lSets = PathUtils.getSetListFromQueries(lpath)
+        return lSets
+    
+    ## Give a Set instances list of a query annotation
+    # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py)
+    #
+    # @param query string query name
+    # @return lSets list of set instance 
+    #
+    def getSetListFromSeqName(self, query):
+        return self.getSetListFromQuery(query)
+    
+    ## Delete path corresponding to a given identifier number
+    #
+    # @param id integer identifier number
+    #
+    def deleteFromId(self,id):
+        sqlCmd = "delete from %s where path=%d;" % (self._table, id)
+        self._iDb.execute(sqlCmd)
+
+    ## Delete path corresponding to a given object path line
+    #
+    # @param path object 
+    #
+    def deleteFromPath(self,path):
+        sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore()))
+        self._iDb.execute(sqlCmd)
+
+    ## Delete path corresponding to a given list of identifier number
+    #
+    # @param lId list of identifier number
+    #
+    def deleteFromIdList(self,lId):
+        if lId == []:
+            return        
+        sqlCmd = "delete from %s where path=%d" % (self._table, lId[0])
+        for id in lId[1:]:
+            sqlCmd += " or path=%d" %(id)
+        sqlCmd += ";"
+        self._iDb.execute(sqlCmd)
+
+    ## Get a new id number
+    #
+    # @return newId integer new id
+    #
+    def getNewId(self):
+        sqlCmd = 'select max(path) from %s;' % (self._table)
+        maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        newId = int(maxId)+1
+        return newId
+    
+    ##  Join two path by changing id number of id1 and id2 path to the least of id1 and id2
+    #
+    # @param id1 integer id path number
+    # @param id2 integer id path number
+    # @return newId integer minimum of id1 id2
+    # @note this method modify the ID even if this one not existing in the path table  
+    #     
+    def joinTwoPaths(self, id1, id2):
+        if id1 < id2:
+            newId = id1
+            oldId = id2
+        else:
+            newId = id2
+            oldId = id1
+        sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\
+                % (self._table, newId, oldId)
+        self._iDb.execute(sqlCmd)
+        return newId
+    
+    ## Create a 'pathRange' table from a 'path' table. 
+    # The output table summarizes the information per identifier. 
+    # The min and max value are taken. 
+    # The identity is averaged over the fragments. 
+    # It may overwrite an existing table.
+    #
+    # @param outTable string name of the output table
+    # @return outTable string Table which summarizes the information per identifier
+    #
+    def path2PathRange( self, outTable="" ):
+        return self._path2PathRangeOrPath2PathRangeQuery(outTable)
+  
+    ## Create a 'pathrange' table from a 'path' table for the given query name
+    #  The output table summarizes the information per identifier
+    #  The min and max value are taken
+    #  The identity is averaged over the fragments, weighted by the length of the of the query
+    #  It may overwrite an existing table
+    #
+    # @param outTable string name of the output table
+    # @param query string query name
+    # @return outTable string  Table which summarizes the information per identifier
+    #
+    def _path2PathRangeFromQuery( self, queryName, outTable="" ):
+        return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName)
+    
+    def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""):
+        self._iDb.createIndex( self._table, "path" )
+        if outTable == "":
+            outTable = "%s_range" % ( self._table )
+        self._iDb.dropTable( outTable )
+        
+        tmpTable = "%s_tmp" % ( self._table )
+        self._iDb.dropTable( tmpTable )
+        
+        sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable)
+        self._iDb.execute(sqlCmd)
+            
+        sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable )
+        self._iDb.execute( sqlCmd )
+        
+        sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable )
+        self._iDb.execute( sqlCmd )
+        
+        self._iDb.createIndex( outTable, "path" )
+        self._iDb.dropTable( tmpTable )
+        return outTable
+            
+    ## Give a list of Path lists sorted by weighted identity.
+    #
+    # @return lChains list of chains
+    #
+    def getListOfChainsSortedByAscIdentityFromQuery( self, qry ):
+        lChains = []
+        tmpTable = self._path2PathRangeFromQuery( qry )
+        sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable )
+        self._iDb.execute( sqlCmd )
+        lPathnums = self._iDb.fetchall()
+        self._iDb.dropTable( tmpTable )
+        for pathnum in lPathnums:
+            lChains.append( self.getPathListFromId( int(pathnum[0]) ) )
+        return lChains
+    
+    ## Give a list of path instances sorted by increasing E-value
+    #
+    # @return lPaths list of path instances
+    #
+    def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName )
+        lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPaths
+    
+    
+    ## Return the number of times a given instance is present in the table
+    # The identifier is not considered,
+    # only coordinates, score, E-value and identity.
+    #
+    # @return nbOcc integer
+    #
+    def getNbOccurrences( self, iPath ):
+        sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table )
+        sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname )
+        sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start )
+        sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end )
+        sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname )
+        sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start )
+        sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end )
+        sqlCmd += " AND score='%s'" % ( iPath.score )
+        sqlCmd += " AND e_value='%s'" % ( iPath.e_value )
+        sqlCmd += " AND identity='%s'" % ( iPath.identity )
+        nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd )
+        return nbOcc
+    
+    
+    def _getPathListFromTypeName( self, type, typeName ):
+        sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName )
+        lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lPath
+    
+    def _getDistinctTypeNamesList( self, type ):
+        sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table )
+        lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
+        return lDistinctTypeNames
+    
+    def _getPathsNbFromTypeName( self, type, typeName ):
+        sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
+        pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
+        return pathNb
+    
+    def _getIdListFromTypeName( self, type, typeName ):
+        sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
+        lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
+        return lId
+    
+    def _getIdNbFromTypeName( self, type, typeName ):
+        sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName )
+        idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )
+        return idNb
+    
+    def _getTypeAndAttr2Insert(self, path):
+        type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'")
+        if path.range_query.isOnDirectStrand():
+            queryStart = path.range_query.start
+            queryEnd = path.range_query.end
+            subjectStart = path.range_subject.start
+            subjectEnd = path.range_subject.end
+        else:
+            queryStart = path.range_query.end
+            queryEnd = path.range_query.start
+            subjectStart = path.range_subject.end
+            subjectEnd = path.range_subject.start
+        attr2Insert = ( path.id,\
+                     path.range_query.seqname,\
+                     queryStart,\
+                     queryEnd,\
+                     path.range_subject.seqname,\
+                     subjectStart,\
+                     subjectEnd,\
+                     path.e_value,\
+                     path.score,\
+                     path.identity\
+                     )
+        return type2Insert, attr2Insert
+    
+    def _getInstanceToAdapt(self):
+        iPath = Path()
+        return iPath
+    
+    def _escapeAntislash(self, obj):
+        obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\")
+        obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\")
+    
+    def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable):
+        sqlCmd = ""
+        if queryName == "":
+            sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table)
+        else:
+            sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName)
+        return sqlCmd
+        
+    ## return a filtered list with only one unique occurrence of path of a given list
+    #
+    # @param lPath a list of Path instances
+    # @return lUniquePath a list of Path instances
+    #
+    def getListOfUniqueOccPath(self, lPath):
+        if len(lPath) < 2 :
+            return lPath
+        
+        sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd()))
+        lUniquePath = []    
+        for i in xrange(1, len(sortedListPath)):
+            previousPath =  sortedListPath [i-1]
+            currentPath =  sortedListPath [i]
+            if previousPath != currentPath:
+                lUniquePath.append(previousPath)
+        
+        if previousPath != currentPath:
+            lUniquePath.append(currentPath)  
+                  
+        return lUniquePath       
\ No newline at end of file
author	yufei-luo
date	Fri, 18 Jan 2013 04:54:14 -0500
parents
children