Mercurial > repos > yufei-luo > s_mart
diff commons/core/sql/TablePathAdaptator.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/sql/TablePathAdaptator.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,673 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + + +from commons.core.coord.Path import Path +from commons.core.coord.PathUtils import PathUtils +from commons.core.sql.TableAdaptator import TableAdaptator +from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator + + +## Adaptator for a Path table +# +class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ): + + ## Give a list of Path instances having the same identifier + # + # @param id integer identifier number + # @return lPath a list of Path instances + # + def getPathListFromId( self, id ): + sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id ) + lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPath + + ## Give a list of Path instances according to the given list of identifier numbers + # + # @param lId integer list + # @return lPath a list of Path instances + # + def getPathListFromIdList( self, lId ): + lPath=[] + if lId == []: + return lPath + sqlCmd = "select * from %s where path=%d" % (self._table, lId[0]) + for i in lId[1:]: + sqlCmd += " or path=%d" % (i) + sqlCmd += ";" + lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPath + + ## Give a list of Path instances having the same given query name + # + # @param query string name of the query + # @return lPath a list of Path instances + # + def getPathListFromQuery( self, query ): + lPath = self._getPathListFromTypeName("query", query) + return lPath + + ## Give a list of Path instances having the same given subject name + # + # @param subject string name of the subject + # @return lPath a list of Path instances + # + def getPathListFromSubject( self, subject ): + lPath = self._getPathListFromTypeName("subject", subject) + return lPath + + ## Give a list of the distinct subject names present in the table + # + # @return lDistinctSubjectNames string list + # + def getSubjectList(self): + lDistinctSubjectNames = self._getDistinctTypeNamesList("subject") + return lDistinctSubjectNames + + ## Give a list of the distinct query names present in the table + # + # @return lDistinctQueryNames string list + # + def getQueryList(self): + lDistinctQueryNames = self._getDistinctTypeNamesList("query") + return lDistinctQueryNames + + ## Give a list of the distinct query names present in the table + # @note method to have correspondence with getSeqNameList() in TableSetAdaptator (for srptAutoPromote.py) + # + # @return lDistinctContigNames string list + # + def getSeqNameList(self): + return self.getQueryList() + + ## Give a list with all the distinct identifiers corresponding to the query + # + # @param query string name of the subject + # @return lId a list of integer + # + def getIdListFromQuery( self, query ): + lId = self._getIdListFromTypeName("query", query) + return lId + + ## Give a list with all the distinct identifiers corresponding to the subject + # + # @param subject string name of the subject + # @return lId a list of integer + # + def getIdListFromSubject( self, subject ): + lId = self._getIdListFromTypeName("subject", subject) + return lId + + ## Give a list of identifiers contained in the table + # + # @return lId integer list + # + def getIdList(self): + sqlCmd = "SELECT DISTINCT path from %s;" % (self._table) + lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) + return lId + + ## Give a list of the distinct subject names present in the table given a query name + # + # @param queryName string + # @return lDistinctSubjectNamesPerQuery string list + # + def getSubjectListFromQuery( self, queryName ): + sqlCmd = "SELECT DISTINCT subject_name FROM %s WHERE query_name='%s'" % ( self._table, queryName ) + lDistinctSubjectNamesPerQuery = self._iDb.getStringListWithSQLCmd(sqlCmd) + return lDistinctSubjectNamesPerQuery + + ## Give the data contained in the table as a list of Paths instances + # + # @return lPaths list of paths instances + # + def getListOfAllPaths( self ): + return self.getListOfAllCoordObject() + + ## Give a list of Path instances with the given query and subject, both on direct strand + # + # @param query string query name + # @param subject string subject name + # @return lPaths list of path instances + # + def getPathListWithDirectQueryDirectSubjectFromQuerySubject( self, query, subject ): + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start<subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a list of Path instances with the given query on direct strand and the given subject on reverse strand + # + # @param query string query name + # @param subject string subject name + # @return lPaths list of path instances + # + def getPathListWithDirectQueryReverseSubjectFromQuerySubject( self, query, subject ): + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND subject_name='%s' AND query_start<query_end AND subject_start>subject_end ORDER BY query_name, subject_name, query_start;" % ( self._table, query, subject ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give the number of Path instances with the given query name + # + # @param query string query name + # @return pathNb integer the number of Path instances + # + def getNbPathsFromQuery( self, query ): + pathNb = self._getPathsNbFromTypeName("query", query) + return pathNb + + ## Give the number of Path instances with the given subject name + # + # @param subject string subject name + # @return pathNb integer the number of Path instances + # + def getNbPathsFromSubject( self, subject ): + pathNb = self._getPathsNbFromTypeName("subject", subject) + return pathNb + + ## Give the number of distinct path identifiers + # + # @return idNb integer the number of Path instances + # + def getNbIds( self ): + sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s" % ( self._table ) + idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) + return idNb + + ## Give the number of distinct path identifiers for a given subject + # + # @param subjectName string subject name + # @return idNb integer the number of Path instances + # + def getNbIdsFromSubject( self, subjectName ): + idNb = self._getIdNbFromTypeName("subject", subjectName) + return idNb + + ## Give the number of distinct path identifiers for a given query + # + # @param queryName string query name + # @return idNb integer the number of Path instances + # + def getNbIdsFromQuery( self, queryName ): + idNb = self._getIdNbFromTypeName("query", queryName) + return idNb + + ## Give a list of Path instances included in a given query region + # + # @param query string query name + # @param start integer start coordinate + # @param end integer end coordinate + # @return lPaths list of Path instances + # + def getPathListIncludedInQueryCoord( self, query, start, end ): + if( start > end ): + tmp = start + start = end + end = tmp + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' AND query_start>=%i AND query_end<=%i" % ( self._table, query, start, end ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a list of Path instances overlapping a given region + # + # @param query string query name + # @param start integer start coordinate + # @param end integer end coordinate + # @return lPath list of Path instances + # + def getPathListOverlappingQueryCoord( self, query, start, end ): + if( start > end ): + tmp = start + start = end + end = tmp + sqlCmd = "SELECT * FROM %s WHERE query_name='%s'" % ( self._table, query ) + sqlCmd += " AND ( ( query_start < %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) + sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) + sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end > %i )" % ( start, end, end ) + sqlCmd += " OR ( query_start < %i AND query_end > %i ) )" % ( start, end ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a list of Path instances overlapping a given region + # + # @note whole chains are returned, even if only a fragment overlap with the given region + # @param query string query name + # @param start integer start coordinate + # @param end integer end coordinate + # @return lPath list of Path instances + # + def getChainListOverlappingQueryCoord( self, query, start, end ): + if( start > end ): + tmp = start + start = end + end = tmp + sqlCmd = "SELECT DISTINCT path FROM %s WHERE query_name='%s'" % ( self._table, query ) + sqlCmd += " AND ( ( query_start <= %i AND query_end >= %i AND query_end <= %i )" % ( start, start, end ) + sqlCmd += " OR ( query_start >= %i AND query_end <= %i )" % ( start, end ) + sqlCmd += " OR ( query_start >= %i AND query_start <= %i AND query_end >= %i )" % ( start, end, end ) + sqlCmd += " OR ( query_start <= %i AND query_end >= %i ) )" % ( start, end ) + lIdentifiers = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) + lPaths = self.getPathListFromIdList( lIdentifiers ) + return lPaths + + ## Give a list of Set instances overlapping a given region + # + # @param query string query name + # @param start integer start coordinate + # @param end integer end coordinate + # @return lSet list of Set instances + # + def getSetListOverlappingQueryCoord(self, query, start, end): + lPath = self.getPathListOverlappingQueryCoord(query, start, end) + lSet = PathUtils.getSetListFromQueries(lPath) + return lSet + + ## Give a list of Set instances included in a given region + # + # @param query string query name + # @param start integer start coordinate + # @param end integer end coordinate + # @return lSet list of Set instances + # + def getSetListIncludedInQueryCoord(self, query, start, end): + lPath=self.getPathListIncludedInQueryCoord(query, start, end) + lSet = PathUtils.getSetListFromQueries(lPath) + return lSet + + ## Give a a list of Path instances sorted by query coordinates + # + # @return lPaths list of Path instances + # + def getPathListSortedByQueryCoord( self ): + sqlCmd = "SELECT * FROM %s ORDER BY query_name, LEAST(query_start,query_end)" % ( self._table ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a a list of Path instances sorted by query coordinates for a given query + # + # @return lPaths list of Path instances + # + def getPathListSortedByQueryCoordFromQuery( self, queryName ): + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY LEAST(query_start,query_end)" % ( self._table, queryName ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a a list of Path instances sorted by query coordinates and score for a given query + # + # @return lPaths list of Path instances + # + def getPathListSortedByQueryCoordAndScoreFromQuery(self, queryName): + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY query_start, query_end, score" % (self._table, queryName) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + ## Give a cumulative length of all paths (fragments) for a given subject name + # + # @param subjectName string subject name + # @return nb Cumulative length for all path + # + # @warning doesn't take into account the overlaps !! + # + def getCumulLengthFromSubject( self, subjectName ): + sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) + nb = self._iDb.getIntegerWithSQLCmd(sqlCmd) + return nb + + ## Give a list of the length of all chains of paths for a given subject name + # + # @param subjectName string name of the subject + # @return lChainLengths list of lengths per chain of paths + # + # @warning doesn't take into account the overlaps !! + # + def getChainLengthListFromSubject( self, subjectName ): + sqlCmd = "SELECT SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) + lChainLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) + return lChainLengths + + ## Give a list of identity of all chains of paths for a given subject name + # + # @param subjectName string name of the subject + # @return lChainIdentities list of identities per chain of paths + # + # @warning doesn't take into account the overlaps !! + # + def getChainIdentityListFromSubject( self, subjectName ): + lChainIdentities = [] + sqlCmd = "SELECT SUM(identity*(ABS(query_start-query_end)+1)) / SUM(ABS(query_end-query_start)+1) FROM %s WHERE subject_name='%s' GROUP BY PATH" % ( self._table, subjectName ) + self._iDb.execute( sqlCmd ) + res = self._iDb.fetchall() + for i in res: + if i[0] != None: + lChainIdentities.append( round( float( i[0] ), 2 ) ) + return lChainIdentities + + ## Give a list of the length of all paths for a given subject name + # + # @param subjectName string name of the subject + # @return lPathLengths list of lengths per path + # + # @warning doesn't take into account the overlaps !! + # + def getPathLengthListFromSubject( self, subjectName ): + sqlCmd = "SELECT ABS(query_end-query_start)+1 FROM %s WHERE subject_name='%s'" % ( self._table, subjectName ) + lPathLengths = self._iDb.getIntegerListWithSQLCmd(sqlCmd) + return lPathLengths + + ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains + # + # @param subjectName string subject name + # @return lPathNums a list of paths Id + # + def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ): + sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" + sqlCmd += " FROM %s" % ( self._table ) + sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) + sqlCmd += " GROUP BY path" + sqlCmd += " ORDER BY length DESC"; + lPathNums = self._iDb.getIntegerListWithSQLCmd(sqlCmd) + return lPathNums + + ## Give a a list with all distinct identifiers for a given subject where the chain lengths is above a given threshold + # + # @param subjectName string subject name + # @lengthThreshold length threshold below which chains are filtered + # @return lPathNums a list of paths Id + # + def getIdListFromSubjectWhereChainsLongerThanThreshold( self, subjectName, lengthThreshold ): + lPathNums = [] + sqlCmd = "SELECT DISTINCT path, SUM( ABS(query_end - query_start) + 1 ) AS length" + sqlCmd += " FROM %s" % ( self._table ) + sqlCmd += " WHERE subject_name='%s'" % ( subjectName ) + sqlCmd += " GROUP BY path" + sqlCmd += " ORDER BY length DESC"; + self._iDb.execute( sqlCmd ) + res = self._iDb.fetchall() + for i in res: + if int(i[1]) >= int(lengthThreshold): + lPathNums.append( i[0] ) + return lPathNums + + ## Give a Set instances list of a query annotation + # + # @param query string query name + # @return lSets list of set instance + # + def getSetListFromQuery(self, query): + lpath = self.getPathListFromQuery(query) + lSets = PathUtils.getSetListFromQueries(lpath) + return lSets + + ## Give a Set instances list of a query annotation + # @note method to have correspondence with getSetListFromSeqName() in TableSetAdaptator (for srptAutoPromote.py) + # + # @param query string query name + # @return lSets list of set instance + # + def getSetListFromSeqName(self, query): + return self.getSetListFromQuery(query) + + ## Delete path corresponding to a given identifier number + # + # @param id integer identifier number + # + def deleteFromId(self,id): + sqlCmd = "delete from %s where path=%d;" % (self._table, id) + self._iDb.execute(sqlCmd) + + ## Delete path corresponding to a given object path line + # + # @param path object + # + def deleteFromPath(self,path): + sqlCmd = "delete from %s where path=%d and query_name='%s' and query_start=%s and query_end=%s and subject_name='%s' and subject_start=%s and subject_end=%s and E_value=%s and score=%s" % (self._table, path.getIdentifier(), path.getQueryName(), path.getQueryStart(), path.getQueryEnd(), path.getSubjectName(), path.getSubjectStart(), path.getSubjectEnd(), path.getEvalue(), int(path.getScore())) + self._iDb.execute(sqlCmd) + + ## Delete path corresponding to a given list of identifier number + # + # @param lId list of identifier number + # + def deleteFromIdList(self,lId): + if lId == []: + return + sqlCmd = "delete from %s where path=%d" % (self._table, lId[0]) + for id in lId[1:]: + sqlCmd += " or path=%d" %(id) + sqlCmd += ";" + self._iDb.execute(sqlCmd) + + ## Get a new id number + # + # @return newId integer new id + # + def getNewId(self): + sqlCmd = 'select max(path) from %s;' % (self._table) + maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd) + newId = int(maxId)+1 + return newId + + ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2 + # + # @param id1 integer id path number + # @param id2 integer id path number + # @return newId integer minimum of id1 id2 + # @note this method modify the ID even if this one not existing in the path table + # + def joinTwoPaths(self, id1, id2): + if id1 < id2: + newId = id1 + oldId = id2 + else: + newId = id2 + oldId = id1 + sqlCmd = "UPDATE %s SET path=%d WHERE path=%d"\ + % (self._table, newId, oldId) + self._iDb.execute(sqlCmd) + return newId + + ## Create a 'pathRange' table from a 'path' table. + # The output table summarizes the information per identifier. + # The min and max value are taken. + # The identity is averaged over the fragments. + # It may overwrite an existing table. + # + # @param outTable string name of the output table + # @return outTable string Table which summarizes the information per identifier + # + def path2PathRange( self, outTable="" ): + return self._path2PathRangeOrPath2PathRangeQuery(outTable) + + ## Create a 'pathrange' table from a 'path' table for the given query name + # The output table summarizes the information per identifier + # The min and max value are taken + # The identity is averaged over the fragments, weighted by the length of the of the query + # It may overwrite an existing table + # + # @param outTable string name of the output table + # @param query string query name + # @return outTable string Table which summarizes the information per identifier + # + def _path2PathRangeFromQuery( self, queryName, outTable="" ): + return self._path2PathRangeOrPath2PathRangeQuery(outTable, queryName) + + def _path2PathRangeOrPath2PathRangeQuery(self, outTable, queryName=""): + self._iDb.createIndex( self._table, "path" ) + if outTable == "": + outTable = "%s_range" % ( self._table ) + self._iDb.dropTable( outTable ) + + tmpTable = "%s_tmp" % ( self._table ) + self._iDb.dropTable( tmpTable ) + + sqlCmd = self._genSqlCmdForTmpTableAccordingToQueryName(queryName, tmpTable) + self._iDb.execute(sqlCmd) + + sqlCmd = "CREATE TABLE %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MIN(subject_start) AS subject_start, MAX(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start<subject_end GROUP BY path;" % ( outTable, tmpTable ) + self._iDb.execute( sqlCmd ) + + sqlCmd = "INSERT into %s SELECT path, query_name, MIN(query_start) AS query_start, MAX(query_end) AS query_end, subject_name, MAX(subject_start) AS subject_start, MIN(subject_end) AS subject_end, MIN(e_value) AS e_value, SUM(score) AS score, TRUNCATE(SUM(identity)/SUM(ABS(query_end-query_start)+1),2) AS identity FROM %s WHERE query_start<query_end AND subject_start>subject_end GROUP BY path;" % ( outTable, tmpTable ) + self._iDb.execute( sqlCmd ) + + self._iDb.createIndex( outTable, "path" ) + self._iDb.dropTable( tmpTable ) + return outTable + + ## Give a list of Path lists sorted by weighted identity. + # + # @return lChains list of chains + # + def getListOfChainsSortedByAscIdentityFromQuery( self, qry ): + lChains = [] + tmpTable = self._path2PathRangeFromQuery( qry ) + sqlCmd = "SELECT path FROM %s ORDER BY identity" % ( tmpTable ) + self._iDb.execute( sqlCmd ) + lPathnums = self._iDb.fetchall() + self._iDb.dropTable( tmpTable ) + for pathnum in lPathnums: + lChains.append( self.getPathListFromId( int(pathnum[0]) ) ) + return lChains + + ## Give a list of path instances sorted by increasing E-value + # + # @return lPaths list of path instances + # + def getPathListSortedByIncreasingEvalueFromQuery( self, queryName ): + sqlCmd = "SELECT * FROM %s WHERE query_name='%s' ORDER BY E_value ASC" % ( self._table, queryName ) + lPaths = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPaths + + + ## Return the number of times a given instance is present in the table + # The identifier is not considered, + # only coordinates, score, E-value and identity. + # + # @return nbOcc integer + # + def getNbOccurrences( self, iPath ): + sqlCmd = "SELECT COUNT(*) FROM %s WHERE" % ( self._table ) + sqlCmd += " query_name='%s'" % ( iPath.range_query.seqname ) + sqlCmd += " AND query_start='%s'" % ( iPath.range_query.start ) + sqlCmd += " AND query_end='%s'" % ( iPath.range_query.end ) + sqlCmd += " AND subject_name='%s'" % ( iPath.range_subject.seqname ) + sqlCmd += " AND subject_start='%s'" % ( iPath.range_subject.start ) + sqlCmd += " AND subject_end='%s'" % ( iPath.range_subject.end ) + sqlCmd += " AND score='%s'" % ( iPath.score ) + sqlCmd += " AND e_value='%s'" % ( iPath.e_value ) + sqlCmd += " AND identity='%s'" % ( iPath.identity ) + nbOcc = self._iDb.getIntegerWithSQLCmd( sqlCmd ) + return nbOcc + + + def _getPathListFromTypeName( self, type, typeName ): + sqlCmd = "SELECT * FROM %s WHERE %s_name='%s';" % ( self._table, type, typeName ) + lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt ) + return lPath + + def _getDistinctTypeNamesList( self, type ): + sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table ) + lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd) + return lDistinctTypeNames + + def _getPathsNbFromTypeName( self, type, typeName ): + sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) + pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) + return pathNb + + def _getIdListFromTypeName( self, type, typeName ): + sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) + lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd ) + return lId + + def _getIdNbFromTypeName( self, type, typeName ): + sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name='%s'" % ( self._table, type, typeName ) + idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd ) + return idNb + + def _getTypeAndAttr2Insert(self, path): + type2Insert = ("'%d'", "'%s'", "'%d'", "'%d'", "'%s'", "'%d'", "'%d'", "'%g'", "'%d'", "'%f'") + if path.range_query.isOnDirectStrand(): + queryStart = path.range_query.start + queryEnd = path.range_query.end + subjectStart = path.range_subject.start + subjectEnd = path.range_subject.end + else: + queryStart = path.range_query.end + queryEnd = path.range_query.start + subjectStart = path.range_subject.end + subjectEnd = path.range_subject.start + attr2Insert = ( path.id,\ + path.range_query.seqname,\ + queryStart,\ + queryEnd,\ + path.range_subject.seqname,\ + subjectStart,\ + subjectEnd,\ + path.e_value,\ + path.score,\ + path.identity\ + ) + return type2Insert, attr2Insert + + def _getInstanceToAdapt(self): + iPath = Path() + return iPath + + def _escapeAntislash(self, obj): + obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\") + obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\") + + def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable): + sqlCmd = "" + if queryName == "": + sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table) + else: + sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name='%s'" % (tmpTable, self._table, queryName) + return sqlCmd + + ## return a filtered list with only one unique occurrence of path of a given list + # + # @param lPath a list of Path instances + # @return lUniquePath a list of Path instances + # + def getListOfUniqueOccPath(self, lPath): + if len(lPath) < 2 : + return lPath + + sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd())) + lUniquePath = [] + for i in xrange(1, len(sortedListPath)): + previousPath = sortedListPath [i-1] + currentPath = sortedListPath [i] + if previousPath != currentPath: + lUniquePath.append(previousPath) + + if previousPath != currentPath: + lUniquePath.append(currentPath) + + return lUniquePath \ No newline at end of file