diff commons/core/sql/TableBinPathAdaptator.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinPathAdaptator.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,257 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.coord.Range import getIdx
+from commons.core.sql.TablePathAdaptator import TablePathAdaptator
+from commons.core.coord.PathUtils import PathUtils
+
+## Bin Adaptator for a path table.
+#
+class TableBinPathAdaptator(TablePathAdaptator):
+
+    
+    ## Constructor
+    #
+    # @param db db instance
+    # @param tableName string table name (default = "")
+    #
+    def __init__(self, db, tableName = ""):
+        TablePathAdaptator.__init__(self, db, tableName)
+        self._table_idx = "%s_idx" % (self._table)
+            
+    ## Insert a path instance
+    #
+    # @param path a path instance
+    # @param delayed boolean indicating if the insert must be delayed (default = false) 
+    #        
+    def insert( self, path, delayed = False ):
+        TablePathAdaptator.insert(self, path, delayed)
+        self._escapeAntislash(path)
+        idx = path.range_query.findIdx()
+        max = path.range_query.getMax()
+        min = path.range_query.getMin()
+        strand = path.range_query.isOnDirectStrand()
+        if delayed:
+            sql_cmd = 'INSERT DELAYED INTO %s VALUES (%d,%d,"%s",%d,%d,%d)'\
+                 % (self._table_idx,\
+                   path.id,\
+                   idx,\
+                   path.range_query.seqname,\
+                   min,\
+                   max,\
+                   strand)
+        else:
+            sql_cmd = 'INSERT INTO %s VALUES (%d,%d,"%s",%d,%d,%d)'\
+                 % (self._table_idx,\
+                   path.id,\
+                   idx,\
+                   path.range_query.seqname,\
+                   min,\
+                   max,\
+                   strand)
+            
+        self._iDb.execute(sql_cmd)
+    
+    ## Return a path instances list included in a given region using the bin scheme
+    #
+    # @param contig string contig name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lOutPath a path instances list
+    #
+    def getPathListIncludedInQueryCoord(self, contig, start, end):
+        min_coord = min(start, end)
+        max_coord = max(start, end)
+        lpath = self.getChainListOverlappingQueryCoord(contig, start, end)
+        lOutPath = []
+        for i in lpath:
+            if i.range_query.getMin() > min_coord and \
+               i.range_query.getMax() < max_coord:
+                lOutPath.append(i)
+                            
+        return lOutPath
+    
+    ## Return a path instances list overlapping (and included) in a given region using the bin scheme
+    #
+    # @param contig string contig name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lOutPath a path instances list
+    #
+    def getPathListOverlappingQueryCoord(self, contig, start, end):
+        min_coord = min(start, end)
+        max_coord = max(start, end)
+        lpath = self.getChainListOverlappingQueryCoord(contig, start, end)
+        lOutPath = []
+        for i in lpath:
+            if ((i.range_query.getMin() <= min_coord and i.range_query.getMax() >= min_coord) or \
+                (i.range_query.getMin() >= min_coord and i.range_query.getMin() <= max_coord) or \
+                (i.range_query.getMin() <= min_coord and i.range_query.getMax() >= max_coord) or \
+                (i.range_query.getMin() >= min_coord and i.range_query.getMax() <= max_coord)) and \
+                (i.range_query.getSeqname() == contig):
+                    lOutPath.append(i)
+                    
+        return lOutPath
+    
+    ## Return a path instances list chain (by Id and Coord in chr) list overlapping a given region using the bin scheme
+    #
+    # @param contig string contig name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lpath a path instances list
+    #    
+    def getChainListOverlappingQueryCoord(self, contig, start, end):
+        min_coord = min(start, end)
+        max_coord = max(start, end)
+        sql_cmd = 'select distinct path from %s where contig="%s" and ('\
+                 % (self._table + "_idx", contig)
+                 
+        for bin_lvl in xrange(6, 2, -1):
+            if getIdx(start,bin_lvl) == getIdx(end, bin_lvl):
+                idx = getIdx(start, bin_lvl)
+                sql_cmd += 'idx=%d' % (idx)
+            else:
+                idx1 = getIdx(min_coord, bin_lvl)
+                idx2 = getIdx(max_coord, bin_lvl)
+                sql_cmd += 'idx between %d and %d' % (idx1, idx2)
+            if bin_lvl > 3:
+                sql_cmd += " or "
+                
+        sql_cmd += ") and min<=%d and max>=%d;" % (max_coord, min_coord)
+
+        
+        self._iDb.execute(sql_cmd)
+        res = self._iDb.fetchall()
+        lnum = []
+        for i in res:
+            lnum.append( int(i[0]) )
+        lpath = self.getPathListFromIdList(lnum)
+        return lpath
+
+    ## Delete path corresponding to a given identifier number
+    #
+    # @param num integer identifier number
+    #
+    def deleteFromId(self, num):
+        TablePathAdaptator.deleteFromId(self, num)
+        sqlCmd='delete from %s where path=%d;' % (self._table_idx, num)
+        self._iDb.execute(sqlCmd)
+    
+    ## Delete path corresponding to a given list of identifier number
+    #
+    # @param lNum list list of integer identifier number
+    #
+    def deleteFromIdList(self, lNum):
+        if lNum == []:
+            return
+        TablePathAdaptator.deleteFromIdList(self, lNum)
+        sqlCmd = 'delete from %s where path=%d' % (self._table_idx, lNum[0])
+        for i in lNum[1:]:
+            sqlCmd += " or path=%d" % (i)
+        sqlCmd += ";"
+        self._iDb.execute(sqlCmd)
+             
+    ##  Join two path by changing id number of id1 and id2 path to the least of id1 and id2
+    #
+    # @param id1 integer id path number
+    # @param id2 integer id path number
+    # @return newId integer minimum of id1 id2
+    # @note this method modify the ID even if this one not existing in the path table  
+    #     
+    def joinTwoPaths(self, id1, id2):
+        TablePathAdaptator.joinTwoPaths(self, id1, id2)
+        if id1 < id2:
+            newId = id1
+            oldId = id2
+        else:
+            newId = id2
+            oldId = id1
+        sqlCmd = 'UPDATE %s SET path=%d WHERE path=%d' % (self._table_idx, newId, oldId)
+        self._iDb.execute(sqlCmd)
+        return newId
+    
+    ## Get a new id number
+    #
+    # @return newId integer max Id in path table + 1
+    #
+    def getNewId(self):
+        sqlCmd = 'select max(path) from %s;' % (self._table_idx)
+        self._iDb.execute(sqlCmd)
+        maxId = self._iDb.fetchall()[0][0]
+        if maxId == None:
+            maxId = 0
+        newId = int(maxId) + 1
+        return newId
+    
+    ## Give a list of Set instances included in a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListIncludedInQueryCoord(self, query, start, end):
+        lPath=self.getPathListIncludedInQueryCoord(query, start, end)
+        lSet = PathUtils.getSetListFromQueries(lPath) 
+        return lSet
+    
+    ## Give a list of Set instances overlapping a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListOverlappingQueryCoord(self, query, start, end):
+        lPath = self.getPathListOverlappingQueryCoord(query, start, end)
+        lSet = PathUtils.getSetListFromQueries(lPath)
+        return lSet
+    
+    ## Give a list of identifiers contained in the table
+    #
+    # @return lId integer list
+    #
+    def getIdList(self):
+        sqlCmd = "SELECT DISTINCT path from %s;" % (self._table_idx)
+        lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
+        return lId
+        
+    ## Give a list of the distinct query names present in the table
+    #
+    # @return lDistinctQueryNames string list
+    #
+    def getQueryList(self):
+        lDistinctQueryNames = self._getDistinctTypeNamesList("query")
+        return lDistinctQueryNames
+    
+    def _getDistinctTypeNamesList( self, type ):
+        sqlCmd = "SELECT DISTINCT contig FROM %s" % ( self._table_idx )
+        lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
+        return lDistinctTypeNames
\ No newline at end of file