| 
6
 | 
     1 # Copyright INRA (Institut National de la Recherche Agronomique)
 | 
| 
 | 
     2 # http://www.inra.fr
 | 
| 
 | 
     3 # http://urgi.versailles.inra.fr
 | 
| 
 | 
     4 #
 | 
| 
 | 
     5 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     6 # abiding by the rules of distribution of free software.  You can  use, 
 | 
| 
 | 
     7 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     8 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     9 # "http://www.cecill.info". 
 | 
| 
 | 
    10 #
 | 
| 
 | 
    11 # As a counterpart to the access to the source code and  rights to copy,
 | 
| 
 | 
    12 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    13 # with a limited warranty  and the software's author,  the holder of the
 | 
| 
 | 
    14 # economic rights,  and the successive licensors  have only  limited
 | 
| 
 | 
    15 # liability. 
 | 
| 
 | 
    16 #
 | 
| 
 | 
    17 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    18 # with loading,  using,  modifying and/or developing or reproducing the
 | 
| 
 | 
    19 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    20 # that may mean  that it is complicated to manipulate,  and  that  also
 | 
| 
 | 
    21 # therefore means  that it is reserved for developers  and  experienced
 | 
| 
 | 
    22 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    23 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    24 # requirements in conditions enabling the security of their systems and/or 
 | 
| 
 | 
    25 # data to be ensured and,  more generally, to use and operate it in the 
 | 
| 
 | 
    26 # same conditions as regards security. 
 | 
| 
 | 
    27 #
 | 
| 
 | 
    28 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    29 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 from commons.core.coord.Range import getIdx
 | 
| 
 | 
    32 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
 | 
| 
 | 
    33 from commons.core.coord.PathUtils import PathUtils
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 ## Bin Adaptator for a path table.
 | 
| 
 | 
    36 #
 | 
| 
 | 
    37 class TableBinPathAdaptator(TablePathAdaptator):
 | 
| 
 | 
    38 
 | 
| 
 | 
    39     
 | 
| 
 | 
    40     ## Constructor
 | 
| 
 | 
    41     #
 | 
| 
 | 
    42     # @param db db instance
 | 
| 
 | 
    43     # @param tableName string table name (default = "")
 | 
| 
 | 
    44     #
 | 
| 
 | 
    45     def __init__(self, db, tableName = ""):
 | 
| 
 | 
    46         TablePathAdaptator.__init__(self, db, tableName)
 | 
| 
 | 
    47         self._table_idx = "%s_idx" % (self._table)
 | 
| 
 | 
    48             
 | 
| 
 | 
    49     ## Insert a path instance
 | 
| 
 | 
    50     #
 | 
| 
 | 
    51     # @param path a path instance
 | 
| 
 | 
    52     # @param delayed boolean indicating if the insert must be delayed (default = false) 
 | 
| 
 | 
    53     #        
 | 
| 
 | 
    54     def insert( self, path, delayed = False ):
 | 
| 
 | 
    55         TablePathAdaptator.insert(self, path, delayed)
 | 
| 
 | 
    56         self._escapeAntislash(path)
 | 
| 
 | 
    57         idx = path.range_query.findIdx()
 | 
| 
 | 
    58         max = path.range_query.getMax()
 | 
| 
 | 
    59         min = path.range_query.getMin()
 | 
| 
 | 
    60         strand = path.range_query.isOnDirectStrand()
 | 
| 
 | 
    61         if delayed:
 | 
| 
 | 
    62             sql_cmd = 'INSERT DELAYED INTO %s VALUES (%d,%d,"%s",%d,%d,%d)'\
 | 
| 
 | 
    63                  % (self._table_idx,\
 | 
| 
 | 
    64                    path.id,\
 | 
| 
 | 
    65                    idx,\
 | 
| 
 | 
    66                    path.range_query.seqname,\
 | 
| 
 | 
    67                    min,\
 | 
| 
 | 
    68                    max,\
 | 
| 
 | 
    69                    strand)
 | 
| 
 | 
    70         else:
 | 
| 
 | 
    71             sql_cmd = 'INSERT INTO %s VALUES (%d,%d,"%s",%d,%d,%d)'\
 | 
| 
 | 
    72                  % (self._table_idx,\
 | 
| 
 | 
    73                    path.id,\
 | 
| 
 | 
    74                    idx,\
 | 
| 
 | 
    75                    path.range_query.seqname,\
 | 
| 
 | 
    76                    min,\
 | 
| 
 | 
    77                    max,\
 | 
| 
 | 
    78                    strand)
 | 
| 
 | 
    79             
 | 
| 
 | 
    80         self._iDb.execute(sql_cmd)
 | 
| 
 | 
    81     
 | 
| 
 | 
    82     ## Return a path instances list included in a given region using the bin scheme
 | 
| 
 | 
    83     #
 | 
| 
 | 
    84     # @param contig string contig name
 | 
| 
 | 
    85     # @param start integer start coordinate
 | 
| 
 | 
    86     # @param end integer end coordinate
 | 
| 
 | 
    87     # @return lOutPath a path instances list
 | 
| 
 | 
    88     #
 | 
| 
 | 
    89     def getPathListIncludedInQueryCoord(self, contig, start, end):
 | 
| 
 | 
    90         min_coord = min(start, end)
 | 
| 
 | 
    91         max_coord = max(start, end)
 | 
| 
 | 
    92         lpath = self.getChainListOverlappingQueryCoord(contig, start, end)
 | 
| 
 | 
    93         lOutPath = []
 | 
| 
 | 
    94         for i in lpath:
 | 
| 
 | 
    95             if i.range_query.getMin() > min_coord and \
 | 
| 
 | 
    96                i.range_query.getMax() < max_coord:
 | 
| 
 | 
    97                 lOutPath.append(i)
 | 
| 
 | 
    98                             
 | 
| 
 | 
    99         return lOutPath
 | 
| 
 | 
   100     
 | 
| 
 | 
   101     ## Return a path instances list overlapping (and included) in a given region using the bin scheme
 | 
| 
 | 
   102     #
 | 
| 
 | 
   103     # @param contig string contig name
 | 
| 
 | 
   104     # @param start integer start coordinate
 | 
| 
 | 
   105     # @param end integer end coordinate
 | 
| 
 | 
   106     # @return lOutPath a path instances list
 | 
| 
 | 
   107     #
 | 
| 
 | 
   108     def getPathListOverlappingQueryCoord(self, contig, start, end):
 | 
| 
 | 
   109         min_coord = min(start, end)
 | 
| 
 | 
   110         max_coord = max(start, end)
 | 
| 
 | 
   111         lpath = self.getChainListOverlappingQueryCoord(contig, start, end)
 | 
| 
 | 
   112         lOutPath = []
 | 
| 
 | 
   113         for i in lpath:
 | 
| 
 | 
   114             if ((i.range_query.getMin() <= min_coord and i.range_query.getMax() >= min_coord) or \
 | 
| 
 | 
   115                 (i.range_query.getMin() >= min_coord and i.range_query.getMin() <= max_coord) or \
 | 
| 
 | 
   116                 (i.range_query.getMin() <= min_coord and i.range_query.getMax() >= max_coord) or \
 | 
| 
 | 
   117                 (i.range_query.getMin() >= min_coord and i.range_query.getMax() <= max_coord)) and \
 | 
| 
 | 
   118                 (i.range_query.getSeqname() == contig):
 | 
| 
 | 
   119                     lOutPath.append(i)
 | 
| 
 | 
   120                     
 | 
| 
 | 
   121         return lOutPath
 | 
| 
 | 
   122     
 | 
| 
 | 
   123     ## Return a path instances list chain (by Id and Coord in chr) list overlapping a given region using the bin scheme
 | 
| 
 | 
   124     #
 | 
| 
 | 
   125     # @param contig string contig name
 | 
| 
 | 
   126     # @param start integer start coordinate
 | 
| 
 | 
   127     # @param end integer end coordinate
 | 
| 
 | 
   128     # @return lpath a path instances list
 | 
| 
 | 
   129     #    
 | 
| 
 | 
   130     def getChainListOverlappingQueryCoord(self, contig, start, end):
 | 
| 
 | 
   131         min_coord = min(start, end)
 | 
| 
 | 
   132         max_coord = max(start, end)
 | 
| 
 | 
   133         sql_cmd = 'select distinct path from %s where contig="%s" and ('\
 | 
| 
 | 
   134                  % (self._table + "_idx", contig)
 | 
| 
 | 
   135                  
 | 
| 
 | 
   136         for bin_lvl in xrange(6, 2, -1):
 | 
| 
 | 
   137             if getIdx(start,bin_lvl) == getIdx(end, bin_lvl):
 | 
| 
 | 
   138                 idx = getIdx(start, bin_lvl)
 | 
| 
 | 
   139                 sql_cmd += 'idx=%d' % (idx)
 | 
| 
 | 
   140             else:
 | 
| 
 | 
   141                 idx1 = getIdx(min_coord, bin_lvl)
 | 
| 
 | 
   142                 idx2 = getIdx(max_coord, bin_lvl)
 | 
| 
 | 
   143                 sql_cmd += 'idx between %d and %d' % (idx1, idx2)
 | 
| 
 | 
   144             if bin_lvl > 3:
 | 
| 
 | 
   145                 sql_cmd += " or "
 | 
| 
 | 
   146                 
 | 
| 
 | 
   147         sql_cmd += ") and min<=%d and max>=%d;" % (max_coord, min_coord)
 | 
| 
 | 
   148 
 | 
| 
 | 
   149         
 | 
| 
 | 
   150         self._iDb.execute(sql_cmd)
 | 
| 
 | 
   151         res = self._iDb.fetchall()
 | 
| 
 | 
   152         lnum = []
 | 
| 
 | 
   153         for i in res:
 | 
| 
 | 
   154             lnum.append( int(i[0]) )
 | 
| 
 | 
   155         lpath = self.getPathListFromIdList(lnum)
 | 
| 
 | 
   156         return lpath
 | 
| 
 | 
   157 
 | 
| 
 | 
   158     ## Delete path corresponding to a given identifier number
 | 
| 
 | 
   159     #
 | 
| 
 | 
   160     # @param num integer identifier number
 | 
| 
 | 
   161     #
 | 
| 
 | 
   162     def deleteFromId(self, num):
 | 
| 
 | 
   163         TablePathAdaptator.deleteFromId(self, num)
 | 
| 
 | 
   164         sqlCmd='delete from %s where path=%d;' % (self._table_idx, num)
 | 
| 
 | 
   165         self._iDb.execute(sqlCmd)
 | 
| 
 | 
   166     
 | 
| 
 | 
   167     ## Delete path corresponding to a given list of identifier number
 | 
| 
 | 
   168     #
 | 
| 
 | 
   169     # @param lNum list list of integer identifier number
 | 
| 
 | 
   170     #
 | 
| 
 | 
   171     def deleteFromIdList(self, lNum):
 | 
| 
 | 
   172         if lNum == []:
 | 
| 
 | 
   173             return
 | 
| 
 | 
   174         TablePathAdaptator.deleteFromIdList(self, lNum)
 | 
| 
 | 
   175         sqlCmd = 'delete from %s where path=%d' % (self._table_idx, lNum[0])
 | 
| 
 | 
   176         for i in lNum[1:]:
 | 
| 
 | 
   177             sqlCmd += " or path=%d" % (i)
 | 
| 
 | 
   178         sqlCmd += ";"
 | 
| 
 | 
   179         self._iDb.execute(sqlCmd)
 | 
| 
 | 
   180              
 | 
| 
 | 
   181     ##  Join two path by changing id number of id1 and id2 path to the least of id1 and id2
 | 
| 
 | 
   182     #
 | 
| 
 | 
   183     # @param id1 integer id path number
 | 
| 
 | 
   184     # @param id2 integer id path number
 | 
| 
 | 
   185     # @return newId integer minimum of id1 id2
 | 
| 
 | 
   186     # @note this method modify the ID even if this one not existing in the path table  
 | 
| 
 | 
   187     #     
 | 
| 
 | 
   188     def joinTwoPaths(self, id1, id2):
 | 
| 
 | 
   189         TablePathAdaptator.joinTwoPaths(self, id1, id2)
 | 
| 
 | 
   190         if id1 < id2:
 | 
| 
 | 
   191             newId = id1
 | 
| 
 | 
   192             oldId = id2
 | 
| 
 | 
   193         else:
 | 
| 
 | 
   194             newId = id2
 | 
| 
 | 
   195             oldId = id1
 | 
| 
 | 
   196         sqlCmd = 'UPDATE %s SET path=%d WHERE path=%d' % (self._table_idx, newId, oldId)
 | 
| 
 | 
   197         self._iDb.execute(sqlCmd)
 | 
| 
 | 
   198         return newId
 | 
| 
 | 
   199     
 | 
| 
 | 
   200     ## Get a new id number
 | 
| 
 | 
   201     #
 | 
| 
 | 
   202     # @return newId integer max Id in path table + 1
 | 
| 
 | 
   203     #
 | 
| 
 | 
   204     def getNewId(self):
 | 
| 
 | 
   205         sqlCmd = 'select max(path) from %s;' % (self._table_idx)
 | 
| 
 | 
   206         self._iDb.execute(sqlCmd)
 | 
| 
 | 
   207         maxId = self._iDb.fetchall()[0][0]
 | 
| 
 | 
   208         if maxId == None:
 | 
| 
 | 
   209             maxId = 0
 | 
| 
 | 
   210         newId = int(maxId) + 1
 | 
| 
 | 
   211         return newId
 | 
| 
 | 
   212     
 | 
| 
 | 
   213     ## Give a list of Set instances included in a given region
 | 
| 
 | 
   214     #
 | 
| 
 | 
   215     # @param query string query name
 | 
| 
 | 
   216     # @param start integer start coordinate
 | 
| 
 | 
   217     # @param end integer end coordinate
 | 
| 
 | 
   218     # @return lSet list of Set instances
 | 
| 
 | 
   219     #
 | 
| 
 | 
   220     def getSetListIncludedInQueryCoord(self, query, start, end):
 | 
| 
 | 
   221         lPath=self.getPathListIncludedInQueryCoord(query, start, end)
 | 
| 
 | 
   222         lSet = PathUtils.getSetListFromQueries(lPath) 
 | 
| 
 | 
   223         return lSet
 | 
| 
 | 
   224     
 | 
| 
 | 
   225     ## Give a list of Set instances overlapping a given region
 | 
| 
 | 
   226     #
 | 
| 
 | 
   227     # @param query string query name
 | 
| 
 | 
   228     # @param start integer start coordinate
 | 
| 
 | 
   229     # @param end integer end coordinate
 | 
| 
 | 
   230     # @return lSet list of Set instances
 | 
| 
 | 
   231     #
 | 
| 
 | 
   232     def getSetListOverlappingQueryCoord(self, query, start, end):
 | 
| 
 | 
   233         lPath = self.getPathListOverlappingQueryCoord(query, start, end)
 | 
| 
 | 
   234         lSet = PathUtils.getSetListFromQueries(lPath)
 | 
| 
 | 
   235         return lSet
 | 
| 
 | 
   236     
 | 
| 
 | 
   237     ## Give a list of identifiers contained in the table
 | 
| 
 | 
   238     #
 | 
| 
 | 
   239     # @return lId integer list
 | 
| 
 | 
   240     #
 | 
| 
 | 
   241     def getIdList(self):
 | 
| 
 | 
   242         sqlCmd = "SELECT DISTINCT path from %s;" % (self._table_idx)
 | 
| 
 | 
   243         lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )
 | 
| 
 | 
   244         return lId
 | 
| 
 | 
   245         
 | 
| 
 | 
   246     ## Give a list of the distinct query names present in the table
 | 
| 
 | 
   247     #
 | 
| 
 | 
   248     # @return lDistinctQueryNames string list
 | 
| 
 | 
   249     #
 | 
| 
 | 
   250     def getQueryList(self):
 | 
| 
 | 
   251         lDistinctQueryNames = self._getDistinctTypeNamesList("query")
 | 
| 
 | 
   252         return lDistinctQueryNames
 | 
| 
 | 
   253     
 | 
| 
 | 
   254     def _getDistinctTypeNamesList( self, type ):
 | 
| 
 | 
   255         sqlCmd = "SELECT DISTINCT contig FROM %s" % ( self._table_idx )
 | 
| 
 | 
   256         lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
 | 
| 
 | 
   257         return lDistinctTypeNames |