Mercurial > repos > yufei-luo > s_mart
view commons/core/sql/TableBinSetAdaptator.py @ 15:440ceca58672
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 22 Apr 2013 11:08:07 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. from commons.core.sql.TableSetAdaptator import TableSetAdaptator from commons.core.coord.SetUtils import SetUtils ## Adaptator for Set tables with bin indexes # class TableBinSetAdaptator(TableSetAdaptator): ## constructor # # @param iDb DbMySql instance instance of DbMySql # @param tableName string table name (default = "") # def __init__(self, iDb, tableName = ""): TableSetAdaptator.__init__(self, iDb, tableName) self._table_idx = "%s_idx" % (self._table) ## Insert a set instance in a set bin table # # @param iSet set instance an instance of set object # @param delayed boolean an insert delayed or not # def insASetInSetAndBinTable(self, iSet, delayed = False): self.insert(iSet, delayed) iSet.seqname = iSet.seqname.replace("\\", "\\\\") iSet.name = iSet.name.replace("\\", "\\\\") bin = iSet.getBin() max = iSet.getMax() min = iSet.getMin() strand = iSet.isOnDirectStrand() sql_prefix = '' if delayed: sql_prefix = 'INSERT DELAYED INTO ' else: sql_prefix = 'INSERT INTO ' sql_cmd = sql_prefix + '%s VALUES (%d,%f,"%s",%d,%d,%d)'\ %(self._table_idx,\ iSet.id,\ bin,\ iSet.seqname,\ min,\ max,\ strand) self._iDb.execute(sql_cmd) ## Delete set corresponding to a given identifier number in set and bin set table # @param id integer identifier number # @note old name was delSet_from_num # def deleteFromIdFromSetAndBinTable(self, id): self.deleteFromId(id) sql_cmd = 'delete from %s where path=%d' % (self._table_idx, id) self._iDb.execute(sql_cmd) ## Delete path corresponding to a given list of identifier number # # @param lId integer list list of identifier number # @note old name was delSet_from_listnum # def deleteFromListIdFromSetAndBinTable(self, lId): if lId != []: self.deleteFromIdList(lId) sql_cmd = 'delete from %s where path=%d' % (self._table_idx, lId[0]) for i in lId[1:]: sql_cmd += " or path=%d" % (i) self._iDb.execute(sql_cmd) ## Join two set by changing id number of id1 and id2 path # to the least of id1 and id2 # # @param id1 integer id path number # @param id2 integer id path number # @return id integer new id # @note old name was joinSet # def joinTwoSetsFromSetAndBinTable(self, id1, id2): self.joinTwoSets(id1, id2) if id1 < id2: new_id = id1 old_id = id2 else: new_id = id2 old_id = id1 sql_cmd = 'UPDATE %s SET path=%d WHERE path=%d'\ % (self._table_idx, new_id, old_id) self._iDb.execute(sql_cmd) return new_id ## Get a new id number from set bin table # def getNewId(self): sql_cmd = 'select max(path) from %s;' % (self._table_idx) self._iDb.execute(sql_cmd) max_id = self._iDb.fetchall()[0][0] if max_id != None: return int(max_id)+1 else: return 1 ## Get a set list instance between start and end parameters # using the bin scheme # # @param seqName reference seq name # @param start start coordinate # @param end end coordinate # @return lSet set list # @note old name was getSetList_from_qcoord # def getSetListFromQueryCoord(self, seqName, start, end): min_coord = min(start,end) max_coord = max(start,end) sql_cmd = 'select path from %s where contig="%s" and ('\ % (self._table + "_idx", seqName) for i in xrange(8, 2, -1): bin_lvl = pow(10, i) if int(start/bin_lvl) == int(end/bin_lvl): bin = float(bin_lvl + (int(start / bin_lvl) / 1e10)) sql_cmd += 'bin=%f' % (bin) else: bin1 = float(bin_lvl + (int(start / bin_lvl) / 1e10)) bin2 = float(bin_lvl + (int(end /bin_lvl) / 1e10)) sql_cmd += 'bin between %f and %f' % (bin1, bin2) if bin_lvl != 1000: sql_cmd += " or " sql_cmd += ") and min<=%d and max>=%d" % (max_coord, min_coord); self._iDb.execute(sql_cmd) res = self._iDb.fetchall() lId = [] for i in res: lId.append(int(i[0])) lSet = self.getSetListFromIdList(lId) return lSet ## Get a set list instances strictly included between start and end parameters # using the bin scheme # # @param seqName reference seq name # @param start start coordinate # @param end end coordinate # @return lSet set list # @note old name was getInSetList_from_qcoord # @warning the implementation has been changed : I added the two first lines # def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end): min_coord = min(start,end) max_coord = max(start,end) lSet = self.getSetListFromQueryCoord(contig, start, end) lSetStrictlyIncluded = [] for iSet in lSet: if iSet.getMin() > min_coord and \ iSet.getMax() < max_coord: lSetStrictlyIncluded.append(iSet) return lSetStrictlyIncluded ## Get a list of the identifier Id contained in the table bin # # @return lId list of int list of identifier # @note old name was getSet_num # def getIdList(self): sql_cmd = 'select distinct path from %s;' % (self._table_idx) self._iDb.execute(sql_cmd) res = self._iDb.fetchall() lId = [] for t in res: lId.append(int(t[0])) return lId ## Get a list of the query sequence name contained in the table bin # # @return lSeqName list of string list of query sequence name # @note old name was getContig_name # def getSeqNameList(self): sql_cmd = 'select distinct contig from %s;' % (self._table_idx) self._iDb.execute(sql_cmd) res = self._iDb.fetchall() lSeqName = [] for t in res: lSeqName.append(t[0]) return lSeqName ## Insert a Set list with the same new identifier in the table bin and set # # @note old name was insAddSetList # def insertListInSetAndBinTable(self, lSets, delayed = False): id = self.getNewId() SetUtils.changeIdInList( lSets, id ) for iSet in lSets: self.insASetInSetAndBinTable(iSet, delayed) ## Insert a set list instances In table Bin and Set and merge all overlapping sets # # @param lSets reference seq name # @note old name was insMergeSetList # def insertListInSetAndBinTableAndMergeAllSets(self, lSets): min, max = SetUtils.getListBoundaries(lSets) oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max) oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet) qhash = SetUtils.getDictOfListsWithIdAsKey(lSets) for lNewSetById in qhash.values(): found = False for currentId, oldLsetById in oldQueryhash.items(): if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById): oldLsetById.extend(lNewSetById) oldLsetById = SetUtils.mergeSetsInList(oldLsetById) self.deleteFromIdFromSetAndBinTable(currentId) found = True if not found: self.insertListInSetAndBinTable(lNewSetById) else: id = self.getNewId() SetUtils.changeIdInList(oldLsetById, id) self.insertListInSetAndBinTable(oldLsetById) ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets # # @param lSets reference seq name # @note old name was insDiffSetList # def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets): min, max = SetUtils.getListBoundaries(lSets) oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max) oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet) newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets) for lNewSetById in newQueryHash.values(): for lOldSetById in oldQueryHash.values(): if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById): lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById) self.insertListInSetAndBinTable(lNewSetById)