Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/coord/SetUtils.py @ 13:feef9a0db09d draft
Uploaded
| author | urgi-team |
|---|---|
| date | Wed, 20 Jul 2016 09:04:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 12:22b0494ec883 | 13:feef9a0db09d |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 | |
| 32 from commons.core.coord.Set import Set | |
| 33 | |
| 34 ## Static methods for the manipulation of Set instances | |
| 35 # | |
| 36 class SetUtils( object ): | |
| 37 | |
| 38 ## Change the identifier of each Set instance in the given list | |
| 39 # | |
| 40 # @param lSets list of Set instances | |
| 41 # @param newId new identifier | |
| 42 # | |
| 43 def changeIdInList(lSets, newId): | |
| 44 for iSet in lSets: | |
| 45 iSet.id = newId | |
| 46 | |
| 47 changeIdInList = staticmethod( changeIdInList ) | |
| 48 | |
| 49 ## Return the length of the overlap between two lists of Set instances | |
| 50 # | |
| 51 # @param lSets1 list of Set instances | |
| 52 # @param lSets2 list of Set instances | |
| 53 # @return length of overlap | |
| 54 # @warning sequence names are supposed to be identical | |
| 55 # | |
| 56 def getOverlapLengthBetweenLists(lSets1, lSets2): | |
| 57 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1) | |
| 58 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2) | |
| 59 osize = 0 | |
| 60 i = 0 | |
| 61 j = 0 | |
| 62 while i!= len(lSet1Sorted): | |
| 63 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\ | |
| 64 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])): | |
| 65 j+=1 | |
| 66 jj=j | |
| 67 while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]): | |
| 68 osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj]) | |
| 69 jj+=1 | |
| 70 i+=1 | |
| 71 return osize | |
| 72 | |
| 73 getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists ) | |
| 74 | |
| 75 ## Return True if the two lists of Set instances overlap, False otherwise | |
| 76 # | |
| 77 # @param lSets1 list of Set instances | |
| 78 # @param lSets2 list of Set instances | |
| 79 # | |
| 80 def areSetsOverlappingBetweenLists( lSets1, lSets2 ): | |
| 81 lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1) | |
| 82 lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2) | |
| 83 i=0 | |
| 84 j=0 | |
| 85 while i!= len(lSet1Sorted): | |
| 86 while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\ | |
| 87 and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])): | |
| 88 j+=1 | |
| 89 if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]): | |
| 90 return True | |
| 91 i+=1 | |
| 92 return False | |
| 93 | |
| 94 areSetsOverlappingBetweenLists = staticmethod( areSetsOverlappingBetweenLists ) | |
| 95 | |
| 96 ## Merge all overlapping Set instances between two lists of Set and give the next identifier | |
| 97 # | |
| 98 # @param lSets1 list of Set instances | |
| 99 # @param lSets2 list of Set instances | |
| 100 # @param max_id start id value for inserting new Set | |
| 101 # @return a new list of the merged Set instances and the next identifier | |
| 102 # | |
| 103 def getListOfMergedSetsAndNextId(lSets1, lSets2, max_id=0): | |
| 104 lSets_merged = [] | |
| 105 list2merge = SetUtils.getListOfIdListOfOverlappingSets ( lSets1,lSets2 ) | |
| 106 idlist1 = SetUtils.getDictOfListsWithIdAsKey(lSets1) | |
| 107 idlist2 = SetUtils.getDictOfListsWithIdAsKey(lSets2) | |
| 108 if max_id == 0: | |
| 109 max_id = max(idlist1.keys()) + 1 | |
| 110 for i in list2merge: | |
| 111 if i == []: | |
| 112 continue | |
| 113 l = [] | |
| 114 min_id = max(i) | |
| 115 for j in i: | |
| 116 if j>0: | |
| 117 if min_id>j: | |
| 118 min_id=j | |
| 119 l.extend(idlist1[j]) | |
| 120 del idlist1[j] | |
| 121 else: | |
| 122 l.extend(idlist2[j*-1]) | |
| 123 del idlist2[j*-1] | |
| 124 l = SetUtils.mergeSetsInList(l) | |
| 125 SetUtils.changeIdInList(l, min_id) | |
| 126 lSets_merged.extend(l) | |
| 127 for id, alist in idlist1.items(): | |
| 128 lSets_merged.extend(alist) | |
| 129 for id,alist in idlist2.items(): | |
| 130 SetUtils.changeIdInList(alist,max_id) | |
| 131 lSets_merged.extend(alist) | |
| 132 max_id+=1 | |
| 133 return lSets_merged, max_id | |
| 134 | |
| 135 getListOfMergedSetsAndNextId = staticmethod ( getListOfMergedSetsAndNextId ) | |
| 136 | |
| 137 # ## Concatenate two Set instance lists and give the next identifier | |
| 138 # # | |
| 139 # # @param lSets1 list of Set instances | |
| 140 # # @param lSets2 list of Set instances | |
| 141 # # @param maxId start id value for inserting new Set | |
| 142 # # @return a new list of Set instances and the next identifier | |
| 143 # # | |
| 144 # @staticmethod | |
| 145 # def getSetsListOfTwoConcatenatedSetsListAndNextId(lSets1, lSets2, maxId = 0): | |
| 146 # lOutSets = lSets1 | |
| 147 # dId2SetsList2 = SetUtils.getDictOfListsWithIdAsKey(lSets2) | |
| 148 # if maxId == 0: | |
| 149 # dId2SetsList1 = SetUtils.getDictOfListsWithIdAsKey(lSets1) | |
| 150 # maxId = max(dId2SetsList1.keys()) | |
| 151 # for lSets in dId2SetsList2.values(): | |
| 152 # SetUtils.changeIdInList(lSets, maxId) | |
| 153 # lOutSets.extend(lSets) | |
| 154 # maxId += 1 | |
| 155 # return lOutSets, maxId | |
| 156 | |
| 157 ## Return the sum of the length of each Set instance in the given list | |
| 158 # | |
| 159 # @param lSets: list of Set instances | |
| 160 # | |
| 161 def getCumulLength(lSets): | |
| 162 length = 0 | |
| 163 for i in lSets: | |
| 164 length += i.getLength() | |
| 165 return length | |
| 166 | |
| 167 getCumulLength = staticmethod( getCumulLength ) | |
| 168 | |
| 169 ## Return a tuple with min and max coordinates of Set instances in the given list | |
| 170 # | |
| 171 # @param lSets list of Set instances | |
| 172 # | |
| 173 def getListBoundaries(lSets): | |
| 174 qmin = -1 | |
| 175 qmax = -1 | |
| 176 for iSet in lSets: | |
| 177 if qmin == -1: | |
| 178 qmin = iSet.start | |
| 179 qmin = min(qmin, iSet.getMin()) | |
| 180 qmax = max(qmax, iSet.getMax()) | |
| 181 return (qmin, qmax) | |
| 182 | |
| 183 getListBoundaries = staticmethod( getListBoundaries ) | |
| 184 | |
| 185 ## Show Set instances contained in the given list | |
| 186 # | |
| 187 # @param lSets list of Set instances | |
| 188 # | |
| 189 def showList(lSets): | |
| 190 for iSet in lSets: | |
| 191 iSet.show() | |
| 192 | |
| 193 showList = staticmethod( showList ) | |
| 194 | |
| 195 ## Write Set instances contained in the given list | |
| 196 # | |
| 197 # @param lSets list of Set instances | |
| 198 # @param fileName a file name | |
| 199 # @param mode the open mode of the file '"w"' or '"a"' | |
| 200 # | |
| 201 def writeListInFile(lSets, fileName, mode="w"): | |
| 202 fileHandler = open(fileName, mode) | |
| 203 for iSet in lSets: | |
| 204 iSet.write(fileHandler) | |
| 205 fileHandler.close() | |
| 206 | |
| 207 writeListInFile = staticmethod( writeListInFile ) | |
| 208 | |
| 209 ## Split a Set list in several Set lists according to the identifier | |
| 210 # | |
| 211 # @param lSets list of Set instances | |
| 212 # @return a dictionary which keys are identifiers and values Set lists | |
| 213 # | |
| 214 def getDictOfListsWithIdAsKey(lSets): | |
| 215 dId2SetList = {} | |
| 216 for iSet in lSets: | |
| 217 if dId2SetList.has_key(iSet.id): | |
| 218 dId2SetList[iSet.id].append(iSet) | |
| 219 else: | |
| 220 dId2SetList[iSet.id] = [iSet] | |
| 221 return dId2SetList | |
| 222 | |
| 223 getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey ) | |
| 224 | |
| 225 | |
| 226 ## Split a Set list in several Set lists according to the identifier | |
| 227 # | |
| 228 # @param lSets list of Set instances | |
| 229 # @return a dictionary which keys are identifiers and values Set lists | |
| 230 # | |
| 231 def getDictOfListsWithIdAsKeyFromFile( setFile ): | |
| 232 dId2SetList = {} | |
| 233 setFileHandler = open( setFile, "r" ) | |
| 234 while True: | |
| 235 line = setFileHandler.readline() | |
| 236 if line == "": | |
| 237 break | |
| 238 iSet = Set() | |
| 239 iSet.setFromTuple( line[:-1].split("\t") ) | |
| 240 if not dId2SetList.has_key( iSet.id ): | |
| 241 dId2SetList[ iSet.id ] = [] | |
| 242 dId2SetList[ iSet.id ].append( iSet ) | |
| 243 setFileHandler.close() | |
| 244 return dId2SetList | |
| 245 | |
| 246 getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile ) | |
| 247 | |
| 248 | |
| 249 ## Return a Map list from the given Set List | |
| 250 # | |
| 251 # @param lSets list of Set instances | |
| 252 # | |
| 253 def getMapListFromSetList(lSets): | |
| 254 lMaps = [] | |
| 255 for iSet in lSets: | |
| 256 lMaps.append(iSet.set2map()) | |
| 257 return lMaps | |
| 258 | |
| 259 getMapListFromSetList = staticmethod( getMapListFromSetList ) | |
| 260 | |
| 261 ## Construct a Set list from a Map list | |
| 262 # | |
| 263 # @param lMaps list of Map instances | |
| 264 # | |
| 265 def getSetListFromMapList(lMaps): | |
| 266 lSets = [] | |
| 267 c = 0 | |
| 268 for iMap in lMaps: | |
| 269 c += 1 | |
| 270 lSets.append( Set(c, iMap.name, iMap.seqname, iMap.start, iMap.end) ) | |
| 271 return lSets | |
| 272 | |
| 273 getSetListFromMapList = staticmethod( getSetListFromMapList ) | |
| 274 | |
| 275 ## Merge all overlapping Set instances in a list without considering the identifiers. | |
| 276 # Start by sorting Set instances by their increasing Min coordinate. | |
| 277 # | |
| 278 # @return: a new list of the merged Set instances | |
| 279 # | |
| 280 def mergeSetsInList(lSets): | |
| 281 l=[] | |
| 282 if len(lSets)==0: | |
| 283 return l | |
| 284 | |
| 285 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenInvLength( lSets ) | |
| 286 | |
| 287 prev_count = 0 | |
| 288 for iSet in lSortedSets[0:]: | |
| 289 if prev_count != len(lSortedSets): | |
| 290 for i in lSortedSets[ prev_count + 1: ]: | |
| 291 if iSet.isOverlapping( i ): | |
| 292 iSet.merge( i ) | |
| 293 IsAlreadyInList = False | |
| 294 for newSet in l: | |
| 295 if newSet.isOverlapping( iSet ): | |
| 296 IsAlreadyInList = True | |
| 297 newSet.merge( iSet ) | |
| 298 l [ l.index( newSet ) ] = newSet | |
| 299 if not IsAlreadyInList: | |
| 300 l.append( iSet ) | |
| 301 prev_count += 1 | |
| 302 return l | |
| 303 | |
| 304 mergeSetsInList = staticmethod( mergeSetsInList ) | |
| 305 | |
| 306 ## Unjoin a Set list according to another | |
| 307 # | |
| 308 # @param lToKeep: a list of Set instances to keep | |
| 309 # @param lToUnjoin: a list of Set instances to unjoin | |
| 310 # @return: lToUnjoin split in several list | |
| 311 # | |
| 312 def getSetListUnjoined(lToKeep, lToUnjoin): | |
| 313 lSortedToKeep = SetUtils.getSetListSortedByIncreasingMinThenMax( lToKeep ) | |
| 314 lSortedToUnjoin = SetUtils.getSetListSortedByIncreasingMinThenMax( lToUnjoin ) | |
| 315 if lSortedToUnjoin == []: | |
| 316 return [] | |
| 317 if lSortedToKeep == []: | |
| 318 return [ lSortedToUnjoin ] | |
| 319 | |
| 320 i=0 | |
| 321 resultListSet=[] | |
| 322 while i<len(lSortedToKeep): | |
| 323 j1=0 | |
| 324 while j1<len(lSortedToUnjoin) and lSortedToKeep[i].getMin() > lSortedToUnjoin[j1].getMax(): | |
| 325 j1+=1 | |
| 326 if j1==len(lSortedToUnjoin): | |
| 327 break | |
| 328 if j1!=0: | |
| 329 resultListSet.append(lSortedToUnjoin[:j1]) | |
| 330 del lSortedToUnjoin[:j1] | |
| 331 j1=0 | |
| 332 if i+1==len(lSortedToKeep): | |
| 333 break | |
| 334 j2=j1 | |
| 335 if j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax(): | |
| 336 while j2<len(lSortedToUnjoin) and lSortedToKeep[i+1].getMin() > lSortedToUnjoin[j2].getMax(): | |
| 337 j2+=1 | |
| 338 resultListSet.append(lSortedToUnjoin[j1:j2]) | |
| 339 del lSortedToUnjoin[j1:j2] | |
| 340 i+=1 | |
| 341 | |
| 342 if resultListSet!=[] or i == 0: | |
| 343 resultListSet.append(lSortedToUnjoin) | |
| 344 return resultListSet | |
| 345 | |
| 346 getSetListUnjoined = staticmethod(getSetListUnjoined) | |
| 347 | |
| 348 ## Return new list of Set instances with no duplicate | |
| 349 # | |
| 350 # @param lSets list of Set instances | |
| 351 # | |
| 352 def getSetListWithoutDuplicates( lSets ): | |
| 353 if len(lSets) < 2: | |
| 354 return lSets | |
| 355 lSortedSet = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets ) | |
| 356 lUniqSet = [ lSortedSet[0] ] | |
| 357 for iSet in lSortedSet[1:]: | |
| 358 if iSet != lUniqSet[-1]: | |
| 359 lUniqSet.append( iSet ) | |
| 360 return lUniqSet | |
| 361 | |
| 362 getSetListWithoutDuplicates = staticmethod( getSetListWithoutDuplicates ) | |
| 363 | |
| 364 ## Return a list of Set instances sorted in increasing order according to the Min, then the Max, and finally their initial order | |
| 365 # | |
| 366 # @param lSets: list of Set instances | |
| 367 # | |
| 368 def getSetListSortedByIncreasingMinThenMax( lSets ): | |
| 369 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), iSet.getMax() ) ) | |
| 370 | |
| 371 getSetListSortedByIncreasingMinThenMax = staticmethod( getSetListSortedByIncreasingMinThenMax ) | |
| 372 | |
| 373 ## Return a list of Set instances sorted in increasing order according to the min, then the inverse of the length, and finally their initial order | |
| 374 # | |
| 375 # @param lSets: list of Set instances | |
| 376 # | |
| 377 def getSetListSortedByIncreasingMinThenInvLength( lSets ): | |
| 378 return sorted( lSets, key=lambda iSet: ( iSet.getMin(), 1 / float(iSet.getLength()) ) ) | |
| 379 | |
| 380 getSetListSortedByIncreasingMinThenInvLength = staticmethod( getSetListSortedByIncreasingMinThenInvLength ) | |
| 381 | |
| 382 ## Return a list of Set instances sorted in increasing order according to the SeqName, then the Name, then the Min, then the Max and finally their initial order | |
| 383 # | |
| 384 # @param lSets: list of Set instances | |
| 385 # | |
| 386 def getSetListSortedBySeqThenRegionThenMinThenMax(lSets): | |
| 387 return sorted(lSets, key=lambda iSet: (iSet.getSeqname(), iSet.getName(), iSet.getMin(), iSet.getMax())) | |
| 388 | |
| 389 getSetListSortedBySeqThenRegionThenMinThenMax = staticmethod(getSetListSortedBySeqThenRegionThenMinThenMax) | |
| 390 | |
| 391 ## Return a list of identifier lists of overlapping Sets from the subject list, according to the reference list | |
| 392 # | |
| 393 # @param lRef list of Set instances | |
| 394 # @param lSubject list of Set instances | |
| 395 # | |
| 396 def getListOfIdListOfOverlappingSets(lRef,lSubject): | |
| 397 lSortedRef = SetUtils.getSetListSortedByIncreasingMinThenMax( lRef ) | |
| 398 lSortedSubject = SetUtils.getSetListSortedByIncreasingMinThenMax( lSubject ) | |
| 399 | |
| 400 lOverlappingSet = [] | |
| 401 lOverlappingSetCounter = 0 | |
| 402 | |
| 403 id2LOverlappingSet_pos = {} | |
| 404 | |
| 405 i = 0 | |
| 406 j = 0 | |
| 407 while i!= len(lSortedRef): | |
| 408 while j!= len(lSortedSubject) and lSortedRef[i].getMin()>lSortedSubject[j].getMax()\ | |
| 409 and not(lSortedRef[i].isOverlapping(lSortedSubject[j])\ | |
| 410 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[j].isOnDirectStrand()): | |
| 411 j+=1 | |
| 412 jj=j | |
| 413 while jj!= len(lSortedSubject) and lSortedRef[i].isOverlapping(lSortedSubject[jj])\ | |
| 414 and lSortedRef[i].isOnDirectStrand()==lSortedSubject[jj].isOnDirectStrand(): | |
| 415 id1=lSortedRef[i].id | |
| 416 id2=lSortedSubject[jj].id*-1 | |
| 417 if id2LOverlappingSet_pos.has_key(id1) \ | |
| 418 and not id2LOverlappingSet_pos.has_key(id2): | |
| 419 lOverlappingSet[id2LOverlappingSet_pos[id1]].append(id2) | |
| 420 id2LOverlappingSet_pos[id2]=id2LOverlappingSet_pos[id1] | |
| 421 if id2LOverlappingSet_pos.has_key(id2) \ | |
| 422 and not id2LOverlappingSet_pos.has_key(id1): | |
| 423 lOverlappingSet[id2LOverlappingSet_pos[id2]].append(id1) | |
| 424 id2LOverlappingSet_pos[id1]=id2LOverlappingSet_pos[id2] | |
| 425 if not id2LOverlappingSet_pos.has_key(id2) \ | |
| 426 and not id2LOverlappingSet_pos.has_key(id1): | |
| 427 lOverlappingSet.append([id1,id2]) | |
| 428 id2LOverlappingSet_pos[id1]=lOverlappingSetCounter | |
| 429 id2LOverlappingSet_pos[id2]=lOverlappingSetCounter | |
| 430 lOverlappingSetCounter+=1 | |
| 431 jj+=1 | |
| 432 i+=1 | |
| 433 | |
| 434 return lOverlappingSet | |
| 435 | |
| 436 getListOfIdListOfOverlappingSets = staticmethod (getListOfIdListOfOverlappingSets) | |
| 437 | |
| 438 ## Return a list of sets without overlapping between two lists of sets | |
| 439 # | |
| 440 # @param lSet1 and lSet2 | |
| 441 # | |
| 442 def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2): | |
| 443 for i in lSet1: | |
| 444 for idx,j in enumerate(lSet2): | |
| 445 n=j.diff(i) | |
| 446 if not n.isEmpty() and n.getLength()>=20: | |
| 447 lSet2.append(n) | |
| 448 lSet2WithoutOverlaps=[] | |
| 449 for i in lSet2: | |
| 450 if not i.isEmpty() and i.getLength()>=20: | |
| 451 lSet2WithoutOverlaps.append(i) | |
| 452 return lSet2WithoutOverlaps | |
| 453 | |
| 454 getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet) | |
| 455 | |
| 456 ## Return a Set list from a Set file | |
| 457 # | |
| 458 # @param setFile string name of a Set file | |
| 459 # @return a list of Set instances | |
| 460 # | |
| 461 def getSetListFromFile( setFile ): | |
| 462 lSets = [] | |
| 463 setFileHandler = open( setFile, "r" ) | |
| 464 while True: | |
| 465 line = setFileHandler.readline() | |
| 466 if line == "": | |
| 467 break | |
| 468 iSet = Set() | |
| 469 iSet.setFromString( line ) | |
| 470 lSets.append( iSet ) | |
| 471 setFileHandler.close() | |
| 472 return lSets | |
| 473 | |
| 474 getSetListFromFile = staticmethod( getSetListFromFile ) | |
| 475 | |
| 476 | |
| 477 def convertSetFileIntoMapFile( setFile, mapFile ): | |
| 478 setFileHandler = open( setFile, "r" ) | |
| 479 mapFileHandler = open( mapFile, "w" ) | |
| 480 iSet = Set() | |
| 481 while True: | |
| 482 line = setFileHandler.readline() | |
| 483 if line == "": | |
| 484 break | |
| 485 iSet.setFromString( line ) | |
| 486 iMap = iSet.getMapInstance() | |
| 487 iMap.write( mapFileHandler ) | |
| 488 setFileHandler.close() | |
| 489 mapFileHandler.close() | |
| 490 | |
| 491 convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile ) | |
| 492 | |
| 493 | |
| 494 def getDictOfListsWithSeqnameAsKey( lSets ): | |
| 495 dSeqnamesToSetList = {} | |
| 496 for iSet in lSets: | |
| 497 if not dSeqnamesToSetList.has_key( iSet.seqname ): | |
| 498 dSeqnamesToSetList[ iSet.seqname ] = [] | |
| 499 dSeqnamesToSetList[ iSet.seqname ].append( iSet ) | |
| 500 return dSeqnamesToSetList | |
| 501 | |
| 502 getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey ) | |
| 503 | |
| 504 | |
| 505 def filterOnLength( lSets, minLength=0, maxLength=10000000000 ): | |
| 506 if minLength == 0 and maxLength == 0: | |
| 507 return lSets | |
| 508 lFiltered = [] | |
| 509 for iSet in lSets: | |
| 510 if minLength <= iSet.getLength() <= maxLength: | |
| 511 lFiltered.append( iSet ) | |
| 512 return lFiltered | |
| 513 | |
| 514 filterOnLength = staticmethod( filterOnLength ) | |
| 515 | |
| 516 | |
| 517 def getListOfNames( setFile ): | |
| 518 lNames = [] | |
| 519 setFileHandler = open( setFile, "r" ) | |
| 520 iSet = Set() | |
| 521 while True: | |
| 522 line = setFileHandler.readline() | |
| 523 if line == "": | |
| 524 break | |
| 525 iSet.setFromTuple( line[:-1].split("\t") ) | |
| 526 if iSet.name not in lNames: | |
| 527 lNames.append( iSet.name ) | |
| 528 setFileHandler.close() | |
| 529 return lNames | |
| 530 | |
| 531 getListOfNames = staticmethod( getListOfNames ) | |
| 532 | |
| 533 | |
| 534 def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ): | |
| 535 dNames2DictsId = {} | |
| 536 setFileHandler = open( setFile, "r" ) | |
| 537 while True: | |
| 538 line = setFileHandler.readline() | |
| 539 if line == "": | |
| 540 break | |
| 541 iSet = Set() | |
| 542 iSet.setFromTuple( line[:-1].split("\t") ) | |
| 543 if not dNames2DictsId.has_key( iSet.name ): | |
| 544 dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] } | |
| 545 else: | |
| 546 if not dNames2DictsId[ iSet.name ].has_key( iSet.id ): | |
| 547 dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ] | |
| 548 else: | |
| 549 dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet ) | |
| 550 setFileHandler.close() | |
| 551 return dNames2DictsId | |
| 552 | |
| 553 getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile ) |
