Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/memory.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 from rdflib.term import BNode | |
| 2 from rdflib.store import Store, NO_STORE, VALID_STORE | |
| 3 | |
| 4 __all__ = ['Memory', 'IOMemory'] | |
| 5 | |
| 6 ANY = Any = None | |
| 7 | |
| 8 | |
| 9 class Memory(Store): | |
| 10 """\ | |
| 11 An in memory implementation of a triple store. | |
| 12 | |
| 13 This triple store uses nested dictionaries to store triples. Each | |
| 14 triple is stored in two such indices as follows spo[s][p][o] = 1 and | |
| 15 pos[p][o][s] = 1. | |
| 16 | |
| 17 Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser | |
| 18 """ | |
| 19 def __init__(self, configuration=None, identifier=None): | |
| 20 super(Memory, self).__init__(configuration) | |
| 21 self.identifier = identifier | |
| 22 | |
| 23 # indexed by [subject][predicate][object] | |
| 24 self.__spo = {} | |
| 25 | |
| 26 # indexed by [predicate][object][subject] | |
| 27 self.__pos = {} | |
| 28 | |
| 29 # indexed by [predicate][object][subject] | |
| 30 self.__osp = {} | |
| 31 | |
| 32 self.__namespace = {} | |
| 33 self.__prefix = {} | |
| 34 | |
| 35 def add(self, xxx_todo_changeme, context, quoted=False): | |
| 36 """\ | |
| 37 Add a triple to the store of triples. | |
| 38 """ | |
| 39 (subject, predicate, object) = xxx_todo_changeme | |
| 40 spo = self.__spo | |
| 41 try: | |
| 42 po = spo[subject] | |
| 43 except: | |
| 44 po = spo[subject] = {} | |
| 45 try: | |
| 46 o = po[predicate] | |
| 47 except: | |
| 48 o = po[predicate] = {} | |
| 49 o[object] = 1 | |
| 50 | |
| 51 pos = self.__pos | |
| 52 try: | |
| 53 os = pos[predicate] | |
| 54 except: | |
| 55 os = pos[predicate] = {} | |
| 56 try: | |
| 57 s = os[object] | |
| 58 except: | |
| 59 s = os[object] = {} | |
| 60 s[subject] = 1 | |
| 61 | |
| 62 osp = self.__osp | |
| 63 try: | |
| 64 sp = osp[object] | |
| 65 except: | |
| 66 sp = osp[object] = {} | |
| 67 try: | |
| 68 p = sp[subject] | |
| 69 except: | |
| 70 p = sp[subject] = {} | |
| 71 p[predicate] = 1 | |
| 72 | |
| 73 def remove(self, xxx_todo_changeme1, context=None): | |
| 74 (subject, predicate, object) = xxx_todo_changeme1 | |
| 75 for (subject, predicate, object), c in self.triples( | |
| 76 (subject, predicate, object)): | |
| 77 del self.__spo[subject][predicate][object] | |
| 78 del self.__pos[predicate][object][subject] | |
| 79 del self.__osp[object][subject][predicate] | |
| 80 | |
| 81 def triples(self, xxx_todo_changeme2, context=None): | |
| 82 """A generator over all the triples matching """ | |
| 83 (subject, predicate, object) = xxx_todo_changeme2 | |
| 84 if subject != ANY: # subject is given | |
| 85 spo = self.__spo | |
| 86 if subject in spo: | |
| 87 subjectDictionary = spo[subject] | |
| 88 if predicate != ANY: # subject+predicate is given | |
| 89 if predicate in subjectDictionary: | |
| 90 if object != ANY: # subject+predicate+object is given | |
| 91 if object in subjectDictionary[predicate]: | |
| 92 yield (subject, predicate, object), \ | |
| 93 self.__contexts() | |
| 94 else: # given object not found | |
| 95 pass | |
| 96 else: # subject+predicate is given, object unbound | |
| 97 for o in list(subjectDictionary[predicate].keys()): | |
| 98 yield (subject, predicate, o), \ | |
| 99 self.__contexts() | |
| 100 else: # given predicate not found | |
| 101 pass | |
| 102 else: # subject given, predicate unbound | |
| 103 for p in list(subjectDictionary.keys()): | |
| 104 if object != ANY: # object is given | |
| 105 if object in subjectDictionary[p]: | |
| 106 yield (subject, p, object), self.__contexts() | |
| 107 else: # given object not found | |
| 108 pass | |
| 109 else: # object unbound | |
| 110 for o in list(subjectDictionary[p].keys()): | |
| 111 yield (subject, p, o), self.__contexts() | |
| 112 else: # given subject not found | |
| 113 pass | |
| 114 elif predicate != ANY: # predicate is given, subject unbound | |
| 115 pos = self.__pos | |
| 116 if predicate in pos: | |
| 117 predicateDictionary = pos[predicate] | |
| 118 if object != ANY: # predicate+object is given, subject unbound | |
| 119 if object in predicateDictionary: | |
| 120 for s in list(predicateDictionary[object].keys()): | |
| 121 yield (s, predicate, object), self.__contexts() | |
| 122 else: # given object not found | |
| 123 pass | |
| 124 else: # predicate is given, object+subject unbound | |
| 125 for o in list(predicateDictionary.keys()): | |
| 126 for s in list(predicateDictionary[o].keys()): | |
| 127 yield (s, predicate, o), self.__contexts() | |
| 128 elif object != ANY: # object is given, subject+predicate unbound | |
| 129 osp = self.__osp | |
| 130 if object in osp: | |
| 131 objectDictionary = osp[object] | |
| 132 for s in list(objectDictionary.keys()): | |
| 133 for p in list(objectDictionary[s].keys()): | |
| 134 yield (s, p, object), self.__contexts() | |
| 135 else: # subject+predicate+object unbound | |
| 136 spo = self.__spo | |
| 137 for s in list(spo.keys()): | |
| 138 subjectDictionary = spo[s] | |
| 139 for p in list(subjectDictionary.keys()): | |
| 140 for o in list(subjectDictionary[p].keys()): | |
| 141 yield (s, p, o), self.__contexts() | |
| 142 | |
| 143 def __len__(self, context=None): | |
| 144 #@@ optimize | |
| 145 i = 0 | |
| 146 for triple in self.triples((None, None, None)): | |
| 147 i += 1 | |
| 148 return i | |
| 149 | |
| 150 def bind(self, prefix, namespace): | |
| 151 self.__prefix[namespace] = prefix | |
| 152 self.__namespace[prefix] = namespace | |
| 153 | |
| 154 def namespace(self, prefix): | |
| 155 return self.__namespace.get(prefix, None) | |
| 156 | |
| 157 def prefix(self, namespace): | |
| 158 return self.__prefix.get(namespace, None) | |
| 159 | |
| 160 def namespaces(self): | |
| 161 for prefix, namespace in self.__namespace.items(): | |
| 162 yield prefix, namespace | |
| 163 | |
| 164 def __contexts(self): | |
| 165 return (c for c in []) # TODO: best way to return empty generator | |
| 166 | |
| 167 | |
| 168 class IOMemory(Store): | |
| 169 """\ | |
| 170 An integer-key-optimized context-aware in-memory store. | |
| 171 | |
| 172 Uses three dict indices (for subjects, objects and predicates) holding | |
| 173 sets of triples. Context information is tracked in a separate dict, with | |
| 174 the triple as key and a dict of {context: quoted} items as value. The | |
| 175 context information is used to filter triple query results. | |
| 176 | |
| 177 Memory usage is low due to several optimizations. RDF nodes are not | |
| 178 stored directly in the indices; instead, the indices hold integer keys | |
| 179 and the actual nodes are only stored once in int-to-object and | |
| 180 object-to-int mapping dictionaries. A default context is determined | |
| 181 based on the first triple that is added to the store, and no context | |
| 182 information is actually stored for subsequent other triples with the | |
| 183 same context information. | |
| 184 | |
| 185 Most operations should be quite fast, but a triples() query with two | |
| 186 bound parts requires a set intersection operation, which may be slow in | |
| 187 some cases. When multiple contexts are used in the same store, filtering | |
| 188 based on context has to be done after each query, which may also be | |
| 189 slow. | |
| 190 | |
| 191 """ | |
| 192 context_aware = True | |
| 193 formula_aware = True | |
| 194 graph_aware = True | |
| 195 | |
| 196 # The following variable name conventions are used in this class: | |
| 197 # | |
| 198 # subject, predicate, object unencoded triple parts | |
| 199 # triple = (subject, predicate, object) unencoded triple | |
| 200 # context: unencoded context | |
| 201 # | |
| 202 # sid, pid, oid integer-encoded triple parts | |
| 203 # enctriple = (sid, pid, oid) integer-encoded triple | |
| 204 # cid integer-encoded context | |
| 205 | |
| 206 def __init__(self, configuration=None, identifier=None): | |
| 207 super(IOMemory, self).__init__() | |
| 208 self.__namespace = {} | |
| 209 self.__prefix = {} | |
| 210 | |
| 211 # Mappings for encoding RDF nodes using integer keys, to save memory | |
| 212 # in the indexes Note that None is always mapped to itself, to make | |
| 213 # it easy to test for it in either encoded or unencoded form. | |
| 214 self.__int2obj = {None: None} # maps integer keys to objects | |
| 215 self.__obj2int = {None: None} # maps objects to integer keys | |
| 216 | |
| 217 # Indexes for each triple part, and a list of contexts for each triple | |
| 218 self.__subjectIndex = {} # key: sid val: set(enctriples) | |
| 219 self.__predicateIndex = {} # key: pid val: set(enctriples) | |
| 220 self.__objectIndex = {} # key: oid val: set(enctriples) | |
| 221 self.__tripleContexts = { | |
| 222 } # key: enctriple val: {cid1: quoted, cid2: quoted ...} | |
| 223 self.__contextTriples = {None: set()} # key: cid val: set(enctriples) | |
| 224 | |
| 225 # all contexts used in store (unencoded) | |
| 226 self.__all_contexts = set() | |
| 227 # default context information for triples | |
| 228 self.__defaultContexts = None | |
| 229 | |
| 230 def bind(self, prefix, namespace): | |
| 231 self.__prefix[namespace] = prefix | |
| 232 self.__namespace[prefix] = namespace | |
| 233 | |
| 234 def namespace(self, prefix): | |
| 235 return self.__namespace.get(prefix, None) | |
| 236 | |
| 237 def prefix(self, namespace): | |
| 238 return self.__prefix.get(namespace, None) | |
| 239 | |
| 240 def namespaces(self): | |
| 241 for prefix, namespace in self.__namespace.items(): | |
| 242 yield prefix, namespace | |
| 243 | |
| 244 def add(self, triple, context, quoted=False): | |
| 245 Store.add(self, triple, context, quoted) | |
| 246 | |
| 247 if context is not None: | |
| 248 self.__all_contexts.add(context) | |
| 249 | |
| 250 enctriple = self.__encodeTriple(triple) | |
| 251 sid, pid, oid = enctriple | |
| 252 | |
| 253 self.__addTripleContext(enctriple, context, quoted) | |
| 254 | |
| 255 if sid in self.__subjectIndex: | |
| 256 self.__subjectIndex[sid].add(enctriple) | |
| 257 else: | |
| 258 self.__subjectIndex[sid] = set([enctriple]) | |
| 259 | |
| 260 if pid in self.__predicateIndex: | |
| 261 self.__predicateIndex[pid].add(enctriple) | |
| 262 else: | |
| 263 self.__predicateIndex[pid] = set([enctriple]) | |
| 264 | |
| 265 if oid in self.__objectIndex: | |
| 266 self.__objectIndex[oid].add(enctriple) | |
| 267 else: | |
| 268 self.__objectIndex[oid] = set([enctriple]) | |
| 269 | |
| 270 def remove(self, triplepat, context=None): | |
| 271 req_cid = self.__obj2id(context) | |
| 272 for triple, contexts in self.triples(triplepat, context): | |
| 273 enctriple = self.__encodeTriple(triple) | |
| 274 for cid in self.__getTripleContexts(enctriple): | |
| 275 if context is not None and req_cid != cid: | |
| 276 continue | |
| 277 self.__removeTripleContext(enctriple, cid) | |
| 278 ctxs = self.__getTripleContexts(enctriple, skipQuoted=True) | |
| 279 if None in ctxs and (context is None or len(ctxs) == 1): | |
| 280 self.__removeTripleContext(enctriple, None) | |
| 281 if len(self.__getTripleContexts(enctriple)) == 0: | |
| 282 # triple has been removed from all contexts | |
| 283 sid, pid, oid = enctriple | |
| 284 self.__subjectIndex[sid].remove(enctriple) | |
| 285 self.__predicateIndex[pid].remove(enctriple) | |
| 286 self.__objectIndex[oid].remove(enctriple) | |
| 287 | |
| 288 del self.__tripleContexts[enctriple] | |
| 289 | |
| 290 if not req_cid is None and \ | |
| 291 req_cid in self.__contextTriples and \ | |
| 292 len(self.__contextTriples[req_cid]) == 0: | |
| 293 # all triples are removed out of this context | |
| 294 # and it's not the default context so delete it | |
| 295 del self.__contextTriples[req_cid] | |
| 296 | |
| 297 if triplepat == (None, None, None) and \ | |
| 298 context in self.__all_contexts and \ | |
| 299 not self.graph_aware: | |
| 300 # remove the whole context | |
| 301 self.__all_contexts.remove(context) | |
| 302 | |
| 303 def triples(self, triplein, context=None): | |
| 304 if context is not None: | |
| 305 if context == self: # hmm...does this really ever happen? | |
| 306 context = None | |
| 307 | |
| 308 cid = self.__obj2id(context) | |
| 309 enctriple = self.__encodeTriple(triplein) | |
| 310 sid, pid, oid = enctriple | |
| 311 | |
| 312 # all triples case (no triple parts given as pattern) | |
| 313 if sid is None and pid is None and oid is None: | |
| 314 return self.__all_triples(cid) | |
| 315 | |
| 316 # optimize "triple in graph" case (all parts given) | |
| 317 if sid is not None and pid is not None and oid is not None: | |
| 318 if sid in self.__subjectIndex and \ | |
| 319 enctriple in self.__subjectIndex[sid] and \ | |
| 320 self.__tripleHasContext(enctriple, cid): | |
| 321 return ((triplein, self.__contexts(enctriple)) for i in [0]) | |
| 322 else: | |
| 323 return self.__emptygen() | |
| 324 | |
| 325 # remaining cases: one or two out of three given | |
| 326 sets = [] | |
| 327 if sid is not None: | |
| 328 if sid in self.__subjectIndex: | |
| 329 sets.append(self.__subjectIndex[sid]) | |
| 330 else: | |
| 331 return self.__emptygen() | |
| 332 if pid is not None: | |
| 333 if pid in self.__predicateIndex: | |
| 334 sets.append(self.__predicateIndex[pid]) | |
| 335 else: | |
| 336 return self.__emptygen() | |
| 337 if oid is not None: | |
| 338 if oid in self.__objectIndex: | |
| 339 sets.append(self.__objectIndex[oid]) | |
| 340 else: | |
| 341 return self.__emptygen() | |
| 342 | |
| 343 # to get the result, do an intersection of the sets (if necessary) | |
| 344 if len(sets) > 1: | |
| 345 enctriples = sets[0].intersection(*sets[1:]) | |
| 346 else: | |
| 347 enctriples = sets[0].copy() | |
| 348 | |
| 349 return ((self.__decodeTriple(enctriple), self.__contexts(enctriple)) | |
| 350 for enctriple in enctriples | |
| 351 if self.__tripleHasContext(enctriple, cid)) | |
| 352 | |
| 353 def contexts(self, triple=None): | |
| 354 if triple is None or triple is (None,None,None): | |
| 355 return (context for context in self.__all_contexts) | |
| 356 | |
| 357 enctriple = self.__encodeTriple(triple) | |
| 358 sid, pid, oid = enctriple | |
| 359 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: | |
| 360 return self.__contexts(enctriple) | |
| 361 else: | |
| 362 return self.__emptygen() | |
| 363 | |
| 364 def __len__(self, context=None): | |
| 365 cid = self.__obj2id(context) | |
| 366 if cid not in self.__contextTriples: | |
| 367 return 0 | |
| 368 return len(self.__contextTriples[cid]) | |
| 369 | |
| 370 def add_graph(self, graph): | |
| 371 if not self.graph_aware: | |
| 372 Store.add_graph(self, graph) | |
| 373 else: | |
| 374 self.__all_contexts.add(graph) | |
| 375 | |
| 376 def remove_graph(self, graph): | |
| 377 if not self.graph_aware: | |
| 378 Store.remove_graph(self, graph) | |
| 379 else: | |
| 380 self.remove((None,None,None), graph) | |
| 381 try: | |
| 382 self.__all_contexts.remove(graph) | |
| 383 except KeyError: | |
| 384 pass # we didn't know this graph, no problem | |
| 385 | |
| 386 | |
| 387 | |
| 388 # internal utility methods below | |
| 389 | |
| 390 def __addTripleContext(self, enctriple, context, quoted): | |
| 391 """add the given context to the set of contexts for the triple""" | |
| 392 cid = self.__obj2id(context) | |
| 393 | |
| 394 sid, pid, oid = enctriple | |
| 395 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: | |
| 396 # we know the triple exists somewhere in the store | |
| 397 if enctriple not in self.__tripleContexts: | |
| 398 # triple exists with default ctx info | |
| 399 # start with a copy of the default ctx info | |
| 400 self.__tripleContexts[ | |
| 401 enctriple] = self.__defaultContexts.copy() | |
| 402 | |
| 403 self.__tripleContexts[enctriple][cid] = quoted | |
| 404 if not quoted: | |
| 405 self.__tripleContexts[enctriple][None] = quoted | |
| 406 else: | |
| 407 # the triple didn't exist before in the store | |
| 408 if quoted: # this context only | |
| 409 self.__tripleContexts[enctriple] = {cid: quoted} | |
| 410 else: # default context as well | |
| 411 self.__tripleContexts[enctriple] = {cid: quoted, None: quoted} | |
| 412 | |
| 413 # if the triple is not quoted add it to the default context | |
| 414 if not quoted: | |
| 415 self.__contextTriples[None].add(enctriple) | |
| 416 | |
| 417 # always add the triple to given context, making sure it's initialized | |
| 418 if cid not in self.__contextTriples: | |
| 419 self.__contextTriples[cid] = set() | |
| 420 self.__contextTriples[cid].add(enctriple) | |
| 421 | |
| 422 # if this is the first ever triple in the store, set default ctx info | |
| 423 if self.__defaultContexts is None: | |
| 424 self.__defaultContexts = self.__tripleContexts[enctriple] | |
| 425 | |
| 426 # if the context info is the same as default, no need to store it | |
| 427 if self.__tripleContexts[enctriple] == self.__defaultContexts: | |
| 428 del self.__tripleContexts[enctriple] | |
| 429 | |
| 430 def __getTripleContexts(self, enctriple, skipQuoted=False): | |
| 431 """return a list of (encoded) contexts for the triple, skipping | |
| 432 quoted contexts if skipQuoted==True""" | |
| 433 | |
| 434 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) | |
| 435 | |
| 436 if not skipQuoted: | |
| 437 return list(ctxs.keys()) | |
| 438 | |
| 439 return [cid for cid, quoted in ctxs.items() if not quoted] | |
| 440 | |
| 441 def __tripleHasContext(self, enctriple, cid): | |
| 442 """return True iff the triple exists in the given context""" | |
| 443 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) | |
| 444 return (cid in ctxs) | |
| 445 | |
| 446 def __removeTripleContext(self, enctriple, cid): | |
| 447 """remove the context from the triple""" | |
| 448 ctxs = self.__tripleContexts.get( | |
| 449 enctriple, self.__defaultContexts).copy() | |
| 450 del ctxs[cid] | |
| 451 if ctxs == self.__defaultContexts: | |
| 452 del self.__tripleContexts[enctriple] | |
| 453 else: | |
| 454 self.__tripleContexts[enctriple] = ctxs | |
| 455 self.__contextTriples[cid].remove(enctriple) | |
| 456 | |
| 457 def __obj2id(self, obj): | |
| 458 """encode object, storing it in the encoding map if necessary, | |
| 459 and return the integer key""" | |
| 460 if obj not in self.__obj2int: | |
| 461 id = randid() | |
| 462 while id in self.__int2obj: | |
| 463 id = randid() | |
| 464 self.__obj2int[obj] = id | |
| 465 self.__int2obj[id] = obj | |
| 466 return id | |
| 467 return self.__obj2int[obj] | |
| 468 | |
| 469 def __encodeTriple(self, triple): | |
| 470 """encode a whole triple, returning the encoded triple""" | |
| 471 return tuple(map(self.__obj2id, triple)) | |
| 472 | |
| 473 def __decodeTriple(self, enctriple): | |
| 474 """decode a whole encoded triple, returning the original | |
| 475 triple""" | |
| 476 return tuple(map(self.__int2obj.get, enctriple)) | |
| 477 | |
| 478 def __all_triples(self, cid): | |
| 479 """return a generator which yields all the triples (unencoded) | |
| 480 of the given context""" | |
| 481 if cid not in self.__contextTriples: | |
| 482 return | |
| 483 for enctriple in self.__contextTriples[cid].copy(): | |
| 484 yield self.__decodeTriple(enctriple), self.__contexts(enctriple) | |
| 485 | |
| 486 def __contexts(self, enctriple): | |
| 487 """return a generator for all the non-quoted contexts | |
| 488 (unencoded) the encoded triple appears in""" | |
| 489 return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None) | |
| 490 | |
| 491 def __emptygen(self): | |
| 492 """return an empty generator""" | |
| 493 if False: | |
| 494 yield | |
| 495 | |
| 496 | |
| 497 import random | |
| 498 | |
| 499 | |
| 500 def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)): | |
| 501 return choice(signs) * randint(1, 2000000000) | |
| 502 | |
| 503 del random |
