comparison env/lib/python3.7/site-packages/rdflib/plugins/memory.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 from rdflib.term import BNode
2 from rdflib.store import Store, NO_STORE, VALID_STORE
3
4 __all__ = ['Memory', 'IOMemory']
5
6 ANY = Any = None
7
8
9 class Memory(Store):
10 """\
11 An in memory implementation of a triple store.
12
13 This triple store uses nested dictionaries to store triples. Each
14 triple is stored in two such indices as follows spo[s][p][o] = 1 and
15 pos[p][o][s] = 1.
16
17 Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
18 """
19 def __init__(self, configuration=None, identifier=None):
20 super(Memory, self).__init__(configuration)
21 self.identifier = identifier
22
23 # indexed by [subject][predicate][object]
24 self.__spo = {}
25
26 # indexed by [predicate][object][subject]
27 self.__pos = {}
28
29 # indexed by [predicate][object][subject]
30 self.__osp = {}
31
32 self.__namespace = {}
33 self.__prefix = {}
34
35 def add(self, xxx_todo_changeme, context, quoted=False):
36 """\
37 Add a triple to the store of triples.
38 """
39 (subject, predicate, object) = xxx_todo_changeme
40 spo = self.__spo
41 try:
42 po = spo[subject]
43 except:
44 po = spo[subject] = {}
45 try:
46 o = po[predicate]
47 except:
48 o = po[predicate] = {}
49 o[object] = 1
50
51 pos = self.__pos
52 try:
53 os = pos[predicate]
54 except:
55 os = pos[predicate] = {}
56 try:
57 s = os[object]
58 except:
59 s = os[object] = {}
60 s[subject] = 1
61
62 osp = self.__osp
63 try:
64 sp = osp[object]
65 except:
66 sp = osp[object] = {}
67 try:
68 p = sp[subject]
69 except:
70 p = sp[subject] = {}
71 p[predicate] = 1
72
73 def remove(self, xxx_todo_changeme1, context=None):
74 (subject, predicate, object) = xxx_todo_changeme1
75 for (subject, predicate, object), c in self.triples(
76 (subject, predicate, object)):
77 del self.__spo[subject][predicate][object]
78 del self.__pos[predicate][object][subject]
79 del self.__osp[object][subject][predicate]
80
81 def triples(self, xxx_todo_changeme2, context=None):
82 """A generator over all the triples matching """
83 (subject, predicate, object) = xxx_todo_changeme2
84 if subject != ANY: # subject is given
85 spo = self.__spo
86 if subject in spo:
87 subjectDictionary = spo[subject]
88 if predicate != ANY: # subject+predicate is given
89 if predicate in subjectDictionary:
90 if object != ANY: # subject+predicate+object is given
91 if object in subjectDictionary[predicate]:
92 yield (subject, predicate, object), \
93 self.__contexts()
94 else: # given object not found
95 pass
96 else: # subject+predicate is given, object unbound
97 for o in list(subjectDictionary[predicate].keys()):
98 yield (subject, predicate, o), \
99 self.__contexts()
100 else: # given predicate not found
101 pass
102 else: # subject given, predicate unbound
103 for p in list(subjectDictionary.keys()):
104 if object != ANY: # object is given
105 if object in subjectDictionary[p]:
106 yield (subject, p, object), self.__contexts()
107 else: # given object not found
108 pass
109 else: # object unbound
110 for o in list(subjectDictionary[p].keys()):
111 yield (subject, p, o), self.__contexts()
112 else: # given subject not found
113 pass
114 elif predicate != ANY: # predicate is given, subject unbound
115 pos = self.__pos
116 if predicate in pos:
117 predicateDictionary = pos[predicate]
118 if object != ANY: # predicate+object is given, subject unbound
119 if object in predicateDictionary:
120 for s in list(predicateDictionary[object].keys()):
121 yield (s, predicate, object), self.__contexts()
122 else: # given object not found
123 pass
124 else: # predicate is given, object+subject unbound
125 for o in list(predicateDictionary.keys()):
126 for s in list(predicateDictionary[o].keys()):
127 yield (s, predicate, o), self.__contexts()
128 elif object != ANY: # object is given, subject+predicate unbound
129 osp = self.__osp
130 if object in osp:
131 objectDictionary = osp[object]
132 for s in list(objectDictionary.keys()):
133 for p in list(objectDictionary[s].keys()):
134 yield (s, p, object), self.__contexts()
135 else: # subject+predicate+object unbound
136 spo = self.__spo
137 for s in list(spo.keys()):
138 subjectDictionary = spo[s]
139 for p in list(subjectDictionary.keys()):
140 for o in list(subjectDictionary[p].keys()):
141 yield (s, p, o), self.__contexts()
142
143 def __len__(self, context=None):
144 #@@ optimize
145 i = 0
146 for triple in self.triples((None, None, None)):
147 i += 1
148 return i
149
150 def bind(self, prefix, namespace):
151 self.__prefix[namespace] = prefix
152 self.__namespace[prefix] = namespace
153
154 def namespace(self, prefix):
155 return self.__namespace.get(prefix, None)
156
157 def prefix(self, namespace):
158 return self.__prefix.get(namespace, None)
159
160 def namespaces(self):
161 for prefix, namespace in self.__namespace.items():
162 yield prefix, namespace
163
164 def __contexts(self):
165 return (c for c in []) # TODO: best way to return empty generator
166
167
168 class IOMemory(Store):
169 """\
170 An integer-key-optimized context-aware in-memory store.
171
172 Uses three dict indices (for subjects, objects and predicates) holding
173 sets of triples. Context information is tracked in a separate dict, with
174 the triple as key and a dict of {context: quoted} items as value. The
175 context information is used to filter triple query results.
176
177 Memory usage is low due to several optimizations. RDF nodes are not
178 stored directly in the indices; instead, the indices hold integer keys
179 and the actual nodes are only stored once in int-to-object and
180 object-to-int mapping dictionaries. A default context is determined
181 based on the first triple that is added to the store, and no context
182 information is actually stored for subsequent other triples with the
183 same context information.
184
185 Most operations should be quite fast, but a triples() query with two
186 bound parts requires a set intersection operation, which may be slow in
187 some cases. When multiple contexts are used in the same store, filtering
188 based on context has to be done after each query, which may also be
189 slow.
190
191 """
192 context_aware = True
193 formula_aware = True
194 graph_aware = True
195
196 # The following variable name conventions are used in this class:
197 #
198 # subject, predicate, object unencoded triple parts
199 # triple = (subject, predicate, object) unencoded triple
200 # context: unencoded context
201 #
202 # sid, pid, oid integer-encoded triple parts
203 # enctriple = (sid, pid, oid) integer-encoded triple
204 # cid integer-encoded context
205
206 def __init__(self, configuration=None, identifier=None):
207 super(IOMemory, self).__init__()
208 self.__namespace = {}
209 self.__prefix = {}
210
211 # Mappings for encoding RDF nodes using integer keys, to save memory
212 # in the indexes Note that None is always mapped to itself, to make
213 # it easy to test for it in either encoded or unencoded form.
214 self.__int2obj = {None: None} # maps integer keys to objects
215 self.__obj2int = {None: None} # maps objects to integer keys
216
217 # Indexes for each triple part, and a list of contexts for each triple
218 self.__subjectIndex = {} # key: sid val: set(enctriples)
219 self.__predicateIndex = {} # key: pid val: set(enctriples)
220 self.__objectIndex = {} # key: oid val: set(enctriples)
221 self.__tripleContexts = {
222 } # key: enctriple val: {cid1: quoted, cid2: quoted ...}
223 self.__contextTriples = {None: set()} # key: cid val: set(enctriples)
224
225 # all contexts used in store (unencoded)
226 self.__all_contexts = set()
227 # default context information for triples
228 self.__defaultContexts = None
229
230 def bind(self, prefix, namespace):
231 self.__prefix[namespace] = prefix
232 self.__namespace[prefix] = namespace
233
234 def namespace(self, prefix):
235 return self.__namespace.get(prefix, None)
236
237 def prefix(self, namespace):
238 return self.__prefix.get(namespace, None)
239
240 def namespaces(self):
241 for prefix, namespace in self.__namespace.items():
242 yield prefix, namespace
243
244 def add(self, triple, context, quoted=False):
245 Store.add(self, triple, context, quoted)
246
247 if context is not None:
248 self.__all_contexts.add(context)
249
250 enctriple = self.__encodeTriple(triple)
251 sid, pid, oid = enctriple
252
253 self.__addTripleContext(enctriple, context, quoted)
254
255 if sid in self.__subjectIndex:
256 self.__subjectIndex[sid].add(enctriple)
257 else:
258 self.__subjectIndex[sid] = set([enctriple])
259
260 if pid in self.__predicateIndex:
261 self.__predicateIndex[pid].add(enctriple)
262 else:
263 self.__predicateIndex[pid] = set([enctriple])
264
265 if oid in self.__objectIndex:
266 self.__objectIndex[oid].add(enctriple)
267 else:
268 self.__objectIndex[oid] = set([enctriple])
269
270 def remove(self, triplepat, context=None):
271 req_cid = self.__obj2id(context)
272 for triple, contexts in self.triples(triplepat, context):
273 enctriple = self.__encodeTriple(triple)
274 for cid in self.__getTripleContexts(enctriple):
275 if context is not None and req_cid != cid:
276 continue
277 self.__removeTripleContext(enctriple, cid)
278 ctxs = self.__getTripleContexts(enctriple, skipQuoted=True)
279 if None in ctxs and (context is None or len(ctxs) == 1):
280 self.__removeTripleContext(enctriple, None)
281 if len(self.__getTripleContexts(enctriple)) == 0:
282 # triple has been removed from all contexts
283 sid, pid, oid = enctriple
284 self.__subjectIndex[sid].remove(enctriple)
285 self.__predicateIndex[pid].remove(enctriple)
286 self.__objectIndex[oid].remove(enctriple)
287
288 del self.__tripleContexts[enctriple]
289
290 if not req_cid is None and \
291 req_cid in self.__contextTriples and \
292 len(self.__contextTriples[req_cid]) == 0:
293 # all triples are removed out of this context
294 # and it's not the default context so delete it
295 del self.__contextTriples[req_cid]
296
297 if triplepat == (None, None, None) and \
298 context in self.__all_contexts and \
299 not self.graph_aware:
300 # remove the whole context
301 self.__all_contexts.remove(context)
302
303 def triples(self, triplein, context=None):
304 if context is not None:
305 if context == self: # hmm...does this really ever happen?
306 context = None
307
308 cid = self.__obj2id(context)
309 enctriple = self.__encodeTriple(triplein)
310 sid, pid, oid = enctriple
311
312 # all triples case (no triple parts given as pattern)
313 if sid is None and pid is None and oid is None:
314 return self.__all_triples(cid)
315
316 # optimize "triple in graph" case (all parts given)
317 if sid is not None and pid is not None and oid is not None:
318 if sid in self.__subjectIndex and \
319 enctriple in self.__subjectIndex[sid] and \
320 self.__tripleHasContext(enctriple, cid):
321 return ((triplein, self.__contexts(enctriple)) for i in [0])
322 else:
323 return self.__emptygen()
324
325 # remaining cases: one or two out of three given
326 sets = []
327 if sid is not None:
328 if sid in self.__subjectIndex:
329 sets.append(self.__subjectIndex[sid])
330 else:
331 return self.__emptygen()
332 if pid is not None:
333 if pid in self.__predicateIndex:
334 sets.append(self.__predicateIndex[pid])
335 else:
336 return self.__emptygen()
337 if oid is not None:
338 if oid in self.__objectIndex:
339 sets.append(self.__objectIndex[oid])
340 else:
341 return self.__emptygen()
342
343 # to get the result, do an intersection of the sets (if necessary)
344 if len(sets) > 1:
345 enctriples = sets[0].intersection(*sets[1:])
346 else:
347 enctriples = sets[0].copy()
348
349 return ((self.__decodeTriple(enctriple), self.__contexts(enctriple))
350 for enctriple in enctriples
351 if self.__tripleHasContext(enctriple, cid))
352
353 def contexts(self, triple=None):
354 if triple is None or triple is (None,None,None):
355 return (context for context in self.__all_contexts)
356
357 enctriple = self.__encodeTriple(triple)
358 sid, pid, oid = enctriple
359 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
360 return self.__contexts(enctriple)
361 else:
362 return self.__emptygen()
363
364 def __len__(self, context=None):
365 cid = self.__obj2id(context)
366 if cid not in self.__contextTriples:
367 return 0
368 return len(self.__contextTriples[cid])
369
370 def add_graph(self, graph):
371 if not self.graph_aware:
372 Store.add_graph(self, graph)
373 else:
374 self.__all_contexts.add(graph)
375
376 def remove_graph(self, graph):
377 if not self.graph_aware:
378 Store.remove_graph(self, graph)
379 else:
380 self.remove((None,None,None), graph)
381 try:
382 self.__all_contexts.remove(graph)
383 except KeyError:
384 pass # we didn't know this graph, no problem
385
386
387
388 # internal utility methods below
389
390 def __addTripleContext(self, enctriple, context, quoted):
391 """add the given context to the set of contexts for the triple"""
392 cid = self.__obj2id(context)
393
394 sid, pid, oid = enctriple
395 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
396 # we know the triple exists somewhere in the store
397 if enctriple not in self.__tripleContexts:
398 # triple exists with default ctx info
399 # start with a copy of the default ctx info
400 self.__tripleContexts[
401 enctriple] = self.__defaultContexts.copy()
402
403 self.__tripleContexts[enctriple][cid] = quoted
404 if not quoted:
405 self.__tripleContexts[enctriple][None] = quoted
406 else:
407 # the triple didn't exist before in the store
408 if quoted: # this context only
409 self.__tripleContexts[enctriple] = {cid: quoted}
410 else: # default context as well
411 self.__tripleContexts[enctriple] = {cid: quoted, None: quoted}
412
413 # if the triple is not quoted add it to the default context
414 if not quoted:
415 self.__contextTriples[None].add(enctriple)
416
417 # always add the triple to given context, making sure it's initialized
418 if cid not in self.__contextTriples:
419 self.__contextTriples[cid] = set()
420 self.__contextTriples[cid].add(enctriple)
421
422 # if this is the first ever triple in the store, set default ctx info
423 if self.__defaultContexts is None:
424 self.__defaultContexts = self.__tripleContexts[enctriple]
425
426 # if the context info is the same as default, no need to store it
427 if self.__tripleContexts[enctriple] == self.__defaultContexts:
428 del self.__tripleContexts[enctriple]
429
430 def __getTripleContexts(self, enctriple, skipQuoted=False):
431 """return a list of (encoded) contexts for the triple, skipping
432 quoted contexts if skipQuoted==True"""
433
434 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
435
436 if not skipQuoted:
437 return list(ctxs.keys())
438
439 return [cid for cid, quoted in ctxs.items() if not quoted]
440
441 def __tripleHasContext(self, enctriple, cid):
442 """return True iff the triple exists in the given context"""
443 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
444 return (cid in ctxs)
445
446 def __removeTripleContext(self, enctriple, cid):
447 """remove the context from the triple"""
448 ctxs = self.__tripleContexts.get(
449 enctriple, self.__defaultContexts).copy()
450 del ctxs[cid]
451 if ctxs == self.__defaultContexts:
452 del self.__tripleContexts[enctriple]
453 else:
454 self.__tripleContexts[enctriple] = ctxs
455 self.__contextTriples[cid].remove(enctriple)
456
457 def __obj2id(self, obj):
458 """encode object, storing it in the encoding map if necessary,
459 and return the integer key"""
460 if obj not in self.__obj2int:
461 id = randid()
462 while id in self.__int2obj:
463 id = randid()
464 self.__obj2int[obj] = id
465 self.__int2obj[id] = obj
466 return id
467 return self.__obj2int[obj]
468
469 def __encodeTriple(self, triple):
470 """encode a whole triple, returning the encoded triple"""
471 return tuple(map(self.__obj2id, triple))
472
473 def __decodeTriple(self, enctriple):
474 """decode a whole encoded triple, returning the original
475 triple"""
476 return tuple(map(self.__int2obj.get, enctriple))
477
478 def __all_triples(self, cid):
479 """return a generator which yields all the triples (unencoded)
480 of the given context"""
481 if cid not in self.__contextTriples:
482 return
483 for enctriple in self.__contextTriples[cid].copy():
484 yield self.__decodeTriple(enctriple), self.__contexts(enctriple)
485
486 def __contexts(self, enctriple):
487 """return a generator for all the non-quoted contexts
488 (unencoded) the encoded triple appears in"""
489 return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None)
490
491 def __emptygen(self):
492 """return an empty generator"""
493 if False:
494 yield
495
496
497 import random
498
499
500 def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)):
501 return choice(signs) * randint(1, 2000000000)
502
503 del random