Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/memory.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 from rdflib.term import BNode | |
2 from rdflib.store import Store, NO_STORE, VALID_STORE | |
3 | |
4 __all__ = ['Memory', 'IOMemory'] | |
5 | |
6 ANY = Any = None | |
7 | |
8 | |
9 class Memory(Store): | |
10 """\ | |
11 An in memory implementation of a triple store. | |
12 | |
13 This triple store uses nested dictionaries to store triples. Each | |
14 triple is stored in two such indices as follows spo[s][p][o] = 1 and | |
15 pos[p][o][s] = 1. | |
16 | |
17 Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser | |
18 """ | |
19 def __init__(self, configuration=None, identifier=None): | |
20 super(Memory, self).__init__(configuration) | |
21 self.identifier = identifier | |
22 | |
23 # indexed by [subject][predicate][object] | |
24 self.__spo = {} | |
25 | |
26 # indexed by [predicate][object][subject] | |
27 self.__pos = {} | |
28 | |
29 # indexed by [predicate][object][subject] | |
30 self.__osp = {} | |
31 | |
32 self.__namespace = {} | |
33 self.__prefix = {} | |
34 | |
35 def add(self, xxx_todo_changeme, context, quoted=False): | |
36 """\ | |
37 Add a triple to the store of triples. | |
38 """ | |
39 (subject, predicate, object) = xxx_todo_changeme | |
40 spo = self.__spo | |
41 try: | |
42 po = spo[subject] | |
43 except: | |
44 po = spo[subject] = {} | |
45 try: | |
46 o = po[predicate] | |
47 except: | |
48 o = po[predicate] = {} | |
49 o[object] = 1 | |
50 | |
51 pos = self.__pos | |
52 try: | |
53 os = pos[predicate] | |
54 except: | |
55 os = pos[predicate] = {} | |
56 try: | |
57 s = os[object] | |
58 except: | |
59 s = os[object] = {} | |
60 s[subject] = 1 | |
61 | |
62 osp = self.__osp | |
63 try: | |
64 sp = osp[object] | |
65 except: | |
66 sp = osp[object] = {} | |
67 try: | |
68 p = sp[subject] | |
69 except: | |
70 p = sp[subject] = {} | |
71 p[predicate] = 1 | |
72 | |
73 def remove(self, xxx_todo_changeme1, context=None): | |
74 (subject, predicate, object) = xxx_todo_changeme1 | |
75 for (subject, predicate, object), c in self.triples( | |
76 (subject, predicate, object)): | |
77 del self.__spo[subject][predicate][object] | |
78 del self.__pos[predicate][object][subject] | |
79 del self.__osp[object][subject][predicate] | |
80 | |
81 def triples(self, xxx_todo_changeme2, context=None): | |
82 """A generator over all the triples matching """ | |
83 (subject, predicate, object) = xxx_todo_changeme2 | |
84 if subject != ANY: # subject is given | |
85 spo = self.__spo | |
86 if subject in spo: | |
87 subjectDictionary = spo[subject] | |
88 if predicate != ANY: # subject+predicate is given | |
89 if predicate in subjectDictionary: | |
90 if object != ANY: # subject+predicate+object is given | |
91 if object in subjectDictionary[predicate]: | |
92 yield (subject, predicate, object), \ | |
93 self.__contexts() | |
94 else: # given object not found | |
95 pass | |
96 else: # subject+predicate is given, object unbound | |
97 for o in list(subjectDictionary[predicate].keys()): | |
98 yield (subject, predicate, o), \ | |
99 self.__contexts() | |
100 else: # given predicate not found | |
101 pass | |
102 else: # subject given, predicate unbound | |
103 for p in list(subjectDictionary.keys()): | |
104 if object != ANY: # object is given | |
105 if object in subjectDictionary[p]: | |
106 yield (subject, p, object), self.__contexts() | |
107 else: # given object not found | |
108 pass | |
109 else: # object unbound | |
110 for o in list(subjectDictionary[p].keys()): | |
111 yield (subject, p, o), self.__contexts() | |
112 else: # given subject not found | |
113 pass | |
114 elif predicate != ANY: # predicate is given, subject unbound | |
115 pos = self.__pos | |
116 if predicate in pos: | |
117 predicateDictionary = pos[predicate] | |
118 if object != ANY: # predicate+object is given, subject unbound | |
119 if object in predicateDictionary: | |
120 for s in list(predicateDictionary[object].keys()): | |
121 yield (s, predicate, object), self.__contexts() | |
122 else: # given object not found | |
123 pass | |
124 else: # predicate is given, object+subject unbound | |
125 for o in list(predicateDictionary.keys()): | |
126 for s in list(predicateDictionary[o].keys()): | |
127 yield (s, predicate, o), self.__contexts() | |
128 elif object != ANY: # object is given, subject+predicate unbound | |
129 osp = self.__osp | |
130 if object in osp: | |
131 objectDictionary = osp[object] | |
132 for s in list(objectDictionary.keys()): | |
133 for p in list(objectDictionary[s].keys()): | |
134 yield (s, p, object), self.__contexts() | |
135 else: # subject+predicate+object unbound | |
136 spo = self.__spo | |
137 for s in list(spo.keys()): | |
138 subjectDictionary = spo[s] | |
139 for p in list(subjectDictionary.keys()): | |
140 for o in list(subjectDictionary[p].keys()): | |
141 yield (s, p, o), self.__contexts() | |
142 | |
143 def __len__(self, context=None): | |
144 #@@ optimize | |
145 i = 0 | |
146 for triple in self.triples((None, None, None)): | |
147 i += 1 | |
148 return i | |
149 | |
150 def bind(self, prefix, namespace): | |
151 self.__prefix[namespace] = prefix | |
152 self.__namespace[prefix] = namespace | |
153 | |
154 def namespace(self, prefix): | |
155 return self.__namespace.get(prefix, None) | |
156 | |
157 def prefix(self, namespace): | |
158 return self.__prefix.get(namespace, None) | |
159 | |
160 def namespaces(self): | |
161 for prefix, namespace in self.__namespace.items(): | |
162 yield prefix, namespace | |
163 | |
164 def __contexts(self): | |
165 return (c for c in []) # TODO: best way to return empty generator | |
166 | |
167 | |
168 class IOMemory(Store): | |
169 """\ | |
170 An integer-key-optimized context-aware in-memory store. | |
171 | |
172 Uses three dict indices (for subjects, objects and predicates) holding | |
173 sets of triples. Context information is tracked in a separate dict, with | |
174 the triple as key and a dict of {context: quoted} items as value. The | |
175 context information is used to filter triple query results. | |
176 | |
177 Memory usage is low due to several optimizations. RDF nodes are not | |
178 stored directly in the indices; instead, the indices hold integer keys | |
179 and the actual nodes are only stored once in int-to-object and | |
180 object-to-int mapping dictionaries. A default context is determined | |
181 based on the first triple that is added to the store, and no context | |
182 information is actually stored for subsequent other triples with the | |
183 same context information. | |
184 | |
185 Most operations should be quite fast, but a triples() query with two | |
186 bound parts requires a set intersection operation, which may be slow in | |
187 some cases. When multiple contexts are used in the same store, filtering | |
188 based on context has to be done after each query, which may also be | |
189 slow. | |
190 | |
191 """ | |
192 context_aware = True | |
193 formula_aware = True | |
194 graph_aware = True | |
195 | |
196 # The following variable name conventions are used in this class: | |
197 # | |
198 # subject, predicate, object unencoded triple parts | |
199 # triple = (subject, predicate, object) unencoded triple | |
200 # context: unencoded context | |
201 # | |
202 # sid, pid, oid integer-encoded triple parts | |
203 # enctriple = (sid, pid, oid) integer-encoded triple | |
204 # cid integer-encoded context | |
205 | |
206 def __init__(self, configuration=None, identifier=None): | |
207 super(IOMemory, self).__init__() | |
208 self.__namespace = {} | |
209 self.__prefix = {} | |
210 | |
211 # Mappings for encoding RDF nodes using integer keys, to save memory | |
212 # in the indexes Note that None is always mapped to itself, to make | |
213 # it easy to test for it in either encoded or unencoded form. | |
214 self.__int2obj = {None: None} # maps integer keys to objects | |
215 self.__obj2int = {None: None} # maps objects to integer keys | |
216 | |
217 # Indexes for each triple part, and a list of contexts for each triple | |
218 self.__subjectIndex = {} # key: sid val: set(enctriples) | |
219 self.__predicateIndex = {} # key: pid val: set(enctriples) | |
220 self.__objectIndex = {} # key: oid val: set(enctriples) | |
221 self.__tripleContexts = { | |
222 } # key: enctriple val: {cid1: quoted, cid2: quoted ...} | |
223 self.__contextTriples = {None: set()} # key: cid val: set(enctriples) | |
224 | |
225 # all contexts used in store (unencoded) | |
226 self.__all_contexts = set() | |
227 # default context information for triples | |
228 self.__defaultContexts = None | |
229 | |
230 def bind(self, prefix, namespace): | |
231 self.__prefix[namespace] = prefix | |
232 self.__namespace[prefix] = namespace | |
233 | |
234 def namespace(self, prefix): | |
235 return self.__namespace.get(prefix, None) | |
236 | |
237 def prefix(self, namespace): | |
238 return self.__prefix.get(namespace, None) | |
239 | |
240 def namespaces(self): | |
241 for prefix, namespace in self.__namespace.items(): | |
242 yield prefix, namespace | |
243 | |
244 def add(self, triple, context, quoted=False): | |
245 Store.add(self, triple, context, quoted) | |
246 | |
247 if context is not None: | |
248 self.__all_contexts.add(context) | |
249 | |
250 enctriple = self.__encodeTriple(triple) | |
251 sid, pid, oid = enctriple | |
252 | |
253 self.__addTripleContext(enctriple, context, quoted) | |
254 | |
255 if sid in self.__subjectIndex: | |
256 self.__subjectIndex[sid].add(enctriple) | |
257 else: | |
258 self.__subjectIndex[sid] = set([enctriple]) | |
259 | |
260 if pid in self.__predicateIndex: | |
261 self.__predicateIndex[pid].add(enctriple) | |
262 else: | |
263 self.__predicateIndex[pid] = set([enctriple]) | |
264 | |
265 if oid in self.__objectIndex: | |
266 self.__objectIndex[oid].add(enctriple) | |
267 else: | |
268 self.__objectIndex[oid] = set([enctriple]) | |
269 | |
270 def remove(self, triplepat, context=None): | |
271 req_cid = self.__obj2id(context) | |
272 for triple, contexts in self.triples(triplepat, context): | |
273 enctriple = self.__encodeTriple(triple) | |
274 for cid in self.__getTripleContexts(enctriple): | |
275 if context is not None and req_cid != cid: | |
276 continue | |
277 self.__removeTripleContext(enctriple, cid) | |
278 ctxs = self.__getTripleContexts(enctriple, skipQuoted=True) | |
279 if None in ctxs and (context is None or len(ctxs) == 1): | |
280 self.__removeTripleContext(enctriple, None) | |
281 if len(self.__getTripleContexts(enctriple)) == 0: | |
282 # triple has been removed from all contexts | |
283 sid, pid, oid = enctriple | |
284 self.__subjectIndex[sid].remove(enctriple) | |
285 self.__predicateIndex[pid].remove(enctriple) | |
286 self.__objectIndex[oid].remove(enctriple) | |
287 | |
288 del self.__tripleContexts[enctriple] | |
289 | |
290 if not req_cid is None and \ | |
291 req_cid in self.__contextTriples and \ | |
292 len(self.__contextTriples[req_cid]) == 0: | |
293 # all triples are removed out of this context | |
294 # and it's not the default context so delete it | |
295 del self.__contextTriples[req_cid] | |
296 | |
297 if triplepat == (None, None, None) and \ | |
298 context in self.__all_contexts and \ | |
299 not self.graph_aware: | |
300 # remove the whole context | |
301 self.__all_contexts.remove(context) | |
302 | |
303 def triples(self, triplein, context=None): | |
304 if context is not None: | |
305 if context == self: # hmm...does this really ever happen? | |
306 context = None | |
307 | |
308 cid = self.__obj2id(context) | |
309 enctriple = self.__encodeTriple(triplein) | |
310 sid, pid, oid = enctriple | |
311 | |
312 # all triples case (no triple parts given as pattern) | |
313 if sid is None and pid is None and oid is None: | |
314 return self.__all_triples(cid) | |
315 | |
316 # optimize "triple in graph" case (all parts given) | |
317 if sid is not None and pid is not None and oid is not None: | |
318 if sid in self.__subjectIndex and \ | |
319 enctriple in self.__subjectIndex[sid] and \ | |
320 self.__tripleHasContext(enctriple, cid): | |
321 return ((triplein, self.__contexts(enctriple)) for i in [0]) | |
322 else: | |
323 return self.__emptygen() | |
324 | |
325 # remaining cases: one or two out of three given | |
326 sets = [] | |
327 if sid is not None: | |
328 if sid in self.__subjectIndex: | |
329 sets.append(self.__subjectIndex[sid]) | |
330 else: | |
331 return self.__emptygen() | |
332 if pid is not None: | |
333 if pid in self.__predicateIndex: | |
334 sets.append(self.__predicateIndex[pid]) | |
335 else: | |
336 return self.__emptygen() | |
337 if oid is not None: | |
338 if oid in self.__objectIndex: | |
339 sets.append(self.__objectIndex[oid]) | |
340 else: | |
341 return self.__emptygen() | |
342 | |
343 # to get the result, do an intersection of the sets (if necessary) | |
344 if len(sets) > 1: | |
345 enctriples = sets[0].intersection(*sets[1:]) | |
346 else: | |
347 enctriples = sets[0].copy() | |
348 | |
349 return ((self.__decodeTriple(enctriple), self.__contexts(enctriple)) | |
350 for enctriple in enctriples | |
351 if self.__tripleHasContext(enctriple, cid)) | |
352 | |
353 def contexts(self, triple=None): | |
354 if triple is None or triple is (None,None,None): | |
355 return (context for context in self.__all_contexts) | |
356 | |
357 enctriple = self.__encodeTriple(triple) | |
358 sid, pid, oid = enctriple | |
359 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: | |
360 return self.__contexts(enctriple) | |
361 else: | |
362 return self.__emptygen() | |
363 | |
364 def __len__(self, context=None): | |
365 cid = self.__obj2id(context) | |
366 if cid not in self.__contextTriples: | |
367 return 0 | |
368 return len(self.__contextTriples[cid]) | |
369 | |
370 def add_graph(self, graph): | |
371 if not self.graph_aware: | |
372 Store.add_graph(self, graph) | |
373 else: | |
374 self.__all_contexts.add(graph) | |
375 | |
376 def remove_graph(self, graph): | |
377 if not self.graph_aware: | |
378 Store.remove_graph(self, graph) | |
379 else: | |
380 self.remove((None,None,None), graph) | |
381 try: | |
382 self.__all_contexts.remove(graph) | |
383 except KeyError: | |
384 pass # we didn't know this graph, no problem | |
385 | |
386 | |
387 | |
388 # internal utility methods below | |
389 | |
390 def __addTripleContext(self, enctriple, context, quoted): | |
391 """add the given context to the set of contexts for the triple""" | |
392 cid = self.__obj2id(context) | |
393 | |
394 sid, pid, oid = enctriple | |
395 if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]: | |
396 # we know the triple exists somewhere in the store | |
397 if enctriple not in self.__tripleContexts: | |
398 # triple exists with default ctx info | |
399 # start with a copy of the default ctx info | |
400 self.__tripleContexts[ | |
401 enctriple] = self.__defaultContexts.copy() | |
402 | |
403 self.__tripleContexts[enctriple][cid] = quoted | |
404 if not quoted: | |
405 self.__tripleContexts[enctriple][None] = quoted | |
406 else: | |
407 # the triple didn't exist before in the store | |
408 if quoted: # this context only | |
409 self.__tripleContexts[enctriple] = {cid: quoted} | |
410 else: # default context as well | |
411 self.__tripleContexts[enctriple] = {cid: quoted, None: quoted} | |
412 | |
413 # if the triple is not quoted add it to the default context | |
414 if not quoted: | |
415 self.__contextTriples[None].add(enctriple) | |
416 | |
417 # always add the triple to given context, making sure it's initialized | |
418 if cid not in self.__contextTriples: | |
419 self.__contextTriples[cid] = set() | |
420 self.__contextTriples[cid].add(enctriple) | |
421 | |
422 # if this is the first ever triple in the store, set default ctx info | |
423 if self.__defaultContexts is None: | |
424 self.__defaultContexts = self.__tripleContexts[enctriple] | |
425 | |
426 # if the context info is the same as default, no need to store it | |
427 if self.__tripleContexts[enctriple] == self.__defaultContexts: | |
428 del self.__tripleContexts[enctriple] | |
429 | |
430 def __getTripleContexts(self, enctriple, skipQuoted=False): | |
431 """return a list of (encoded) contexts for the triple, skipping | |
432 quoted contexts if skipQuoted==True""" | |
433 | |
434 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) | |
435 | |
436 if not skipQuoted: | |
437 return list(ctxs.keys()) | |
438 | |
439 return [cid for cid, quoted in ctxs.items() if not quoted] | |
440 | |
441 def __tripleHasContext(self, enctriple, cid): | |
442 """return True iff the triple exists in the given context""" | |
443 ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts) | |
444 return (cid in ctxs) | |
445 | |
446 def __removeTripleContext(self, enctriple, cid): | |
447 """remove the context from the triple""" | |
448 ctxs = self.__tripleContexts.get( | |
449 enctriple, self.__defaultContexts).copy() | |
450 del ctxs[cid] | |
451 if ctxs == self.__defaultContexts: | |
452 del self.__tripleContexts[enctriple] | |
453 else: | |
454 self.__tripleContexts[enctriple] = ctxs | |
455 self.__contextTriples[cid].remove(enctriple) | |
456 | |
457 def __obj2id(self, obj): | |
458 """encode object, storing it in the encoding map if necessary, | |
459 and return the integer key""" | |
460 if obj not in self.__obj2int: | |
461 id = randid() | |
462 while id in self.__int2obj: | |
463 id = randid() | |
464 self.__obj2int[obj] = id | |
465 self.__int2obj[id] = obj | |
466 return id | |
467 return self.__obj2int[obj] | |
468 | |
469 def __encodeTriple(self, triple): | |
470 """encode a whole triple, returning the encoded triple""" | |
471 return tuple(map(self.__obj2id, triple)) | |
472 | |
473 def __decodeTriple(self, enctriple): | |
474 """decode a whole encoded triple, returning the original | |
475 triple""" | |
476 return tuple(map(self.__int2obj.get, enctriple)) | |
477 | |
478 def __all_triples(self, cid): | |
479 """return a generator which yields all the triples (unencoded) | |
480 of the given context""" | |
481 if cid not in self.__contextTriples: | |
482 return | |
483 for enctriple in self.__contextTriples[cid].copy(): | |
484 yield self.__decodeTriple(enctriple), self.__contexts(enctriple) | |
485 | |
486 def __contexts(self, enctriple): | |
487 """return a generator for all the non-quoted contexts | |
488 (unencoded) the encoded triple appears in""" | |
489 return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None) | |
490 | |
491 def __emptygen(self): | |
492 """return an empty generator""" | |
493 if False: | |
494 yield | |
495 | |
496 | |
497 import random | |
498 | |
499 | |
500 def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)): | |
501 return choice(signs) * randint(1, 2000000000) | |
502 | |
503 del random |