diff planemo/lib/python3.7/site-packages/rdflib/plugins/memory.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/memory.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,503 @@
+from rdflib.term import BNode
+from rdflib.store import Store, NO_STORE, VALID_STORE
+
+__all__ = ['Memory', 'IOMemory']
+
+ANY = Any = None
+
+
+class Memory(Store):
+    """\
+    An in memory implementation of a triple store.
+
+    This triple store uses nested dictionaries to store triples. Each
+    triple is stored in two such indices as follows spo[s][p][o] = 1 and
+    pos[p][o][s] = 1.
+
+    Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
+    """
+    def __init__(self, configuration=None, identifier=None):
+        super(Memory, self).__init__(configuration)
+        self.identifier = identifier
+
+        # indexed by [subject][predicate][object]
+        self.__spo = {}
+
+        # indexed by [predicate][object][subject]
+        self.__pos = {}
+
+        # indexed by [predicate][object][subject]
+        self.__osp = {}
+
+        self.__namespace = {}
+        self.__prefix = {}
+
+    def add(self, xxx_todo_changeme, context, quoted=False):
+        """\
+        Add a triple to the store of triples.
+        """
+        (subject, predicate, object) = xxx_todo_changeme
+        spo = self.__spo
+        try:
+            po = spo[subject]
+        except:
+            po = spo[subject] = {}
+        try:
+            o = po[predicate]
+        except:
+            o = po[predicate] = {}
+        o[object] = 1
+
+        pos = self.__pos
+        try:
+            os = pos[predicate]
+        except:
+            os = pos[predicate] = {}
+        try:
+            s = os[object]
+        except:
+            s = os[object] = {}
+        s[subject] = 1
+
+        osp = self.__osp
+        try:
+            sp = osp[object]
+        except:
+            sp = osp[object] = {}
+        try:
+            p = sp[subject]
+        except:
+            p = sp[subject] = {}
+        p[predicate] = 1
+
+    def remove(self, xxx_todo_changeme1, context=None):
+        (subject, predicate, object) = xxx_todo_changeme1
+        for (subject, predicate, object), c in self.triples(
+                (subject, predicate, object)):
+            del self.__spo[subject][predicate][object]
+            del self.__pos[predicate][object][subject]
+            del self.__osp[object][subject][predicate]
+
+    def triples(self, xxx_todo_changeme2, context=None):
+        """A generator over all the triples matching """
+        (subject, predicate, object) = xxx_todo_changeme2
+        if subject != ANY:  # subject is given
+            spo = self.__spo
+            if subject in spo:
+                subjectDictionary = spo[subject]
+                if predicate != ANY:  # subject+predicate is given
+                    if predicate in subjectDictionary:
+                        if object != ANY:  # subject+predicate+object is given
+                            if object in subjectDictionary[predicate]:
+                                yield (subject, predicate, object), \
+                                    self.__contexts()
+                            else:  # given object not found
+                                pass
+                        else:  # subject+predicate is given, object unbound
+                            for o in list(subjectDictionary[predicate].keys()):
+                                yield (subject, predicate, o), \
+                                    self.__contexts()
+                    else:  # given predicate not found
+                        pass
+                else:  # subject given, predicate unbound
+                    for p in list(subjectDictionary.keys()):
+                        if object != ANY:  # object is given
+                            if object in subjectDictionary[p]:
+                                yield (subject, p, object), self.__contexts()
+                            else:  # given object not found
+                                pass
+                        else:  # object unbound
+                            for o in list(subjectDictionary[p].keys()):
+                                yield (subject, p, o), self.__contexts()
+            else:  # given subject not found
+                pass
+        elif predicate != ANY:  # predicate is given, subject unbound
+            pos = self.__pos
+            if predicate in pos:
+                predicateDictionary = pos[predicate]
+                if object != ANY:  # predicate+object is given, subject unbound
+                    if object in predicateDictionary:
+                        for s in list(predicateDictionary[object].keys()):
+                            yield (s, predicate, object), self.__contexts()
+                    else:  # given object not found
+                        pass
+                else:  # predicate is given, object+subject unbound
+                    for o in list(predicateDictionary.keys()):
+                        for s in list(predicateDictionary[o].keys()):
+                            yield (s, predicate, o), self.__contexts()
+        elif object != ANY:  # object is given, subject+predicate unbound
+            osp = self.__osp
+            if object in osp:
+                objectDictionary = osp[object]
+                for s in list(objectDictionary.keys()):
+                    for p in list(objectDictionary[s].keys()):
+                        yield (s, p, object), self.__contexts()
+        else:  # subject+predicate+object unbound
+            spo = self.__spo
+            for s in list(spo.keys()):
+                subjectDictionary = spo[s]
+                for p in list(subjectDictionary.keys()):
+                    for o in list(subjectDictionary[p].keys()):
+                        yield (s, p, o), self.__contexts()
+
+    def __len__(self, context=None):
+        #@@ optimize
+        i = 0
+        for triple in self.triples((None, None, None)):
+            i += 1
+        return i
+
+    def bind(self, prefix, namespace):
+        self.__prefix[namespace] = prefix
+        self.__namespace[prefix] = namespace
+
+    def namespace(self, prefix):
+        return self.__namespace.get(prefix, None)
+
+    def prefix(self, namespace):
+        return self.__prefix.get(namespace, None)
+
+    def namespaces(self):
+        for prefix, namespace in self.__namespace.items():
+            yield prefix, namespace
+
+    def __contexts(self):
+        return (c for c in [])  # TODO: best way to return empty generator
+
+
+class IOMemory(Store):
+    """\
+    An integer-key-optimized context-aware in-memory store.
+
+    Uses three dict indices (for subjects, objects and predicates) holding
+    sets of triples. Context information is tracked in a separate dict, with
+    the triple as key and a dict of {context: quoted} items as value. The
+    context information is used to filter triple query results.
+
+    Memory usage is low due to several optimizations. RDF nodes are not
+    stored directly in the indices; instead, the indices hold integer keys
+    and the actual nodes are only stored once in int-to-object and
+    object-to-int mapping dictionaries. A default context is determined
+    based on the first triple that is added to the store, and no context
+    information is actually stored for subsequent other triples with the
+    same context information.
+
+    Most operations should be quite fast, but a triples() query with two
+    bound parts requires a set intersection operation, which may be slow in
+    some cases. When multiple contexts are used in the same store, filtering
+    based on context has to be done after each query, which may also be
+    slow.
+
+    """
+    context_aware = True
+    formula_aware = True
+    graph_aware = True
+
+    # The following variable name conventions are used in this class:
+    #
+    # subject, predicate, object             unencoded triple parts
+    # triple = (subject, predicate, object)  unencoded triple
+    # context:                               unencoded context
+    #
+    # sid, pid, oid                          integer-encoded triple parts
+    # enctriple = (sid, pid, oid)            integer-encoded triple
+    # cid                                    integer-encoded context
+
+    def __init__(self, configuration=None, identifier=None):
+        super(IOMemory, self).__init__()
+        self.__namespace = {}
+        self.__prefix = {}
+
+        # Mappings for encoding RDF nodes using integer keys, to save memory
+        # in the indexes Note that None is always mapped to itself, to make
+        # it easy to test for it in either encoded or unencoded form.
+        self.__int2obj = {None: None}  # maps integer keys to objects
+        self.__obj2int = {None: None}  # maps objects to integer keys
+
+        # Indexes for each triple part, and a list of contexts for each triple
+        self.__subjectIndex = {}    # key: sid    val: set(enctriples)
+        self.__predicateIndex = {}  # key: pid    val: set(enctriples)
+        self.__objectIndex = {}     # key: oid    val: set(enctriples)
+        self.__tripleContexts = {
+        }  # key: enctriple    val: {cid1: quoted, cid2: quoted ...}
+        self.__contextTriples = {None: set()}  # key: cid    val: set(enctriples)
+
+        # all contexts used in store (unencoded)
+        self.__all_contexts = set()
+        # default context information for triples
+        self.__defaultContexts = None
+
+    def bind(self, prefix, namespace):
+        self.__prefix[namespace] = prefix
+        self.__namespace[prefix] = namespace
+
+    def namespace(self, prefix):
+        return self.__namespace.get(prefix, None)
+
+    def prefix(self, namespace):
+        return self.__prefix.get(namespace, None)
+
+    def namespaces(self):
+        for prefix, namespace in self.__namespace.items():
+            yield prefix, namespace
+
+    def add(self, triple, context, quoted=False):
+        Store.add(self, triple, context, quoted)
+
+        if context is not None:
+            self.__all_contexts.add(context)
+
+        enctriple = self.__encodeTriple(triple)
+        sid, pid, oid = enctriple
+
+        self.__addTripleContext(enctriple, context, quoted)
+
+        if sid in self.__subjectIndex:
+            self.__subjectIndex[sid].add(enctriple)
+        else:
+            self.__subjectIndex[sid] = set([enctriple])
+
+        if pid in self.__predicateIndex:
+            self.__predicateIndex[pid].add(enctriple)
+        else:
+            self.__predicateIndex[pid] = set([enctriple])
+
+        if oid in self.__objectIndex:
+            self.__objectIndex[oid].add(enctriple)
+        else:
+            self.__objectIndex[oid] = set([enctriple])
+
+    def remove(self, triplepat, context=None):
+        req_cid = self.__obj2id(context)
+        for triple, contexts in self.triples(triplepat, context):
+            enctriple = self.__encodeTriple(triple)
+            for cid in self.__getTripleContexts(enctriple):
+                if context is not None and req_cid != cid:
+                    continue
+                self.__removeTripleContext(enctriple, cid)
+            ctxs = self.__getTripleContexts(enctriple, skipQuoted=True)
+            if None in ctxs and (context is None or len(ctxs) == 1):
+                self.__removeTripleContext(enctriple, None)
+            if len(self.__getTripleContexts(enctriple)) == 0:
+                # triple has been removed from all contexts
+                sid, pid, oid = enctriple
+                self.__subjectIndex[sid].remove(enctriple)
+                self.__predicateIndex[pid].remove(enctriple)
+                self.__objectIndex[oid].remove(enctriple)
+
+                del self.__tripleContexts[enctriple]
+
+        if not req_cid is None and \
+                req_cid in self.__contextTriples and \
+                len(self.__contextTriples[req_cid]) == 0:
+            # all triples are removed out of this context
+            # and it's not the default context so delete it
+            del self.__contextTriples[req_cid]
+
+        if triplepat == (None, None, None) and \
+                context in self.__all_contexts and \
+                not self.graph_aware:
+            # remove the whole context
+            self.__all_contexts.remove(context)
+
+    def triples(self, triplein, context=None):
+        if context is not None:
+            if context == self:  # hmm...does this really ever happen?
+                context = None
+
+        cid = self.__obj2id(context)
+        enctriple = self.__encodeTriple(triplein)
+        sid, pid, oid = enctriple
+
+        # all triples case (no triple parts given as pattern)
+        if sid is None and pid is None and oid is None:
+            return self.__all_triples(cid)
+
+        # optimize "triple in graph" case (all parts given)
+        if sid is not None and pid is not None and oid is not None:
+            if sid in self.__subjectIndex and \
+               enctriple in self.__subjectIndex[sid] and \
+               self.__tripleHasContext(enctriple, cid):
+                return ((triplein, self.__contexts(enctriple)) for i in [0])
+            else:
+                return self.__emptygen()
+
+        # remaining cases: one or two out of three given
+        sets = []
+        if sid is not None:
+            if sid in self.__subjectIndex:
+                sets.append(self.__subjectIndex[sid])
+            else:
+                return self.__emptygen()
+        if pid is not None:
+            if pid in self.__predicateIndex:
+                sets.append(self.__predicateIndex[pid])
+            else:
+                return self.__emptygen()
+        if oid is not None:
+            if oid in self.__objectIndex:
+                sets.append(self.__objectIndex[oid])
+            else:
+                return self.__emptygen()
+
+        # to get the result, do an intersection of the sets (if necessary)
+        if len(sets) > 1:
+            enctriples = sets[0].intersection(*sets[1:])
+        else:
+            enctriples = sets[0].copy()
+
+        return ((self.__decodeTriple(enctriple), self.__contexts(enctriple))
+                for enctriple in enctriples
+                if self.__tripleHasContext(enctriple, cid))
+
+    def contexts(self, triple=None):
+        if triple is None or triple is (None,None,None):
+            return (context for context in self.__all_contexts)
+
+        enctriple = self.__encodeTriple(triple)
+        sid, pid, oid = enctriple
+        if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
+            return self.__contexts(enctriple)
+        else:
+            return self.__emptygen()
+
+    def __len__(self, context=None):
+        cid = self.__obj2id(context)
+        if cid not in self.__contextTriples:
+            return 0
+        return len(self.__contextTriples[cid])
+
+    def add_graph(self, graph):
+        if not self.graph_aware:
+            Store.add_graph(self, graph)
+        else:
+            self.__all_contexts.add(graph)
+
+    def remove_graph(self, graph):
+        if not self.graph_aware:
+            Store.remove_graph(self, graph)
+        else:
+            self.remove((None,None,None), graph)
+            try:
+                self.__all_contexts.remove(graph)
+            except KeyError:
+                pass # we didn't know this graph, no problem
+
+
+
+    # internal utility methods below
+
+    def __addTripleContext(self, enctriple, context, quoted):
+        """add the given context to the set of contexts for the triple"""
+        cid = self.__obj2id(context)
+
+        sid, pid, oid = enctriple
+        if sid in self.__subjectIndex and enctriple in self.__subjectIndex[sid]:
+            # we know the triple exists somewhere in the store
+            if enctriple not in self.__tripleContexts:
+                # triple exists with default ctx info
+                # start with a copy of the default ctx info
+                self.__tripleContexts[
+                    enctriple] = self.__defaultContexts.copy()
+
+            self.__tripleContexts[enctriple][cid] = quoted
+            if not quoted:
+                self.__tripleContexts[enctriple][None] = quoted
+        else:
+            # the triple didn't exist before in the store
+            if quoted:  # this context only
+                self.__tripleContexts[enctriple] = {cid: quoted}
+            else:  # default context as well
+                self.__tripleContexts[enctriple] = {cid: quoted, None: quoted}
+
+        # if the triple is not quoted add it to the default context
+        if not quoted:
+            self.__contextTriples[None].add(enctriple)
+
+        # always add the triple to given context, making sure it's initialized
+        if cid not in self.__contextTriples:
+            self.__contextTriples[cid] = set()
+        self.__contextTriples[cid].add(enctriple)
+
+        # if this is the first ever triple in the store, set default ctx info
+        if self.__defaultContexts is None:
+            self.__defaultContexts = self.__tripleContexts[enctriple]
+
+        # if the context info is the same as default, no need to store it
+        if self.__tripleContexts[enctriple] == self.__defaultContexts:
+            del self.__tripleContexts[enctriple]
+
+    def __getTripleContexts(self, enctriple, skipQuoted=False):
+        """return a list of (encoded) contexts for the triple, skipping
+           quoted contexts if skipQuoted==True"""
+
+        ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
+
+        if not skipQuoted:
+            return list(ctxs.keys())
+
+        return [cid for cid, quoted in ctxs.items() if not quoted]
+
+    def __tripleHasContext(self, enctriple, cid):
+        """return True iff the triple exists in the given context"""
+        ctxs = self.__tripleContexts.get(enctriple, self.__defaultContexts)
+        return (cid in ctxs)
+
+    def __removeTripleContext(self, enctriple, cid):
+        """remove the context from the triple"""
+        ctxs = self.__tripleContexts.get(
+            enctriple, self.__defaultContexts).copy()
+        del ctxs[cid]
+        if ctxs == self.__defaultContexts:
+            del self.__tripleContexts[enctriple]
+        else:
+            self.__tripleContexts[enctriple] = ctxs
+        self.__contextTriples[cid].remove(enctriple)
+
+    def __obj2id(self, obj):
+        """encode object, storing it in the encoding map if necessary,
+           and return the integer key"""
+        if obj not in self.__obj2int:
+            id = randid()
+            while id in self.__int2obj:
+                id = randid()
+            self.__obj2int[obj] = id
+            self.__int2obj[id] = obj
+            return id
+        return self.__obj2int[obj]
+
+    def __encodeTriple(self, triple):
+        """encode a whole triple, returning the encoded triple"""
+        return tuple(map(self.__obj2id, triple))
+
+    def __decodeTriple(self, enctriple):
+        """decode a whole encoded triple, returning the original
+        triple"""
+        return tuple(map(self.__int2obj.get, enctriple))
+
+    def __all_triples(self, cid):
+        """return a generator which yields all the triples (unencoded)
+           of the given context"""
+        if cid not in self.__contextTriples:
+            return
+        for enctriple in self.__contextTriples[cid].copy():
+            yield self.__decodeTriple(enctriple), self.__contexts(enctriple)
+
+    def __contexts(self, enctriple):
+        """return a generator for all the non-quoted contexts
+           (unencoded) the encoded triple appears in"""
+        return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None)
+
+    def __emptygen(self):
+        """return an empty generator"""
+        if False:
+            yield
+
+
+import random
+
+
+def randid(randint=random.randint, choice=random.choice, signs=(-1, 1)):
+    return choice(signs) * randint(1, 2000000000)
+
+del random