diff planemo/lib/python3.7/site-packages/rdflib/plugins/stores/regexmatching.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/stores/regexmatching.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,161 @@
+"""
+This wrapper intercepts calls through the store interface which make use of
+the REGEXTerm class to represent matches by REGEX instead of literal
+comparison.
+
+Implemented for stores that don't support this and essentially
+provides the support by replacing the REGEXTerms by wildcards (None) and
+matching against the results from the store it's wrapping.
+"""
+
+from rdflib.store import Store
+from rdflib.graph import Graph
+import re
+
+# Store is capable of doing its own REGEX matching
+NATIVE_REGEX = 0
+# Store uses Python's re module internally for REGEX matching
+PYTHON_REGEX = 1
+
+
+class REGEXTerm(str):
+    """
+    REGEXTerm can be used in any term slot and is interpreted as a request to
+    perform a REGEX match (not a string comparison) using the value
+    (pre-compiled) for checking rdf:type matches
+    """
+    def __init__(self, expr):
+        self.compiledExpr = re.compile(expr)
+
+    def __reduce__(self):
+        return (REGEXTerm, (str(''),))
+
+
+def regexCompareQuad(quad, regexQuad):
+    for index in range(4):
+        if isinstance(regexQuad[index], REGEXTerm) and not \
+                regexQuad[index].compiledExpr.match(quad[index]):
+            return False
+    return True
+
+
+class REGEXMatching(Store):
+    def __init__(self, storage):
+        self.storage = storage
+        self.context_aware = storage.context_aware
+        # NOTE: this store can't be formula_aware as it doesn't have enough
+        # info to reverse the removal of a quoted statement.
+        self.formula_aware = storage.formula_aware
+        self.transaction_aware = storage.transaction_aware
+
+    def open(self, configuration, create=True):
+        return self.storage.open(configuration, create)
+
+    def close(self, commit_pending_transaction=False):
+        self.storage.close()
+
+    def destroy(self, configuration):
+        self.storage.destroy(configuration)
+
+    def add(self, triple, context, quoted=False):
+        (subject, predicate, object_) = triple
+        self.storage.add((subject, predicate, object_), context, quoted)
+
+    def remove(self, triple, context=None):
+        (subject, predicate, object_) = triple
+        if isinstance(subject, REGEXTerm) or \
+            isinstance(predicate, REGEXTerm) or \
+            isinstance(object_, REGEXTerm) or \
+                (context is not None
+                 and isinstance(context.identifier, REGEXTerm)):
+            # One or more of the terms is a REGEX expression, so we must
+            # replace it / them with wildcard(s)and match after we query.
+            s = not isinstance(subject, REGEXTerm) and subject or None
+            p = not isinstance(predicate, REGEXTerm) and predicate or None
+            o = not isinstance(object_, REGEXTerm) and object_ or None
+            c = (context is not None
+                 and not isinstance(context.identifier, REGEXTerm)) \
+                and context \
+                or None
+
+            removeQuadList = []
+            for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
+                for ctx in cg:
+                    ctx = ctx.identifier
+                    if regexCompareQuad(
+                            (s1, p1, o1, ctx),
+                            (subject, predicate, object_, context
+                             is not None and context.identifier or context)):
+                        removeQuadList.append((s1, p1, o1, ctx))
+            for s, p, o, c in removeQuadList:
+                self.storage.remove((s, p, o), c and Graph(self, c) or c)
+        else:
+            self.storage.remove((subject, predicate, object_), context)
+
+    def triples(self, triple, context=None):
+        (subject, predicate, object_) = triple
+        if isinstance(subject, REGEXTerm) or \
+            isinstance(predicate, REGEXTerm) or \
+            isinstance(object_, REGEXTerm) or \
+                (context is not None
+                 and isinstance(context.identifier, REGEXTerm)):
+            # One or more of the terms is a REGEX expression, so we must
+            # replace it / them with wildcard(s) and match after we query.
+            s = not isinstance(subject, REGEXTerm) and subject or None
+            p = not isinstance(predicate, REGEXTerm) and predicate or None
+            o = not isinstance(object_, REGEXTerm) and object_ or None
+            c = (context is not None
+                 and not isinstance(context.identifier, REGEXTerm)) \
+                and context \
+                or None
+            for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
+                matchingCtxs = []
+                for ctx in cg:
+                    if c is None:
+                        if context is None \
+                            or context.identifier.compiledExpr.match(
+                                ctx.identifier):
+                            matchingCtxs.append(ctx)
+                    else:
+                        matchingCtxs.append(ctx)
+                if matchingCtxs \
+                    and regexCompareQuad((s1, p1, o1, None),
+                                         (subject, predicate, object_, None)):
+                    yield (s1, p1, o1), (c for c in matchingCtxs)
+        else:
+            for (s1, p1, o1), cg in self.storage.triples(
+                    (subject, predicate, object_), context):
+                yield (s1, p1, o1), cg
+
+    def __len__(self, context=None):
+        # NOTE: If the context is a REGEX this could be an expensive
+        # proposition
+        return self.storage.__len__(context)
+
+    def contexts(self, triple=None):
+        # NOTE: There is no way to control REGEX matching for this method at
+        # this level as it only returns the contexts, not the matching
+        # triples.
+        for ctx in self.storage.contexts(triple):
+            yield ctx
+
+    def remove_context(self, identifier):
+        self.storage.remove((None, None, None), identifier)
+
+    def bind(self, prefix, namespace):
+        self.storage.bind(prefix, namespace)
+
+    def prefix(self, namespace):
+        return self.storage.prefix(namespace)
+
+    def namespace(self, prefix):
+        return self.storage.namespace(prefix)
+
+    def namespaces(self):
+        return self.storage.namespaces()
+
+    def commit(self):
+        self.storage.commit()
+
+    def rollback(self):
+        self.storage.rollback()