comparison planemo/lib/python3.7/site-packages/rdflib/plugins/stores/regexmatching.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """
2 This wrapper intercepts calls through the store interface which make use of
3 the REGEXTerm class to represent matches by REGEX instead of literal
4 comparison.
5
6 Implemented for stores that don't support this and essentially
7 provides the support by replacing the REGEXTerms by wildcards (None) and
8 matching against the results from the store it's wrapping.
9 """
10
11 from rdflib.store import Store
12 from rdflib.graph import Graph
13 import re
14
15 # Store is capable of doing its own REGEX matching
16 NATIVE_REGEX = 0
17 # Store uses Python's re module internally for REGEX matching
18 PYTHON_REGEX = 1
19
20
21 class REGEXTerm(str):
22 """
23 REGEXTerm can be used in any term slot and is interpreted as a request to
24 perform a REGEX match (not a string comparison) using the value
25 (pre-compiled) for checking rdf:type matches
26 """
27 def __init__(self, expr):
28 self.compiledExpr = re.compile(expr)
29
30 def __reduce__(self):
31 return (REGEXTerm, (str(''),))
32
33
34 def regexCompareQuad(quad, regexQuad):
35 for index in range(4):
36 if isinstance(regexQuad[index], REGEXTerm) and not \
37 regexQuad[index].compiledExpr.match(quad[index]):
38 return False
39 return True
40
41
42 class REGEXMatching(Store):
43 def __init__(self, storage):
44 self.storage = storage
45 self.context_aware = storage.context_aware
46 # NOTE: this store can't be formula_aware as it doesn't have enough
47 # info to reverse the removal of a quoted statement.
48 self.formula_aware = storage.formula_aware
49 self.transaction_aware = storage.transaction_aware
50
51 def open(self, configuration, create=True):
52 return self.storage.open(configuration, create)
53
54 def close(self, commit_pending_transaction=False):
55 self.storage.close()
56
57 def destroy(self, configuration):
58 self.storage.destroy(configuration)
59
60 def add(self, triple, context, quoted=False):
61 (subject, predicate, object_) = triple
62 self.storage.add((subject, predicate, object_), context, quoted)
63
64 def remove(self, triple, context=None):
65 (subject, predicate, object_) = triple
66 if isinstance(subject, REGEXTerm) or \
67 isinstance(predicate, REGEXTerm) or \
68 isinstance(object_, REGEXTerm) or \
69 (context is not None
70 and isinstance(context.identifier, REGEXTerm)):
71 # One or more of the terms is a REGEX expression, so we must
72 # replace it / them with wildcard(s)and match after we query.
73 s = not isinstance(subject, REGEXTerm) and subject or None
74 p = not isinstance(predicate, REGEXTerm) and predicate or None
75 o = not isinstance(object_, REGEXTerm) and object_ or None
76 c = (context is not None
77 and not isinstance(context.identifier, REGEXTerm)) \
78 and context \
79 or None
80
81 removeQuadList = []
82 for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
83 for ctx in cg:
84 ctx = ctx.identifier
85 if regexCompareQuad(
86 (s1, p1, o1, ctx),
87 (subject, predicate, object_, context
88 is not None and context.identifier or context)):
89 removeQuadList.append((s1, p1, o1, ctx))
90 for s, p, o, c in removeQuadList:
91 self.storage.remove((s, p, o), c and Graph(self, c) or c)
92 else:
93 self.storage.remove((subject, predicate, object_), context)
94
95 def triples(self, triple, context=None):
96 (subject, predicate, object_) = triple
97 if isinstance(subject, REGEXTerm) or \
98 isinstance(predicate, REGEXTerm) or \
99 isinstance(object_, REGEXTerm) or \
100 (context is not None
101 and isinstance(context.identifier, REGEXTerm)):
102 # One or more of the terms is a REGEX expression, so we must
103 # replace it / them with wildcard(s) and match after we query.
104 s = not isinstance(subject, REGEXTerm) and subject or None
105 p = not isinstance(predicate, REGEXTerm) and predicate or None
106 o = not isinstance(object_, REGEXTerm) and object_ or None
107 c = (context is not None
108 and not isinstance(context.identifier, REGEXTerm)) \
109 and context \
110 or None
111 for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
112 matchingCtxs = []
113 for ctx in cg:
114 if c is None:
115 if context is None \
116 or context.identifier.compiledExpr.match(
117 ctx.identifier):
118 matchingCtxs.append(ctx)
119 else:
120 matchingCtxs.append(ctx)
121 if matchingCtxs \
122 and regexCompareQuad((s1, p1, o1, None),
123 (subject, predicate, object_, None)):
124 yield (s1, p1, o1), (c for c in matchingCtxs)
125 else:
126 for (s1, p1, o1), cg in self.storage.triples(
127 (subject, predicate, object_), context):
128 yield (s1, p1, o1), cg
129
130 def __len__(self, context=None):
131 # NOTE: If the context is a REGEX this could be an expensive
132 # proposition
133 return self.storage.__len__(context)
134
135 def contexts(self, triple=None):
136 # NOTE: There is no way to control REGEX matching for this method at
137 # this level as it only returns the contexts, not the matching
138 # triples.
139 for ctx in self.storage.contexts(triple):
140 yield ctx
141
142 def remove_context(self, identifier):
143 self.storage.remove((None, None, None), identifier)
144
145 def bind(self, prefix, namespace):
146 self.storage.bind(prefix, namespace)
147
148 def prefix(self, namespace):
149 return self.storage.prefix(namespace)
150
151 def namespace(self, prefix):
152 return self.storage.namespace(prefix)
153
154 def namespaces(self):
155 return self.storage.namespaces()
156
157 def commit(self):
158 self.storage.commit()
159
160 def rollback(self):
161 self.storage.rollback()