Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/paths.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 from rdflib.py3compat import PY3, format_doctest_out | |
2 | |
3 __doc__ = format_doctest_out(""" | |
4 | |
5 This module implements the SPARQL 1.1 Property path operators, as | |
6 defined in: | |
7 | |
8 http://www.w3.org/TR/sparql11-query/#propertypaths | |
9 | |
10 In SPARQL the syntax is as follows: | |
11 | |
12 +--------------------+-------------------------------------------------+ | |
13 |Syntax | Matches | | |
14 +====================+=================================================+ | |
15 |iri | An IRI. A path of length one. | | |
16 +--------------------+-------------------------------------------------+ | |
17 |^elt | Inverse path (object to subject). | | |
18 +--------------------+-------------------------------------------------+ | |
19 |elt1 / elt2 | A sequence path of elt1 followed by elt2. | | |
20 +--------------------+-------------------------------------------------+ | |
21 |elt1 | elt2 | A alternative path of elt1 or elt2 | | |
22 | | (all possibilities are tried). | | |
23 +--------------------+-------------------------------------------------+ | |
24 |elt* | A path that connects the subject and object | | |
25 | | of the path by zero or more matches of elt. | | |
26 +--------------------+-------------------------------------------------+ | |
27 |elt+ | A path that connects the subject and object | | |
28 | | of the path by one or more matches of elt. | | |
29 +--------------------+-------------------------------------------------+ | |
30 |elt? | A path that connects the subject and object | | |
31 | | of the path by zero or one matches of elt. | | |
32 +--------------------+-------------------------------------------------+ | |
33 |!iri or | Negated property set. An IRI which is not one of| | |
34 |!(iri\ :sub:`1`\ | | iri\ :sub:`1`...iri\ :sub:`n`. | | |
35 |... |iri\ :sub:`n`) | !iri is short for !(iri). | | |
36 +--------------------+-------------------------------------------------+ | |
37 |!^iri or | Negated property set where the excluded matches | | |
38 |!(^iri\ :sub:`1`\ | | are based on reversed path. That is, not one of | | |
39 |... |^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. | | |
40 | | !^iri is short for !(^iri). | | |
41 +--------------------+-------------------------------------------------+ | |
42 |!(iri\ :sub:`1`\ | | A combination of forward and reverse | | |
43 |...|iri\ :sub:`j`\ || properties in a negated property set. | | |
44 |^iri\ :sub:`j+1`\ | | | | |
45 |... |^iri\ :sub:`n`)| | | |
46 +--------------------+-------------------------------------------------+ | |
47 |(elt) | A group path elt, brackets control precedence. | | |
48 +--------------------+-------------------------------------------------+ | |
49 | |
50 This module is used internally be the SPARQL engine, but they property paths | |
51 can also be used to query RDFLib Graphs directly. | |
52 | |
53 Where possible the SPARQL syntax is mapped to python operators, and property | |
54 path objects can be constructed from existing URIRefs. | |
55 | |
56 >>> from rdflib import Graph, Namespace | |
57 | |
58 >>> foaf=Namespace('http://xmlns.com/foaf/0.1/') | |
59 | |
60 >>> ~foaf.knows | |
61 Path(~http://xmlns.com/foaf/0.1/knows) | |
62 | |
63 >>> foaf.knows/foaf.name | |
64 Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name) | |
65 | |
66 >>> foaf.name|foaf.firstName | |
67 Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/firstName) | |
68 | |
69 Modifiers (?, *, +) are done using * (the multiplication operator) and | |
70 the strings '*', '?', '+', also defined as constants in this file. | |
71 | |
72 >>> foaf.knows*OneOrMore | |
73 Path(http://xmlns.com/foaf/0.1/knows+) | |
74 | |
75 The path objects can also be used with the normal graph methods. | |
76 | |
77 First some example data: | |
78 | |
79 >>> g=Graph() | |
80 | |
81 >>> g=g.parse(data=''' | |
82 ... @prefix : <ex:> . | |
83 ... | |
84 ... :a :p1 :c ; :p2 :f . | |
85 ... :c :p2 :e ; :p3 :g . | |
86 ... :g :p3 :h ; :p2 :j . | |
87 ... :h :p3 :a ; :p2 :g . | |
88 ... | |
89 ... :q :px :q . | |
90 ... | |
91 ... ''', format='n3') # doctest: +ELLIPSIS | |
92 | |
93 >>> e=Namespace('ex:') | |
94 | |
95 Graph contains: | |
96 >>> (e.a, e.p1/e.p2, e.e) in g | |
97 True | |
98 | |
99 Graph generator functions, triples, subjects, objects, etc. : | |
100 | |
101 >>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE | |
102 [rdflib.term.URIRef(%(u)s'ex:j'), rdflib.term.URIRef(%(u)s'ex:g'), | |
103 rdflib.term.URIRef(%(u)s'ex:f')] | |
104 | |
105 A more complete set of tests: | |
106 | |
107 >>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)] | |
108 True | |
109 >>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)] | |
110 True | |
111 >>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ] | |
112 True | |
113 >>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)] | |
114 True | |
115 >>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [ | |
116 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
117 True | |
118 >>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c), | |
119 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
120 True | |
121 >>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)] | |
122 True | |
123 >>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == [] | |
124 True | |
125 >>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)] | |
126 True | |
127 >>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)] | |
128 True | |
129 >>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)] | |
130 True | |
131 | |
132 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
133 [(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))] | |
134 | |
135 >>> list(evalPath(g, (None, e.p1|e.p2, e.c))) | |
136 [(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))] | |
137 | |
138 >>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ] | |
139 True | |
140 >>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE | |
141 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:c')), | |
142 (rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))] | |
143 | |
144 >>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
145 [(rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')), | |
146 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')), | |
147 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))] | |
148 | |
149 >>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
150 [(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:a')), | |
151 (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')), | |
152 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')), | |
153 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))] | |
154 | |
155 >>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)] | |
156 True | |
157 >>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == [] | |
158 True | |
159 >>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)] | |
160 True | |
161 >>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)] | |
162 True | |
163 >>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)] | |
164 True | |
165 | |
166 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
167 [(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))] | |
168 | |
169 >>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j))) | |
170 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:j'))] | |
171 | |
172 No vars specified: | |
173 | |
174 >>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE | |
175 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a')), | |
176 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:g')), | |
177 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:h')), | |
178 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')), | |
179 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:h')), | |
180 (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a'))] | |
181 | |
182 .. versionadded:: 4.0 | |
183 | |
184 """) | |
185 | |
186 | |
187 from rdflib.term import URIRef, Node | |
188 | |
189 | |
190 # property paths | |
191 | |
192 ZeroOrMore = '*' | |
193 OneOrMore = '+' | |
194 ZeroOrOne = '?' | |
195 | |
196 | |
197 class Path(object): | |
198 def eval(self, graph, subj=None, obj=None): | |
199 raise NotImplementedError() | |
200 | |
201 def __hash__(self): | |
202 return hash(repr(self)) | |
203 | |
204 def __eq__(self, other): | |
205 return repr(self) == repr(other) | |
206 | |
207 def __lt__(self, other): | |
208 if not isinstance(other, (Path, Node)): | |
209 raise TypeError('unorderable types: %s() < %s()' % ( | |
210 repr(self), repr(other))) | |
211 return repr(self) < repr(other) | |
212 | |
213 def __le__(self, other): | |
214 if not isinstance(other, (Path, Node)): | |
215 raise TypeError('unorderable types: %s() < %s()' % ( | |
216 repr(self), repr(other))) | |
217 return repr(self) <= repr(other) | |
218 | |
219 def __ne__(self, other): | |
220 return not self == other | |
221 | |
222 def __gt__(self, other): | |
223 return not self <= other | |
224 | |
225 def __ge__(self, other): | |
226 return not self < other | |
227 | |
228 | |
229 class InvPath(Path): | |
230 | |
231 def __init__(self, arg): | |
232 self.arg = arg | |
233 | |
234 def eval(self, graph, subj=None, obj=None): | |
235 for s, o in evalPath(graph, (obj, self.arg, subj)): | |
236 yield o, s | |
237 | |
238 def __repr__(self): | |
239 return "Path(~%s)" % (self.arg,) | |
240 | |
241 def n3(self): | |
242 return '^%s' % self.arg.n3() | |
243 | |
244 | |
245 class SequencePath(Path): | |
246 def __init__(self, *args): | |
247 self.args = [] | |
248 for a in args: | |
249 if isinstance(a, SequencePath): | |
250 self.args += a.args | |
251 else: | |
252 self.args.append(a) | |
253 | |
254 def eval(self, graph, subj=None, obj=None): | |
255 def _eval_seq(paths, subj, obj): | |
256 if paths[1:]: | |
257 for s, o in evalPath(graph, (subj, paths[0], None)): | |
258 for r in _eval_seq(paths[1:], o, obj): | |
259 yield s, r[1] | |
260 | |
261 else: | |
262 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
263 yield s, o | |
264 | |
265 def _eval_seq_bw(paths, subj, obj): | |
266 if paths[:-1]: | |
267 for s, o in evalPath(graph, (None, paths[-1], obj)): | |
268 for r in _eval_seq(paths[:-1], subj, s): | |
269 yield r[0], o | |
270 | |
271 else: | |
272 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
273 yield s, o | |
274 | |
275 if subj: | |
276 return _eval_seq(self.args, subj, obj) | |
277 elif obj: | |
278 return _eval_seq_bw(self.args, subj, obj) | |
279 else: # no vars bound, we can start anywhere | |
280 return _eval_seq(self.args, subj, obj) | |
281 | |
282 def __repr__(self): | |
283 return "Path(%s)" % " / ".join(str(x) for x in self.args) | |
284 | |
285 def n3(self): | |
286 return '/'.join(a.n3() for a in self.args) | |
287 | |
288 | |
289 class AlternativePath(Path): | |
290 def __init__(self, *args): | |
291 self.args = [] | |
292 for a in args: | |
293 if isinstance(a, AlternativePath): | |
294 self.args += a.args | |
295 else: | |
296 self.args.append(a) | |
297 | |
298 def eval(self, graph, subj=None, obj=None): | |
299 for x in self.args: | |
300 for y in evalPath(graph, (subj, x, obj)): | |
301 yield y | |
302 | |
303 def __repr__(self): | |
304 return "Path(%s)" % " | ".join(str(x) for x in self.args) | |
305 | |
306 def n3(self): | |
307 return '|'.join(a.n3() for a in self.args) | |
308 | |
309 | |
310 | |
311 class MulPath(Path): | |
312 def __init__(self, path, mod): | |
313 self.path = path | |
314 self.mod = mod | |
315 | |
316 if mod == ZeroOrOne: | |
317 self.zero = True | |
318 self.more = False | |
319 elif mod == ZeroOrMore: | |
320 self.zero = True | |
321 self.more = True | |
322 elif mod == OneOrMore: | |
323 self.zero = False | |
324 self.more = True | |
325 else: | |
326 raise Exception('Unknown modifier %s' % mod) | |
327 | |
328 def eval(self, graph, subj=None, obj=None, first=True): | |
329 if self.zero and first: | |
330 if subj and obj: | |
331 if subj == obj: | |
332 yield (subj, obj) | |
333 elif subj: | |
334 yield (subj, subj) | |
335 elif obj: | |
336 yield (obj, obj) | |
337 | |
338 def _fwd(subj=None, obj=None, seen=None): | |
339 seen.add(subj) | |
340 | |
341 for s, o in evalPath(graph, (subj, self.path, None)): | |
342 if not obj or o == obj: | |
343 yield s, o | |
344 if self.more: | |
345 if o in seen: | |
346 continue | |
347 for s2, o2 in _fwd(o, obj, seen): | |
348 yield s, o2 | |
349 | |
350 def _bwd(subj=None, obj=None, seen=None): | |
351 seen.add(obj) | |
352 | |
353 for s, o in evalPath(graph, (None, self.path, obj)): | |
354 if not subj or subj == s: | |
355 yield s, o | |
356 if self.more: | |
357 if s in seen: | |
358 continue | |
359 | |
360 for s2, o2 in _bwd(None, s, seen): | |
361 yield s2, o | |
362 | |
363 def _fwdbwd(): | |
364 if self.zero: | |
365 seen1 = set() | |
366 # According to the spec, ALL nodes are possible solutions | |
367 # (even literals) | |
368 # we cannot do this without going through ALL triples | |
369 # unless we keep an index of all terms somehow | |
370 # but lets just hope this query doesnt happen very often... | |
371 for s, o in graph.subject_objects(None): | |
372 if s not in seen1: | |
373 seen1.add(s) | |
374 yield s, s | |
375 if o not in seen1: | |
376 seen1.add(o) | |
377 yield o, o | |
378 | |
379 for s, o in evalPath(graph, (None, self.path, None)): | |
380 if not self.more: | |
381 yield s, o | |
382 else: | |
383 seen = set() | |
384 f = list(_fwd(s, None, seen)) # cache or recompute? | |
385 for s3, o3 in _bwd(None, o, seen): | |
386 for s2, o2 in f: | |
387 yield s3, o2 # ? | |
388 | |
389 done = set() # the spec does by defn. not allow duplicates | |
390 if subj: | |
391 for x in _fwd(subj, obj, set()): | |
392 if x not in done: | |
393 done.add(x) | |
394 yield x | |
395 elif obj: | |
396 for x in _bwd(subj, obj, set()): | |
397 if x not in done: | |
398 done.add(x) | |
399 yield x | |
400 else: | |
401 for x in _fwdbwd(): | |
402 if x not in done: | |
403 done.add(x) | |
404 yield x | |
405 | |
406 def __repr__(self): | |
407 return "Path(%s%s)" % (self.path, self.mod) | |
408 | |
409 def n3(self): | |
410 return '%s%s' % (self.path, self.mod) | |
411 | |
412 | |
413 | |
414 class NegatedPath(Path): | |
415 def __init__(self, arg): | |
416 if isinstance(arg, (URIRef, InvPath)): | |
417 self.args = [arg] | |
418 elif isinstance(arg, AlternativePath): | |
419 self.args = arg.args | |
420 else: | |
421 raise Exception( | |
422 'Can only negate URIRefs, InvPaths or ' + | |
423 'AlternativePaths, not: %s' % (arg,)) | |
424 | |
425 def eval(self, graph, subj=None, obj=None): | |
426 for s, p, o in graph.triples((subj, None, obj)): | |
427 for a in self.args: | |
428 if isinstance(a, URIRef): | |
429 if p == a: | |
430 break | |
431 elif isinstance(a, InvPath): | |
432 if (o, a.arg, s) in graph: | |
433 break | |
434 else: | |
435 raise Exception('Invalid path in NegatedPath: %s' % a) | |
436 else: | |
437 yield s, o | |
438 | |
439 def __repr__(self): | |
440 return "Path(! %s)" % ",".join(str(x) for x in self.args) | |
441 | |
442 def n3(self): | |
443 return '!(%s)' % ('|'.join(self.args)) | |
444 | |
445 | |
446 class PathList(list): | |
447 pass | |
448 | |
449 | |
450 def path_alternative(self, other): | |
451 """ | |
452 alternative path | |
453 """ | |
454 if not isinstance(other, (URIRef, Path)): | |
455 raise Exception('Only URIRefs or Paths can be in paths!') | |
456 return AlternativePath(self, other) | |
457 | |
458 | |
459 def path_sequence(self, other): | |
460 """ | |
461 sequence path | |
462 """ | |
463 if not isinstance(other, (URIRef, Path)): | |
464 raise Exception('Only URIRefs or Paths can be in paths!') | |
465 return SequencePath(self, other) | |
466 | |
467 | |
468 def evalPath(graph, t): | |
469 return ((s, o) for s, p, o in graph.triples(t)) | |
470 | |
471 def mul_path(p, mul): | |
472 """ | |
473 cardinality path | |
474 """ | |
475 return MulPath(p, mul) | |
476 | |
477 | |
478 def inv_path(p): | |
479 """ | |
480 inverse path | |
481 """ | |
482 return InvPath(p) | |
483 | |
484 | |
485 def neg_path(p): | |
486 """ | |
487 negated path | |
488 """ | |
489 return NegatedPath(p) | |
490 | |
491 | |
492 | |
493 if __name__ == '__main__': | |
494 | |
495 import doctest | |
496 doctest.testmod() | |
497 else: | |
498 # monkey patch | |
499 # (these cannot be directly in terms.py | |
500 # as it would introduce circular imports) | |
501 | |
502 URIRef.__or__ = path_alternative | |
503 URIRef.__mul__ = mul_path | |
504 URIRef.__invert__ = inv_path | |
505 URIRef.__neg__ = neg_path | |
506 URIRef.__truediv__ = path_sequence | |
507 if not PY3: | |
508 URIRef.__div__ = path_sequence | |
509 | |
510 Path.__invert__ = inv_path | |
511 Path.__neg__ = neg_path | |
512 Path.__mul__ = mul_path | |
513 Path.__or__ = path_alternative | |
514 Path.__truediv__ = path_sequence | |
515 if not PY3: | |
516 Path.__div__ = path_sequence |