Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/paths.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 from six import PY3 | |
2 | |
3 | |
4 __doc__ = """ | |
5 | |
6 This module implements the SPARQL 1.1 Property path operators, as | |
7 defined in: | |
8 | |
9 http://www.w3.org/TR/sparql11-query/#propertypaths | |
10 | |
11 In SPARQL the syntax is as follows: | |
12 | |
13 +--------------------+-------------------------------------------------+ | |
14 |Syntax | Matches | | |
15 +====================+=================================================+ | |
16 |iri | An IRI. A path of length one. | | |
17 +--------------------+-------------------------------------------------+ | |
18 |^elt | Inverse path (object to subject). | | |
19 +--------------------+-------------------------------------------------+ | |
20 |elt1 / elt2 | A sequence path of elt1 followed by elt2. | | |
21 +--------------------+-------------------------------------------------+ | |
22 |elt1 | elt2 | A alternative path of elt1 or elt2 | | |
23 | | (all possibilities are tried). | | |
24 +--------------------+-------------------------------------------------+ | |
25 |elt* | A path that connects the subject and object | | |
26 | | of the path by zero or more matches of elt. | | |
27 +--------------------+-------------------------------------------------+ | |
28 |elt+ | A path that connects the subject and object | | |
29 | | of the path by one or more matches of elt. | | |
30 +--------------------+-------------------------------------------------+ | |
31 |elt? | A path that connects the subject and object | | |
32 | | of the path by zero or one matches of elt. | | |
33 +--------------------+-------------------------------------------------+ | |
34 |!iri or | Negated property set. An IRI which is not one of| | |
35 |!(iri\ :sub:`1`\ \| | iri\ :sub:`1`...iri\ :sub:`n`. | | |
36 |... \|iri\ :sub:`n`)| !iri is short for !(iri). | | |
37 +--------------------+-------------------------------------------------+ | |
38 |!^iri or | Negated property set where the excluded matches | | |
39 |!(^iri\ :sub:`1`\ \|| are based on reversed path. That is, not one of | | |
40 |...\|^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. | | |
41 | | !^iri is short for !(^iri). | | |
42 +--------------------+-------------------------------------------------+ | |
43 |!(iri\ :sub:`1`\ \| | A combination of forward and reverse | | |
44 |...\|iri\ :sub:`j`\ | properties in a negated property set. | | |
45 |\|^iri\ :sub:`j+1`\ | | | |
46 |\|... \|^iri\ | | | |
47 |:sub:`n`)| | | | |
48 +--------------------+-------------------------------------------------+ | |
49 |(elt) | A group path elt, brackets control precedence. | | |
50 +--------------------+-------------------------------------------------+ | |
51 | |
52 This module is used internally be the SPARQL engine, but they property paths | |
53 can also be used to query RDFLib Graphs directly. | |
54 | |
55 Where possible the SPARQL syntax is mapped to python operators, and property | |
56 path objects can be constructed from existing URIRefs. | |
57 | |
58 >>> from rdflib import Graph, Namespace | |
59 | |
60 >>> foaf=Namespace('http://xmlns.com/foaf/0.1/') | |
61 | |
62 >>> ~foaf.knows | |
63 Path(~http://xmlns.com/foaf/0.1/knows) | |
64 | |
65 >>> foaf.knows/foaf.name | |
66 Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name) | |
67 | |
68 >>> foaf.name|foaf.givenName | |
69 Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/givenName) | |
70 | |
71 Modifiers (?, *, +) are done using * (the multiplication operator) and | |
72 the strings '*', '?', '+', also defined as constants in this file. | |
73 | |
74 >>> foaf.knows*OneOrMore | |
75 Path(http://xmlns.com/foaf/0.1/knows+) | |
76 | |
77 The path objects can also be used with the normal graph methods. | |
78 | |
79 First some example data: | |
80 | |
81 >>> g=Graph() | |
82 | |
83 >>> g=g.parse(data=''' | |
84 ... @prefix : <ex:> . | |
85 ... | |
86 ... :a :p1 :c ; :p2 :f . | |
87 ... :c :p2 :e ; :p3 :g . | |
88 ... :g :p3 :h ; :p2 :j . | |
89 ... :h :p3 :a ; :p2 :g . | |
90 ... | |
91 ... :q :px :q . | |
92 ... | |
93 ... ''', format='n3') # doctest: +ELLIPSIS | |
94 | |
95 >>> e=Namespace('ex:') | |
96 | |
97 Graph contains: | |
98 >>> (e.a, e.p1/e.p2, e.e) in g | |
99 True | |
100 | |
101 Graph generator functions, triples, subjects, objects, etc. : | |
102 | |
103 >>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE | |
104 [rdflib.term.URIRef(u'ex:j'), rdflib.term.URIRef(u'ex:g'), | |
105 rdflib.term.URIRef(u'ex:f')] | |
106 | |
107 A more complete set of tests: | |
108 | |
109 >>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)] | |
110 True | |
111 >>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)] | |
112 True | |
113 >>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ] | |
114 True | |
115 >>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)] | |
116 True | |
117 >>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [ | |
118 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
119 True | |
120 >>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c), | |
121 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
122 True | |
123 >>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)] | |
124 True | |
125 >>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == [] | |
126 True | |
127 >>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)] | |
128 True | |
129 >>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)] | |
130 True | |
131 >>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)] | |
132 True | |
133 | |
134 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
135 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))] | |
136 | |
137 >>> list(evalPath(g, (None, e.p1|e.p2, e.c))) | |
138 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))] | |
139 | |
140 >>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ] | |
141 True | |
142 >>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE | |
143 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:c')), | |
144 (rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))] | |
145 | |
146 >>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
147 [(rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')), | |
148 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
149 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))] | |
150 | |
151 >>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
152 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:a')), | |
153 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')), | |
154 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
155 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))] | |
156 | |
157 >>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)] | |
158 True | |
159 >>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == [] | |
160 True | |
161 >>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)] | |
162 True | |
163 >>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)] | |
164 True | |
165 >>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)] | |
166 True | |
167 | |
168 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
169 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))] | |
170 | |
171 >>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j))) | |
172 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:j'))] | |
173 | |
174 No vars specified: | |
175 | |
176 >>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE | |
177 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a')), | |
178 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:g')), | |
179 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:h')), | |
180 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
181 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:h')), | |
182 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a'))] | |
183 | |
184 .. versionadded:: 4.0 | |
185 | |
186 """ | |
187 | |
188 | |
189 from rdflib.term import URIRef, Node | |
190 | |
191 | |
192 # property paths | |
193 | |
194 ZeroOrMore = '*' | |
195 OneOrMore = '+' | |
196 ZeroOrOne = '?' | |
197 | |
198 | |
199 class Path(object): | |
200 def eval(self, graph, subj=None, obj=None): | |
201 raise NotImplementedError() | |
202 | |
203 def __hash__(self): | |
204 return hash(repr(self)) | |
205 | |
206 def __eq__(self, other): | |
207 return repr(self) == repr(other) | |
208 | |
209 def __lt__(self, other): | |
210 if not isinstance(other, (Path, Node)): | |
211 raise TypeError('unorderable types: %s() < %s()' % ( | |
212 repr(self), repr(other))) | |
213 return repr(self) < repr(other) | |
214 | |
215 def __le__(self, other): | |
216 if not isinstance(other, (Path, Node)): | |
217 raise TypeError('unorderable types: %s() < %s()' % ( | |
218 repr(self), repr(other))) | |
219 return repr(self) <= repr(other) | |
220 | |
221 def __ne__(self, other): | |
222 return not self == other | |
223 | |
224 def __gt__(self, other): | |
225 return not self <= other | |
226 | |
227 def __ge__(self, other): | |
228 return not self < other | |
229 | |
230 | |
231 class InvPath(Path): | |
232 | |
233 def __init__(self, arg): | |
234 self.arg = arg | |
235 | |
236 def eval(self, graph, subj=None, obj=None): | |
237 for s, o in evalPath(graph, (obj, self.arg, subj)): | |
238 yield o, s | |
239 | |
240 def __repr__(self): | |
241 return "Path(~%s)" % (self.arg,) | |
242 | |
243 def n3(self): | |
244 return '^%s' % self.arg.n3() | |
245 | |
246 | |
247 class SequencePath(Path): | |
248 def __init__(self, *args): | |
249 self.args = [] | |
250 for a in args: | |
251 if isinstance(a, SequencePath): | |
252 self.args += a.args | |
253 else: | |
254 self.args.append(a) | |
255 | |
256 def eval(self, graph, subj=None, obj=None): | |
257 def _eval_seq(paths, subj, obj): | |
258 if paths[1:]: | |
259 for s, o in evalPath(graph, (subj, paths[0], None)): | |
260 for r in _eval_seq(paths[1:], o, obj): | |
261 yield s, r[1] | |
262 | |
263 else: | |
264 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
265 yield s, o | |
266 | |
267 def _eval_seq_bw(paths, subj, obj): | |
268 if paths[:-1]: | |
269 for s, o in evalPath(graph, (None, paths[-1], obj)): | |
270 for r in _eval_seq(paths[:-1], subj, s): | |
271 yield r[0], o | |
272 | |
273 else: | |
274 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
275 yield s, o | |
276 | |
277 if subj: | |
278 return _eval_seq(self.args, subj, obj) | |
279 elif obj: | |
280 return _eval_seq_bw(self.args, subj, obj) | |
281 else: # no vars bound, we can start anywhere | |
282 return _eval_seq(self.args, subj, obj) | |
283 | |
284 def __repr__(self): | |
285 return "Path(%s)" % " / ".join(str(x) for x in self.args) | |
286 | |
287 def n3(self): | |
288 return '/'.join(a.n3() for a in self.args) | |
289 | |
290 | |
291 class AlternativePath(Path): | |
292 def __init__(self, *args): | |
293 self.args = [] | |
294 for a in args: | |
295 if isinstance(a, AlternativePath): | |
296 self.args += a.args | |
297 else: | |
298 self.args.append(a) | |
299 | |
300 def eval(self, graph, subj=None, obj=None): | |
301 for x in self.args: | |
302 for y in evalPath(graph, (subj, x, obj)): | |
303 yield y | |
304 | |
305 def __repr__(self): | |
306 return "Path(%s)" % " | ".join(str(x) for x in self.args) | |
307 | |
308 def n3(self): | |
309 return '|'.join(a.n3() for a in self.args) | |
310 | |
311 | |
312 class MulPath(Path): | |
313 def __init__(self, path, mod): | |
314 self.path = path | |
315 self.mod = mod | |
316 | |
317 if mod == ZeroOrOne: | |
318 self.zero = True | |
319 self.more = False | |
320 elif mod == ZeroOrMore: | |
321 self.zero = True | |
322 self.more = True | |
323 elif mod == OneOrMore: | |
324 self.zero = False | |
325 self.more = True | |
326 else: | |
327 raise Exception('Unknown modifier %s' % mod) | |
328 | |
329 def eval(self, graph, subj=None, obj=None, first=True): | |
330 if self.zero and first: | |
331 if subj and obj: | |
332 if subj == obj: | |
333 yield (subj, obj) | |
334 elif subj: | |
335 yield (subj, subj) | |
336 elif obj: | |
337 yield (obj, obj) | |
338 | |
339 def _fwd(subj=None, obj=None, seen=None): | |
340 seen.add(subj) | |
341 | |
342 for s, o in evalPath(graph, (subj, self.path, None)): | |
343 if not obj or o == obj: | |
344 yield s, o | |
345 if self.more: | |
346 if o in seen: | |
347 continue | |
348 for s2, o2 in _fwd(o, obj, seen): | |
349 yield s, o2 | |
350 | |
351 def _bwd(subj=None, obj=None, seen=None): | |
352 seen.add(obj) | |
353 | |
354 for s, o in evalPath(graph, (None, self.path, obj)): | |
355 if not subj or subj == s: | |
356 yield s, o | |
357 if self.more: | |
358 if s in seen: | |
359 continue | |
360 | |
361 for s2, o2 in _bwd(None, s, seen): | |
362 yield s2, o | |
363 | |
364 def _all_fwd_paths(): | |
365 if self.zero: | |
366 seen1 = set() | |
367 # According to the spec, ALL nodes are possible solutions | |
368 # (even literals) | |
369 # we cannot do this without going through ALL triples | |
370 # unless we keep an index of all terms somehow | |
371 # but lets just hope this query doesnt happen very often... | |
372 for s, o in graph.subject_objects(None): | |
373 if s not in seen1: | |
374 seen1.add(s) | |
375 yield s, s | |
376 if o not in seen1: | |
377 seen1.add(o) | |
378 yield o, o | |
379 | |
380 seen = set() | |
381 for s, o in evalPath(graph, (None, self.path, None)): | |
382 if not self.more: | |
383 yield s, o | |
384 else: | |
385 if s not in seen: | |
386 seen.add(s) | |
387 f = list(_fwd(s, None, set())) | |
388 for s1, o1 in f: | |
389 assert s1 == s | |
390 yield(s1, o1) | |
391 | |
392 done = set() # the spec does by defn. not allow duplicates | |
393 if subj: | |
394 for x in _fwd(subj, obj, set()): | |
395 if x not in done: | |
396 done.add(x) | |
397 yield x | |
398 elif obj: | |
399 for x in _bwd(subj, obj, set()): | |
400 if x not in done: | |
401 done.add(x) | |
402 yield x | |
403 else: | |
404 for x in _all_fwd_paths(): | |
405 if x not in done: | |
406 done.add(x) | |
407 yield x | |
408 | |
409 def __repr__(self): | |
410 return "Path(%s%s)" % (self.path, self.mod) | |
411 | |
412 def n3(self): | |
413 return '%s%s' % (self.path.n3(), self.mod) | |
414 | |
415 | |
416 class NegatedPath(Path): | |
417 def __init__(self, arg): | |
418 if isinstance(arg, (URIRef, InvPath)): | |
419 self.args = [arg] | |
420 elif isinstance(arg, AlternativePath): | |
421 self.args = arg.args | |
422 else: | |
423 raise Exception( | |
424 'Can only negate URIRefs, InvPaths or ' + | |
425 'AlternativePaths, not: %s' % (arg,)) | |
426 | |
427 def eval(self, graph, subj=None, obj=None): | |
428 for s, p, o in graph.triples((subj, None, obj)): | |
429 for a in self.args: | |
430 if isinstance(a, URIRef): | |
431 if p == a: | |
432 break | |
433 elif isinstance(a, InvPath): | |
434 if (o, a.arg, s) in graph: | |
435 break | |
436 else: | |
437 raise Exception('Invalid path in NegatedPath: %s' % a) | |
438 else: | |
439 yield s, o | |
440 | |
441 def __repr__(self): | |
442 return "Path(! %s)" % ",".join(str(x) for x in self.args) | |
443 | |
444 def n3(self): | |
445 return '!(%s)' % ('|'.join(self.args)) | |
446 | |
447 | |
448 class PathList(list): | |
449 pass | |
450 | |
451 | |
452 def path_alternative(self, other): | |
453 """ | |
454 alternative path | |
455 """ | |
456 if not isinstance(other, (URIRef, Path)): | |
457 raise Exception('Only URIRefs or Paths can be in paths!') | |
458 return AlternativePath(self, other) | |
459 | |
460 | |
461 def path_sequence(self, other): | |
462 """ | |
463 sequence path | |
464 """ | |
465 if not isinstance(other, (URIRef, Path)): | |
466 raise Exception('Only URIRefs or Paths can be in paths!') | |
467 return SequencePath(self, other) | |
468 | |
469 | |
470 def evalPath(graph, t): | |
471 return ((s, o) for s, p, o in graph.triples(t)) | |
472 | |
473 | |
474 def mul_path(p, mul): | |
475 """ | |
476 cardinality path | |
477 """ | |
478 return MulPath(p, mul) | |
479 | |
480 | |
481 def inv_path(p): | |
482 """ | |
483 inverse path | |
484 """ | |
485 return InvPath(p) | |
486 | |
487 | |
488 def neg_path(p): | |
489 """ | |
490 negated path | |
491 """ | |
492 return NegatedPath(p) | |
493 | |
494 | |
495 if __name__ == '__main__': | |
496 | |
497 import doctest | |
498 doctest.testmod() | |
499 else: | |
500 # monkey patch | |
501 # (these cannot be directly in terms.py | |
502 # as it would introduce circular imports) | |
503 | |
504 URIRef.__or__ = path_alternative | |
505 URIRef.__mul__ = mul_path | |
506 URIRef.__invert__ = inv_path | |
507 URIRef.__neg__ = neg_path | |
508 URIRef.__truediv__ = path_sequence | |
509 if not PY3: | |
510 URIRef.__div__ = path_sequence | |
511 | |
512 Path.__invert__ = inv_path | |
513 Path.__neg__ = neg_path | |
514 Path.__mul__ = mul_path | |
515 Path.__or__ = path_alternative | |
516 Path.__truediv__ = path_sequence | |
517 if not PY3: | |
518 Path.__div__ = path_sequence |