comparison env/lib/python3.7/site-packages/rdflib/paths.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 from rdflib.py3compat import PY3, format_doctest_out
2
3 __doc__ = format_doctest_out("""
4
5 This module implements the SPARQL 1.1 Property path operators, as
6 defined in:
7
8 http://www.w3.org/TR/sparql11-query/#propertypaths
9
10 In SPARQL the syntax is as follows:
11
12 +--------------------+-------------------------------------------------+
13 |Syntax | Matches |
14 +====================+=================================================+
15 |iri | An IRI. A path of length one. |
16 +--------------------+-------------------------------------------------+
17 |^elt | Inverse path (object to subject). |
18 +--------------------+-------------------------------------------------+
19 |elt1 / elt2 | A sequence path of elt1 followed by elt2. |
20 +--------------------+-------------------------------------------------+
21 |elt1 | elt2 | A alternative path of elt1 or elt2 |
22 | | (all possibilities are tried). |
23 +--------------------+-------------------------------------------------+
24 |elt* | A path that connects the subject and object |
25 | | of the path by zero or more matches of elt. |
26 +--------------------+-------------------------------------------------+
27 |elt+ | A path that connects the subject and object |
28 | | of the path by one or more matches of elt. |
29 +--------------------+-------------------------------------------------+
30 |elt? | A path that connects the subject and object |
31 | | of the path by zero or one matches of elt. |
32 +--------------------+-------------------------------------------------+
33 |!iri or | Negated property set. An IRI which is not one of|
34 |!(iri\ :sub:`1`\ | | iri\ :sub:`1`...iri\ :sub:`n`. |
35 |... |iri\ :sub:`n`) | !iri is short for !(iri). |
36 +--------------------+-------------------------------------------------+
37 |!^iri or | Negated property set where the excluded matches |
38 |!(^iri\ :sub:`1`\ | | are based on reversed path. That is, not one of |
39 |... |^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. |
40 | | !^iri is short for !(^iri). |
41 +--------------------+-------------------------------------------------+
42 |!(iri\ :sub:`1`\ | | A combination of forward and reverse |
43 |...|iri\ :sub:`j`\ || properties in a negated property set. |
44 |^iri\ :sub:`j+1`\ | | |
45 |... |^iri\ :sub:`n`)| |
46 +--------------------+-------------------------------------------------+
47 |(elt) | A group path elt, brackets control precedence. |
48 +--------------------+-------------------------------------------------+
49
50 This module is used internally be the SPARQL engine, but they property paths
51 can also be used to query RDFLib Graphs directly.
52
53 Where possible the SPARQL syntax is mapped to python operators, and property
54 path objects can be constructed from existing URIRefs.
55
56 >>> from rdflib import Graph, Namespace
57
58 >>> foaf=Namespace('http://xmlns.com/foaf/0.1/')
59
60 >>> ~foaf.knows
61 Path(~http://xmlns.com/foaf/0.1/knows)
62
63 >>> foaf.knows/foaf.name
64 Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name)
65
66 >>> foaf.name|foaf.firstName
67 Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/firstName)
68
69 Modifiers (?, *, +) are done using * (the multiplication operator) and
70 the strings '*', '?', '+', also defined as constants in this file.
71
72 >>> foaf.knows*OneOrMore
73 Path(http://xmlns.com/foaf/0.1/knows+)
74
75 The path objects can also be used with the normal graph methods.
76
77 First some example data:
78
79 >>> g=Graph()
80
81 >>> g=g.parse(data='''
82 ... @prefix : <ex:> .
83 ...
84 ... :a :p1 :c ; :p2 :f .
85 ... :c :p2 :e ; :p3 :g .
86 ... :g :p3 :h ; :p2 :j .
87 ... :h :p3 :a ; :p2 :g .
88 ...
89 ... :q :px :q .
90 ...
91 ... ''', format='n3') # doctest: +ELLIPSIS
92
93 >>> e=Namespace('ex:')
94
95 Graph contains:
96 >>> (e.a, e.p1/e.p2, e.e) in g
97 True
98
99 Graph generator functions, triples, subjects, objects, etc. :
100
101 >>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE
102 [rdflib.term.URIRef(%(u)s'ex:j'), rdflib.term.URIRef(%(u)s'ex:g'),
103 rdflib.term.URIRef(%(u)s'ex:f')]
104
105 A more complete set of tests:
106
107 >>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)]
108 True
109 >>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)]
110 True
111 >>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ]
112 True
113 >>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)]
114 True
115 >>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [
116 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
117 True
118 >>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c),
119 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
120 True
121 >>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)]
122 True
123 >>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == []
124 True
125 >>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)]
126 True
127 >>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)]
128 True
129 >>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)]
130 True
131
132 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
133 [(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))]
134
135 >>> list(evalPath(g, (None, e.p1|e.p2, e.c)))
136 [(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))]
137
138 >>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ]
139 True
140 >>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE
141 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:c')),
142 (rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))]
143
144 >>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
145 [(rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')),
146 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
147 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))]
148
149 >>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
150 [(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:a')),
151 (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')),
152 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
153 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))]
154
155 >>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)]
156 True
157 >>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == []
158 True
159 >>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)]
160 True
161 >>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)]
162 True
163 >>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)]
164 True
165
166 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
167 [(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))]
168
169 >>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j)))
170 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:j'))]
171
172 No vars specified:
173
174 >>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE
175 [(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a')),
176 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:g')),
177 (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:h')),
178 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
179 (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:h')),
180 (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a'))]
181
182 .. versionadded:: 4.0
183
184 """)
185
186
187 from rdflib.term import URIRef, Node
188
189
190 # property paths
191
192 ZeroOrMore = '*'
193 OneOrMore = '+'
194 ZeroOrOne = '?'
195
196
197 class Path(object):
198 def eval(self, graph, subj=None, obj=None):
199 raise NotImplementedError()
200
201 def __hash__(self):
202 return hash(repr(self))
203
204 def __eq__(self, other):
205 return repr(self) == repr(other)
206
207 def __lt__(self, other):
208 if not isinstance(other, (Path, Node)):
209 raise TypeError('unorderable types: %s() < %s()' % (
210 repr(self), repr(other)))
211 return repr(self) < repr(other)
212
213 def __le__(self, other):
214 if not isinstance(other, (Path, Node)):
215 raise TypeError('unorderable types: %s() < %s()' % (
216 repr(self), repr(other)))
217 return repr(self) <= repr(other)
218
219 def __ne__(self, other):
220 return not self == other
221
222 def __gt__(self, other):
223 return not self <= other
224
225 def __ge__(self, other):
226 return not self < other
227
228
229 class InvPath(Path):
230
231 def __init__(self, arg):
232 self.arg = arg
233
234 def eval(self, graph, subj=None, obj=None):
235 for s, o in evalPath(graph, (obj, self.arg, subj)):
236 yield o, s
237
238 def __repr__(self):
239 return "Path(~%s)" % (self.arg,)
240
241 def n3(self):
242 return '^%s' % self.arg.n3()
243
244
245 class SequencePath(Path):
246 def __init__(self, *args):
247 self.args = []
248 for a in args:
249 if isinstance(a, SequencePath):
250 self.args += a.args
251 else:
252 self.args.append(a)
253
254 def eval(self, graph, subj=None, obj=None):
255 def _eval_seq(paths, subj, obj):
256 if paths[1:]:
257 for s, o in evalPath(graph, (subj, paths[0], None)):
258 for r in _eval_seq(paths[1:], o, obj):
259 yield s, r[1]
260
261 else:
262 for s, o in evalPath(graph, (subj, paths[0], obj)):
263 yield s, o
264
265 def _eval_seq_bw(paths, subj, obj):
266 if paths[:-1]:
267 for s, o in evalPath(graph, (None, paths[-1], obj)):
268 for r in _eval_seq(paths[:-1], subj, s):
269 yield r[0], o
270
271 else:
272 for s, o in evalPath(graph, (subj, paths[0], obj)):
273 yield s, o
274
275 if subj:
276 return _eval_seq(self.args, subj, obj)
277 elif obj:
278 return _eval_seq_bw(self.args, subj, obj)
279 else: # no vars bound, we can start anywhere
280 return _eval_seq(self.args, subj, obj)
281
282 def __repr__(self):
283 return "Path(%s)" % " / ".join(str(x) for x in self.args)
284
285 def n3(self):
286 return '/'.join(a.n3() for a in self.args)
287
288
289 class AlternativePath(Path):
290 def __init__(self, *args):
291 self.args = []
292 for a in args:
293 if isinstance(a, AlternativePath):
294 self.args += a.args
295 else:
296 self.args.append(a)
297
298 def eval(self, graph, subj=None, obj=None):
299 for x in self.args:
300 for y in evalPath(graph, (subj, x, obj)):
301 yield y
302
303 def __repr__(self):
304 return "Path(%s)" % " | ".join(str(x) for x in self.args)
305
306 def n3(self):
307 return '|'.join(a.n3() for a in self.args)
308
309
310
311 class MulPath(Path):
312 def __init__(self, path, mod):
313 self.path = path
314 self.mod = mod
315
316 if mod == ZeroOrOne:
317 self.zero = True
318 self.more = False
319 elif mod == ZeroOrMore:
320 self.zero = True
321 self.more = True
322 elif mod == OneOrMore:
323 self.zero = False
324 self.more = True
325 else:
326 raise Exception('Unknown modifier %s' % mod)
327
328 def eval(self, graph, subj=None, obj=None, first=True):
329 if self.zero and first:
330 if subj and obj:
331 if subj == obj:
332 yield (subj, obj)
333 elif subj:
334 yield (subj, subj)
335 elif obj:
336 yield (obj, obj)
337
338 def _fwd(subj=None, obj=None, seen=None):
339 seen.add(subj)
340
341 for s, o in evalPath(graph, (subj, self.path, None)):
342 if not obj or o == obj:
343 yield s, o
344 if self.more:
345 if o in seen:
346 continue
347 for s2, o2 in _fwd(o, obj, seen):
348 yield s, o2
349
350 def _bwd(subj=None, obj=None, seen=None):
351 seen.add(obj)
352
353 for s, o in evalPath(graph, (None, self.path, obj)):
354 if not subj or subj == s:
355 yield s, o
356 if self.more:
357 if s in seen:
358 continue
359
360 for s2, o2 in _bwd(None, s, seen):
361 yield s2, o
362
363 def _fwdbwd():
364 if self.zero:
365 seen1 = set()
366 # According to the spec, ALL nodes are possible solutions
367 # (even literals)
368 # we cannot do this without going through ALL triples
369 # unless we keep an index of all terms somehow
370 # but lets just hope this query doesnt happen very often...
371 for s, o in graph.subject_objects(None):
372 if s not in seen1:
373 seen1.add(s)
374 yield s, s
375 if o not in seen1:
376 seen1.add(o)
377 yield o, o
378
379 for s, o in evalPath(graph, (None, self.path, None)):
380 if not self.more:
381 yield s, o
382 else:
383 seen = set()
384 f = list(_fwd(s, None, seen)) # cache or recompute?
385 for s3, o3 in _bwd(None, o, seen):
386 for s2, o2 in f:
387 yield s3, o2 # ?
388
389 done = set() # the spec does by defn. not allow duplicates
390 if subj:
391 for x in _fwd(subj, obj, set()):
392 if x not in done:
393 done.add(x)
394 yield x
395 elif obj:
396 for x in _bwd(subj, obj, set()):
397 if x not in done:
398 done.add(x)
399 yield x
400 else:
401 for x in _fwdbwd():
402 if x not in done:
403 done.add(x)
404 yield x
405
406 def __repr__(self):
407 return "Path(%s%s)" % (self.path, self.mod)
408
409 def n3(self):
410 return '%s%s' % (self.path, self.mod)
411
412
413
414 class NegatedPath(Path):
415 def __init__(self, arg):
416 if isinstance(arg, (URIRef, InvPath)):
417 self.args = [arg]
418 elif isinstance(arg, AlternativePath):
419 self.args = arg.args
420 else:
421 raise Exception(
422 'Can only negate URIRefs, InvPaths or ' +
423 'AlternativePaths, not: %s' % (arg,))
424
425 def eval(self, graph, subj=None, obj=None):
426 for s, p, o in graph.triples((subj, None, obj)):
427 for a in self.args:
428 if isinstance(a, URIRef):
429 if p == a:
430 break
431 elif isinstance(a, InvPath):
432 if (o, a.arg, s) in graph:
433 break
434 else:
435 raise Exception('Invalid path in NegatedPath: %s' % a)
436 else:
437 yield s, o
438
439 def __repr__(self):
440 return "Path(! %s)" % ",".join(str(x) for x in self.args)
441
442 def n3(self):
443 return '!(%s)' % ('|'.join(self.args))
444
445
446 class PathList(list):
447 pass
448
449
450 def path_alternative(self, other):
451 """
452 alternative path
453 """
454 if not isinstance(other, (URIRef, Path)):
455 raise Exception('Only URIRefs or Paths can be in paths!')
456 return AlternativePath(self, other)
457
458
459 def path_sequence(self, other):
460 """
461 sequence path
462 """
463 if not isinstance(other, (URIRef, Path)):
464 raise Exception('Only URIRefs or Paths can be in paths!')
465 return SequencePath(self, other)
466
467
468 def evalPath(graph, t):
469 return ((s, o) for s, p, o in graph.triples(t))
470
471 def mul_path(p, mul):
472 """
473 cardinality path
474 """
475 return MulPath(p, mul)
476
477
478 def inv_path(p):
479 """
480 inverse path
481 """
482 return InvPath(p)
483
484
485 def neg_path(p):
486 """
487 negated path
488 """
489 return NegatedPath(p)
490
491
492
493 if __name__ == '__main__':
494
495 import doctest
496 doctest.testmod()
497 else:
498 # monkey patch
499 # (these cannot be directly in terms.py
500 # as it would introduce circular imports)
501
502 URIRef.__or__ = path_alternative
503 URIRef.__mul__ = mul_path
504 URIRef.__invert__ = inv_path
505 URIRef.__neg__ = neg_path
506 URIRef.__truediv__ = path_sequence
507 if not PY3:
508 URIRef.__div__ = path_sequence
509
510 Path.__invert__ = inv_path
511 Path.__neg__ = neg_path
512 Path.__mul__ = mul_path
513 Path.__or__ = path_alternative
514 Path.__truediv__ = path_sequence
515 if not PY3:
516 Path.__div__ = path_sequence