Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/paths.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 from six import PY3 | |
| 2 | |
| 3 | |
| 4 __doc__ = """ | |
| 5 | |
| 6 This module implements the SPARQL 1.1 Property path operators, as | |
| 7 defined in: | |
| 8 | |
| 9 http://www.w3.org/TR/sparql11-query/#propertypaths | |
| 10 | |
| 11 In SPARQL the syntax is as follows: | |
| 12 | |
| 13 +--------------------+-------------------------------------------------+ | |
| 14 |Syntax | Matches | | |
| 15 +====================+=================================================+ | |
| 16 |iri | An IRI. A path of length one. | | |
| 17 +--------------------+-------------------------------------------------+ | |
| 18 |^elt | Inverse path (object to subject). | | |
| 19 +--------------------+-------------------------------------------------+ | |
| 20 |elt1 / elt2 | A sequence path of elt1 followed by elt2. | | |
| 21 +--------------------+-------------------------------------------------+ | |
| 22 |elt1 | elt2 | A alternative path of elt1 or elt2 | | |
| 23 | | (all possibilities are tried). | | |
| 24 +--------------------+-------------------------------------------------+ | |
| 25 |elt* | A path that connects the subject and object | | |
| 26 | | of the path by zero or more matches of elt. | | |
| 27 +--------------------+-------------------------------------------------+ | |
| 28 |elt+ | A path that connects the subject and object | | |
| 29 | | of the path by one or more matches of elt. | | |
| 30 +--------------------+-------------------------------------------------+ | |
| 31 |elt? | A path that connects the subject and object | | |
| 32 | | of the path by zero or one matches of elt. | | |
| 33 +--------------------+-------------------------------------------------+ | |
| 34 |!iri or | Negated property set. An IRI which is not one of| | |
| 35 |!(iri\ :sub:`1`\ \| | iri\ :sub:`1`...iri\ :sub:`n`. | | |
| 36 |... \|iri\ :sub:`n`)| !iri is short for !(iri). | | |
| 37 +--------------------+-------------------------------------------------+ | |
| 38 |!^iri or | Negated property set where the excluded matches | | |
| 39 |!(^iri\ :sub:`1`\ \|| are based on reversed path. That is, not one of | | |
| 40 |...\|^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. | | |
| 41 | | !^iri is short for !(^iri). | | |
| 42 +--------------------+-------------------------------------------------+ | |
| 43 |!(iri\ :sub:`1`\ \| | A combination of forward and reverse | | |
| 44 |...\|iri\ :sub:`j`\ | properties in a negated property set. | | |
| 45 |\|^iri\ :sub:`j+1`\ | | | |
| 46 |\|... \|^iri\ | | | |
| 47 |:sub:`n`)| | | | |
| 48 +--------------------+-------------------------------------------------+ | |
| 49 |(elt) | A group path elt, brackets control precedence. | | |
| 50 +--------------------+-------------------------------------------------+ | |
| 51 | |
| 52 This module is used internally be the SPARQL engine, but they property paths | |
| 53 can also be used to query RDFLib Graphs directly. | |
| 54 | |
| 55 Where possible the SPARQL syntax is mapped to python operators, and property | |
| 56 path objects can be constructed from existing URIRefs. | |
| 57 | |
| 58 >>> from rdflib import Graph, Namespace | |
| 59 | |
| 60 >>> foaf=Namespace('http://xmlns.com/foaf/0.1/') | |
| 61 | |
| 62 >>> ~foaf.knows | |
| 63 Path(~http://xmlns.com/foaf/0.1/knows) | |
| 64 | |
| 65 >>> foaf.knows/foaf.name | |
| 66 Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name) | |
| 67 | |
| 68 >>> foaf.name|foaf.givenName | |
| 69 Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/givenName) | |
| 70 | |
| 71 Modifiers (?, *, +) are done using * (the multiplication operator) and | |
| 72 the strings '*', '?', '+', also defined as constants in this file. | |
| 73 | |
| 74 >>> foaf.knows*OneOrMore | |
| 75 Path(http://xmlns.com/foaf/0.1/knows+) | |
| 76 | |
| 77 The path objects can also be used with the normal graph methods. | |
| 78 | |
| 79 First some example data: | |
| 80 | |
| 81 >>> g=Graph() | |
| 82 | |
| 83 >>> g=g.parse(data=''' | |
| 84 ... @prefix : <ex:> . | |
| 85 ... | |
| 86 ... :a :p1 :c ; :p2 :f . | |
| 87 ... :c :p2 :e ; :p3 :g . | |
| 88 ... :g :p3 :h ; :p2 :j . | |
| 89 ... :h :p3 :a ; :p2 :g . | |
| 90 ... | |
| 91 ... :q :px :q . | |
| 92 ... | |
| 93 ... ''', format='n3') # doctest: +ELLIPSIS | |
| 94 | |
| 95 >>> e=Namespace('ex:') | |
| 96 | |
| 97 Graph contains: | |
| 98 >>> (e.a, e.p1/e.p2, e.e) in g | |
| 99 True | |
| 100 | |
| 101 Graph generator functions, triples, subjects, objects, etc. : | |
| 102 | |
| 103 >>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE | |
| 104 [rdflib.term.URIRef(u'ex:j'), rdflib.term.URIRef(u'ex:g'), | |
| 105 rdflib.term.URIRef(u'ex:f')] | |
| 106 | |
| 107 A more complete set of tests: | |
| 108 | |
| 109 >>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)] | |
| 110 True | |
| 111 >>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)] | |
| 112 True | |
| 113 >>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ] | |
| 114 True | |
| 115 >>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)] | |
| 116 True | |
| 117 >>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [ | |
| 118 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
| 119 True | |
| 120 >>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c), | |
| 121 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] | |
| 122 True | |
| 123 >>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)] | |
| 124 True | |
| 125 >>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == [] | |
| 126 True | |
| 127 >>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)] | |
| 128 True | |
| 129 >>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)] | |
| 130 True | |
| 131 >>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)] | |
| 132 True | |
| 133 | |
| 134 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
| 135 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))] | |
| 136 | |
| 137 >>> list(evalPath(g, (None, e.p1|e.p2, e.c))) | |
| 138 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))] | |
| 139 | |
| 140 >>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ] | |
| 141 True | |
| 142 >>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE | |
| 143 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:c')), | |
| 144 (rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))] | |
| 145 | |
| 146 >>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
| 147 [(rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')), | |
| 148 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
| 149 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))] | |
| 150 | |
| 151 >>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE | |
| 152 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:a')), | |
| 153 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')), | |
| 154 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
| 155 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))] | |
| 156 | |
| 157 >>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)] | |
| 158 True | |
| 159 >>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == [] | |
| 160 True | |
| 161 >>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)] | |
| 162 True | |
| 163 >>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)] | |
| 164 True | |
| 165 >>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)] | |
| 166 True | |
| 167 | |
| 168 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) | |
| 169 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))] | |
| 170 | |
| 171 >>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j))) | |
| 172 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:j'))] | |
| 173 | |
| 174 No vars specified: | |
| 175 | |
| 176 >>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE | |
| 177 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a')), | |
| 178 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:g')), | |
| 179 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:h')), | |
| 180 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')), | |
| 181 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:h')), | |
| 182 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a'))] | |
| 183 | |
| 184 .. versionadded:: 4.0 | |
| 185 | |
| 186 """ | |
| 187 | |
| 188 | |
| 189 from rdflib.term import URIRef, Node | |
| 190 | |
| 191 | |
| 192 # property paths | |
| 193 | |
| 194 ZeroOrMore = '*' | |
| 195 OneOrMore = '+' | |
| 196 ZeroOrOne = '?' | |
| 197 | |
| 198 | |
| 199 class Path(object): | |
| 200 def eval(self, graph, subj=None, obj=None): | |
| 201 raise NotImplementedError() | |
| 202 | |
| 203 def __hash__(self): | |
| 204 return hash(repr(self)) | |
| 205 | |
| 206 def __eq__(self, other): | |
| 207 return repr(self) == repr(other) | |
| 208 | |
| 209 def __lt__(self, other): | |
| 210 if not isinstance(other, (Path, Node)): | |
| 211 raise TypeError('unorderable types: %s() < %s()' % ( | |
| 212 repr(self), repr(other))) | |
| 213 return repr(self) < repr(other) | |
| 214 | |
| 215 def __le__(self, other): | |
| 216 if not isinstance(other, (Path, Node)): | |
| 217 raise TypeError('unorderable types: %s() < %s()' % ( | |
| 218 repr(self), repr(other))) | |
| 219 return repr(self) <= repr(other) | |
| 220 | |
| 221 def __ne__(self, other): | |
| 222 return not self == other | |
| 223 | |
| 224 def __gt__(self, other): | |
| 225 return not self <= other | |
| 226 | |
| 227 def __ge__(self, other): | |
| 228 return not self < other | |
| 229 | |
| 230 | |
| 231 class InvPath(Path): | |
| 232 | |
| 233 def __init__(self, arg): | |
| 234 self.arg = arg | |
| 235 | |
| 236 def eval(self, graph, subj=None, obj=None): | |
| 237 for s, o in evalPath(graph, (obj, self.arg, subj)): | |
| 238 yield o, s | |
| 239 | |
| 240 def __repr__(self): | |
| 241 return "Path(~%s)" % (self.arg,) | |
| 242 | |
| 243 def n3(self): | |
| 244 return '^%s' % self.arg.n3() | |
| 245 | |
| 246 | |
| 247 class SequencePath(Path): | |
| 248 def __init__(self, *args): | |
| 249 self.args = [] | |
| 250 for a in args: | |
| 251 if isinstance(a, SequencePath): | |
| 252 self.args += a.args | |
| 253 else: | |
| 254 self.args.append(a) | |
| 255 | |
| 256 def eval(self, graph, subj=None, obj=None): | |
| 257 def _eval_seq(paths, subj, obj): | |
| 258 if paths[1:]: | |
| 259 for s, o in evalPath(graph, (subj, paths[0], None)): | |
| 260 for r in _eval_seq(paths[1:], o, obj): | |
| 261 yield s, r[1] | |
| 262 | |
| 263 else: | |
| 264 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
| 265 yield s, o | |
| 266 | |
| 267 def _eval_seq_bw(paths, subj, obj): | |
| 268 if paths[:-1]: | |
| 269 for s, o in evalPath(graph, (None, paths[-1], obj)): | |
| 270 for r in _eval_seq(paths[:-1], subj, s): | |
| 271 yield r[0], o | |
| 272 | |
| 273 else: | |
| 274 for s, o in evalPath(graph, (subj, paths[0], obj)): | |
| 275 yield s, o | |
| 276 | |
| 277 if subj: | |
| 278 return _eval_seq(self.args, subj, obj) | |
| 279 elif obj: | |
| 280 return _eval_seq_bw(self.args, subj, obj) | |
| 281 else: # no vars bound, we can start anywhere | |
| 282 return _eval_seq(self.args, subj, obj) | |
| 283 | |
| 284 def __repr__(self): | |
| 285 return "Path(%s)" % " / ".join(str(x) for x in self.args) | |
| 286 | |
| 287 def n3(self): | |
| 288 return '/'.join(a.n3() for a in self.args) | |
| 289 | |
| 290 | |
| 291 class AlternativePath(Path): | |
| 292 def __init__(self, *args): | |
| 293 self.args = [] | |
| 294 for a in args: | |
| 295 if isinstance(a, AlternativePath): | |
| 296 self.args += a.args | |
| 297 else: | |
| 298 self.args.append(a) | |
| 299 | |
| 300 def eval(self, graph, subj=None, obj=None): | |
| 301 for x in self.args: | |
| 302 for y in evalPath(graph, (subj, x, obj)): | |
| 303 yield y | |
| 304 | |
| 305 def __repr__(self): | |
| 306 return "Path(%s)" % " | ".join(str(x) for x in self.args) | |
| 307 | |
| 308 def n3(self): | |
| 309 return '|'.join(a.n3() for a in self.args) | |
| 310 | |
| 311 | |
| 312 class MulPath(Path): | |
| 313 def __init__(self, path, mod): | |
| 314 self.path = path | |
| 315 self.mod = mod | |
| 316 | |
| 317 if mod == ZeroOrOne: | |
| 318 self.zero = True | |
| 319 self.more = False | |
| 320 elif mod == ZeroOrMore: | |
| 321 self.zero = True | |
| 322 self.more = True | |
| 323 elif mod == OneOrMore: | |
| 324 self.zero = False | |
| 325 self.more = True | |
| 326 else: | |
| 327 raise Exception('Unknown modifier %s' % mod) | |
| 328 | |
| 329 def eval(self, graph, subj=None, obj=None, first=True): | |
| 330 if self.zero and first: | |
| 331 if subj and obj: | |
| 332 if subj == obj: | |
| 333 yield (subj, obj) | |
| 334 elif subj: | |
| 335 yield (subj, subj) | |
| 336 elif obj: | |
| 337 yield (obj, obj) | |
| 338 | |
| 339 def _fwd(subj=None, obj=None, seen=None): | |
| 340 seen.add(subj) | |
| 341 | |
| 342 for s, o in evalPath(graph, (subj, self.path, None)): | |
| 343 if not obj or o == obj: | |
| 344 yield s, o | |
| 345 if self.more: | |
| 346 if o in seen: | |
| 347 continue | |
| 348 for s2, o2 in _fwd(o, obj, seen): | |
| 349 yield s, o2 | |
| 350 | |
| 351 def _bwd(subj=None, obj=None, seen=None): | |
| 352 seen.add(obj) | |
| 353 | |
| 354 for s, o in evalPath(graph, (None, self.path, obj)): | |
| 355 if not subj or subj == s: | |
| 356 yield s, o | |
| 357 if self.more: | |
| 358 if s in seen: | |
| 359 continue | |
| 360 | |
| 361 for s2, o2 in _bwd(None, s, seen): | |
| 362 yield s2, o | |
| 363 | |
| 364 def _all_fwd_paths(): | |
| 365 if self.zero: | |
| 366 seen1 = set() | |
| 367 # According to the spec, ALL nodes are possible solutions | |
| 368 # (even literals) | |
| 369 # we cannot do this without going through ALL triples | |
| 370 # unless we keep an index of all terms somehow | |
| 371 # but lets just hope this query doesnt happen very often... | |
| 372 for s, o in graph.subject_objects(None): | |
| 373 if s not in seen1: | |
| 374 seen1.add(s) | |
| 375 yield s, s | |
| 376 if o not in seen1: | |
| 377 seen1.add(o) | |
| 378 yield o, o | |
| 379 | |
| 380 seen = set() | |
| 381 for s, o in evalPath(graph, (None, self.path, None)): | |
| 382 if not self.more: | |
| 383 yield s, o | |
| 384 else: | |
| 385 if s not in seen: | |
| 386 seen.add(s) | |
| 387 f = list(_fwd(s, None, set())) | |
| 388 for s1, o1 in f: | |
| 389 assert s1 == s | |
| 390 yield(s1, o1) | |
| 391 | |
| 392 done = set() # the spec does by defn. not allow duplicates | |
| 393 if subj: | |
| 394 for x in _fwd(subj, obj, set()): | |
| 395 if x not in done: | |
| 396 done.add(x) | |
| 397 yield x | |
| 398 elif obj: | |
| 399 for x in _bwd(subj, obj, set()): | |
| 400 if x not in done: | |
| 401 done.add(x) | |
| 402 yield x | |
| 403 else: | |
| 404 for x in _all_fwd_paths(): | |
| 405 if x not in done: | |
| 406 done.add(x) | |
| 407 yield x | |
| 408 | |
| 409 def __repr__(self): | |
| 410 return "Path(%s%s)" % (self.path, self.mod) | |
| 411 | |
| 412 def n3(self): | |
| 413 return '%s%s' % (self.path.n3(), self.mod) | |
| 414 | |
| 415 | |
| 416 class NegatedPath(Path): | |
| 417 def __init__(self, arg): | |
| 418 if isinstance(arg, (URIRef, InvPath)): | |
| 419 self.args = [arg] | |
| 420 elif isinstance(arg, AlternativePath): | |
| 421 self.args = arg.args | |
| 422 else: | |
| 423 raise Exception( | |
| 424 'Can only negate URIRefs, InvPaths or ' + | |
| 425 'AlternativePaths, not: %s' % (arg,)) | |
| 426 | |
| 427 def eval(self, graph, subj=None, obj=None): | |
| 428 for s, p, o in graph.triples((subj, None, obj)): | |
| 429 for a in self.args: | |
| 430 if isinstance(a, URIRef): | |
| 431 if p == a: | |
| 432 break | |
| 433 elif isinstance(a, InvPath): | |
| 434 if (o, a.arg, s) in graph: | |
| 435 break | |
| 436 else: | |
| 437 raise Exception('Invalid path in NegatedPath: %s' % a) | |
| 438 else: | |
| 439 yield s, o | |
| 440 | |
| 441 def __repr__(self): | |
| 442 return "Path(! %s)" % ",".join(str(x) for x in self.args) | |
| 443 | |
| 444 def n3(self): | |
| 445 return '!(%s)' % ('|'.join(self.args)) | |
| 446 | |
| 447 | |
| 448 class PathList(list): | |
| 449 pass | |
| 450 | |
| 451 | |
| 452 def path_alternative(self, other): | |
| 453 """ | |
| 454 alternative path | |
| 455 """ | |
| 456 if not isinstance(other, (URIRef, Path)): | |
| 457 raise Exception('Only URIRefs or Paths can be in paths!') | |
| 458 return AlternativePath(self, other) | |
| 459 | |
| 460 | |
| 461 def path_sequence(self, other): | |
| 462 """ | |
| 463 sequence path | |
| 464 """ | |
| 465 if not isinstance(other, (URIRef, Path)): | |
| 466 raise Exception('Only URIRefs or Paths can be in paths!') | |
| 467 return SequencePath(self, other) | |
| 468 | |
| 469 | |
| 470 def evalPath(graph, t): | |
| 471 return ((s, o) for s, p, o in graph.triples(t)) | |
| 472 | |
| 473 | |
| 474 def mul_path(p, mul): | |
| 475 """ | |
| 476 cardinality path | |
| 477 """ | |
| 478 return MulPath(p, mul) | |
| 479 | |
| 480 | |
| 481 def inv_path(p): | |
| 482 """ | |
| 483 inverse path | |
| 484 """ | |
| 485 return InvPath(p) | |
| 486 | |
| 487 | |
| 488 def neg_path(p): | |
| 489 """ | |
| 490 negated path | |
| 491 """ | |
| 492 return NegatedPath(p) | |
| 493 | |
| 494 | |
| 495 if __name__ == '__main__': | |
| 496 | |
| 497 import doctest | |
| 498 doctest.testmod() | |
| 499 else: | |
| 500 # monkey patch | |
| 501 # (these cannot be directly in terms.py | |
| 502 # as it would introduce circular imports) | |
| 503 | |
| 504 URIRef.__or__ = path_alternative | |
| 505 URIRef.__mul__ = mul_path | |
| 506 URIRef.__invert__ = inv_path | |
| 507 URIRef.__neg__ = neg_path | |
| 508 URIRef.__truediv__ = path_sequence | |
| 509 if not PY3: | |
| 510 URIRef.__div__ = path_sequence | |
| 511 | |
| 512 Path.__invert__ = inv_path | |
| 513 Path.__neg__ = neg_path | |
| 514 Path.__mul__ = mul_path | |
| 515 Path.__or__ = path_alternative | |
| 516 Path.__truediv__ = path_sequence | |
| 517 if not PY3: | |
| 518 Path.__div__ = path_sequence |
