diff env/lib/python3.7/site-packages/rdflib/paths.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.7/site-packages/rdflib/paths.py	Sat May 02 07:14:21 2020 -0400
@@ -0,0 +1,516 @@
+from rdflib.py3compat import PY3, format_doctest_out
+
+__doc__ = format_doctest_out("""
+
+This module implements the SPARQL 1.1 Property path operators, as
+defined in:
+
+http://www.w3.org/TR/sparql11-query/#propertypaths
+
+In SPARQL the syntax is as follows:
+
++--------------------+-------------------------------------------------+
+|Syntax              | Matches                                         |
++====================+=================================================+
+|iri                 | An IRI. A path of length one.                   |
++--------------------+-------------------------------------------------+
+|^elt                | Inverse path (object to subject).               |
++--------------------+-------------------------------------------------+
+|elt1 / elt2         | A sequence path of elt1 followed by elt2.       |
++--------------------+-------------------------------------------------+
+|elt1 | elt2         | A alternative path of elt1 or elt2              |
+|                    | (all possibilities are tried).                  |
++--------------------+-------------------------------------------------+
+|elt*                | A path that connects the subject and object     |
+|                    | of the path by zero or more matches of elt.     |
++--------------------+-------------------------------------------------+
+|elt+                | A path that connects the subject and object     |
+|                    | of the path by one or more matches of elt.      |
++--------------------+-------------------------------------------------+
+|elt?                | A path that connects the subject and object     |
+|                    | of the path by zero or one matches of elt.      |
++--------------------+-------------------------------------------------+
+|!iri or             | Negated property set. An IRI which is not one of|
+|!(iri\ :sub:`1`\ |  | iri\ :sub:`1`...iri\ :sub:`n`.                  |
+|... |iri\ :sub:`n`) | !iri is short for !(iri).                       |
++--------------------+-------------------------------------------------+
+|!^iri or            | Negated property set where the excluded matches |
+|!(^iri\ :sub:`1`\ | | are based on reversed path. That is, not one of |
+|... |^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. |
+|                    | !^iri is short for !(^iri).                     |
++--------------------+-------------------------------------------------+
+|!(iri\ :sub:`1`\ |  | A combination of forward and reverse            |
+|...|iri\ :sub:`j`\ || properties in a negated property set.           |
+|^iri\ :sub:`j+1`\ | |                                                 |
+|... |^iri\ :sub:`n`)|                                                 |
++--------------------+-------------------------------------------------+
+|(elt)               | A group path elt, brackets control precedence.  |
++--------------------+-------------------------------------------------+
+
+This module is used internally be the SPARQL engine, but they property paths
+can also be used to query RDFLib Graphs directly.
+
+Where possible the SPARQL syntax is mapped to python operators, and property
+path objects can be constructed from existing URIRefs.
+
+>>> from rdflib import Graph, Namespace
+
+>>> foaf=Namespace('http://xmlns.com/foaf/0.1/')
+
+>>> ~foaf.knows
+Path(~http://xmlns.com/foaf/0.1/knows)
+
+>>> foaf.knows/foaf.name
+Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name)
+
+>>> foaf.name|foaf.firstName
+Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/firstName)
+
+Modifiers (?, *, +) are done using * (the multiplication operator) and
+the strings '*', '?', '+', also defined as constants in this file.
+
+>>> foaf.knows*OneOrMore
+Path(http://xmlns.com/foaf/0.1/knows+)
+
+The path objects can also be used with the normal graph methods.
+
+First some example data:
+
+>>> g=Graph()
+
+>>> g=g.parse(data='''
+... @prefix : <ex:> .
+...
+... :a :p1 :c ; :p2 :f .
+... :c :p2 :e ; :p3 :g .
+... :g :p3 :h ; :p2 :j .
+... :h :p3 :a ; :p2 :g .
+...
+... :q :px :q .
+...
+... ''', format='n3') # doctest: +ELLIPSIS
+
+>>> e=Namespace('ex:')
+
+Graph contains:
+>>> (e.a, e.p1/e.p2, e.e) in g
+True
+
+Graph generator functions, triples, subjects, objects, etc. :
+
+>>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE
+[rdflib.term.URIRef(%(u)s'ex:j'), rdflib.term.URIRef(%(u)s'ex:g'),
+    rdflib.term.URIRef(%(u)s'ex:f')]
+
+A more complete set of tests:
+
+>>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)]
+True
+>>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)]
+True
+>>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ]
+True
+>>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)]
+True
+>>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [
+...     (e.c, e.g), (e.c, e.h), (e.c, e.a)]
+True
+>>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c),
+...     (e.c, e.g), (e.c, e.h), (e.c, e.a)]
+True
+>>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)]
+True
+>>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == []
+True
+>>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)]
+True
+>>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)]
+True
+>>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)]
+True
+
+>>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
+[(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))]
+
+>>> list(evalPath(g, (None, e.p1|e.p2, e.c)))
+[(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))]
+
+>>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ]
+True
+>>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE
+[(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:c')),
+ (rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:c'))]
+
+>>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
+[(rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))]
+
+>>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
+[(rdflib.term.URIRef(%(u)s'ex:a'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a'))]
+
+>>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)]
+True
+>>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == []
+True
+>>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)]
+True
+>>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)]
+True
+>>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)]
+True
+
+>>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
+[(rdflib.term.URIRef(%(u)s'ex:q'), rdflib.term.URIRef(%(u)s'ex:q'))]
+
+>>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j)))
+[(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:j'))]
+
+No vars specified:
+
+>>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE
+[(rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:g')),
+ (rdflib.term.URIRef(%(u)s'ex:c'), rdflib.term.URIRef(%(u)s'ex:h')),
+ (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:a')),
+ (rdflib.term.URIRef(%(u)s'ex:g'), rdflib.term.URIRef(%(u)s'ex:h')),
+ (rdflib.term.URIRef(%(u)s'ex:h'), rdflib.term.URIRef(%(u)s'ex:a'))]
+
+.. versionadded:: 4.0
+
+""")
+
+
+from rdflib.term import URIRef, Node
+
+
+# property paths
+
+ZeroOrMore = '*'
+OneOrMore = '+'
+ZeroOrOne = '?'
+
+
+class Path(object):
+    def eval(self, graph, subj=None, obj=None):
+        raise NotImplementedError()
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __eq__(self, other):
+        return repr(self) == repr(other)
+
+    def __lt__(self, other):
+        if not isinstance(other, (Path, Node)):
+            raise TypeError('unorderable types: %s() < %s()' % (
+                repr(self), repr(other)))
+        return repr(self) < repr(other)
+
+    def __le__(self, other):
+        if not isinstance(other, (Path, Node)):
+            raise TypeError('unorderable types: %s() < %s()' % (
+                repr(self), repr(other)))
+        return repr(self) <= repr(other)
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __gt__(self, other):
+        return not self <= other
+
+    def __ge__(self, other):
+        return not self < other
+
+
+class InvPath(Path):
+
+    def __init__(self, arg):
+        self.arg = arg
+
+    def eval(self, graph, subj=None, obj=None):
+        for s, o in evalPath(graph, (obj, self.arg, subj)):
+            yield o, s
+
+    def __repr__(self):
+        return "Path(~%s)" % (self.arg,)
+
+    def n3(self):
+        return '^%s' % self.arg.n3()
+
+
+class SequencePath(Path):
+    def __init__(self, *args):
+        self.args = []
+        for a in args:
+            if isinstance(a, SequencePath):
+                self.args += a.args
+            else:
+                self.args.append(a)
+
+    def eval(self, graph, subj=None, obj=None):
+        def _eval_seq(paths, subj, obj):
+            if paths[1:]:
+                for s, o in evalPath(graph, (subj, paths[0], None)):
+                    for r in _eval_seq(paths[1:], o, obj):
+                        yield s, r[1]
+
+            else:
+                for s, o in evalPath(graph, (subj, paths[0], obj)):
+                    yield s, o
+
+        def _eval_seq_bw(paths, subj, obj):
+            if paths[:-1]:
+                for s, o in evalPath(graph, (None, paths[-1], obj)):
+                    for r in _eval_seq(paths[:-1], subj, s):
+                        yield r[0], o
+
+            else:
+                for s, o in evalPath(graph, (subj, paths[0], obj)):
+                    yield s, o
+
+        if subj:
+            return _eval_seq(self.args, subj, obj)
+        elif obj:
+            return _eval_seq_bw(self.args, subj, obj)
+        else:  # no vars bound, we can start anywhere
+            return _eval_seq(self.args, subj, obj)
+
+    def __repr__(self):
+        return "Path(%s)" % " / ".join(str(x) for x in self.args)
+
+    def n3(self):
+        return '/'.join(a.n3() for a in self.args)
+
+
+class AlternativePath(Path):
+    def __init__(self, *args):
+        self.args = []
+        for a in args:
+            if isinstance(a, AlternativePath):
+                self.args += a.args
+            else:
+                self.args.append(a)
+
+    def eval(self, graph, subj=None, obj=None):
+        for x in self.args:
+            for y in evalPath(graph, (subj, x, obj)):
+                yield y
+
+    def __repr__(self):
+        return "Path(%s)" % " | ".join(str(x) for x in self.args)
+
+    def n3(self):
+        return '|'.join(a.n3() for a in self.args)
+
+
+
+class MulPath(Path):
+    def __init__(self, path, mod):
+        self.path = path
+        self.mod = mod
+
+        if mod == ZeroOrOne:
+            self.zero = True
+            self.more = False
+        elif mod == ZeroOrMore:
+            self.zero = True
+            self.more = True
+        elif mod == OneOrMore:
+            self.zero = False
+            self.more = True
+        else:
+            raise Exception('Unknown modifier %s' % mod)
+
+    def eval(self, graph, subj=None, obj=None, first=True):
+        if self.zero and first:
+            if subj and obj:
+                if subj == obj:
+                    yield (subj, obj)
+            elif subj:
+                yield (subj, subj)
+            elif obj:
+                yield (obj, obj)
+
+        def _fwd(subj=None, obj=None, seen=None):
+            seen.add(subj)
+
+            for s, o in evalPath(graph, (subj, self.path, None)):
+                if not obj or o == obj:
+                    yield s, o
+                if self.more:
+                    if o in seen:
+                        continue
+                    for s2, o2 in _fwd(o, obj, seen):
+                        yield s, o2
+
+        def _bwd(subj=None, obj=None, seen=None):
+            seen.add(obj)
+
+            for s, o in evalPath(graph, (None, self.path, obj)):
+                if not subj or subj == s:
+                    yield s, o
+                if self.more:
+                    if s in seen:
+                        continue
+
+                    for s2, o2 in _bwd(None, s, seen):
+                        yield s2, o
+
+        def _fwdbwd():
+            if self.zero:
+                seen1 = set()
+                # According to the spec, ALL nodes are possible solutions
+                # (even literals)
+                # we cannot do this without going through ALL triples
+                # unless we keep an index of all terms somehow
+                # but lets just hope this query doesnt happen very often...
+                for s, o in graph.subject_objects(None):
+                    if s not in seen1:
+                        seen1.add(s)
+                        yield s, s
+                    if o not in seen1:
+                        seen1.add(o)
+                        yield o, o
+
+            for s, o in evalPath(graph, (None, self.path, None)):
+                if not self.more:
+                    yield s, o
+                else:
+                    seen = set()
+                    f = list(_fwd(s, None, seen))  # cache or recompute?
+                    for s3, o3 in _bwd(None, o, seen):
+                        for s2, o2 in f:
+                            yield s3, o2  # ?
+
+        done = set()  # the spec does by defn. not allow duplicates
+        if subj:
+            for x in _fwd(subj, obj, set()):
+                if x not in done:
+                    done.add(x)
+                    yield x
+        elif obj:
+            for x in _bwd(subj, obj, set()):
+                if x not in done:
+                    done.add(x)
+                    yield x
+        else:
+            for x in _fwdbwd():
+                if x not in done:
+                    done.add(x)
+                    yield x
+
+    def __repr__(self):
+        return "Path(%s%s)" % (self.path, self.mod)
+
+    def n3(self):
+        return '%s%s' % (self.path, self.mod)
+
+
+
+class NegatedPath(Path):
+    def __init__(self, arg):
+        if isinstance(arg, (URIRef, InvPath)):
+            self.args = [arg]
+        elif isinstance(arg, AlternativePath):
+            self.args = arg.args
+        else:
+            raise Exception(
+                'Can only negate URIRefs, InvPaths or ' +
+                'AlternativePaths, not: %s' % (arg,))
+
+    def eval(self, graph, subj=None, obj=None):
+        for s, p, o in graph.triples((subj, None, obj)):
+            for a in self.args:
+                if isinstance(a, URIRef):
+                    if p == a:
+                        break
+                elif isinstance(a, InvPath):
+                    if (o, a.arg, s) in graph:
+                        break
+                else:
+                    raise Exception('Invalid path in NegatedPath: %s' % a)
+            else:
+                yield s, o
+
+    def __repr__(self):
+        return "Path(! %s)" % ",".join(str(x) for x in self.args)
+
+    def n3(self):
+        return '!(%s)' % ('|'.join(self.args))
+
+
+class PathList(list):
+    pass
+
+
+def path_alternative(self, other):
+    """
+    alternative path
+    """
+    if not isinstance(other, (URIRef, Path)):
+        raise Exception('Only URIRefs or Paths can be in paths!')
+    return AlternativePath(self, other)
+
+
+def path_sequence(self, other):
+    """
+    sequence path
+    """
+    if not isinstance(other, (URIRef, Path)):
+        raise Exception('Only URIRefs or Paths can be in paths!')
+    return SequencePath(self, other)
+
+
+def evalPath(graph, t):
+    return ((s, o) for s, p, o in graph.triples(t))
+
+def mul_path(p, mul):
+    """
+    cardinality path
+    """
+    return MulPath(p, mul)
+
+
+def inv_path(p):
+    """
+    inverse path
+    """
+    return InvPath(p)
+
+
+def neg_path(p):
+    """
+    negated path
+    """
+    return NegatedPath(p)
+
+
+
+if __name__ == '__main__':
+
+    import doctest
+    doctest.testmod()
+else:
+    # monkey patch
+    # (these cannot be directly in terms.py
+    #  as it would introduce circular imports)
+
+    URIRef.__or__ = path_alternative
+    URIRef.__mul__ = mul_path
+    URIRef.__invert__ = inv_path
+    URIRef.__neg__ = neg_path
+    URIRef.__truediv__ = path_sequence
+    if not PY3:
+        URIRef.__div__ = path_sequence
+
+    Path.__invert__ = inv_path
+    Path.__neg__ = neg_path
+    Path.__mul__ = mul_path
+    Path.__or__ = path_alternative
+    Path.__truediv__ = path_sequence
+    if not PY3:
+        Path.__div__ = path_sequence