diff env/lib/python3.7/site-packages/rdflib/plugins/sparql/algebra.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/rdflib/plugins/sparql/algebra.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,821 +0,0 @@
-
-"""
-Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression
-
-http://www.w3.org/TR/sparql11-query/#sparqlQuery
-
-"""
-
-import functools
-import operator
-import collections
-
-from rdflib import Literal, Variable, URIRef, BNode
-
-from rdflib.plugins.sparql.sparql import Prologue, Query
-from rdflib.plugins.sparql.parserutils import CompValue, Expr
-from rdflib.plugins.sparql.operators import (
-    and_, TrueFilter, simplify as simplifyFilters)
-from rdflib.paths import (
-    InvPath, AlternativePath, SequencePath, MulPath, NegatedPath)
-
-from pyparsing import ParseResults
-from functools import reduce
-
-
-# ---------------------------
-# Some convenience methods
-def OrderBy(p, expr):
-    return CompValue('OrderBy', p=p, expr=expr)
-
-
-def ToMultiSet(p):
-    return CompValue('ToMultiSet', p=p)
-
-
-def Union(p1, p2):
-    return CompValue('Union', p1=p1, p2=p2)
-
-
-def Join(p1, p2):
-    return CompValue('Join', p1=p1, p2=p2)
-
-
-def Minus(p1, p2):
-    return CompValue('Minus', p1=p1, p2=p2)
-
-
-def Graph(term, graph):
-    return CompValue('Graph', term=term, p=graph)
-
-
-def BGP(triples=None):
-    return CompValue('BGP', triples=triples or [])
-
-
-def LeftJoin(p1, p2, expr):
-    return CompValue('LeftJoin', p1=p1, p2=p2, expr=expr)
-
-
-def Filter(expr, p):
-    return CompValue('Filter', expr=expr, p=p)
-
-
-def Extend(p, expr, var):
-    return CompValue('Extend', p=p, expr=expr, var=var)
-
-def Values(res):
-    return CompValue('values', res=res)
-
-def Project(p, PV):
-    return CompValue('Project', p=p, PV=PV)
-
-
-def Group(p, expr=None):
-    return CompValue('Group', p=p, expr=expr)
-
-
-def _knownTerms(triple, varsknown, varscount):
-    return (len([_f for _f in (x not in varsknown and
-                              isinstance(
-                                  x, (Variable, BNode)) for x in triple) if _f]),
-            -sum(varscount.get(x, 0) for x in triple),
-            not isinstance(triple[2], Literal),
-            )
-
-
-def reorderTriples(l):
-    """
-    Reorder triple patterns so that we execute the
-    ones with most bindings first
-    """
-
-    def _addvar(term, varsknown):
-        if isinstance(term, (Variable, BNode)):
-            varsknown.add(term)
-
-    l = [(None, x) for x in l]
-    varsknown = set()
-    varscount = collections.defaultdict(int)
-    for t in l:
-        for c in t[1]:
-            if isinstance(c, (Variable, BNode)):
-                varscount[c] += 1
-    i = 0
-
-    # Done in steps, sort by number of bound terms
-    # the top block of patterns with the most bound terms is kept
-    # the rest is resorted based on the vars bound after the first
-    # block is evaluated
-
-    # we sort by decorate/undecorate, since we need the value of the sort keys
-
-    while i < len(l):
-        l[i:] = sorted((_knownTerms(x[
-                       1], varsknown, varscount), x[1]) for x in l[i:])
-        t = l[i][0][0]  # top block has this many terms bound
-        j = 0
-        while i+j < len(l) and l[i+j][0][0] == t:
-            for c in l[i+j][1]:
-                _addvar(c, varsknown)
-            j += 1
-        i += 1
-
-    return [x[1] for x in l]
-
-
-def triples(l):
-
-    l = reduce(lambda x, y: x + y, l)
-    if (len(l) % 3) != 0:
-        raise Exception('these aint triples')
-    return reorderTriples((l[x], l[x + 1], l[x + 2])
-                          for x in range(0, len(l), 3))
-
-
-def translatePName(p, prologue):
-    """
-    Expand prefixed/relative URIs
-    """
-    if isinstance(p, CompValue):
-        if p.name == 'pname':
-            return prologue.absolutize(p)
-        if p.name == 'literal':
-            return Literal(p.string, lang=p.lang,
-                           datatype=prologue.absolutize(p.datatype))
-    elif isinstance(p, URIRef):
-        return prologue.absolutize(p)
-
-
-def translatePath(p):
-
-    """
-    Translate PropertyPath expressions
-    """
-
-    if isinstance(p, CompValue):
-        if p.name == 'PathAlternative':
-            if len(p.part) == 1:
-                return p.part[0]
-            else:
-                return AlternativePath(*p.part)
-
-        elif p.name == 'PathSequence':
-            if len(p.part) == 1:
-                return p.part[0]
-            else:
-                return SequencePath(*p.part)
-
-        elif p.name == 'PathElt':
-            if not p.mod:
-                return p.part
-            else:
-                if isinstance(p.part, list):
-                    if len(p.part) != 1:
-                        raise Exception('Denkfehler!')
-
-                    return MulPath(p.part[0], p.mod)
-                else:
-                    return MulPath(p.part, p.mod)
-
-        elif p.name == 'PathEltOrInverse':
-            if isinstance(p.part, list):
-                if len(p.part) != 1:
-                    raise Exception('Denkfehler!')
-                return InvPath(p.part[0])
-            else:
-                return InvPath(p.part)
-
-        elif p.name == 'PathNegatedPropertySet':
-            if isinstance(p.part, list):
-                return NegatedPath(AlternativePath(*p.part))
-            else:
-                return NegatedPath(p.part)
-
-
-def translateExists(e):
-
-    """
-    Translate the graph pattern used by EXISTS and NOT EXISTS
-    http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
-    """
-
-    def _c(n):
-        if isinstance(n, CompValue):
-            if n.name in ('Builtin_EXISTS', 'Builtin_NOTEXISTS'):
-                n.graph = translateGroupGraphPattern(n.graph)
-                if n.graph.name == 'Filter':
-                    # filters inside (NOT) EXISTS can see vars bound outside
-                    n.graph.no_isolated_scope = True
-
-    e = traverse(e, visitPost=_c)
-
-    return e
-
-
-def collectAndRemoveFilters(parts):
-
-    """
-
-    FILTER expressions apply to the whole group graph pattern in which
-    they appear.
-
-    http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
-    """
-
-    filters = []
-
-    i = 0
-    while i < len(parts):
-        p = parts[i]
-        if p.name == 'Filter':
-            filters.append(translateExists(p.expr))
-            parts.pop(i)
-        else:
-            i += 1
-
-    if filters:
-        return and_(*filters)
-
-    return None
-
-
-def translateGroupOrUnionGraphPattern(graphPattern):
-    A = None
-
-    for g in graphPattern.graph:
-        g = translateGroupGraphPattern(g)
-        if not A:
-            A = g
-        else:
-            A = Union(A, g)
-    return A
-
-
-def translateGraphGraphPattern(graphPattern):
-    return Graph(graphPattern.term,
-                 translateGroupGraphPattern(graphPattern.graph))
-
-
-def translateInlineData(graphPattern):
-    return ToMultiSet(translateValues(graphPattern))
-
-
-def translateGroupGraphPattern(graphPattern):
-    """
-    http://www.w3.org/TR/sparql11-query/#convertGraphPattern
-    """
-
-    if graphPattern.name == 'SubSelect':
-        return ToMultiSet(translate(graphPattern)[0])
-
-    if not graphPattern.part:
-        graphPattern.part = []  # empty { }
-
-    filters = collectAndRemoveFilters(graphPattern.part)
-
-    g = []
-    for p in graphPattern.part:
-        if p.name == 'TriplesBlock':
-            # merge adjacent TripleBlocks
-            if not (g and g[-1].name == 'BGP'):
-                g.append(BGP())
-            g[-1]["triples"] += triples(p.triples)
-        else:
-            g.append(p)
-
-    G = BGP()
-    for p in g:
-        if p.name == 'OptionalGraphPattern':
-            A = translateGroupGraphPattern(p.graph)
-            if A.name == 'Filter':
-                G = LeftJoin(G, A.p, A.expr)
-            else:
-                G = LeftJoin(G, A, TrueFilter)
-        elif p.name == 'MinusGraphPattern':
-            G = Minus(p1=G, p2=translateGroupGraphPattern(p.graph))
-        elif p.name == 'GroupOrUnionGraphPattern':
-            G = Join(p1=G, p2=translateGroupOrUnionGraphPattern(p))
-        elif p.name == 'GraphGraphPattern':
-            G = Join(p1=G, p2=translateGraphGraphPattern(p))
-        elif p.name == 'InlineData':
-            G = Join(p1=G, p2=translateInlineData(p))
-        elif p.name == 'ServiceGraphPattern':
-            G = Join(p1=G, p2=p)
-        elif p.name in ('BGP', 'Extend'):
-            G = Join(p1=G, p2=p)
-        elif p.name == 'Bind':
-            G = Extend(G, p.expr, p.var)
-
-        else:
-            raise Exception('Unknown part in GroupGraphPattern: %s - %s' %
-                            (type(p), p.name))
-
-    if filters:
-        G = Filter(expr=filters, p=G)
-
-    return G
-
-
-class StopTraversal(Exception):
-    def __init__(self, rv):
-        self.rv = rv
-
-
-def _traverse(e, visitPre=lambda n: None, visitPost=lambda n: None):
-    """
-    Traverse a parse-tree, visit each node
-
-    if visit functions return a value, replace current node
-    """
-    _e = visitPre(e)
-    if _e is not None:
-        return _e
-
-    if e is None:
-        return None
-
-    if isinstance(e, (list, ParseResults)):
-        return [_traverse(x, visitPre, visitPost) for x in e]
-    elif isinstance(e, tuple):
-        return tuple([_traverse(x, visitPre, visitPost) for x in e])
-
-    elif isinstance(e, CompValue):
-        for k, val in e.items():
-            e[k] = _traverse(val, visitPre, visitPost)
-
-    _e = visitPost(e)
-    if _e is not None:
-        return _e
-
-    return e
-
-
-def _traverseAgg(e, visitor=lambda n, v: None):
-    """
-    Traverse a parse-tree, visit each node
-
-    if visit functions return a value, replace current node
-    """
-
-    res = []
-
-    if isinstance(e, (list, ParseResults, tuple)):
-        res = [_traverseAgg(x, visitor) for x in e]
-
-    elif isinstance(e, CompValue):
-        for k, val in e.items():
-            if val != None:
-                res.append(_traverseAgg(val, visitor))
-
-    return visitor(e, res)
-
-
-def traverse(
-        tree, visitPre=lambda n: None,
-        visitPost=lambda n: None, complete=None):
-    """
-    Traverse tree, visit each node with visit function
-    visit function may raise StopTraversal to stop traversal
-    if complete!=None, it is returned on complete traversal,
-    otherwise the transformed tree is returned
-    """
-    try:
-        r = _traverse(tree, visitPre, visitPost)
-        if complete is not None:
-            return complete
-        return r
-    except StopTraversal as st:
-        return st.rv
-
-
-def _hasAggregate(x):
-    """
-    Traverse parse(sub)Tree
-    return true if any aggregates are used
-    """
-
-    if isinstance(x, CompValue):
-        if x.name.startswith('Aggregate_'):
-            raise StopTraversal(True)
-
-
-def _aggs(e, A):
-    """
-    Collect Aggregates in A
-    replaces aggregates with variable references
-    """
-
-    # TODO: nested Aggregates?
-
-    if isinstance(e, CompValue) and e.name.startswith('Aggregate_'):
-        A.append(e)
-        aggvar = Variable('__agg_%d__' % len(A))
-        e["res"] = aggvar
-        return aggvar
-
-
-def _findVars(x, res):
-    """
-    Find all variables in a tree
-    """
-    if isinstance(x, Variable):
-        res.add(x)
-    if isinstance(x, CompValue):
-        if x.name == "Bind":
-            res.add(x.var)
-            return x  # stop recursion and finding vars in the expr
-        elif x.name == 'SubSelect':
-            if x.projection:
-                res.update(v.var or v.evar for v in x.projection)
-            return x
-
-
-def _addVars(x, children):
-    """
-    find which variables may be bound by this part of the query
-    """
-    if isinstance(x, Variable):
-        return set([x])
-    elif isinstance(x, CompValue):
-        if x.name == "RelationalExpression":
-            x["_vars"] = set()
-        elif x.name == "Extend":
-            # vars only used in the expr for a bind should not be included
-            x["_vars"] = reduce(operator.or_, [ child for child,part in zip(children,x) if part!='expr' ], set())
-
-        else:
-            x["_vars"] = set(reduce(operator.or_, children, set()))
-
-            if x.name == 'SubSelect':
-                if x.projection:
-                    s = set(v.var or v.evar for v in x.projection)
-                else:
-                    s = set()
-
-                return s
-
-        return x["_vars"]
-
-    return reduce(operator.or_, children, set())
-
-
-def _sample(e, v=None):
-    """
-    For each unaggregated variable V in expr
-    Replace V with Sample(V)
-    """
-    if isinstance(e, CompValue) and e.name.startswith("Aggregate_"):
-        return e  # do not replace vars in aggregates
-    if isinstance(e, Variable) and v != e:
-        return CompValue('Aggregate_Sample', vars=e)
-
-
-def _simplifyFilters(e):
-    if isinstance(e, Expr):
-        return simplifyFilters(e)
-
-
-def translateAggregates(q, M):
-    E = []
-    A = []
-
-    # collect/replace aggs in :
-    #    select expr as ?var
-    if q.projection:
-        for v in q.projection:
-            if v.evar:
-                v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar))
-                v.expr = traverse(v.expr, functools.partial(_aggs, A=A))
-
-
-    # having clause
-    if traverse(q.having, _hasAggregate, complete=False):
-        q.having = traverse(q.having, _sample)
-        traverse(q.having, functools.partial(_aggs, A=A))
-
-    # order by
-    if traverse(q.orderby, _hasAggregate, complete=False):
-        q.orderby = traverse(q.orderby, _sample)
-        traverse(q.orderby, functools.partial(_aggs, A=A))
-
-    # sample all other select vars
-    # TODO: only allowed for vars in group-by?
-    if q.projection:
-        for v in q.projection:
-            if v.var:
-                rv = Variable('__agg_%d__' % (len(A) + 1))
-                A.append(CompValue('Aggregate_Sample', vars=v.var, res=rv))
-                E.append((rv, v.var))
-
-    return CompValue('AggregateJoin', A=A, p=M), E
-
-
-def translateValues(v):
-    # if len(v.var)!=len(v.value):
-    #     raise Exception("Unmatched vars and values in ValueClause: "+str(v))
-
-    res = []
-    if not v.var:
-        return res
-    if not v.value:
-        return res
-    if not isinstance(v.value[0], list):
-
-        for val in v.value:
-            res.append({v.var[0]: val})
-    else:
-        for vals in v.value:
-            res.append(dict(list(zip(v.var, vals))))
-
-    return Values(res)
-
-
-def translate(q):
-    """
-    http://www.w3.org/TR/sparql11-query/#convertSolMod
-
-    """
-
-    _traverse(q, _simplifyFilters)
-
-    q.where = traverse(q.where, visitPost=translatePath)
-
-    # TODO: Var scope test
-    VS = set()
-    traverse(q.where, functools.partial(_findVars, res=VS))
-
-    # all query types have a where part
-    M = translateGroupGraphPattern(q.where)
-
-    aggregate = False
-    if q.groupby:
-        conditions = []
-        # convert "GROUP BY (?expr as ?var)" to an Extend
-        for c in q.groupby.condition:
-            if isinstance(c, CompValue) and c.name == 'GroupAs':
-                M = Extend(M, c.expr, c.var)
-                c = c.var
-            conditions.append(c)
-
-        M = Group(p=M, expr=conditions)
-        aggregate = True
-    elif traverse(q.having, _hasAggregate, complete=False) or \
-            traverse(q.orderby, _hasAggregate, complete=False) or \
-            any(traverse(x.expr, _hasAggregate, complete=False)
-                for x in q.projection or [] if x.evar):
-        # if any aggregate is used, implicit group by
-        M = Group(p=M)
-        aggregate = True
-
-    if aggregate:
-        M, E = translateAggregates(q, M)
-    else:
-        E = []
-
-    # HAVING
-    if q.having:
-        M = Filter(expr=and_(*q.having.condition), p=M)
-
-    # VALUES
-    if q.valuesClause:
-        M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause)))
-
-    if not q.projection:
-        # select *
-        PV = list(VS)
-    else:
-        PV = list()
-        for v in q.projection:
-            if v.var:
-                if v not in PV:
-                    PV.append(v.var)
-            elif v.evar:
-                if v not in PV:
-                    PV.append(v.evar)
-
-                E.append((v.expr, v.evar))
-            else:
-                raise Exception("I expected a var or evar here!")
-
-    for e, v in E:
-        M = Extend(M, e, v)
-
-    # ORDER BY
-    if q.orderby:
-        M = OrderBy(M, [CompValue('OrderCondition', expr=c.expr,
-                    order=c.order) for c in q.orderby.condition])
-
-    # PROJECT
-    M = Project(M, PV)
-
-    if q.modifier:
-        if q.modifier == 'DISTINCT':
-            M = CompValue('Distinct', p=M)
-        elif q.modifier == 'REDUCED':
-            M = CompValue('Reduced', p=M)
-
-    if q.limitoffset:
-        offset = 0
-        if q.limitoffset.offset != None:
-            offset = q.limitoffset.offset.toPython()
-
-        if q.limitoffset.limit != None:
-            M = CompValue('Slice', p=M, start=offset,
-                          length=q.limitoffset.limit.toPython())
-        else:
-            M = CompValue('Slice', p=M, start=offset)
-
-    return M, PV
-
-
-def simplify(n):
-    """Remove joins to empty BGPs"""
-    if isinstance(n, CompValue):
-        if n.name == 'Join':
-            if n.p1.name == 'BGP' and len(n.p1.triples) == 0:
-                return n.p2
-            if n.p2.name == 'BGP' and len(n.p2.triples) == 0:
-                return n.p1
-        elif n.name == 'BGP':
-            n["triples"] = reorderTriples(n.triples)
-            return n
-
-
-def analyse(n, children):
-
-    """
-    Some things can be lazily joined.
-    This propegates whether they can up the tree
-    and sets lazy flags for all joins
-    """
-
-    if isinstance(n, CompValue):
-        if n.name == 'Join':
-            n["lazy"] = all(children)
-            return False
-        elif n.name in ('Slice', 'Distinct'):
-            return False
-        else:
-            return all(children)
-    else:
-        return True
-
-
-def translatePrologue(p, base, initNs=None, prologue=None):
-
-    if prologue is None:
-        prologue = Prologue()
-        prologue.base = ""
-    if base:
-        prologue.base = base
-    if initNs:
-        for k, v in initNs.items():
-            prologue.bind(k, v)
-
-    for x in p:
-        if x.name == 'Base':
-            prologue.base = x.iri
-        elif x.name == 'PrefixDecl':
-            prologue.bind(x.prefix, prologue.absolutize(x.iri))
-
-    return prologue
-
-
-def translateQuads(quads):
-    if quads.triples:
-        alltriples = triples(quads.triples)
-    else:
-        alltriples = []
-
-    allquads = collections.defaultdict(list)
-
-    if quads.quadsNotTriples:
-        for q in quads.quadsNotTriples:
-            if q.triples:
-                allquads[q.term] += triples(q.triples)
-
-    return alltriples, allquads
-
-
-def translateUpdate1(u, prologue):
-    if u.name in ('Load', 'Clear', 'Drop', 'Create'):
-        pass  # no translation needed
-    elif u.name in ('Add', 'Move', 'Copy'):
-        pass
-    elif u.name in ('InsertData', 'DeleteData', 'DeleteWhere'):
-        t, q = translateQuads(u.quads)
-        u["quads"] = q
-        u["triples"] = t
-        if u.name in ('DeleteWhere', 'DeleteData'):
-            pass  # TODO: check for bnodes in triples
-    elif u.name == 'Modify':
-        if u.delete:
-            u.delete["triples"], u.delete[
-                "quads"] = translateQuads(u.delete.quads)
-        if u.insert:
-            u.insert["triples"], u.insert[
-                "quads"] = translateQuads(u.insert.quads)
-        u["where"] = translateGroupGraphPattern(u.where)
-    else:
-        raise Exception('Unknown type of update operation: %s' % u)
-
-    u.prologue = prologue
-    return u
-
-
-def translateUpdate(q, base=None, initNs=None):
-    """
-    Returns a list of SPARQL Update Algebra expressions
-    """
-
-    res = []
-    prologue = None
-    if not q.request:
-        return res
-    for p, u in zip(q.prologue, q.request):
-        prologue = translatePrologue(p, base, initNs, prologue)
-
-        # absolutize/resolve prefixes
-        u = traverse(
-            u, visitPost=functools.partial(translatePName, prologue=prologue))
-        u = _traverse(u, _simplifyFilters)
-
-        u = traverse(u, visitPost=translatePath)
-
-        res.append(translateUpdate1(u, prologue))
-
-    return res
-
-
-def translateQuery(q, base=None, initNs=None):
-    """
-    Translate a query-parsetree to a SPARQL Algebra Expression
-
-    Return a rdflib.plugins.sparql.sparql.Query object
-    """
-
-    # We get in: (prologue, query)
-
-    prologue = translatePrologue(q[0], base, initNs)
-
-    # absolutize/resolve prefixes
-    q[1] = traverse(
-        q[1], visitPost=functools.partial(translatePName, prologue=prologue))
-
-    P, PV = translate(q[1])
-    datasetClause = q[1].datasetClause
-    if q[1].name == 'ConstructQuery':
-
-        template = triples(q[1].template) if q[1].template else None
-
-        res = CompValue(q[1].name, p=P,
-                        template=template,
-                        datasetClause=datasetClause)
-    else:
-        res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV)
-
-    res = traverse(res, visitPost=simplify)
-    _traverseAgg(res, visitor=analyse)
-    _traverseAgg(res, _addVars)
-
-    return Query(prologue, res)
-
-
-def pprintAlgebra(q):
-    def pp(p, ind="    "):
-        # if isinstance(p, list):
-        #     print "[ "
-        #     for x in p: pp(x,ind)
-        #     print "%s ]"%ind
-        #     return
-        if not isinstance(p, CompValue):
-            print(p)
-            return
-        print("%s(" % (p.name, ))
-        for k in p:
-            print("%s%s =" % (ind, k,), end=' ')
-            pp(p[k], ind + "    ")
-        print("%s)" % ind)
-
-    try:
-        pp(q.algebra)
-    except AttributeError:
-        # it's update, just a list
-        for x in q:
-            pp(x)
-
-if __name__ == '__main__':
-    import sys
-    from rdflib.plugins.sparql import parser
-    import os.path
-
-    if os.path.exists(sys.argv[1]):
-        q = file(sys.argv[1])
-    else:
-        q = sys.argv[1]
-
-    pq = parser.parseQuery(q)
-    print(pq)
-    tq = translateQuery(pq)
-    print(pprintAlgebra(tq))