comparison env/lib/python3.7/site-packages/rdflib/plugins/sparql/algebra.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1
2 """
3 Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression
4
5 http://www.w3.org/TR/sparql11-query/#sparqlQuery
6
7 """
8
9 import functools
10 import operator
11 import collections
12
13 from rdflib import Literal, Variable, URIRef, BNode
14
15 from rdflib.plugins.sparql.sparql import Prologue, Query
16 from rdflib.plugins.sparql.parserutils import CompValue, Expr
17 from rdflib.plugins.sparql.operators import (
18 and_, TrueFilter, simplify as simplifyFilters)
19 from rdflib.paths import (
20 InvPath, AlternativePath, SequencePath, MulPath, NegatedPath)
21
22 from pyparsing import ParseResults
23 from functools import reduce
24
25
26 # ---------------------------
27 # Some convenience methods
28 def OrderBy(p, expr):
29 return CompValue('OrderBy', p=p, expr=expr)
30
31
32 def ToMultiSet(p):
33 return CompValue('ToMultiSet', p=p)
34
35
36 def Union(p1, p2):
37 return CompValue('Union', p1=p1, p2=p2)
38
39
40 def Join(p1, p2):
41 return CompValue('Join', p1=p1, p2=p2)
42
43
44 def Minus(p1, p2):
45 return CompValue('Minus', p1=p1, p2=p2)
46
47
48 def Graph(term, graph):
49 return CompValue('Graph', term=term, p=graph)
50
51
52 def BGP(triples=None):
53 return CompValue('BGP', triples=triples or [])
54
55
56 def LeftJoin(p1, p2, expr):
57 return CompValue('LeftJoin', p1=p1, p2=p2, expr=expr)
58
59
60 def Filter(expr, p):
61 return CompValue('Filter', expr=expr, p=p)
62
63
64 def Extend(p, expr, var):
65 return CompValue('Extend', p=p, expr=expr, var=var)
66
67 def Values(res):
68 return CompValue('values', res=res)
69
70 def Project(p, PV):
71 return CompValue('Project', p=p, PV=PV)
72
73
74 def Group(p, expr=None):
75 return CompValue('Group', p=p, expr=expr)
76
77
78 def _knownTerms(triple, varsknown, varscount):
79 return (len([_f for _f in (x not in varsknown and
80 isinstance(
81 x, (Variable, BNode)) for x in triple) if _f]),
82 -sum(varscount.get(x, 0) for x in triple),
83 not isinstance(triple[2], Literal),
84 )
85
86
87 def reorderTriples(l):
88 """
89 Reorder triple patterns so that we execute the
90 ones with most bindings first
91 """
92
93 def _addvar(term, varsknown):
94 if isinstance(term, (Variable, BNode)):
95 varsknown.add(term)
96
97 l = [(None, x) for x in l]
98 varsknown = set()
99 varscount = collections.defaultdict(int)
100 for t in l:
101 for c in t[1]:
102 if isinstance(c, (Variable, BNode)):
103 varscount[c] += 1
104 i = 0
105
106 # Done in steps, sort by number of bound terms
107 # the top block of patterns with the most bound terms is kept
108 # the rest is resorted based on the vars bound after the first
109 # block is evaluated
110
111 # we sort by decorate/undecorate, since we need the value of the sort keys
112
113 while i < len(l):
114 l[i:] = sorted((_knownTerms(x[
115 1], varsknown, varscount), x[1]) for x in l[i:])
116 t = l[i][0][0] # top block has this many terms bound
117 j = 0
118 while i+j < len(l) and l[i+j][0][0] == t:
119 for c in l[i+j][1]:
120 _addvar(c, varsknown)
121 j += 1
122 i += 1
123
124 return [x[1] for x in l]
125
126
127 def triples(l):
128
129 l = reduce(lambda x, y: x + y, l)
130 if (len(l) % 3) != 0:
131 raise Exception('these aint triples')
132 return reorderTriples((l[x], l[x + 1], l[x + 2])
133 for x in range(0, len(l), 3))
134
135
136 def translatePName(p, prologue):
137 """
138 Expand prefixed/relative URIs
139 """
140 if isinstance(p, CompValue):
141 if p.name == 'pname':
142 return prologue.absolutize(p)
143 if p.name == 'literal':
144 return Literal(p.string, lang=p.lang,
145 datatype=prologue.absolutize(p.datatype))
146 elif isinstance(p, URIRef):
147 return prologue.absolutize(p)
148
149
150 def translatePath(p):
151
152 """
153 Translate PropertyPath expressions
154 """
155
156 if isinstance(p, CompValue):
157 if p.name == 'PathAlternative':
158 if len(p.part) == 1:
159 return p.part[0]
160 else:
161 return AlternativePath(*p.part)
162
163 elif p.name == 'PathSequence':
164 if len(p.part) == 1:
165 return p.part[0]
166 else:
167 return SequencePath(*p.part)
168
169 elif p.name == 'PathElt':
170 if not p.mod:
171 return p.part
172 else:
173 if isinstance(p.part, list):
174 if len(p.part) != 1:
175 raise Exception('Denkfehler!')
176
177 return MulPath(p.part[0], p.mod)
178 else:
179 return MulPath(p.part, p.mod)
180
181 elif p.name == 'PathEltOrInverse':
182 if isinstance(p.part, list):
183 if len(p.part) != 1:
184 raise Exception('Denkfehler!')
185 return InvPath(p.part[0])
186 else:
187 return InvPath(p.part)
188
189 elif p.name == 'PathNegatedPropertySet':
190 if isinstance(p.part, list):
191 return NegatedPath(AlternativePath(*p.part))
192 else:
193 return NegatedPath(p.part)
194
195
196 def translateExists(e):
197
198 """
199 Translate the graph pattern used by EXISTS and NOT EXISTS
200 http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
201 """
202
203 def _c(n):
204 if isinstance(n, CompValue):
205 if n.name in ('Builtin_EXISTS', 'Builtin_NOTEXISTS'):
206 n.graph = translateGroupGraphPattern(n.graph)
207 if n.graph.name == 'Filter':
208 # filters inside (NOT) EXISTS can see vars bound outside
209 n.graph.no_isolated_scope = True
210
211 e = traverse(e, visitPost=_c)
212
213 return e
214
215
216 def collectAndRemoveFilters(parts):
217
218 """
219
220 FILTER expressions apply to the whole group graph pattern in which
221 they appear.
222
223 http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters
224 """
225
226 filters = []
227
228 i = 0
229 while i < len(parts):
230 p = parts[i]
231 if p.name == 'Filter':
232 filters.append(translateExists(p.expr))
233 parts.pop(i)
234 else:
235 i += 1
236
237 if filters:
238 return and_(*filters)
239
240 return None
241
242
243 def translateGroupOrUnionGraphPattern(graphPattern):
244 A = None
245
246 for g in graphPattern.graph:
247 g = translateGroupGraphPattern(g)
248 if not A:
249 A = g
250 else:
251 A = Union(A, g)
252 return A
253
254
255 def translateGraphGraphPattern(graphPattern):
256 return Graph(graphPattern.term,
257 translateGroupGraphPattern(graphPattern.graph))
258
259
260 def translateInlineData(graphPattern):
261 return ToMultiSet(translateValues(graphPattern))
262
263
264 def translateGroupGraphPattern(graphPattern):
265 """
266 http://www.w3.org/TR/sparql11-query/#convertGraphPattern
267 """
268
269 if graphPattern.name == 'SubSelect':
270 return ToMultiSet(translate(graphPattern)[0])
271
272 if not graphPattern.part:
273 graphPattern.part = [] # empty { }
274
275 filters = collectAndRemoveFilters(graphPattern.part)
276
277 g = []
278 for p in graphPattern.part:
279 if p.name == 'TriplesBlock':
280 # merge adjacent TripleBlocks
281 if not (g and g[-1].name == 'BGP'):
282 g.append(BGP())
283 g[-1]["triples"] += triples(p.triples)
284 else:
285 g.append(p)
286
287 G = BGP()
288 for p in g:
289 if p.name == 'OptionalGraphPattern':
290 A = translateGroupGraphPattern(p.graph)
291 if A.name == 'Filter':
292 G = LeftJoin(G, A.p, A.expr)
293 else:
294 G = LeftJoin(G, A, TrueFilter)
295 elif p.name == 'MinusGraphPattern':
296 G = Minus(p1=G, p2=translateGroupGraphPattern(p.graph))
297 elif p.name == 'GroupOrUnionGraphPattern':
298 G = Join(p1=G, p2=translateGroupOrUnionGraphPattern(p))
299 elif p.name == 'GraphGraphPattern':
300 G = Join(p1=G, p2=translateGraphGraphPattern(p))
301 elif p.name == 'InlineData':
302 G = Join(p1=G, p2=translateInlineData(p))
303 elif p.name == 'ServiceGraphPattern':
304 G = Join(p1=G, p2=p)
305 elif p.name in ('BGP', 'Extend'):
306 G = Join(p1=G, p2=p)
307 elif p.name == 'Bind':
308 G = Extend(G, p.expr, p.var)
309
310 else:
311 raise Exception('Unknown part in GroupGraphPattern: %s - %s' %
312 (type(p), p.name))
313
314 if filters:
315 G = Filter(expr=filters, p=G)
316
317 return G
318
319
320 class StopTraversal(Exception):
321 def __init__(self, rv):
322 self.rv = rv
323
324
325 def _traverse(e, visitPre=lambda n: None, visitPost=lambda n: None):
326 """
327 Traverse a parse-tree, visit each node
328
329 if visit functions return a value, replace current node
330 """
331 _e = visitPre(e)
332 if _e is not None:
333 return _e
334
335 if e is None:
336 return None
337
338 if isinstance(e, (list, ParseResults)):
339 return [_traverse(x, visitPre, visitPost) for x in e]
340 elif isinstance(e, tuple):
341 return tuple([_traverse(x, visitPre, visitPost) for x in e])
342
343 elif isinstance(e, CompValue):
344 for k, val in e.items():
345 e[k] = _traverse(val, visitPre, visitPost)
346
347 _e = visitPost(e)
348 if _e is not None:
349 return _e
350
351 return e
352
353
354 def _traverseAgg(e, visitor=lambda n, v: None):
355 """
356 Traverse a parse-tree, visit each node
357
358 if visit functions return a value, replace current node
359 """
360
361 res = []
362
363 if isinstance(e, (list, ParseResults, tuple)):
364 res = [_traverseAgg(x, visitor) for x in e]
365
366 elif isinstance(e, CompValue):
367 for k, val in e.items():
368 if val != None:
369 res.append(_traverseAgg(val, visitor))
370
371 return visitor(e, res)
372
373
374 def traverse(
375 tree, visitPre=lambda n: None,
376 visitPost=lambda n: None, complete=None):
377 """
378 Traverse tree, visit each node with visit function
379 visit function may raise StopTraversal to stop traversal
380 if complete!=None, it is returned on complete traversal,
381 otherwise the transformed tree is returned
382 """
383 try:
384 r = _traverse(tree, visitPre, visitPost)
385 if complete is not None:
386 return complete
387 return r
388 except StopTraversal as st:
389 return st.rv
390
391
392 def _hasAggregate(x):
393 """
394 Traverse parse(sub)Tree
395 return true if any aggregates are used
396 """
397
398 if isinstance(x, CompValue):
399 if x.name.startswith('Aggregate_'):
400 raise StopTraversal(True)
401
402
403 def _aggs(e, A):
404 """
405 Collect Aggregates in A
406 replaces aggregates with variable references
407 """
408
409 # TODO: nested Aggregates?
410
411 if isinstance(e, CompValue) and e.name.startswith('Aggregate_'):
412 A.append(e)
413 aggvar = Variable('__agg_%d__' % len(A))
414 e["res"] = aggvar
415 return aggvar
416
417
418 def _findVars(x, res):
419 """
420 Find all variables in a tree
421 """
422 if isinstance(x, Variable):
423 res.add(x)
424 if isinstance(x, CompValue):
425 if x.name == "Bind":
426 res.add(x.var)
427 return x # stop recursion and finding vars in the expr
428 elif x.name == 'SubSelect':
429 if x.projection:
430 res.update(v.var or v.evar for v in x.projection)
431 return x
432
433
434 def _addVars(x, children):
435 """
436 find which variables may be bound by this part of the query
437 """
438 if isinstance(x, Variable):
439 return set([x])
440 elif isinstance(x, CompValue):
441 if x.name == "RelationalExpression":
442 x["_vars"] = set()
443 elif x.name == "Extend":
444 # vars only used in the expr for a bind should not be included
445 x["_vars"] = reduce(operator.or_, [ child for child,part in zip(children,x) if part!='expr' ], set())
446
447 else:
448 x["_vars"] = set(reduce(operator.or_, children, set()))
449
450 if x.name == 'SubSelect':
451 if x.projection:
452 s = set(v.var or v.evar for v in x.projection)
453 else:
454 s = set()
455
456 return s
457
458 return x["_vars"]
459
460 return reduce(operator.or_, children, set())
461
462
463 def _sample(e, v=None):
464 """
465 For each unaggregated variable V in expr
466 Replace V with Sample(V)
467 """
468 if isinstance(e, CompValue) and e.name.startswith("Aggregate_"):
469 return e # do not replace vars in aggregates
470 if isinstance(e, Variable) and v != e:
471 return CompValue('Aggregate_Sample', vars=e)
472
473
474 def _simplifyFilters(e):
475 if isinstance(e, Expr):
476 return simplifyFilters(e)
477
478
479 def translateAggregates(q, M):
480 E = []
481 A = []
482
483 # collect/replace aggs in :
484 # select expr as ?var
485 if q.projection:
486 for v in q.projection:
487 if v.evar:
488 v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar))
489 v.expr = traverse(v.expr, functools.partial(_aggs, A=A))
490
491
492 # having clause
493 if traverse(q.having, _hasAggregate, complete=False):
494 q.having = traverse(q.having, _sample)
495 traverse(q.having, functools.partial(_aggs, A=A))
496
497 # order by
498 if traverse(q.orderby, _hasAggregate, complete=False):
499 q.orderby = traverse(q.orderby, _sample)
500 traverse(q.orderby, functools.partial(_aggs, A=A))
501
502 # sample all other select vars
503 # TODO: only allowed for vars in group-by?
504 if q.projection:
505 for v in q.projection:
506 if v.var:
507 rv = Variable('__agg_%d__' % (len(A) + 1))
508 A.append(CompValue('Aggregate_Sample', vars=v.var, res=rv))
509 E.append((rv, v.var))
510
511 return CompValue('AggregateJoin', A=A, p=M), E
512
513
514 def translateValues(v):
515 # if len(v.var)!=len(v.value):
516 # raise Exception("Unmatched vars and values in ValueClause: "+str(v))
517
518 res = []
519 if not v.var:
520 return res
521 if not v.value:
522 return res
523 if not isinstance(v.value[0], list):
524
525 for val in v.value:
526 res.append({v.var[0]: val})
527 else:
528 for vals in v.value:
529 res.append(dict(list(zip(v.var, vals))))
530
531 return Values(res)
532
533
534 def translate(q):
535 """
536 http://www.w3.org/TR/sparql11-query/#convertSolMod
537
538 """
539
540 _traverse(q, _simplifyFilters)
541
542 q.where = traverse(q.where, visitPost=translatePath)
543
544 # TODO: Var scope test
545 VS = set()
546 traverse(q.where, functools.partial(_findVars, res=VS))
547
548 # all query types have a where part
549 M = translateGroupGraphPattern(q.where)
550
551 aggregate = False
552 if q.groupby:
553 conditions = []
554 # convert "GROUP BY (?expr as ?var)" to an Extend
555 for c in q.groupby.condition:
556 if isinstance(c, CompValue) and c.name == 'GroupAs':
557 M = Extend(M, c.expr, c.var)
558 c = c.var
559 conditions.append(c)
560
561 M = Group(p=M, expr=conditions)
562 aggregate = True
563 elif traverse(q.having, _hasAggregate, complete=False) or \
564 traverse(q.orderby, _hasAggregate, complete=False) or \
565 any(traverse(x.expr, _hasAggregate, complete=False)
566 for x in q.projection or [] if x.evar):
567 # if any aggregate is used, implicit group by
568 M = Group(p=M)
569 aggregate = True
570
571 if aggregate:
572 M, E = translateAggregates(q, M)
573 else:
574 E = []
575
576 # HAVING
577 if q.having:
578 M = Filter(expr=and_(*q.having.condition), p=M)
579
580 # VALUES
581 if q.valuesClause:
582 M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause)))
583
584 if not q.projection:
585 # select *
586 PV = list(VS)
587 else:
588 PV = list()
589 for v in q.projection:
590 if v.var:
591 if v not in PV:
592 PV.append(v.var)
593 elif v.evar:
594 if v not in PV:
595 PV.append(v.evar)
596
597 E.append((v.expr, v.evar))
598 else:
599 raise Exception("I expected a var or evar here!")
600
601 for e, v in E:
602 M = Extend(M, e, v)
603
604 # ORDER BY
605 if q.orderby:
606 M = OrderBy(M, [CompValue('OrderCondition', expr=c.expr,
607 order=c.order) for c in q.orderby.condition])
608
609 # PROJECT
610 M = Project(M, PV)
611
612 if q.modifier:
613 if q.modifier == 'DISTINCT':
614 M = CompValue('Distinct', p=M)
615 elif q.modifier == 'REDUCED':
616 M = CompValue('Reduced', p=M)
617
618 if q.limitoffset:
619 offset = 0
620 if q.limitoffset.offset != None:
621 offset = q.limitoffset.offset.toPython()
622
623 if q.limitoffset.limit != None:
624 M = CompValue('Slice', p=M, start=offset,
625 length=q.limitoffset.limit.toPython())
626 else:
627 M = CompValue('Slice', p=M, start=offset)
628
629 return M, PV
630
631
632 def simplify(n):
633 """Remove joins to empty BGPs"""
634 if isinstance(n, CompValue):
635 if n.name == 'Join':
636 if n.p1.name == 'BGP' and len(n.p1.triples) == 0:
637 return n.p2
638 if n.p2.name == 'BGP' and len(n.p2.triples) == 0:
639 return n.p1
640 elif n.name == 'BGP':
641 n["triples"] = reorderTriples(n.triples)
642 return n
643
644
645 def analyse(n, children):
646
647 """
648 Some things can be lazily joined.
649 This propegates whether they can up the tree
650 and sets lazy flags for all joins
651 """
652
653 if isinstance(n, CompValue):
654 if n.name == 'Join':
655 n["lazy"] = all(children)
656 return False
657 elif n.name in ('Slice', 'Distinct'):
658 return False
659 else:
660 return all(children)
661 else:
662 return True
663
664
665 def translatePrologue(p, base, initNs=None, prologue=None):
666
667 if prologue is None:
668 prologue = Prologue()
669 prologue.base = ""
670 if base:
671 prologue.base = base
672 if initNs:
673 for k, v in initNs.items():
674 prologue.bind(k, v)
675
676 for x in p:
677 if x.name == 'Base':
678 prologue.base = x.iri
679 elif x.name == 'PrefixDecl':
680 prologue.bind(x.prefix, prologue.absolutize(x.iri))
681
682 return prologue
683
684
685 def translateQuads(quads):
686 if quads.triples:
687 alltriples = triples(quads.triples)
688 else:
689 alltriples = []
690
691 allquads = collections.defaultdict(list)
692
693 if quads.quadsNotTriples:
694 for q in quads.quadsNotTriples:
695 if q.triples:
696 allquads[q.term] += triples(q.triples)
697
698 return alltriples, allquads
699
700
701 def translateUpdate1(u, prologue):
702 if u.name in ('Load', 'Clear', 'Drop', 'Create'):
703 pass # no translation needed
704 elif u.name in ('Add', 'Move', 'Copy'):
705 pass
706 elif u.name in ('InsertData', 'DeleteData', 'DeleteWhere'):
707 t, q = translateQuads(u.quads)
708 u["quads"] = q
709 u["triples"] = t
710 if u.name in ('DeleteWhere', 'DeleteData'):
711 pass # TODO: check for bnodes in triples
712 elif u.name == 'Modify':
713 if u.delete:
714 u.delete["triples"], u.delete[
715 "quads"] = translateQuads(u.delete.quads)
716 if u.insert:
717 u.insert["triples"], u.insert[
718 "quads"] = translateQuads(u.insert.quads)
719 u["where"] = translateGroupGraphPattern(u.where)
720 else:
721 raise Exception('Unknown type of update operation: %s' % u)
722
723 u.prologue = prologue
724 return u
725
726
727 def translateUpdate(q, base=None, initNs=None):
728 """
729 Returns a list of SPARQL Update Algebra expressions
730 """
731
732 res = []
733 prologue = None
734 if not q.request:
735 return res
736 for p, u in zip(q.prologue, q.request):
737 prologue = translatePrologue(p, base, initNs, prologue)
738
739 # absolutize/resolve prefixes
740 u = traverse(
741 u, visitPost=functools.partial(translatePName, prologue=prologue))
742 u = _traverse(u, _simplifyFilters)
743
744 u = traverse(u, visitPost=translatePath)
745
746 res.append(translateUpdate1(u, prologue))
747
748 return res
749
750
751 def translateQuery(q, base=None, initNs=None):
752 """
753 Translate a query-parsetree to a SPARQL Algebra Expression
754
755 Return a rdflib.plugins.sparql.sparql.Query object
756 """
757
758 # We get in: (prologue, query)
759
760 prologue = translatePrologue(q[0], base, initNs)
761
762 # absolutize/resolve prefixes
763 q[1] = traverse(
764 q[1], visitPost=functools.partial(translatePName, prologue=prologue))
765
766 P, PV = translate(q[1])
767 datasetClause = q[1].datasetClause
768 if q[1].name == 'ConstructQuery':
769
770 template = triples(q[1].template) if q[1].template else None
771
772 res = CompValue(q[1].name, p=P,
773 template=template,
774 datasetClause=datasetClause)
775 else:
776 res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV)
777
778 res = traverse(res, visitPost=simplify)
779 _traverseAgg(res, visitor=analyse)
780 _traverseAgg(res, _addVars)
781
782 return Query(prologue, res)
783
784
785 def pprintAlgebra(q):
786 def pp(p, ind=" "):
787 # if isinstance(p, list):
788 # print "[ "
789 # for x in p: pp(x,ind)
790 # print "%s ]"%ind
791 # return
792 if not isinstance(p, CompValue):
793 print(p)
794 return
795 print("%s(" % (p.name, ))
796 for k in p:
797 print("%s%s =" % (ind, k,), end=' ')
798 pp(p[k], ind + " ")
799 print("%s)" % ind)
800
801 try:
802 pp(q.algebra)
803 except AttributeError:
804 # it's update, just a list
805 for x in q:
806 pp(x)
807
808 if __name__ == '__main__':
809 import sys
810 from rdflib.plugins.sparql import parser
811 import os.path
812
813 if os.path.exists(sys.argv[1]):
814 q = file(sys.argv[1])
815 else:
816 q = sys.argv[1]
817
818 pq = parser.parseQuery(q)
819 print(pq)
820 tq = translateQuery(pq)
821 print(pprintAlgebra(tq))