diff env/lib/python3.7/site-packages/rdflib/plugins/sparql/aggregates.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/rdflib/plugins/sparql/aggregates.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,260 +0,0 @@
-from rdflib import Literal, XSD
-
-from rdflib.plugins.sparql.evalutils import _eval, NotBoundError, _val
-from rdflib.plugins.sparql.operators import numeric
-from rdflib.plugins.sparql.datatypes import type_promotion
-
-from rdflib.plugins.sparql.sparql import SPARQLTypeError
-
-from decimal import Decimal
-
-"""
-Aggregation functions
-"""
-
-class Accumulator(object):
-    """abstract base class for different aggregation functions """
-
-    def __init__(self, aggregation):
-        self.var = aggregation.res
-        self.expr = aggregation.vars
-        if not aggregation.distinct:
-            self.use_row = self.dont_care
-            self.distinct = False
-        else:
-            self.distinct = aggregation.distinct
-            self.seen = set()
-
-    def dont_care(self, row):
-        """skips distinct test """
-        return True
-
-    def use_row(self, row):
-        """tests distinct with set """
-        return _eval(self.expr, row) not in self.seen
-
-    def set_value(self, bindings):
-        """sets final value in bindings"""
-        bindings[self.var] = self.get_value()
-
-
-class Counter(Accumulator):
-
-    def __init__(self, aggregation):
-        super(Counter, self).__init__(aggregation)
-        self.value = 0
-        if self.expr == "*":
-            # cannot eval "*" => always use the full row
-            self.eval_row = self.eval_full_row
-
-    def update(self, row, aggregator):
-        try:
-            val = self.eval_row(row)
-        except NotBoundError:
-            # skip UNDEF
-            return
-        self.value += 1
-        if self.distinct:
-            self.seen.add(val)
-
-    def get_value(self):
-        return Literal(self.value)
-
-    def eval_row(self, row):
-        return _eval(self.expr, row)
-
-    def eval_full_row(self, row):
-        return row
-
-    def use_row(self, row):
-        return self.eval_row(row) not in self.seen
-
-
-def type_safe_numbers(*args):
-    types = list(map(type, args))
-    if float in types and Decimal in types:
-        return list(map(float, args))
-    return args
-
-
-class Sum(Accumulator):
-
-    def __init__(self, aggregation):
-        super(Sum, self).__init__(aggregation)
-        self.value = 0
-        self.datatype = None
-
-    def update(self, row, aggregator):
-        try:
-            value = _eval(self.expr, row)
-            dt = self.datatype
-            if dt is None:
-                dt = value.datatype
-            else:
-                dt = type_promotion(dt, value.datatype)
-            self.datatype = dt
-            self.value = sum(type_safe_numbers(self.value, numeric(value)))
-            if self.distinct:
-                self.seen.add(value)
-        except NotBoundError:
-            # skip UNDEF
-            pass
-
-    def get_value(self):
-        return Literal(self.value, datatype=self.datatype)
-
-class Average(Accumulator):
-
-    def __init__(self, aggregation):
-        super(Average, self).__init__(aggregation)
-        self.counter = 0
-        self.sum = 0
-        self.datatype = None
-
-    def update(self, row, aggregator):
-        try:
-            value = _eval(self.expr, row)
-            dt = self.datatype
-            self.sum = sum(type_safe_numbers(self.sum, numeric(value)))
-            if dt is None:
-                dt = value.datatype
-            else:
-                dt = type_promotion(dt, value.datatype)
-            self.datatype = dt
-            if self.distinct:
-                self.seen.add(value)
-            self.counter += 1
-        # skip UNDEF or BNode => SPARQLTypeError
-        except NotBoundError:
-            pass
-        except SPARQLTypeError:
-            pass
-
-    def get_value(self):
-        if self.counter == 0:
-            return Literal(0)
-        if self.datatype in (XSD.float, XSD.double):
-            return Literal(self.sum / self.counter)
-        else:
-            return Literal(Decimal(self.sum) / Decimal(self.counter))
-
-
-class Extremum(Accumulator):
-    """abstract base class for Minimum and Maximum"""
-
-    def __init__(self, aggregation):
-        super(Extremum, self).__init__(aggregation)
-        self.value = None
-        # DISTINCT would not change the value for MIN or MAX
-        self.use_row = self.dont_care
-
-    def set_value(self, bindings):
-        if self.value is not None:
-            # simply do not set if self.value is still None
-            bindings[self.var] = Literal(self.value)
-
-    def update(self, row, aggregator):
-        try:
-            if self.value is None:
-                self.value = _eval(self.expr, row)
-            else:
-                # self.compare is implemented by Minimum/Maximum
-                self.value = self.compare(self.value, _eval(self.expr, row))
-        # skip UNDEF or BNode => SPARQLTypeError
-        except NotBoundError:
-            pass
-        except SPARQLTypeError:
-            pass
-
-
-class Minimum(Extremum):
-
-    def compare(self, val1, val2):
-        return min(val1, val2, key=_val)
-
-
-class Maximum(Extremum):
-
-    def compare(self, val1, val2):
-        return max(val1, val2, key=_val)
-
-
-class Sample(Accumulator):
-    """takes the first eligable value"""
-
-    def __init__(self, aggregation):
-        super(Sample, self).__init__(aggregation)
-        # DISTINCT would not change the value
-        self.use_row = self.dont_care
-
-    def update(self, row, aggregator):
-        try:
-            # set the value now
-            aggregator.bindings[self.var] =  _eval(self.expr, row)
-            # and skip this accumulator for future rows
-            del aggregator.accumulators[self.var]
-        except NotBoundError:
-            pass
-
-    def get_value(self):
-        # set None if no value was set
-        return None
-
-class GroupConcat(Accumulator):
-
-    def __init__(self, aggregation):
-        super(GroupConcat, self).__init__(aggregation)
-        # only GROUPCONCAT needs to have a list as accumlator
-        self.value = []
-        self.separator = aggregation.separator or " "
-
-    def update(self, row, aggregator):
-        try:
-            value = _eval(self.expr, row)
-            self.value.append(value)
-            if self.distinct:
-                self.seen.add(value)
-        # skip UNDEF
-        except NotBoundError:
-            pass
-
-    def get_value(self):
-        return Literal(self.separator.join(str(v) for v in self.value))
-
-
-class Aggregator(object):
-    """combines different Accumulator objects"""
-
-    accumulator_classes = {
-        "Aggregate_Count": Counter,
-        "Aggregate_Sample": Sample,
-        "Aggregate_Sum": Sum,
-        "Aggregate_Avg": Average,
-        "Aggregate_Min": Minimum,
-        "Aggregate_Max": Maximum,
-        "Aggregate_GroupConcat": GroupConcat,
-    }
-
-    def __init__(self, aggregations):
-        self.bindings = {}
-        self.accumulators = {}
-        for a in aggregations:
-            accumulator_class = self.accumulator_classes.get(a.name)
-            if accumulator_class is None:
-                raise Exception("Unknown aggregate function " + a.name)
-            self.accumulators[a.res] = accumulator_class(a)
-
-    def update(self, row):
-        """update all own accumulators"""
-        # SAMPLE accumulators may delete themselves
-        # => iterate over list not generator
-
-        for acc in list(self.accumulators.values()):
-            if acc.use_row(row):
-                acc.update(row, self)
-
-    def get_bindings(self):
-        """calculate and set last values"""
-        for acc in self.accumulators.values():
-            acc.set_value(self.bindings)
-        return self.bindings