diff env/lib/python3.7/site-packages/rdflib/void.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/rdflib/void.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-import collections
-
-from rdflib import URIRef, Graph, Literal
-from rdflib.namespace import VOID, RDF
-
-
-def generateVoID(g, dataset=None, res=None, distinctForPartitions=True):
-    """
-    Returns a new graph with a VoID description of the passed dataset
-
-    For more info on Vocabulary of Interlinked Datasets (VoID), see:
-    http://vocab.deri.ie/void
-
-    This only makes two passes through the triples (once to detect the types
-    of things)
-
-    The tradeoff is that lots of temporary structures are built up in memory
-    meaning lots of memory may be consumed :)
-    I imagine at least a few copies of your original graph.
-
-    the distinctForPartitions parameter controls whether
-    distinctSubjects/objects are tracked for each class/propertyPartition
-    this requires more memory again
-
-    """
-
-    typeMap = collections.defaultdict(set)
-    classes = collections.defaultdict(set)
-    for e, c in g.subject_objects(RDF.type):
-        classes[c].add(e)
-        typeMap[e].add(c)
-
-    triples = 0
-    subjects = set()
-    objects = set()
-    properties = set()
-    classCount = collections.defaultdict(int)
-    propCount = collections.defaultdict(int)
-
-    classProps = collections.defaultdict(set)
-    classObjects = collections.defaultdict(set)
-    propSubjects = collections.defaultdict(set)
-    propObjects = collections.defaultdict(set)
-
-    for s, p, o in g:
-
-        triples += 1
-        subjects.add(s)
-        properties.add(p)
-        objects.add(o)
-
-        # class partitions
-        if s in typeMap:
-            for c in typeMap[s]:
-                classCount[c] += 1
-                if distinctForPartitions:
-                    classObjects[c].add(o)
-                    classProps[c].add(p)
-
-        # property partitions
-        propCount[p] += 1
-        if distinctForPartitions:
-            propObjects[p].add(o)
-            propSubjects[p].add(s)
-
-    if not dataset:
-        dataset = URIRef("http://example.org/Dataset")
-
-    if not res:
-        res = Graph()
-
-    res.add((dataset, RDF.type, VOID.Dataset))
-
-    # basic stats
-    res.add((dataset, VOID.triples, Literal(triples)))
-    res.add((dataset, VOID.classes, Literal(len(classes))))
-
-    res.add((dataset, VOID.distinctObjects, Literal(len(objects))))
-    res.add((dataset, VOID.distinctSubjects, Literal(len(subjects))))
-    res.add((dataset, VOID.properties, Literal(len(properties))))
-
-    for i, c in enumerate(classes):
-        part = URIRef(dataset + "_class%d" % i)
-        res.add((dataset, VOID.classPartition, part))
-        res.add((part, RDF.type, VOID.Dataset))
-
-        res.add((part, VOID.triples, Literal(classCount[c])))
-        res.add((part, VOID.classes, Literal(1)))
-
-        res.add((part, VOID["class"], c))
-
-        res.add((part, VOID.entities, Literal(len(classes[c]))))
-        res.add((part, VOID.distinctSubjects, Literal(len(classes[c]))))
-
-        if distinctForPartitions:
-            res.add(
-                (part, VOID.properties, Literal(len(classProps[c]))))
-            res.add((part, VOID.distinctObjects,
-                    Literal(len(classObjects[c]))))
-
-    for i, p in enumerate(properties):
-        part = URIRef(dataset + "_property%d" % i)
-        res.add((dataset, VOID.propertyPartition, part))
-        res.add((part, RDF.type, VOID.Dataset))
-
-        res.add((part, VOID.triples, Literal(propCount[p])))
-        res.add((part, VOID.properties, Literal(1)))
-
-        res.add((part, VOID.property, p))
-
-        if distinctForPartitions:
-
-            entities = 0
-            propClasses = set()
-            for s in propSubjects[p]:
-                if s in typeMap:
-                    entities += 1
-                for c in typeMap[s]:
-                    propClasses.add(c)
-
-            res.add((part, VOID.entities, Literal(entities)))
-            res.add((part, VOID.classes, Literal(len(propClasses))))
-
-            res.add((part, VOID.distinctSubjects,
-                    Literal(len(propSubjects[p]))))
-            res.add((part, VOID.distinctObjects,
-                    Literal(len(propObjects[p]))))
-
-    return res, dataset