comparison env/lib/python3.9/site-packages/rdflib/graph.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4
5 from rdflib.term import Literal # required for doctests
6
7 assert Literal # avoid warning
8 from rdflib.namespace import Namespace # required for doctests
9
10 assert Namespace # avoid warning
11
12
13 __doc__ = """\
14
15 RDFLib defines the following kinds of Graphs:
16
17 * :class:`~rdflib.graph.Graph`
18 * :class:`~rdflib.graph.QuotedGraph`
19 * :class:`~rdflib.graph.ConjunctiveGraph`
20 * :class:`~rdflib.graph.Dataset`
21
22 Graph
23 -----
24
25 An RDF graph is a set of RDF triples. Graphs support the python ``in``
26 operator, as well as iteration and some operations like union,
27 difference and intersection.
28
29 see :class:`~rdflib.graph.Graph`
30
31 Conjunctive Graph
32 -----------------
33
34 A Conjunctive Graph is the most relevant collection of graphs that are
35 considered to be the boundary for closed world assumptions. This
36 boundary is equivalent to that of the store instance (which is itself
37 uniquely identified and distinct from other instances of
38 :class:`Store` that signify other Conjunctive Graphs). It is
39 equivalent to all the named graphs within it and associated with a
40 ``_default_`` graph which is automatically assigned a :class:`BNode`
41 for an identifier - if one isn't given.
42
43 see :class:`~rdflib.graph.ConjunctiveGraph`
44
45 Quoted graph
46 ------------
47
48 The notion of an RDF graph [14] is extended to include the concept of
49 a formula node. A formula node may occur wherever any other kind of
50 node can appear. Associated with a formula node is an RDF graph that
51 is completely disjoint from all other graphs; i.e. has no nodes in
52 common with any other graph. (It may contain the same labels as other
53 RDF graphs; because this is, by definition, a separate graph,
54 considerations of tidiness do not apply between the graph at a formula
55 node and any other graph.)
56
57 This is intended to map the idea of "{ N3-expression }" that is used
58 by N3 into an RDF graph upon which RDF semantics is defined.
59
60 see :class:`~rdflib.graph.QuotedGraph`
61
62 Dataset
63 -------
64
65 The RDF 1.1 Dataset, a small extension to the Conjunctive Graph. The
66 primary term is "graphs in the datasets" and not "contexts with quads"
67 so there is a separate method to set/retrieve a graph in a dataset and
68 to operate with dataset graphs. As a consequence of this approach,
69 dataset graphs cannot be identified with blank nodes, a name is always
70 required (RDFLib will automatically add a name if one is not provided
71 at creation time). This implementation includes a convenience method
72 to directly add a single quad to a dataset graph.
73
74 see :class:`~rdflib.graph.Dataset`
75
76 Working with graphs
77 ===================
78
79 Instantiating Graphs with default store (IOMemory) and default identifier
80 (a BNode):
81
82 >>> g = Graph()
83 >>> g.store.__class__
84 <class 'rdflib.plugins.memory.IOMemory'>
85 >>> g.identifier.__class__
86 <class 'rdflib.term.BNode'>
87
88 Instantiating Graphs with a IOMemory store and an identifier -
89 <http://rdflib.net>:
90
91 >>> g = Graph('IOMemory', URIRef("http://rdflib.net"))
92 >>> g.identifier
93 rdflib.term.URIRef('http://rdflib.net')
94 >>> str(g) # doctest: +NORMALIZE_WHITESPACE
95 "<http://rdflib.net> a rdfg:Graph;rdflib:storage
96 [a rdflib:Store;rdfs:label 'IOMemory']."
97
98 Creating a ConjunctiveGraph - The top level container for all named Graphs
99 in a "database":
100
101 >>> g = ConjunctiveGraph()
102 >>> str(g.default_context)
103 "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']]."
104
105 Adding / removing reified triples to Graph and iterating over it directly or
106 via triple pattern:
107
108 >>> g = Graph()
109 >>> statementId = BNode()
110 >>> print(len(g))
111 0
112 >>> g.add((statementId, RDF.type, RDF.Statement))
113 >>> g.add((statementId, RDF.subject,
114 ... URIRef("http://rdflib.net/store/ConjunctiveGraph")))
115 >>> g.add((statementId, RDF.predicate, RDFS.label))
116 >>> g.add((statementId, RDF.object, Literal("Conjunctive Graph")))
117 >>> print(len(g))
118 4
119 >>> for s, p, o in g:
120 ... print(type(s))
121 ...
122 <class 'rdflib.term.BNode'>
123 <class 'rdflib.term.BNode'>
124 <class 'rdflib.term.BNode'>
125 <class 'rdflib.term.BNode'>
126
127 >>> for s, p, o in g.triples((None, RDF.object, None)):
128 ... print(o)
129 ...
130 Conjunctive Graph
131 >>> g.remove((statementId, RDF.type, RDF.Statement))
132 >>> print(len(g))
133 3
134
135 ``None`` terms in calls to :meth:`~rdflib.graph.Graph.triples` can be
136 thought of as "open variables".
137
138 Graph support set-theoretic operators, you can add/subtract graphs, as
139 well as intersection (with multiplication operator g1*g2) and xor (g1
140 ^ g2).
141
142 Note that BNode IDs are kept when doing set-theoretic operations, this
143 may or may not be what you want. Two named graphs within the same
144 application probably want share BNode IDs, two graphs with data from
145 different sources probably not. If your BNode IDs are all generated
146 by RDFLib they are UUIDs and unique.
147
148 >>> g1 = Graph()
149 >>> g2 = Graph()
150 >>> u = URIRef("http://example.com/foo")
151 >>> g1.add([u, RDFS.label, Literal("foo")])
152 >>> g1.add([u, RDFS.label, Literal("bar")])
153 >>> g2.add([u, RDFS.label, Literal("foo")])
154 >>> g2.add([u, RDFS.label, Literal("bing")])
155 >>> len(g1 + g2) # adds bing as label
156 3
157 >>> len(g1 - g2) # removes foo
158 1
159 >>> len(g1 * g2) # only foo
160 1
161 >>> g1 += g2 # now g1 contains everything
162
163
164 Graph Aggregation - ConjunctiveGraphs and ReadOnlyGraphAggregate within
165 the same store:
166
167 >>> store = plugin.get("IOMemory", Store)()
168 >>> g1 = Graph(store)
169 >>> g2 = Graph(store)
170 >>> g3 = Graph(store)
171 >>> stmt1 = BNode()
172 >>> stmt2 = BNode()
173 >>> stmt3 = BNode()
174 >>> g1.add((stmt1, RDF.type, RDF.Statement))
175 >>> g1.add((stmt1, RDF.subject,
176 ... URIRef('http://rdflib.net/store/ConjunctiveGraph')))
177 >>> g1.add((stmt1, RDF.predicate, RDFS.label))
178 >>> g1.add((stmt1, RDF.object, Literal('Conjunctive Graph')))
179 >>> g2.add((stmt2, RDF.type, RDF.Statement))
180 >>> g2.add((stmt2, RDF.subject,
181 ... URIRef('http://rdflib.net/store/ConjunctiveGraph')))
182 >>> g2.add((stmt2, RDF.predicate, RDF.type))
183 >>> g2.add((stmt2, RDF.object, RDFS.Class))
184 >>> g3.add((stmt3, RDF.type, RDF.Statement))
185 >>> g3.add((stmt3, RDF.subject,
186 ... URIRef('http://rdflib.net/store/ConjunctiveGraph')))
187 >>> g3.add((stmt3, RDF.predicate, RDFS.comment))
188 >>> g3.add((stmt3, RDF.object, Literal(
189 ... 'The top-level aggregate graph - The sum ' +
190 ... 'of all named graphs within a Store')))
191 >>> len(list(ConjunctiveGraph(store).subjects(RDF.type, RDF.Statement)))
192 3
193 >>> len(list(ReadOnlyGraphAggregate([g1,g2]).subjects(
194 ... RDF.type, RDF.Statement)))
195 2
196
197 ConjunctiveGraphs have a :meth:`~rdflib.graph.ConjunctiveGraph.quads` method
198 which returns quads instead of triples, where the fourth item is the Graph
199 (or subclass thereof) instance in which the triple was asserted:
200
201 >>> uniqueGraphNames = set(
202 ... [graph.identifier for s, p, o, graph in ConjunctiveGraph(store
203 ... ).quads((None, RDF.predicate, None))])
204 >>> len(uniqueGraphNames)
205 3
206 >>> unionGraph = ReadOnlyGraphAggregate([g1, g2])
207 >>> uniqueGraphNames = set(
208 ... [graph.identifier for s, p, o, graph in unionGraph.quads(
209 ... (None, RDF.predicate, None))])
210 >>> len(uniqueGraphNames)
211 2
212
213 Parsing N3 from a string
214
215 >>> g2 = Graph()
216 >>> src = '''
217 ... @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
218 ... @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
219 ... [ a rdf:Statement ;
220 ... rdf:subject <http://rdflib.net/store#ConjunctiveGraph>;
221 ... rdf:predicate rdfs:label;
222 ... rdf:object "Conjunctive Graph" ] .
223 ... '''
224 >>> g2 = g2.parse(data=src, format="n3")
225 >>> print(len(g2))
226 4
227
228 Using Namespace class:
229
230 >>> RDFLib = Namespace("http://rdflib.net/")
231 >>> RDFLib.ConjunctiveGraph
232 rdflib.term.URIRef('http://rdflib.net/ConjunctiveGraph')
233 >>> RDFLib["Graph"]
234 rdflib.term.URIRef('http://rdflib.net/Graph')
235
236 """
237
238 import logging
239
240 logger = logging.getLogger(__name__)
241
242 import random
243 from rdflib.namespace import RDF, RDFS, SKOS
244 from rdflib import plugin, exceptions, query
245 from rdflib.term import Node, URIRef, Genid
246 from rdflib.term import BNode
247 import rdflib.term
248 from rdflib.paths import Path
249 from rdflib.store import Store
250 from rdflib.serializer import Serializer
251 from rdflib.parser import Parser
252 from rdflib.parser import create_input_source
253 from rdflib.namespace import NamespaceManager
254 from rdflib.resource import Resource
255 from rdflib.collection import Collection
256
257 import os
258 import shutil
259 import tempfile
260
261 from six import BytesIO
262 from six import b
263 from six.moves.urllib.parse import urlparse
264
265 __all__ = [
266 "Graph",
267 "ConjunctiveGraph",
268 "QuotedGraph",
269 "Seq",
270 "ModificationException",
271 "Dataset",
272 "UnSupportedAggregateOperation",
273 "ReadOnlyGraphAggregate",
274 ]
275
276
277 class Graph(Node):
278 """An RDF Graph
279
280 The constructor accepts one argument, the "store"
281 that will be used to store the graph data (see the "store"
282 package for stores currently shipped with rdflib).
283
284 Stores can be context-aware or unaware. Unaware stores take up
285 (some) less space but cannot support features that require
286 context, such as true merging/demerging of sub-graphs and
287 provenance.
288
289 The Graph constructor can take an identifier which identifies the Graph
290 by name. If none is given, the graph is assigned a BNode for its
291 identifier.
292
293 For more on named graphs, see: http://www.w3.org/2004/03/trix/
294 """
295
296 def __init__(self, store="default", identifier=None, namespace_manager=None, base=None):
297 super(Graph, self).__init__()
298 self.base = base
299 self.__identifier = identifier or BNode()
300
301 if not isinstance(self.__identifier, Node):
302 self.__identifier = URIRef(self.__identifier)
303
304 if not isinstance(store, Store):
305 # TODO: error handling
306 self.__store = store = plugin.get(store, Store)()
307 else:
308 self.__store = store
309 self.__namespace_manager = namespace_manager
310 self.context_aware = False
311 self.formula_aware = False
312 self.default_union = False
313
314 def __get_store(self):
315 return self.__store
316
317 store = property(__get_store) # read-only attr
318
319 def __get_identifier(self):
320 return self.__identifier
321
322 identifier = property(__get_identifier) # read-only attr
323
324 def _get_namespace_manager(self):
325 if self.__namespace_manager is None:
326 self.__namespace_manager = NamespaceManager(self)
327 return self.__namespace_manager
328
329 def _set_namespace_manager(self, nm):
330 self.__namespace_manager = nm
331
332 namespace_manager = property(
333 _get_namespace_manager,
334 _set_namespace_manager,
335 doc="this graph's namespace-manager",
336 )
337
338 def __repr__(self):
339 return "<Graph identifier=%s (%s)>" % (self.identifier, type(self))
340
341 def __str__(self):
342 if isinstance(self.identifier, URIRef):
343 return (
344 "%s a rdfg:Graph;rdflib:storage " + "[a rdflib:Store;rdfs:label '%s']."
345 ) % (self.identifier.n3(), self.store.__class__.__name__)
346 else:
347 return (
348 "[a rdfg:Graph;rdflib:storage " + "[a rdflib:Store;rdfs:label '%s']]."
349 ) % self.store.__class__.__name__
350
351 def toPython(self):
352 return self
353
354 def destroy(self, configuration):
355 """Destroy the store identified by `configuration` if supported"""
356 self.__store.destroy(configuration)
357
358 # Transactional interfaces (optional)
359 def commit(self):
360 """Commits active transactions"""
361 self.__store.commit()
362
363 def rollback(self):
364 """Rollback active transactions"""
365 self.__store.rollback()
366
367 def open(self, configuration, create=False):
368 """Open the graph store
369
370 Might be necessary for stores that require opening a connection to a
371 database or acquiring some resource.
372 """
373 return self.__store.open(configuration, create)
374
375 def close(self, commit_pending_transaction=False):
376 """Close the graph store
377
378 Might be necessary for stores that require closing a connection to a
379 database or releasing some resource.
380 """
381 self.__store.close(commit_pending_transaction=commit_pending_transaction)
382
383 def add(self, triple):
384 """Add a triple with self as context"""
385 s, p, o = triple
386 assert isinstance(s, Node), "Subject %s must be an rdflib term" % (s,)
387 assert isinstance(p, Node), "Predicate %s must be an rdflib term" % (p,)
388 assert isinstance(o, Node), "Object %s must be an rdflib term" % (o,)
389 self.__store.add((s, p, o), self, quoted=False)
390
391 def addN(self, quads):
392 """Add a sequence of triple with context"""
393
394 self.__store.addN(
395 (s, p, o, c)
396 for s, p, o, c in quads
397 if isinstance(c, Graph)
398 and c.identifier is self.identifier
399 and _assertnode(s, p, o)
400 )
401
402 def remove(self, triple):
403 """Remove a triple from the graph
404
405 If the triple does not provide a context attribute, removes the triple
406 from all contexts.
407 """
408 self.__store.remove(triple, context=self)
409
410 def triples(self, triple):
411 """Generator over the triple store
412
413 Returns triples that match the given triple pattern. If triple pattern
414 does not provide a context, all contexts will be searched.
415 """
416 s, p, o = triple
417 if isinstance(p, Path):
418 for _s, _o in p.eval(self, s, o):
419 yield _s, p, _o
420 else:
421 for (s, p, o), cg in self.__store.triples((s, p, o), context=self):
422 yield s, p, o
423
424 def __getitem__(self, item):
425 """
426 A graph can be "sliced" as a shortcut for the triples method
427 The python slice syntax is (ab)used for specifying triples.
428 A generator over matches is returned,
429 the returned tuples include only the parts not given
430
431 >>> import rdflib
432 >>> g = rdflib.Graph()
433 >>> g.add((rdflib.URIRef("urn:bob"), rdflib.RDFS.label, rdflib.Literal("Bob")))
434
435 >>> list(g[rdflib.URIRef("urn:bob")]) # all triples about bob
436 [(rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Literal('Bob'))]
437
438 >>> list(g[:rdflib.RDFS.label]) # all label triples
439 [(rdflib.term.URIRef('urn:bob'), rdflib.term.Literal('Bob'))]
440
441 >>> list(g[::rdflib.Literal("Bob")]) # all triples with bob as object
442 [(rdflib.term.URIRef('urn:bob'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'))]
443
444 Combined with SPARQL paths, more complex queries can be
445 written concisely:
446
447 Name of all Bobs friends:
448
449 g[bob : FOAF.knows/FOAF.name ]
450
451 Some label for Bob:
452
453 g[bob : DC.title|FOAF.name|RDFS.label]
454
455 All friends and friends of friends of Bob
456
457 g[bob : FOAF.knows * "+"]
458
459 etc.
460
461 .. versionadded:: 4.0
462
463 """
464
465 if isinstance(item, slice):
466
467 s, p, o = item.start, item.stop, item.step
468 if s is None and p is None and o is None:
469 return self.triples((s, p, o))
470 elif s is None and p is None:
471 return self.subject_predicates(o)
472 elif s is None and o is None:
473 return self.subject_objects(p)
474 elif p is None and o is None:
475 return self.predicate_objects(s)
476 elif s is None:
477 return self.subjects(p, o)
478 elif p is None:
479 return self.predicates(s, o)
480 elif o is None:
481 return self.objects(s, p)
482 else:
483 # all given
484 return (s, p, o) in self
485
486 elif isinstance(item, (Path, Node)):
487
488 return self.predicate_objects(item)
489
490 else:
491 raise TypeError(
492 "You can only index a graph by a single rdflib term or path, or a slice of rdflib terms."
493 )
494
495 def __len__(self):
496 """Returns the number of triples in the graph
497
498 If context is specified then the number of triples in the context is
499 returned instead.
500 """
501 return self.__store.__len__(context=self)
502
503 def __iter__(self):
504 """Iterates over all triples in the store"""
505 return self.triples((None, None, None))
506
507 def __contains__(self, triple):
508 """Support for 'triple in graph' syntax"""
509 for triple in self.triples(triple):
510 return True
511 return False
512
513 def __hash__(self):
514 return hash(self.identifier)
515
516 def __cmp__(self, other):
517 if other is None:
518 return -1
519 elif isinstance(other, Graph):
520 return (self.identifier > other.identifier) - (
521 self.identifier < other.identifier
522 )
523 else:
524 # Note if None is considered equivalent to owl:Nothing
525 # Then perhaps a graph with length 0 should be considered
526 # equivalent to None (if compared to it)?
527 return 1
528
529 def __eq__(self, other):
530 return isinstance(other, Graph) and self.identifier == other.identifier
531
532 def __lt__(self, other):
533 return (other is None) or (
534 isinstance(other, Graph) and self.identifier < other.identifier
535 )
536
537 def __le__(self, other):
538 return self < other or self == other
539
540 def __gt__(self, other):
541 return (isinstance(other, Graph) and self.identifier > other.identifier) or (
542 other is not None
543 )
544
545 def __ge__(self, other):
546 return self > other or self == other
547
548 def __iadd__(self, other):
549 """Add all triples in Graph other to Graph.
550 BNode IDs are not changed."""
551 self.addN((s, p, o, self) for s, p, o in other)
552 return self
553
554 def __isub__(self, other):
555 """Subtract all triples in Graph other from Graph.
556 BNode IDs are not changed."""
557 for triple in other:
558 self.remove(triple)
559 return self
560
561 def __add__(self, other):
562 """Set-theoretic union
563 BNode IDs are not changed."""
564 retval = Graph()
565 for (prefix, uri) in set(list(self.namespaces()) + list(other.namespaces())):
566 retval.bind(prefix, uri)
567 for x in self:
568 retval.add(x)
569 for y in other:
570 retval.add(y)
571 return retval
572
573 def __mul__(self, other):
574 """Set-theoretic intersection.
575 BNode IDs are not changed."""
576 retval = Graph()
577 for x in other:
578 if x in self:
579 retval.add(x)
580 return retval
581
582 def __sub__(self, other):
583 """Set-theoretic difference.
584 BNode IDs are not changed."""
585 retval = Graph()
586 for x in self:
587 if x not in other:
588 retval.add(x)
589 return retval
590
591 def __xor__(self, other):
592 """Set-theoretic XOR.
593 BNode IDs are not changed."""
594 return (self - other) + (other - self)
595
596 __or__ = __add__
597 __and__ = __mul__
598
599 # Conv. methods
600
601 def set(self, triple):
602 """Convenience method to update the value of object
603
604 Remove any existing triples for subject and predicate before adding
605 (subject, predicate, object).
606 """
607 (subject, predicate, object_) = triple
608 assert (
609 subject is not None
610 ), "s can't be None in .set([s,p,o]), as it would remove (*, p, *)"
611 assert (
612 predicate is not None
613 ), "p can't be None in .set([s,p,o]), as it would remove (s, *, *)"
614 self.remove((subject, predicate, None))
615 self.add((subject, predicate, object_))
616
617 def subjects(self, predicate=None, object=None):
618 """A generator of subjects with the given predicate and object"""
619 for s, p, o in self.triples((None, predicate, object)):
620 yield s
621
622 def predicates(self, subject=None, object=None):
623 """A generator of predicates with the given subject and object"""
624 for s, p, o in self.triples((subject, None, object)):
625 yield p
626
627 def objects(self, subject=None, predicate=None):
628 """A generator of objects with the given subject and predicate"""
629 for s, p, o in self.triples((subject, predicate, None)):
630 yield o
631
632 def subject_predicates(self, object=None):
633 """A generator of (subject, predicate) tuples for the given object"""
634 for s, p, o in self.triples((None, None, object)):
635 yield s, p
636
637 def subject_objects(self, predicate=None):
638 """A generator of (subject, object) tuples for the given predicate"""
639 for s, p, o in self.triples((None, predicate, None)):
640 yield s, o
641
642 def predicate_objects(self, subject=None):
643 """A generator of (predicate, object) tuples for the given subject"""
644 for s, p, o in self.triples((subject, None, None)):
645 yield p, o
646
647 def triples_choices(self, triple, context=None):
648 subject, predicate, object_ = triple
649 for (s, p, o), cg in self.store.triples_choices(
650 (subject, predicate, object_), context=self
651 ):
652 yield s, p, o
653
654 def value(
655 self, subject=None, predicate=RDF.value, object=None, default=None, any=True
656 ):
657 """Get a value for a pair of two criteria
658
659 Exactly one of subject, predicate, object must be None. Useful if one
660 knows that there may only be one value.
661
662 It is one of those situations that occur a lot, hence this
663 'macro' like utility
664
665 Parameters:
666 subject, predicate, object -- exactly one must be None
667 default -- value to be returned if no values found
668 any -- if True, return any value in the case there is more than one,
669 else, raise UniquenessError
670 """
671 retval = default
672
673 if (
674 (subject is None and predicate is None)
675 or (subject is None and object is None)
676 or (predicate is None and object is None)
677 ):
678 return None
679
680 if object is None:
681 values = self.objects(subject, predicate)
682 if subject is None:
683 values = self.subjects(predicate, object)
684 if predicate is None:
685 values = self.predicates(subject, object)
686
687 try:
688 retval = next(values)
689 except StopIteration:
690 retval = default
691 else:
692 if any is False:
693 try:
694 next(values)
695 msg = (
696 "While trying to find a value for (%s, %s, %s) the"
697 " following multiple values where found:\n"
698 % (subject, predicate, object)
699 )
700 triples = self.store.triples((subject, predicate, object), None)
701 for (s, p, o), contexts in triples:
702 msg += "(%s, %s, %s)\n (contexts: %s)\n" % (
703 s,
704 p,
705 o,
706 list(contexts),
707 )
708 raise exceptions.UniquenessError(msg)
709 except StopIteration:
710 pass
711 return retval
712
713 def label(self, subject, default=""):
714 """Query for the RDFS.label of the subject
715
716 Return default if no label exists or any label if multiple exist.
717 """
718 if subject is None:
719 return default
720 return self.value(subject, RDFS.label, default=default, any=True)
721
722 def preferredLabel(
723 self,
724 subject,
725 lang=None,
726 default=None,
727 labelProperties=(SKOS.prefLabel, RDFS.label),
728 ):
729 """
730 Find the preferred label for subject.
731
732 By default prefers skos:prefLabels over rdfs:labels. In case at least
733 one prefLabel is found returns those, else returns labels. In case a
734 language string (e.g., "en", "de" or even "" for no lang-tagged
735 literals) is given, only such labels will be considered.
736
737 Return a list of (labelProp, label) pairs, where labelProp is either
738 skos:prefLabel or rdfs:label.
739
740 >>> from rdflib import ConjunctiveGraph, URIRef, RDFS, Literal
741 >>> from rdflib.namespace import SKOS
742 >>> from pprint import pprint
743 >>> g = ConjunctiveGraph()
744 >>> u = URIRef("http://example.com/foo")
745 >>> g.add([u, RDFS.label, Literal("foo")])
746 >>> g.add([u, RDFS.label, Literal("bar")])
747 >>> pprint(sorted(g.preferredLabel(u)))
748 [(rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'),
749 rdflib.term.Literal('bar')),
750 (rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label'),
751 rdflib.term.Literal('foo'))]
752 >>> g.add([u, SKOS.prefLabel, Literal("bla")])
753 >>> pprint(g.preferredLabel(u))
754 [(rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'),
755 rdflib.term.Literal('bla'))]
756 >>> g.add([u, SKOS.prefLabel, Literal("blubb", lang="en")])
757 >>> sorted(g.preferredLabel(u)) #doctest: +NORMALIZE_WHITESPACE
758 [(rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'),
759 rdflib.term.Literal('bla')),
760 (rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'),
761 rdflib.term.Literal('blubb', lang='en'))]
762 >>> g.preferredLabel(u, lang="") #doctest: +NORMALIZE_WHITESPACE
763 [(rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'),
764 rdflib.term.Literal('bla'))]
765 >>> pprint(g.preferredLabel(u, lang="en"))
766 [(rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#prefLabel'),
767 rdflib.term.Literal('blubb', lang='en'))]
768 """
769
770 if default is None:
771 default = []
772
773 # setup the language filtering
774 if lang is not None:
775 if lang == "": # we only want not language-tagged literals
776
777 def langfilter(l):
778 return l.language is None
779
780 else:
781
782 def langfilter(l):
783 return l.language == lang
784
785 else: # we don't care about language tags
786
787 def langfilter(l):
788 return True
789
790 for labelProp in labelProperties:
791 labels = list(filter(langfilter, self.objects(subject, labelProp)))
792 if len(labels) == 0:
793 continue
794 else:
795 return [(labelProp, l) for l in labels]
796 return default
797
798 def comment(self, subject, default=""):
799 """Query for the RDFS.comment of the subject
800
801 Return default if no comment exists
802 """
803 if subject is None:
804 return default
805 return self.value(subject, RDFS.comment, default=default, any=True)
806
807 def items(self, list):
808 """Generator over all items in the resource specified by list
809
810 list is an RDF collection.
811 """
812 chain = set([list])
813 while list:
814 item = self.value(list, RDF.first)
815 if item is not None:
816 yield item
817 list = self.value(list, RDF.rest)
818 if list in chain:
819 raise ValueError("List contains a recursive rdf:rest reference")
820 chain.add(list)
821
822 def transitiveClosure(self, func, arg, seen=None):
823 """
824 Generates transitive closure of a user-defined
825 function against the graph
826
827 >>> from rdflib.collection import Collection
828 >>> g=Graph()
829 >>> a=BNode("foo")
830 >>> b=BNode("bar")
831 >>> c=BNode("baz")
832 >>> g.add((a,RDF.first,RDF.type))
833 >>> g.add((a,RDF.rest,b))
834 >>> g.add((b,RDF.first,RDFS.label))
835 >>> g.add((b,RDF.rest,c))
836 >>> g.add((c,RDF.first,RDFS.comment))
837 >>> g.add((c,RDF.rest,RDF.nil))
838 >>> def topList(node,g):
839 ... for s in g.subjects(RDF.rest, node):
840 ... yield s
841 >>> def reverseList(node,g):
842 ... for f in g.objects(node, RDF.first):
843 ... print(f)
844 ... for s in g.subjects(RDF.rest, node):
845 ... yield s
846
847 >>> [rt for rt in g.transitiveClosure(
848 ... topList,RDF.nil)] # doctest: +NORMALIZE_WHITESPACE
849 [rdflib.term.BNode('baz'),
850 rdflib.term.BNode('bar'),
851 rdflib.term.BNode('foo')]
852
853 >>> [rt for rt in g.transitiveClosure(
854 ... reverseList,RDF.nil)] # doctest: +NORMALIZE_WHITESPACE
855 http://www.w3.org/2000/01/rdf-schema#comment
856 http://www.w3.org/2000/01/rdf-schema#label
857 http://www.w3.org/1999/02/22-rdf-syntax-ns#type
858 [rdflib.term.BNode('baz'),
859 rdflib.term.BNode('bar'),
860 rdflib.term.BNode('foo')]
861
862 """
863 if seen is None:
864 seen = {}
865 elif arg in seen:
866 return
867 seen[arg] = 1
868 for rt in func(arg, self):
869 yield rt
870 for rt_2 in self.transitiveClosure(func, rt, seen):
871 yield rt_2
872
873 def transitive_objects(self, subject, property, remember=None):
874 """Transitively generate objects for the ``property`` relationship
875
876 Generated objects belong to the depth first transitive closure of the
877 ``property`` relationship starting at ``subject``.
878 """
879 if remember is None:
880 remember = {}
881 if subject in remember:
882 return
883 remember[subject] = 1
884 yield subject
885 for object in self.objects(subject, property):
886 for o in self.transitive_objects(object, property, remember):
887 yield o
888
889 def transitive_subjects(self, predicate, object, remember=None):
890 """Transitively generate objects for the ``property`` relationship
891
892 Generated objects belong to the depth first transitive closure of the
893 ``property`` relationship starting at ``subject``.
894 """
895 if remember is None:
896 remember = {}
897 if object in remember:
898 return
899 remember[object] = 1
900 yield object
901 for subject in self.subjects(predicate, object):
902 for s in self.transitive_subjects(predicate, subject, remember):
903 yield s
904
905 def seq(self, subject):
906 """Check if subject is an rdf:Seq
907
908 If yes, it returns a Seq class instance, None otherwise.
909 """
910 if (subject, RDF.type, RDF.Seq) in self:
911 return Seq(self, subject)
912 else:
913 return None
914
915 def qname(self, uri):
916 return self.namespace_manager.qname(uri)
917
918 def compute_qname(self, uri, generate=True):
919 return self.namespace_manager.compute_qname(uri, generate)
920
921 def bind(self, prefix, namespace, override=True, replace=False):
922 """Bind prefix to namespace
923
924 If override is True will bind namespace to given prefix even
925 if namespace was already bound to a different prefix.
926
927 if replace, replace any existing prefix with the new namespace
928
929 for example: graph.bind("foaf", "http://xmlns.com/foaf/0.1/")
930
931 """
932 return self.namespace_manager.bind(
933 prefix, namespace, override=override, replace=replace
934 )
935
936 def namespaces(self):
937 """Generator over all the prefix, namespace tuples"""
938 for prefix, namespace in self.namespace_manager.namespaces():
939 yield prefix, namespace
940
941 def absolutize(self, uri, defrag=1):
942 """Turn uri into an absolute URI if it's not one already"""
943 return self.namespace_manager.absolutize(uri, defrag)
944
945 def serialize(
946 self, destination=None, format="xml", base=None, encoding=None, **args
947 ):
948 """Serialize the Graph to destination
949
950 If destination is None serialize method returns the serialization as a
951 string. Format defaults to xml (AKA rdf/xml).
952
953 Format support can be extended with plugins,
954 but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
955 """
956
957 # if base is not given as attribute use the base set for the graph
958 if base is None:
959 base = self.base
960
961 serializer = plugin.get(format, Serializer)(self)
962 if destination is None:
963 stream = BytesIO()
964 serializer.serialize(stream, base=base, encoding=encoding, **args)
965 return stream.getvalue()
966 if hasattr(destination, "write"):
967 stream = destination
968 serializer.serialize(stream, base=base, encoding=encoding, **args)
969 else:
970 location = destination
971 scheme, netloc, path, params, _query, fragment = urlparse(location)
972 if netloc != "":
973 print(
974 "WARNING: not saving as location" + "is not a local file reference"
975 )
976 return
977 fd, name = tempfile.mkstemp()
978 stream = os.fdopen(fd, "wb")
979 serializer.serialize(stream, base=base, encoding=encoding, **args)
980 stream.close()
981 if hasattr(shutil, "move"):
982 shutil.move(name, path)
983 else:
984 shutil.copy(name, path)
985 os.remove(name)
986
987 def parse(
988 self,
989 source=None,
990 publicID=None,
991 format=None,
992 location=None,
993 file=None,
994 data=None,
995 **args
996 ):
997 """
998 Parse source adding the resulting triples to the Graph.
999
1000 The source is specified using one of source, location, file or
1001 data.
1002
1003 :Parameters:
1004
1005 - `source`: An InputSource, file-like object, or string. In the case
1006 of a string the string is the location of the source.
1007 - `location`: A string indicating the relative or absolute URL of the
1008 source. Graph's absolutize method is used if a relative location
1009 is specified.
1010 - `file`: A file-like object.
1011 - `data`: A string containing the data to be parsed.
1012 - `format`: Used if format can not be determined from source.
1013 Defaults to rdf/xml. Format support can be extended with plugins,
1014 but "xml", "n3", "nt" & "trix" are built in.
1015 - `publicID`: the logical URI to use as the document base. If None
1016 specified the document location is used (at least in the case where
1017 there is a document location).
1018
1019 :Returns:
1020
1021 - self, the graph instance.
1022
1023 Examples:
1024
1025 >>> my_data = '''
1026 ... <rdf:RDF
1027 ... xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1028 ... xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
1029 ... >
1030 ... <rdf:Description>
1031 ... <rdfs:label>Example</rdfs:label>
1032 ... <rdfs:comment>This is really just an example.</rdfs:comment>
1033 ... </rdf:Description>
1034 ... </rdf:RDF>
1035 ... '''
1036 >>> import tempfile
1037 >>> fd, file_name = tempfile.mkstemp()
1038 >>> f = os.fdopen(fd, "w")
1039 >>> dummy = f.write(my_data) # Returns num bytes written on py3
1040 >>> f.close()
1041
1042 >>> g = Graph()
1043 >>> result = g.parse(data=my_data, format="application/rdf+xml")
1044 >>> len(g)
1045 2
1046
1047 >>> g = Graph()
1048 >>> result = g.parse(location=file_name, format="application/rdf+xml")
1049 >>> len(g)
1050 2
1051
1052 >>> g = Graph()
1053 >>> with open(file_name, "r") as f:
1054 ... result = g.parse(f, format="application/rdf+xml")
1055 >>> len(g)
1056 2
1057
1058 >>> os.remove(file_name)
1059
1060 """
1061
1062 source = create_input_source(
1063 source=source,
1064 publicID=publicID,
1065 location=location,
1066 file=file,
1067 data=data,
1068 format=format,
1069 )
1070 if format is None:
1071 format = source.content_type
1072 if format is None:
1073 # raise Exception("Could not determine format for %r. You can" + \
1074 # "expicitly specify one with the format argument." % source)
1075 format = "application/rdf+xml"
1076 parser = plugin.get(format, Parser)()
1077 try:
1078 parser.parse(source, self, **args)
1079 finally:
1080 if source.auto_close:
1081 source.close()
1082 return self
1083
1084 def load(self, source, publicID=None, format="xml"):
1085 self.parse(source, publicID, format)
1086
1087 def query(
1088 self,
1089 query_object,
1090 processor="sparql",
1091 result="sparql",
1092 initNs=None,
1093 initBindings=None,
1094 use_store_provided=True,
1095 **kwargs
1096 ):
1097 """
1098 Query this graph.
1099
1100 A type of 'prepared queries' can be realised by providing
1101 initial variable bindings with initBindings
1102
1103 Initial namespaces are used to resolve prefixes used in the query,
1104 if none are given, the namespaces from the graph's namespace manager
1105 are used.
1106
1107 :returntype: rdflib.query.QueryResult
1108
1109 """
1110
1111 initBindings = initBindings or {}
1112 initNs = initNs or dict(self.namespaces())
1113
1114 if hasattr(self.store, "query") and use_store_provided:
1115 try:
1116 return self.store.query(
1117 query_object,
1118 initNs,
1119 initBindings,
1120 self.default_union and "__UNION__" or self.identifier,
1121 **kwargs
1122 )
1123 except NotImplementedError:
1124 pass # store has no own implementation
1125
1126 if not isinstance(result, query.Result):
1127 result = plugin.get(result, query.Result)
1128 if not isinstance(processor, query.Processor):
1129 processor = plugin.get(processor, query.Processor)(self)
1130
1131 return result(processor.query(query_object, initBindings, initNs, **kwargs))
1132
1133 def update(
1134 self,
1135 update_object,
1136 processor="sparql",
1137 initNs=None,
1138 initBindings=None,
1139 use_store_provided=True,
1140 **kwargs
1141 ):
1142 """Update this graph with the given update query."""
1143 initBindings = initBindings or {}
1144 initNs = initNs or dict(self.namespaces())
1145
1146 if hasattr(self.store, "update") and use_store_provided:
1147 try:
1148 return self.store.update(
1149 update_object,
1150 initNs,
1151 initBindings,
1152 self.default_union and "__UNION__" or self.identifier,
1153 **kwargs
1154 )
1155 except NotImplementedError:
1156 pass # store has no own implementation
1157
1158 if not isinstance(processor, query.UpdateProcessor):
1159 processor = plugin.get(processor, query.UpdateProcessor)(self)
1160
1161 return processor.update(update_object, initBindings, initNs, **kwargs)
1162
1163 def n3(self):
1164 """return an n3 identifier for the Graph"""
1165 return "[%s]" % self.identifier.n3()
1166
1167 def __reduce__(self):
1168 return (Graph, (self.store, self.identifier,))
1169
1170 def isomorphic(self, other):
1171 """
1172 does a very basic check if these graphs are the same
1173 If no BNodes are involved, this is accurate.
1174
1175 See rdflib.compare for a correct implementation of isomorphism checks
1176 """
1177 # TODO: this is only an approximation.
1178 if len(self) != len(other):
1179 return False
1180 for s, p, o in self:
1181 if not isinstance(s, BNode) and not isinstance(o, BNode):
1182 if not (s, p, o) in other:
1183 return False
1184 for s, p, o in other:
1185 if not isinstance(s, BNode) and not isinstance(o, BNode):
1186 if not (s, p, o) in self:
1187 return False
1188 # TODO: very well could be a false positive at this point yet.
1189 return True
1190
1191 def connected(self):
1192 """Check if the Graph is connected
1193
1194 The Graph is considered undirectional.
1195
1196 Performs a search on the Graph, starting from a random node. Then
1197 iteratively goes depth-first through the triplets where the node is
1198 subject and object. Return True if all nodes have been visited and
1199 False if it cannot continue and there are still unvisited nodes left.
1200 """
1201 all_nodes = list(self.all_nodes())
1202 discovered = []
1203
1204 # take a random one, could also always take the first one, doesn't
1205 # really matter.
1206 if not all_nodes:
1207 return False
1208
1209 visiting = [all_nodes[random.randrange(len(all_nodes))]]
1210 while visiting:
1211 x = visiting.pop()
1212 if x not in discovered:
1213 discovered.append(x)
1214 for new_x in self.objects(subject=x):
1215 if new_x not in discovered and new_x not in visiting:
1216 visiting.append(new_x)
1217 for new_x in self.subjects(object=x):
1218 if new_x not in discovered and new_x not in visiting:
1219 visiting.append(new_x)
1220
1221 # optimisation by only considering length, since no new objects can
1222 # be introduced anywhere.
1223 if len(all_nodes) == len(discovered):
1224 return True
1225 else:
1226 return False
1227
1228 def all_nodes(self):
1229 res = set(self.objects())
1230 res.update(self.subjects())
1231 return res
1232
1233 def collection(self, identifier):
1234 """Create a new ``Collection`` instance.
1235
1236 Parameters:
1237
1238 - ``identifier``: a URIRef or BNode instance.
1239
1240 Example::
1241
1242 >>> graph = Graph()
1243 >>> uri = URIRef("http://example.org/resource")
1244 >>> collection = graph.collection(uri)
1245 >>> assert isinstance(collection, Collection)
1246 >>> assert collection.uri is uri
1247 >>> assert collection.graph is graph
1248 >>> collection += [ Literal(1), Literal(2) ]
1249 """
1250
1251 return Collection(self, identifier)
1252
1253 def resource(self, identifier):
1254 """Create a new ``Resource`` instance.
1255
1256 Parameters:
1257
1258 - ``identifier``: a URIRef or BNode instance.
1259
1260 Example::
1261
1262 >>> graph = Graph()
1263 >>> uri = URIRef("http://example.org/resource")
1264 >>> resource = graph.resource(uri)
1265 >>> assert isinstance(resource, Resource)
1266 >>> assert resource.identifier is uri
1267 >>> assert resource.graph is graph
1268
1269 """
1270 if not isinstance(identifier, Node):
1271 identifier = URIRef(identifier)
1272 return Resource(self, identifier)
1273
1274 def _process_skolem_tuples(self, target, func):
1275 for t in self.triples((None, None, None)):
1276 target.add(func(t))
1277
1278 def skolemize(self, new_graph=None, bnode=None, authority=None, basepath=None):
1279 def do_skolemize(bnode, t):
1280 (s, p, o) = t
1281 if s == bnode:
1282 s = s.skolemize(authority=authority, basepath=basepath)
1283 if o == bnode:
1284 o = o.skolemize(authority=authority, basepath=basepath)
1285 return s, p, o
1286
1287 def do_skolemize2(t):
1288 (s, p, o) = t
1289 if isinstance(s, BNode):
1290 s = s.skolemize(authority=authority, basepath=basepath)
1291 if isinstance(o, BNode):
1292 o = o.skolemize(authority=authority, basepath=basepath)
1293 return s, p, o
1294
1295 retval = Graph() if new_graph is None else new_graph
1296
1297 if bnode is None:
1298 self._process_skolem_tuples(retval, do_skolemize2)
1299 elif isinstance(bnode, BNode):
1300 self._process_skolem_tuples(retval, lambda t: do_skolemize(bnode, t))
1301
1302 return retval
1303
1304 def de_skolemize(self, new_graph=None, uriref=None):
1305 def do_de_skolemize(uriref, t):
1306 (s, p, o) = t
1307 if s == uriref:
1308 s = s.de_skolemize()
1309 if o == uriref:
1310 o = o.de_skolemize()
1311 return s, p, o
1312
1313 def do_de_skolemize2(t):
1314 (s, p, o) = t
1315 if isinstance(s, Genid):
1316 s = s.de_skolemize()
1317 if isinstance(o, Genid):
1318 o = o.de_skolemize()
1319 return s, p, o
1320
1321 retval = Graph() if new_graph is None else new_graph
1322
1323 if uriref is None:
1324 self._process_skolem_tuples(retval, do_de_skolemize2)
1325 elif isinstance(uriref, Genid):
1326 self._process_skolem_tuples(retval, lambda t: do_de_skolemize(uriref, t))
1327
1328 return retval
1329
1330
1331 class ConjunctiveGraph(Graph):
1332 """A ConjunctiveGraph is an (unnamed) aggregation of all the named
1333 graphs in a store.
1334
1335 It has a ``default`` graph, whose name is associated with the
1336 graph throughout its life. :meth:`__init__` can take an identifier
1337 to use as the name of this default graph or it will assign a
1338 BNode.
1339
1340 All methods that add triples work against this default graph.
1341
1342 All queries are carried out against the union of all graphs.
1343 """
1344
1345 def __init__(self, store="default", identifier=None, default_graph_base=None):
1346 super(ConjunctiveGraph, self).__init__(store, identifier=identifier)
1347 assert self.store.context_aware, (
1348 "ConjunctiveGraph must be backed by" " a context aware store."
1349 )
1350 self.context_aware = True
1351 self.default_union = True # Conjunctive!
1352 self.default_context = Graph(
1353 store=self.store, identifier=identifier or BNode(), base=default_graph_base
1354 )
1355
1356 def __str__(self):
1357 pattern = (
1358 "[a rdflib:ConjunctiveGraph;rdflib:storage "
1359 "[a rdflib:Store;rdfs:label '%s']]"
1360 )
1361 return pattern % self.store.__class__.__name__
1362
1363 def _spoc(self, triple_or_quad, default=False):
1364 """
1365 helper method for having methods that support
1366 either triples or quads
1367 """
1368 if triple_or_quad is None:
1369 return (None, None, None, self.default_context if default else None)
1370 if len(triple_or_quad) == 3:
1371 c = self.default_context if default else None
1372 (s, p, o) = triple_or_quad
1373 elif len(triple_or_quad) == 4:
1374 (s, p, o, c) = triple_or_quad
1375 c = self._graph(c)
1376 return s, p, o, c
1377
1378 def __contains__(self, triple_or_quad):
1379 """Support for 'triple/quad in graph' syntax"""
1380 s, p, o, c = self._spoc(triple_or_quad)
1381 for t in self.triples((s, p, o), context=c):
1382 return True
1383 return False
1384
1385 def add(self, triple_or_quad):
1386 """
1387 Add a triple or quad to the store.
1388
1389 if a triple is given it is added to the default context
1390 """
1391
1392 s, p, o, c = self._spoc(triple_or_quad, default=True)
1393
1394 _assertnode(s, p, o)
1395
1396 self.store.add((s, p, o), context=c, quoted=False)
1397
1398 def _graph(self, c):
1399 if c is None:
1400 return None
1401 if not isinstance(c, Graph):
1402 return self.get_context(c)
1403 else:
1404 return c
1405
1406 def addN(self, quads):
1407 """Add a sequence of triples with context"""
1408
1409 self.store.addN(
1410 (s, p, o, self._graph(c)) for s, p, o, c in quads if _assertnode(s, p, o)
1411 )
1412
1413 def remove(self, triple_or_quad):
1414 """
1415 Removes a triple or quads
1416
1417 if a triple is given it is removed from all contexts
1418
1419 a quad is removed from the given context only
1420
1421 """
1422 s, p, o, c = self._spoc(triple_or_quad)
1423
1424 self.store.remove((s, p, o), context=c)
1425
1426 def triples(self, triple_or_quad, context=None):
1427 """
1428 Iterate over all the triples in the entire conjunctive graph
1429
1430 For legacy reasons, this can take the context to query either
1431 as a fourth element of the quad, or as the explicit context
1432 keyword parameter. The kw param takes precedence.
1433 """
1434
1435 s, p, o, c = self._spoc(triple_or_quad)
1436 context = self._graph(context or c)
1437
1438 if self.default_union:
1439 if context == self.default_context:
1440 context = None
1441 else:
1442 if context is None:
1443 context = self.default_context
1444
1445 if isinstance(p, Path):
1446 if context is None:
1447 context = self
1448
1449 for s, o in p.eval(context, s, o):
1450 yield s, p, o
1451 else:
1452 for (s, p, o), cg in self.store.triples((s, p, o), context=context):
1453 yield s, p, o
1454
1455 def quads(self, triple_or_quad=None):
1456 """Iterate over all the quads in the entire conjunctive graph"""
1457
1458 s, p, o, c = self._spoc(triple_or_quad)
1459
1460 for (s, p, o), cg in self.store.triples((s, p, o), context=c):
1461 for ctx in cg:
1462 yield s, p, o, ctx
1463
1464 def triples_choices(self, triple, context=None):
1465 """Iterate over all the triples in the entire conjunctive graph"""
1466 s, p, o = triple
1467 if context is None:
1468 if not self.default_union:
1469 context = self.default_context
1470 else:
1471 context = self._graph(context)
1472
1473 for (s1, p1, o1), cg in self.store.triples_choices((s, p, o), context=context):
1474 yield s1, p1, o1
1475
1476 def __len__(self):
1477 """Number of triples in the entire conjunctive graph"""
1478 return self.store.__len__()
1479
1480 def contexts(self, triple=None):
1481 """Iterate over all contexts in the graph
1482
1483 If triple is specified, iterate over all contexts the triple is in.
1484 """
1485 for context in self.store.contexts(triple):
1486 if isinstance(context, Graph):
1487 # TODO: One of these should never happen and probably
1488 # should raise an exception rather than smoothing over
1489 # the weirdness - see #225
1490 yield context
1491 else:
1492 yield self.get_context(context)
1493
1494 def get_context(self, identifier, quoted=False, base=None):
1495 """Return a context graph for the given identifier
1496
1497 identifier must be a URIRef or BNode.
1498 """
1499 return Graph(store=self.store, identifier=identifier, namespace_manager=self, base=base)
1500
1501 def remove_context(self, context):
1502 """Removes the given context from the graph"""
1503 self.store.remove((None, None, None), context)
1504
1505 def context_id(self, uri, context_id=None):
1506 """URI#context"""
1507 uri = uri.split("#", 1)[0]
1508 if context_id is None:
1509 context_id = "#context"
1510 return URIRef(context_id, base=uri)
1511
1512 def parse(
1513 self,
1514 source=None,
1515 publicID=None,
1516 format="xml",
1517 location=None,
1518 file=None,
1519 data=None,
1520 **args
1521 ):
1522 """
1523 Parse source adding the resulting triples to its own context
1524 (sub graph of this graph).
1525
1526 See :meth:`rdflib.graph.Graph.parse` for documentation on arguments.
1527
1528 :Returns:
1529
1530 The graph into which the source was parsed. In the case of n3
1531 it returns the root context.
1532 """
1533
1534 source = create_input_source(
1535 source=source,
1536 publicID=publicID,
1537 location=location,
1538 file=file,
1539 data=data,
1540 format=format,
1541 )
1542
1543 g_id = publicID and publicID or source.getPublicId()
1544 if not isinstance(g_id, Node):
1545 g_id = URIRef(g_id)
1546
1547 context = Graph(store=self.store, identifier=g_id)
1548 context.remove((None, None, None)) # hmm ?
1549 context.parse(source, publicID=publicID, format=format, **args)
1550 return context
1551
1552 def __reduce__(self):
1553 return ConjunctiveGraph, (self.store, self.identifier)
1554
1555
1556 DATASET_DEFAULT_GRAPH_ID = URIRef("urn:x-rdflib:default")
1557
1558
1559 class Dataset(ConjunctiveGraph):
1560 __doc__ = """
1561 RDF 1.1 Dataset. Small extension to the Conjunctive Graph:
1562 - the primary term is graphs in the datasets and not contexts with quads,
1563 so there is a separate method to set/retrieve a graph in a dataset and
1564 operate with graphs
1565 - graphs cannot be identified with blank nodes
1566 - added a method to directly add a single quad
1567
1568 Examples of usage:
1569
1570 >>> # Create a new Dataset
1571 >>> ds = Dataset()
1572 >>> # simple triples goes to default graph
1573 >>> ds.add((URIRef("http://example.org/a"),
1574 ... URIRef("http://www.example.org/b"),
1575 ... Literal("foo")))
1576 >>>
1577 >>> # Create a graph in the dataset, if the graph name has already been
1578 >>> # used, the corresponding graph will be returned
1579 >>> # (ie, the Dataset keeps track of the constituent graphs)
1580 >>> g = ds.graph(URIRef("http://www.example.com/gr"))
1581 >>>
1582 >>> # add triples to the new graph as usual
1583 >>> g.add(
1584 ... (URIRef("http://example.org/x"),
1585 ... URIRef("http://example.org/y"),
1586 ... Literal("bar")) )
1587 >>> # alternatively: add a quad to the dataset -> goes to the graph
1588 >>> ds.add(
1589 ... (URIRef("http://example.org/x"),
1590 ... URIRef("http://example.org/z"),
1591 ... Literal("foo-bar"),g) )
1592 >>>
1593 >>> # querying triples return them all regardless of the graph
1594 >>> for t in ds.triples((None,None,None)): # doctest: +SKIP
1595 ... print(t) # doctest: +NORMALIZE_WHITESPACE
1596 (rdflib.term.URIRef("http://example.org/a"),
1597 rdflib.term.URIRef("http://www.example.org/b"),
1598 rdflib.term.Literal("foo"))
1599 (rdflib.term.URIRef("http://example.org/x"),
1600 rdflib.term.URIRef("http://example.org/z"),
1601 rdflib.term.Literal("foo-bar"))
1602 (rdflib.term.URIRef("http://example.org/x"),
1603 rdflib.term.URIRef("http://example.org/y"),
1604 rdflib.term.Literal("bar"))
1605 >>>
1606 >>> # querying quads return quads; the fourth argument can be unrestricted
1607 >>> # or restricted to a graph
1608 >>> for q in ds.quads((None, None, None, None)): # doctest: +SKIP
1609 ... print(q) # doctest: +NORMALIZE_WHITESPACE
1610 (rdflib.term.URIRef("http://example.org/a"),
1611 rdflib.term.URIRef("http://www.example.org/b"),
1612 rdflib.term.Literal("foo"),
1613 None)
1614 (rdflib.term.URIRef("http://example.org/x"),
1615 rdflib.term.URIRef("http://example.org/y"),
1616 rdflib.term.Literal("bar"),
1617 rdflib.term.URIRef("http://www.example.com/gr"))
1618 (rdflib.term.URIRef("http://example.org/x"),
1619 rdflib.term.URIRef("http://example.org/z"),
1620 rdflib.term.Literal("foo-bar"),
1621 rdflib.term.URIRef("http://www.example.com/gr"))
1622 >>>
1623 >>> for q in ds.quads((None,None,None,g)): # doctest: +SKIP
1624 ... print(q) # doctest: +NORMALIZE_WHITESPACE
1625 (rdflib.term.URIRef("http://example.org/x"),
1626 rdflib.term.URIRef("http://example.org/y"),
1627 rdflib.term.Literal("bar"),
1628 rdflib.term.URIRef("http://www.example.com/gr"))
1629 (rdflib.term.URIRef("http://example.org/x"),
1630 rdflib.term.URIRef("http://example.org/z"),
1631 rdflib.term.Literal("foo-bar"),
1632 rdflib.term.URIRef("http://www.example.com/gr"))
1633 >>> # Note that in the call above -
1634 >>> # ds.quads((None,None,None,"http://www.example.com/gr"))
1635 >>> # would have been accepted, too
1636 >>>
1637 >>> # graph names in the dataset can be queried:
1638 >>> for c in ds.graphs(): # doctest: +SKIP
1639 ... print(c) # doctest:
1640 DEFAULT
1641 http://www.example.com/gr
1642 >>> # A graph can be created without specifying a name; a skolemized genid
1643 >>> # is created on the fly
1644 >>> h = ds.graph()
1645 >>> for c in ds.graphs(): # doctest: +SKIP
1646 ... print(c) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
1647 DEFAULT
1648 http://rdlib.net/.well-known/genid/rdflib/N...
1649 http://www.example.com/gr
1650 >>> # Note that the Dataset.graphs() call returns names of empty graphs,
1651 >>> # too. This can be restricted:
1652 >>> for c in ds.graphs(empty=False): # doctest: +SKIP
1653 ... print(c) # doctest: +NORMALIZE_WHITESPACE
1654 DEFAULT
1655 http://www.example.com/gr
1656 >>>
1657 >>> # a graph can also be removed from a dataset via ds.remove_graph(g)
1658
1659 .. versionadded:: 4.0
1660 """
1661
1662 def __init__(self, store="default", default_union=False, default_graph_base=None):
1663 super(Dataset, self).__init__(store=store, identifier=None)
1664
1665 if not self.store.graph_aware:
1666 raise Exception("DataSet must be backed by a graph-aware store!")
1667 self.default_context = Graph(
1668 store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID, base=default_graph_base
1669 )
1670
1671 self.default_union = default_union
1672
1673 def __str__(self):
1674 pattern = (
1675 "[a rdflib:Dataset;rdflib:storage " "[a rdflib:Store;rdfs:label '%s']]"
1676 )
1677 return pattern % self.store.__class__.__name__
1678
1679 def graph(self, identifier=None, base=None):
1680 if identifier is None:
1681 from rdflib.term import rdflib_skolem_genid
1682
1683 self.bind(
1684 "genid", "http://rdflib.net" + rdflib_skolem_genid, override=False
1685 )
1686 identifier = BNode().skolemize()
1687
1688 g = self._graph(identifier)
1689 g.base = base
1690
1691 self.store.add_graph(g)
1692 return g
1693
1694 def parse(
1695 self,
1696 source=None,
1697 publicID=None,
1698 format="xml",
1699 location=None,
1700 file=None,
1701 data=None,
1702 **args
1703 ):
1704 c = ConjunctiveGraph.parse(
1705 self, source, publicID, format, location, file, data, **args
1706 )
1707 self.graph(c)
1708 return c
1709
1710 def add_graph(self, g):
1711 """alias of graph for consistency"""
1712 return self.graph(g)
1713
1714 def remove_graph(self, g):
1715 if not isinstance(g, Graph):
1716 g = self.get_context(g)
1717
1718 self.store.remove_graph(g)
1719 if g is None or g == self.default_context:
1720 # default graph cannot be removed
1721 # only triples deleted, so add it back in
1722 self.store.add_graph(self.default_context)
1723
1724 def contexts(self, triple=None):
1725 default = False
1726 for c in super(Dataset, self).contexts(triple):
1727 default |= c.identifier == DATASET_DEFAULT_GRAPH_ID
1728 yield c
1729 if not default:
1730 yield self.graph(DATASET_DEFAULT_GRAPH_ID)
1731
1732 graphs = contexts
1733
1734 def quads(self, quad):
1735 for s, p, o, c in super(Dataset, self).quads(quad):
1736 if c.identifier == self.default_context:
1737 yield s, p, o, None
1738 else:
1739 yield s, p, o, c.identifier
1740
1741
1742 class QuotedGraph(Graph):
1743 """
1744 Quoted Graphs are intended to implement Notation 3 formulae. They are
1745 associated with a required identifier that the N3 parser *must* provide
1746 in order to maintain consistent formulae identification for scenarios
1747 such as implication and other such processing.
1748 """
1749
1750 def __init__(self, store, identifier):
1751 super(QuotedGraph, self).__init__(store, identifier)
1752
1753 def add(self, triple):
1754 """Add a triple with self as context"""
1755 s, p, o = triple
1756 assert isinstance(s, Node), "Subject %s must be an rdflib term" % (s,)
1757 assert isinstance(p, Node), "Predicate %s must be an rdflib term" % (p,)
1758 assert isinstance(o, Node), "Object %s must be an rdflib term" % (o,)
1759
1760 self.store.add((s, p, o), self, quoted=True)
1761
1762 def addN(self, quads):
1763 """Add a sequence of triple with context"""
1764
1765 self.store.addN(
1766 (s, p, o, c)
1767 for s, p, o, c in quads
1768 if isinstance(c, QuotedGraph)
1769 and c.identifier is self.identifier
1770 and _assertnode(s, p, o)
1771 )
1772
1773 def n3(self):
1774 """Return an n3 identifier for the Graph"""
1775 return "{%s}" % self.identifier.n3()
1776
1777 def __str__(self):
1778 identifier = self.identifier.n3()
1779 label = self.store.__class__.__name__
1780 pattern = (
1781 "{this rdflib.identifier %s;rdflib:storage "
1782 "[a rdflib:Store;rdfs:label '%s']}"
1783 )
1784 return pattern % (identifier, label)
1785
1786 def __reduce__(self):
1787 return QuotedGraph, (self.store, self.identifier)
1788
1789
1790 # Make sure QuotedGraph is ordered correctly
1791 # wrt to other Terms.
1792 # this must be done here, as the QuotedGraph cannot be
1793 # circularily imported in term.py
1794 rdflib.term._ORDERING[QuotedGraph] = 11
1795
1796
1797 class Seq(object):
1798 """Wrapper around an RDF Seq resource
1799
1800 It implements a container type in Python with the order of the items
1801 returned corresponding to the Seq content. It is based on the natural
1802 ordering of the predicate names _1, _2, _3, etc, which is the
1803 'implementation' of a sequence in RDF terms.
1804 """
1805
1806 def __init__(self, graph, subject):
1807 """Parameters:
1808
1809 - graph:
1810 the graph containing the Seq
1811
1812 - subject:
1813 the subject of a Seq. Note that the init does not
1814 check whether this is a Seq, this is done in whoever
1815 creates this instance!
1816 """
1817
1818 _list = self._list = list()
1819 LI_INDEX = URIRef(str(RDF) + "_")
1820 for (p, o) in graph.predicate_objects(subject):
1821 if p.startswith(LI_INDEX): # != RDF.Seq: #
1822 i = int(p.replace(LI_INDEX, ""))
1823 _list.append((i, o))
1824
1825 # here is the trick: the predicates are _1, _2, _3, etc. Ie,
1826 # by sorting the keys (by integer) we have what we want!
1827 _list.sort()
1828
1829 def toPython(self):
1830 return self
1831
1832 def __iter__(self):
1833 """Generator over the items in the Seq"""
1834 for _, item in self._list:
1835 yield item
1836
1837 def __len__(self):
1838 """Length of the Seq"""
1839 return len(self._list)
1840
1841 def __getitem__(self, index):
1842 """Item given by index from the Seq"""
1843 index, item = self._list.__getitem__(index)
1844 return item
1845
1846
1847 class ModificationException(Exception):
1848 def __init__(self):
1849 pass
1850
1851 def __str__(self):
1852 return (
1853 "Modifications and transactional operations not allowed on "
1854 "ReadOnlyGraphAggregate instances"
1855 )
1856
1857
1858 class UnSupportedAggregateOperation(Exception):
1859 def __init__(self):
1860 pass
1861
1862 def __str__(self):
1863 return "This operation is not supported by ReadOnlyGraphAggregate " "instances"
1864
1865
1866 class ReadOnlyGraphAggregate(ConjunctiveGraph):
1867 """Utility class for treating a set of graphs as a single graph
1868
1869 Only read operations are supported (hence the name). Essentially a
1870 ConjunctiveGraph over an explicit subset of the entire store.
1871 """
1872
1873 def __init__(self, graphs, store="default"):
1874 if store is not None:
1875 super(ReadOnlyGraphAggregate, self).__init__(store)
1876 Graph.__init__(self, store)
1877 self.__namespace_manager = None
1878
1879 assert (
1880 isinstance(graphs, list)
1881 and graphs
1882 and [g for g in graphs if isinstance(g, Graph)]
1883 ), "graphs argument must be a list of Graphs!!"
1884 self.graphs = graphs
1885
1886 def __repr__(self):
1887 return "<ReadOnlyGraphAggregate: %s graphs>" % len(self.graphs)
1888
1889 def destroy(self, configuration):
1890 raise ModificationException()
1891
1892 # Transactional interfaces (optional)
1893 def commit(self):
1894 raise ModificationException()
1895
1896 def rollback(self):
1897 raise ModificationException()
1898
1899 def open(self, configuration, create=False):
1900 # TODO: is there a use case for this method?
1901 for graph in self.graphs:
1902 graph.open(self, configuration, create)
1903
1904 def close(self):
1905 for graph in self.graphs:
1906 graph.close()
1907
1908 def add(self, triple):
1909 raise ModificationException()
1910
1911 def addN(self, quads):
1912 raise ModificationException()
1913
1914 def remove(self, triple):
1915 raise ModificationException()
1916
1917 def triples(self, triple):
1918 s, p, o = triple
1919 for graph in self.graphs:
1920 if isinstance(p, Path):
1921 for s, o in p.eval(self, s, o):
1922 yield s, p, o
1923 else:
1924 for s1, p1, o1 in graph.triples((s, p, o)):
1925 yield s1, p1, o1
1926
1927 def __contains__(self, triple_or_quad):
1928 context = None
1929 if len(triple_or_quad) == 4:
1930 context = triple_or_quad[3]
1931 for graph in self.graphs:
1932 if context is None or graph.identifier == context.identifier:
1933 if triple_or_quad[:3] in graph:
1934 return True
1935 return False
1936
1937 def quads(self, triple):
1938 """Iterate over all the quads in the entire aggregate graph"""
1939 s, p, o = triple
1940 for graph in self.graphs:
1941 for s1, p1, o1 in graph.triples((s, p, o)):
1942 yield s1, p1, o1, graph
1943
1944 def __len__(self):
1945 return sum(len(g) for g in self.graphs)
1946
1947 def __hash__(self):
1948 raise UnSupportedAggregateOperation()
1949
1950 def __cmp__(self, other):
1951 if other is None:
1952 return -1
1953 elif isinstance(other, Graph):
1954 return -1
1955 elif isinstance(other, ReadOnlyGraphAggregate):
1956 return (self.graphs > other.graphs) - (self.graphs < other.graphs)
1957 else:
1958 return -1
1959
1960 def __iadd__(self, other):
1961 raise ModificationException()
1962
1963 def __isub__(self, other):
1964 raise ModificationException()
1965
1966 # Conv. methods
1967
1968 def triples_choices(self, triple, context=None):
1969 subject, predicate, object_ = triple
1970 for graph in self.graphs:
1971 choices = graph.triples_choices((subject, predicate, object_))
1972 for (s, p, o) in choices:
1973 yield s, p, o
1974
1975 def qname(self, uri):
1976 if hasattr(self, "namespace_manager") and self.namespace_manager:
1977 return self.namespace_manager.qname(uri)
1978 raise UnSupportedAggregateOperation()
1979
1980 def compute_qname(self, uri, generate=True):
1981 if hasattr(self, "namespace_manager") and self.namespace_manager:
1982 return self.namespace_manager.compute_qname(uri, generate)
1983 raise UnSupportedAggregateOperation()
1984
1985 def bind(self, prefix, namespace, override=True):
1986 raise UnSupportedAggregateOperation()
1987
1988 def namespaces(self):
1989 if hasattr(self, "namespace_manager"):
1990 for prefix, namespace in self.namespace_manager.namespaces():
1991 yield prefix, namespace
1992 else:
1993 for graph in self.graphs:
1994 for prefix, namespace in graph.namespaces():
1995 yield prefix, namespace
1996
1997 def absolutize(self, uri, defrag=1):
1998 raise UnSupportedAggregateOperation()
1999
2000 def parse(self, source, publicID=None, format="xml", **args):
2001 raise ModificationException()
2002
2003 def n3(self):
2004 raise UnSupportedAggregateOperation()
2005
2006 def __reduce__(self):
2007 raise UnSupportedAggregateOperation()
2008
2009
2010 def _assertnode(*terms):
2011 for t in terms:
2012 assert isinstance(t, Node), "Term %s must be an rdflib term" % (t,)
2013 return True
2014
2015
2016 def test():
2017 import doctest
2018
2019 doctest.testmod()
2020
2021
2022 if __name__ == "__main__":
2023 test()