comparison planemo/lib/python3.7/site-packages/rdflib/term.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """
2 This module defines the different types of terms. Terms are the kinds of
3 objects that can appear in a quoted/asserted triple. This includes those
4 that are core to RDF:
5
6 * :class:`Blank Nodes <rdflib.term.BNode>`
7 * :class:`URI References <rdflib.term.URIRef>`
8 * :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag)
9
10 Those that extend the RDF model into N3:
11
12 * :class:`Formulae <rdflib.graph.QuotedGraph>`
13 * :class:`Universal Quantifications (Variables) <rdflib.term.Variable>`
14
15 And those that are primarily for matching against 'Nodes' in the
16 underlying Graph:
17
18 * REGEX Expressions
19 * Date Ranges
20 * Numerical Ranges
21
22 """
23
24 __all__ = [
25 'bind',
26
27 'Node',
28 'Identifier',
29
30 'URIRef',
31 'BNode',
32 'Literal',
33
34 'Variable',
35 'Statement',
36 ]
37
38 import logging
39 logger = logging.getLogger(__name__)
40 import warnings
41 import math
42
43 import base64
44 import xml.dom.minidom
45
46 from urllib.parse import urlparse, urljoin, urldefrag
47 from datetime import date, time, datetime
48 from re import sub, compile
49 from collections import defaultdict
50
51 from isodate import parse_time, parse_date, parse_datetime
52
53 try:
54 from hashlib import md5
55 assert md5
56 except ImportError:
57 from md5 import md5
58
59
60 import rdflib
61 from . import py3compat
62 from rdflib.compat import numeric_greater
63
64
65 b = py3compat.b
66
67 skolem_genid = "/.well-known/genid/"
68 rdflib_skolem_genid = "/.well-known/genid/rdflib/"
69 skolems = {}
70
71
72 _invalid_uri_chars = '<>" {}|\\^`'
73
74 def _is_valid_uri(uri):
75 for c in _invalid_uri_chars:
76 if c in uri: return False
77 return True
78
79 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
80
81 def _is_valid_langtag(tag):
82 return bool(_lang_tag_regex.match(tag))
83
84 def _is_valid_unicode(value):
85 """
86 Verify that the provided value can be converted into a Python
87 unicode object.
88 """
89 if isinstance(value, bytes):
90 coding_func, param = getattr(value, 'decode'), 'utf-8'
91 elif py3compat.PY3:
92 coding_func, param = str, value
93 else:
94 coding_func, param = str, value
95
96 # try to convert value into unicode
97 try:
98 coding_func(param)
99 except UnicodeError:
100 return False
101 return True
102
103 class Node(object):
104 """
105 A Node in the Graph.
106 """
107
108 __slots__ = ()
109
110
111 class Identifier(Node, str): # allow Identifiers to be Nodes in the Graph
112 """
113 See http://www.w3.org/2002/07/rdf-identifer-terminology/
114 regarding choice of terminology.
115 """
116
117 __slots__ = ()
118
119 def __new__(cls, value):
120 return str.__new__(cls, value)
121
122 def eq(self, other):
123 """A "semantic"/interpreted equality function,
124 by default, same as __eq__"""
125 return self.__eq__(other)
126
127 def neq(self, other):
128 """A "semantic"/interpreted not equal function,
129 by default, same as __ne__"""
130 return self.__ne__(other)
131
132 def __ne__(self, other):
133 return not self.__eq__(other)
134
135 def __eq__(self, other):
136 """
137 Equality for Nodes.
138
139 >>> BNode("foo")==None
140 False
141 >>> BNode("foo")==URIRef("foo")
142 False
143 >>> URIRef("foo")==BNode("foo")
144 False
145 >>> BNode("foo")!=URIRef("foo")
146 True
147 >>> URIRef("foo")!=BNode("foo")
148 True
149 >>> Variable('a')!=URIRef('a')
150 True
151 >>> Variable('a')!=Variable('a')
152 False
153 """
154
155 if type(self) == type(other):
156 return str(self) == str(other)
157 else:
158 return False
159
160 def __gt__(self, other):
161 """
162 This implements ordering for Nodes,
163
164 This tries to implement this:
165 http://www.w3.org/TR/sparql11-query/#modOrderBy
166
167 Variables are not included in the SPARQL list, but
168 they are greater than BNodes and smaller than everything else
169
170 """
171 if other is None:
172 return True # everything bigger than None
173 elif type(self) == type(other):
174 return str(self) > str(other)
175 elif isinstance(other, Node):
176 return _ORDERING[type(self)] > _ORDERING[type(other)]
177
178 return NotImplemented
179
180 def __lt__(self, other):
181 if other is None:
182 return False # Nothing is less than None
183 elif type(self) == type(other):
184 return str(self) < str(other)
185 elif isinstance(other, Node):
186 return _ORDERING[type(self)] < _ORDERING[type(other)]
187
188 return NotImplemented
189
190 def __le__(self, other):
191 r = self.__lt__(other)
192 if r:
193 return True
194 return self == other
195
196 def __ge__(self, other):
197 r = self.__gt__(other)
198 if r:
199 return True
200 return self == other
201
202 def __hash__(self):
203 t = type(self)
204 fqn = t.__module__ + '.' + t.__name__
205 return hash(fqn) ^ hash(str(self))
206
207
208 class URIRef(Identifier):
209 """
210 RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref
211 """
212
213 __slots__ = ()
214
215 def __new__(cls, value, base=None):
216 if base is not None:
217 ends_in_hash = value.endswith("#")
218 value = urljoin(base, value, allow_fragments=1)
219 if ends_in_hash:
220 if not value.endswith("#"):
221 value += "#"
222
223 if not _is_valid_uri(value):
224 logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value)
225
226
227 try:
228 rt = str.__new__(cls, value)
229 except UnicodeDecodeError:
230 rt = str.__new__(cls, value, 'utf-8')
231 return rt
232
233 def toPython(self):
234 return str(self)
235
236 def n3(self, namespace_manager = None):
237 """
238 This will do a limited check for valid URIs,
239 essentially just making sure that the string includes no illegal
240 characters (``<, >, ", {, }, |, \\, `, ^``)
241
242 :param namespace_manager: if not None, will be used to make up
243 a prefixed name
244 """
245
246 if not _is_valid_uri(self):
247 raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self)
248
249 if namespace_manager:
250 return namespace_manager.normalizeUri(self)
251 else:
252 return "<%s>" % self
253
254 def defrag(self):
255 if "#" in self:
256 url, frag = urldefrag(self)
257 return URIRef(url)
258 else:
259 return self
260
261 def __reduce__(self):
262 return (URIRef, (str(self),))
263
264 def __getnewargs__(self):
265 return (str(self), )
266
267 if not py3compat.PY3:
268 def __str__(self):
269 return self.encode()
270
271 def __repr__(self):
272 if self.__class__ is URIRef:
273 clsName = "rdflib.term.URIRef"
274 else:
275 clsName = self.__class__.__name__
276
277 return """%s(%s)""" % (clsName, super(URIRef, self).__repr__())
278
279 def __add__(self, other):
280 return self.__class__(str(self) + other)
281
282 def __radd__(self, other):
283 return self.__class__(other + str(self))
284
285 def __mod__(self, other):
286 return self.__class__(str(self) % other)
287
288 def md5_term_hash(self):
289 """a string of hex that will be the same for two URIRefs that
290 are the same. It is not a suitable unique id.
291
292 Supported for backwards compatibility; new code should
293 probably just use __hash__
294 """
295 warnings.warn(
296 "method md5_term_hash is deprecated, and will be removed " +
297 "in the future. If you use this please let rdflib-dev know!",
298 category=DeprecationWarning, stacklevel=2)
299 d = md5(self.encode())
300 d.update(b("U"))
301 return d.hexdigest()
302
303 def de_skolemize(self):
304 """ Create a Blank Node from a skolem URI, in accordance
305 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization.
306 This function accepts only rdflib type skolemization, to provide
307 a round-tripping within the system.
308
309 .. versionadded:: 4.0
310 """
311 if isinstance(self, RDFLibGenid):
312 parsed_uri = urlparse("%s" % self)
313 return BNode(
314 value=parsed_uri.path[len(rdflib_skolem_genid):])
315 elif isinstance(self, Genid):
316 bnode_id = "%s" % self
317 if bnode_id in skolems:
318 return skolems[bnode_id]
319 else:
320 retval = BNode()
321 skolems[bnode_id] = retval
322 return retval
323 else:
324 raise Exception("<%s> is not a skolem URI" % self)
325
326
327 class Genid(URIRef):
328 __slots__ = ()
329
330 @staticmethod
331 def _is_external_skolem(uri):
332 if not isinstance(uri, str):
333 uri = str(uri)
334 parsed_uri = urlparse(uri)
335 gen_id = parsed_uri.path.rfind(skolem_genid)
336 if gen_id != 0:
337 return False
338 return True
339
340
341 class RDFLibGenid(Genid):
342 __slots__ = ()
343
344 @staticmethod
345 def _is_rdflib_skolem(uri):
346 if not isinstance(uri, str):
347 uri = str(uri)
348 parsed_uri = urlparse(uri)
349 if parsed_uri.params != "" \
350 or parsed_uri.query != "" \
351 or parsed_uri.fragment != "":
352 return False
353 gen_id = parsed_uri.path.rfind(rdflib_skolem_genid)
354 if gen_id != 0:
355 return False
356 return True
357
358
359 def _unique_id():
360 # Used to read: """Create a (hopefully) unique prefix"""
361 # now retained merely to leave interal API unchanged.
362 # From BNode.__new__() below ...
363 #
364 # acceptable bnode value range for RDF/XML needs to be
365 # something that can be serialzed as a nodeID for N3
366 #
367 # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
368 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
369 return "N" # ensure that id starts with a letter
370
371
372 def _serial_number_generator():
373 """
374 Generates UUID4-based but ncname-compliant identifiers.
375 """
376 from uuid import uuid4
377
378 def _generator():
379 return uuid4().hex
380
381 return _generator
382
383
384 class BNode(Identifier):
385 """
386 Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
387
388 """
389 __slots__ = ()
390
391 def __new__(cls, value=None,
392 _sn_gen=_serial_number_generator(), _prefix=_unique_id()):
393 """
394 # only store implementations should pass in a value
395 """
396 if value is None:
397 # so that BNode values do not collide with ones created with
398 # a different instance of this module at some other time.
399 node_id = _sn_gen()
400 value = "%s%s" % (_prefix, node_id)
401 else:
402 # TODO: check that value falls within acceptable bnode value range
403 # for RDF/XML needs to be something that can be serialzed
404 # as a nodeID for N3 ?? Unless we require these
405 # constraints be enforced elsewhere?
406 pass # assert is_ncname(unicode(value)), "BNode identifiers
407 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
408 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
409 return Identifier.__new__(cls, value)
410
411 def toPython(self):
412 return str(self)
413
414 def n3(self, namespace_manager=None):
415 return "_:%s" % self
416
417 def __getnewargs__(self):
418 return (str(self), )
419
420 def __reduce__(self):
421 return (BNode, (str(self),))
422
423 if not py3compat.PY3:
424 def __str__(self):
425 return self.encode()
426
427 def __repr__(self):
428 if self.__class__ is BNode:
429 clsName = "rdflib.term.BNode"
430 else:
431 clsName = self.__class__.__name__
432 return """%s('%s')""" % (clsName, str(self))
433
434 def md5_term_hash(self):
435 """a string of hex that will be the same for two BNodes that
436 are the same. It is not a suitable unique id.
437
438 Supported for backwards compatibility; new code should
439 probably just use __hash__
440 """
441 warnings.warn(
442 "method md5_term_hash is deprecated, and will be removed " +
443 "in the future. If you use this please let rdflib-dev know!",
444 category=DeprecationWarning, stacklevel=2)
445 d = md5(self.encode())
446 d.update(b("B"))
447 return d.hexdigest()
448
449 def skolemize(self, authority="http://rdlib.net/"):
450 """ Create a URIRef "skolem" representation of the BNode, in accordance
451 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization
452
453 .. versionadded:: 4.0
454 """
455 skolem = "%s%s" % (rdflib_skolem_genid, str(self))
456 return URIRef(urljoin(authority, skolem))
457
458
459 class Literal(Identifier):
460 __doc__ = py3compat.format_doctest_out("""
461 RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal
462
463 The lexical value of the literal is the unicode object
464 The interpreted, datatyped value is available from .value
465
466 Language tags must be valid according to :rfc:5646
467
468 For valid XSD datatypes, the lexical form is optionally normalized
469 at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS
470 and can be overridden by the normalize parameter to __new__
471
472 Equality and hashing of Literals are done based on the lexical form, i.e.:
473
474 >>> from rdflib.namespace import XSD
475
476 >>> Literal('01')!=Literal('1') # clear - strings differ
477 True
478
479 but with data-type they get normalized:
480
481 >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer)
482 False
483
484 unless disabled:
485
486 >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer)
487 True
488
489
490 Value based comparison is possible:
491
492 >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float))
493 True
494
495 The eq method also provides limited support for basic python types:
496
497 >>> Literal(1).eq(1) # fine - int compatible with xsd:integer
498 True
499 >>> Literal('a').eq('b') # fine - str compatible with plain-lit
500 False
501 >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string
502 True
503 >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit
504 NotImplemented
505
506 Greater-than/less-than ordering comparisons are also done in value
507 space, when compatible datatypes are used. Incompatible datatypes
508 are ordered by DT, or by lang-tag. For other nodes the ordering
509 is None < BNode < URIRef < Literal
510
511 Any comparison with non-rdflib Node are "NotImplemented"
512 In PY2.X some stable order will be made up by python
513
514 In PY3 this is an error.
515
516 >>> from rdflib import Literal, XSD
517 >>> lit2006 = Literal('2006-01-01',datatype=XSD.date)
518 >>> lit2006.toPython()
519 datetime.date(2006, 1, 1)
520 >>> lit2006 < Literal('2007-01-01',datatype=XSD.date)
521 True
522 >>> Literal(datetime.utcnow()).datatype
523 rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime')
524 >>> Literal(1) > Literal(2) # by value
525 False
526 >>> Literal(1) > Literal(2.0) # by value
527 False
528 >>> Literal('1') > Literal(1) # by DT
529 True
530 >>> Literal('1') < Literal('1') # by lexical form
531 False
532 >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag
533 False
534 >>> Literal(1) > URIRef('foo') # by node-type
535 True
536
537 The > < operators will eat this NotImplemented and either make up
538 an ordering (py2.x) or throw a TypeError (py3k):
539
540 >>> Literal(1).__gt__(2.0)
541 NotImplemented
542
543
544 """)
545
546
547 if not py3compat.PY3:
548 __slots__ = ("language", "datatype", "value", "_language",
549 "_datatype", "_value")
550 else:
551 __slots__ = ("_language", "_datatype", "_value")
552
553 def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None):
554
555 if lang == '':
556 lang = None # no empty lang-tags in RDF
557
558 normalize = normalize if normalize != None else rdflib.NORMALIZE_LITERALS
559
560 if lang is not None and datatype is not None:
561 raise TypeError(
562 "A Literal can only have one of lang or datatype, "
563 "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
564
565 if lang and not _is_valid_langtag(lang):
566 raise Exception("'%s' is not a valid language tag!"%lang)
567
568 if datatype:
569 datatype = URIRef(datatype)
570
571 value = None
572 if isinstance(lexical_or_value, Literal):
573 # create from another Literal instance
574
575 lang = lang or lexical_or_value.language
576 if datatype:
577 # override datatype
578 value = _castLexicalToPython(lexical_or_value, datatype)
579 else:
580 datatype = lexical_or_value.datatype
581 value = lexical_or_value.value
582
583 elif isinstance(lexical_or_value, str):
584 # passed a string
585 # try parsing lexical form of datatyped literal
586 value = _castLexicalToPython(lexical_or_value, datatype)
587
588 if value is not None and normalize:
589 _value, _datatype = _castPythonToLiteral(value)
590 if _value is not None and _is_valid_unicode(_value):
591 lexical_or_value = _value
592
593 else:
594 # passed some python object
595 value = lexical_or_value
596 _value, _datatype = _castPythonToLiteral(lexical_or_value)
597
598 datatype = datatype or _datatype
599 if _value is not None:
600 lexical_or_value = _value
601 if datatype:
602 lang = None
603
604 if py3compat.PY3 and isinstance(lexical_or_value, bytes):
605 lexical_or_value = lexical_or_value.decode('utf-8')
606
607 try:
608 inst = str.__new__(cls, lexical_or_value)
609 except UnicodeDecodeError:
610 inst = str.__new__(cls, lexical_or_value, 'utf-8')
611
612 inst._language = lang
613 inst._datatype = datatype
614 inst._value = value
615 return inst
616
617 @py3compat.format_doctest_out
618 def normalize(self):
619 """
620 Returns a new literal with a normalised lexical representation
621 of this literal
622 >>> from rdflib import XSD
623 >>> Literal("01", datatype=XSD.integer, normalize=False).normalize()
624 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
625
626 Illegal lexical forms for the datatype given are simply passed on
627 >>> Literal("a", datatype=XSD.integer, normalize=False)
628 rdflib.term.Literal(%(u)s'a', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
629
630 """
631
632 if self.value != None:
633 return Literal(self.value, datatype=self.datatype, lang=self.language)
634 else:
635 return self
636
637 @property
638 def value(self):
639 return self._value
640
641 @property
642 def language(self):
643 return self._language
644
645 @property
646 def datatype(self):
647 return self._datatype
648
649 def __reduce__(self):
650 return (Literal, (str(self), self.language, self.datatype),)
651
652 def __getstate__(self):
653 return (None, dict(language=self.language, datatype=self.datatype))
654
655 def __setstate__(self, arg):
656 _, d = arg
657 self._language = d["language"]
658 self._datatype = d["datatype"]
659
660 @py3compat.format_doctest_out
661 def __add__(self, val):
662 """
663 >>> Literal(1) + 1
664 rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
665 >>> Literal("1") + "1"
666 rdflib.term.Literal(%(u)s'11')
667 """
668
669 py = self.toPython()
670 if not isinstance(py, Literal):
671 try:
672 return Literal(py + val)
673 except TypeError:
674 pass # fall-through
675
676 s = str.__add__(self, val)
677 return Literal(s, self.language, self.datatype)
678
679 def __bool__(self):
680 """
681 Is the Literal "True"
682 This is used for if statements, bool(literal), etc.
683 """
684 if self.value != None:
685 return bool(self.value)
686 return len(self) != 0
687
688 @py3compat.format_doctest_out
689 def __neg__(self):
690 """
691 >>> (- Literal(1))
692 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
693 >>> (- Literal(10.5))
694 rdflib.term.Literal(%(u)s'-10.5', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#double'))
695 >>> from rdflib.namespace import XSD
696 >>> (- Literal("1", datatype=XSD.integer))
697 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
698
699 >>> (- Literal("1"))
700 Traceback (most recent call last):
701 File "<stdin>", line 1, in <module>
702 TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
703 >>>
704 """
705
706 if isinstance(self.value, (int, float)):
707 return Literal(self.value.__neg__())
708 else:
709 raise TypeError("Not a number; %s" % repr(self))
710
711 @py3compat.format_doctest_out
712 def __pos__(self):
713 """
714 >>> (+ Literal(1))
715 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
716 >>> (+ Literal(-1))
717 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
718 >>> from rdflib.namespace import XSD
719 >>> (+ Literal("-1", datatype=XSD.integer))
720 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
721
722 >>> (+ Literal("1"))
723 Traceback (most recent call last):
724 File "<stdin>", line 1, in <module>
725 TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
726 """
727 if isinstance(self.value, (int, float)):
728 return Literal(self.value.__pos__())
729 else:
730 raise TypeError("Not a number; %s" % repr(self))
731
732 @py3compat.format_doctest_out
733 def __abs__(self):
734 """
735 >>> abs(Literal(-1))
736 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
737
738 >>> from rdflib.namespace import XSD
739 >>> abs( Literal("-1", datatype=XSD.integer))
740 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
741
742 >>> abs(Literal("1"))
743 Traceback (most recent call last):
744 File "<stdin>", line 1, in <module>
745 TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
746 """
747 if isinstance(self.value, (int, float)):
748 return Literal(self.value.__abs__())
749 else:
750 raise TypeError("Not a number; %s" % repr(self))
751
752 @py3compat.format_doctest_out
753 def __invert__(self):
754 """
755 >>> ~(Literal(-1))
756 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
757
758 >>> from rdflib.namespace import XSD
759 >>> ~( Literal("-1", datatype=XSD.integer))
760 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
761
762 Not working:
763
764 >>> ~(Literal("1"))
765 Traceback (most recent call last):
766 File "<stdin>", line 1, in <module>
767 TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
768 """
769 if isinstance(self.value, (int, float)):
770 return Literal(self.value.__invert__())
771 else:
772 raise TypeError("Not a number; %s" % repr(self))
773
774 def __gt__(self, other):
775 """
776
777 This implements ordering for Literals,
778 the other comparison methods delegate here
779
780 This tries to implement this:
781 http://www.w3.org/TR/sparql11-query/#modOrderBy
782
783 In short, Literals with compatible data-types are orderd in value space,
784 i.e.
785 >>> from rdflib import XSD
786
787 >>> Literal(1)>Literal(2) # int/int
788 False
789 >>> Literal(2.0)>Literal(1) # double/int
790 True
791 >>> from decimal import Decimal
792 >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double
793 True
794 >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double
795 True
796 >>> Literal('b')>Literal('a') # plain lit/plain lit
797 True
798 >>> Literal('b')>Literal('a', datatype=XSD.string) # plain lit/xsd:string
799 True
800
801 Incompatible datatype mismatches ordered by DT
802
803 >>> Literal(1)>Literal("2") # int>string
804 False
805
806 Langtagged literals by lang tag
807 >>> Literal("a", lang="en")>Literal("a", lang="fr")
808 False
809 """
810 if other is None:
811 return True # Everything is greater than None
812 if isinstance(other, Literal):
813
814 if self.datatype in _NUMERIC_LITERAL_TYPES and \
815 other.datatype in _NUMERIC_LITERAL_TYPES:
816 return numeric_greater(self.value, other.value)
817
818 # plain-literals and xsd:string literals
819 # are "the same"
820 dtself = self.datatype or _XSD_STRING
821 dtother = other.datatype or _XSD_STRING
822
823 if dtself != dtother:
824 if rdflib.DAWG_LITERAL_COLLATION:
825 return NotImplemented
826 else:
827 return dtself > dtother
828
829 if self.language != other.language:
830 if not self.language:
831 return False
832 elif not other.language:
833 return True
834 else:
835 return self.language > other.language
836
837 if self.value != None and other.value != None:
838 return self.value > other.value
839
840 if str(self) != str(other):
841 return str(self) > str(other)
842
843 # same language, same lexical form, check real dt
844 # plain-literals come before xsd:string!
845 if self.datatype != other.datatype:
846 if not self.datatype:
847 return False
848 elif not other.datatype:
849 return True
850 else:
851 return self.datatype > other.datatype
852
853 return False # they are the same
854
855 elif isinstance(other, Node):
856 return True # Literal are the greatest!
857 else:
858 return NotImplemented # we can only compare to nodes
859
860 def __lt__(self, other):
861 if other is None:
862 return False # Nothing is less than None
863 if isinstance(other, Literal):
864 try:
865 return not self.__gt__(other) and not self.eq(other)
866 except TypeError:
867 return NotImplemented
868 if isinstance(other, Node):
869 return False # all nodes are less-than Literals
870
871 return NotImplemented
872
873 def __le__(self, other):
874 """
875 >>> from rdflib.namespace import XSD
876 >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime
877 ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
878 True
879 """
880 r = self.__lt__(other)
881 if r:
882 return True
883 try:
884 return self.eq(other)
885 except TypeError:
886 return NotImplemented
887
888 def __ge__(self, other):
889 r = self.__gt__(other)
890 if r:
891 return True
892 try:
893 return self.eq(other)
894 except TypeError:
895 return NotImplemented
896
897 def _comparable_to(self, other):
898 """
899 Helper method to decide which things are meaningful to
900 rich-compare with this literal
901 """
902 if isinstance(other, Literal):
903 if (self.datatype and other.datatype):
904 # two datatyped literals
905 if not self.datatype in XSDToPython or not other.datatype in XSDToPython:
906 # non XSD DTs must match
907 if self.datatype != other.datatype:
908 return False
909
910 else:
911 # xsd:string may be compared with plain literals
912 if not (self.datatype == _XSD_STRING and not other.datatype) or \
913 (other.datatype == _XSD_STRING and not self.datatype):
914 return False
915
916 # if given lang-tag has to be case insensitive equal
917 if (self.language or "").lower() != (other.language or "").lower():
918 return False
919
920 return True
921
922 def __hash__(self):
923 """
924 >>> from rdflib.namespace import XSD
925 >>> a = {Literal('1', datatype=XSD.integer):'one'}
926 >>> Literal('1', datatype=XSD.double) in a
927 False
928
929
930 "Called for the key object for dictionary operations,
931 and by the built-in function hash(). Should return
932 a 32-bit integer usable as a hash value for
933 dictionary operations. The only required property
934 is that objects which compare equal have the same
935 hash value; it is advised to somehow mix together
936 (e.g., using exclusive or) the hash values for the
937 components of the object that also play a part in
938 comparison of objects." -- 3.4.1 Basic customization (Python)
939
940 "Two literals are equal if and only if all of the following hold:
941 * The strings of the two lexical forms compare equal, character by
942 character.
943 * Either both or neither have language tags.
944 * The language tags, if any, compare equal.
945 * Either both or neither have datatype URIs.
946 * The two datatype URIs, if any, compare equal, character by
947 character."
948 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
949
950 """
951 res = super(Literal, self).__hash__()
952 if self.language:
953 res ^= hash(self.language.lower())
954 if self.datatype:
955 res ^= hash(self.datatype)
956 return res
957
958 @py3compat.format_doctest_out
959 def __eq__(self, other):
960 """
961 Literals are only equal to other literals.
962
963 "Two literals are equal if and only if all of the following hold:
964 * The strings of the two lexical forms compare equal, character by character.
965 * Either both or neither have language tags.
966 * The language tags, if any, compare equal.
967 * Either both or neither have datatype URIs.
968 * The two datatype URIs, if any, compare equal, character by character."
969 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
970
971 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo"))
972 True
973 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2"))
974 False
975
976 >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo"))
977 False
978 >>> Literal("1", datatype=URIRef("foo")) == "asdf"
979 False
980 >>> from rdflib import XSD
981 >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date)
982 True
983 >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1)
984 False
985 >>> Literal("one", lang="en") == Literal("one", lang="en")
986 True
987 >>> Literal("hast", lang='en') == Literal("hast", lang='de')
988 False
989 >>> Literal("1", datatype=XSD.integer) == Literal(1)
990 True
991 >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer)
992 True
993
994 """
995 if self is other:
996 return True
997 if other is None:
998 return False
999 if isinstance(other, Literal):
1000 return self.datatype == other.datatype \
1001 and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \
1002 and str.__eq__(self, other)
1003
1004 return False
1005
1006 def eq(self, other):
1007 """
1008 Compare the value of this literal with something else
1009
1010 Either, with the value of another literal
1011 comparisons are then done in literal "value space",
1012 and according to the rules of XSD subtype-substitution/type-promotion
1013
1014 OR, with a python object:
1015
1016 basestring objects can be compared with plain-literals,
1017 or those with datatype xsd:string
1018
1019 bool objects with xsd:boolean
1020
1021 a int, long or float with numeric xsd types
1022
1023 isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime
1024
1025 Any other operations returns NotImplemented
1026
1027 """
1028 if isinstance(other, Literal):
1029
1030 if self.datatype in _NUMERIC_LITERAL_TYPES \
1031 and other.datatype in _NUMERIC_LITERAL_TYPES:
1032 if self.value != None and other.value != None:
1033 return self.value == other.value
1034 else:
1035 if str.__eq__(self, other):
1036 return True
1037 raise TypeError(
1038 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
1039 if (self.language or "").lower() != (other.language or "").lower():
1040 return False
1041
1042 dtself = self.datatype or _XSD_STRING
1043 dtother = other.datatype or _XSD_STRING
1044
1045 if (dtself == _XSD_STRING and dtother == _XSD_STRING):
1046 # string/plain literals, compare on lexical form
1047 return str.__eq__(self, other)
1048
1049 if dtself != dtother:
1050 if rdflib.DAWG_LITERAL_COLLATION:
1051 raise TypeError("I don't know how to compare literals with datatypes %s and %s" % (
1052 self.datatype, other.datatype))
1053 else:
1054 return False
1055
1056 # matching non-string DTs now - do we compare values or
1057 # lexical form first? comparing two ints is far quicker -
1058 # maybe there are counter examples
1059
1060 if self.value != None and other.value != None:
1061
1062 if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL):
1063 return _isEqualXMLNode(self.value, other.value)
1064
1065 return self.value == other.value
1066 else:
1067
1068 if str.__eq__(self, other):
1069 return True
1070
1071 if self.datatype == _XSD_STRING:
1072 return False # string value space=lexical space
1073
1074 # matching DTs, but not matching, we cannot compare!
1075 raise TypeError(
1076 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
1077
1078 elif isinstance(other, Node):
1079 return False # no non-Literal nodes are equal to a literal
1080
1081 elif isinstance(other, str):
1082 # only plain-literals can be directly compared to strings
1083
1084 # TODO: Is "blah"@en eq "blah" ?
1085 if self.language is not None:
1086 return False
1087
1088 if (self.datatype == _XSD_STRING or self.datatype is None):
1089 return str(self) == other
1090
1091 elif isinstance(other, (int, float)):
1092 if self.datatype in _NUMERIC_LITERAL_TYPES:
1093 return self.value == other
1094 elif isinstance(other, (date, datetime, time)):
1095 if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME):
1096 return self.value == other
1097 elif isinstance(other, bool):
1098 if self.datatype == _XSD_BOOLEAN:
1099 return self.value == other
1100
1101 return NotImplemented
1102
1103 def neq(self, other):
1104 return not self.eq(other)
1105
1106 @py3compat.format_doctest_out
1107 def n3(self, namespace_manager = None):
1108 r'''
1109 Returns a representation in the N3 format.
1110
1111 Examples::
1112
1113 >>> Literal("foo").n3()
1114 %(u)s'"foo"'
1115
1116 Strings with newlines or triple-quotes::
1117
1118 >>> Literal("foo\nbar").n3()
1119 %(u)s'"""foo\nbar"""'
1120
1121 >>> Literal("''\'").n3()
1122 %(u)s'"\'\'\'"'
1123
1124 >>> Literal('"""').n3()
1125 %(u)s'"\\"\\"\\""'
1126
1127 Language::
1128
1129 >>> Literal("hello", lang="en").n3()
1130 %(u)s'"hello"@en'
1131
1132 Datatypes::
1133
1134 >>> Literal(1).n3()
1135 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
1136
1137 >>> Literal(1.0).n3()
1138 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>'
1139
1140 >>> Literal(True).n3()
1141 %(u)s'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>'
1142
1143 Datatype and language isn't allowed (datatype takes precedence)::
1144
1145 >>> Literal(1, lang="en").n3()
1146 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
1147
1148 Custom datatype::
1149
1150 >>> footype = URIRef("http://example.org/ns#foo")
1151 >>> Literal("1", datatype=footype).n3()
1152 %(u)s'"1"^^<http://example.org/ns#foo>'
1153
1154 Passing a namespace-manager will use it to abbreviate datatype URIs:
1155
1156 >>> from rdflib import Graph
1157 >>> Literal(1).n3(Graph().namespace_manager)
1158 %(u)s'"1"^^xsd:integer'
1159 '''
1160 if namespace_manager:
1161 return self._literal_n3(qname_callback =
1162 namespace_manager.normalizeUri)
1163 else:
1164 return self._literal_n3()
1165
1166 @py3compat.format_doctest_out
1167 def _literal_n3(self, use_plain=False, qname_callback=None):
1168 '''
1169 Using plain literal (shorthand) output::
1170 >>> from rdflib.namespace import XSD
1171
1172 >>> Literal(1)._literal_n3(use_plain=True)
1173 %(u)s'1'
1174
1175 >>> Literal(1.0)._literal_n3(use_plain=True)
1176 %(u)s'1e+00'
1177
1178 >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True)
1179 %(u)s'1.0'
1180
1181 >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True)
1182 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>'
1183
1184 >>> Literal("foo", datatype=XSD.string)._literal_n3(
1185 ... use_plain=True)
1186 %(u)s'"foo"^^<http://www.w3.org/2001/XMLSchema#string>'
1187
1188 >>> Literal(True)._literal_n3(use_plain=True)
1189 %(u)s'true'
1190
1191 >>> Literal(False)._literal_n3(use_plain=True)
1192 %(u)s'false'
1193
1194 >>> Literal(1.91)._literal_n3(use_plain=True)
1195 %(u)s'1.91e+00'
1196
1197 Only limited precision available for floats:
1198 >>> Literal(0.123456789)._literal_n3(use_plain=True)
1199 %(u)s'1.234568e-01'
1200
1201 >>> Literal('0.123456789',
1202 ... datatype=XSD.decimal)._literal_n3(use_plain=True)
1203 %(u)s'0.123456789'
1204
1205 Using callback for datatype QNames::
1206
1207 >>> Literal(1)._literal_n3(
1208 ... qname_callback=lambda uri: "xsd:integer")
1209 %(u)s'"1"^^xsd:integer'
1210
1211 '''
1212 if use_plain and self.datatype in _PLAIN_LITERAL_TYPES:
1213 if self.value is not None:
1214 # If self is inf or NaN, we need a datatype
1215 # (there is no plain representation)
1216 if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
1217 try:
1218 v = float(self)
1219 if math.isinf(v) or math.isnan(v):
1220 return self._literal_n3(False, qname_callback)
1221 except ValueError:
1222 return self._literal_n3(False, qname_callback)
1223
1224 # this is a bit of a mess -
1225 # in py >=2.6 the string.format function makes this easier
1226 # we try to produce "pretty" output
1227 if self.datatype == _XSD_DOUBLE:
1228 return sub("\\.?0*e", "e", '%e' % float(self))
1229 elif self.datatype == _XSD_DECIMAL:
1230 s = '%s' % self
1231 if '.' not in s:
1232 s += '.0'
1233 return s
1234
1235 elif self.datatype == _XSD_BOOLEAN:
1236 return ('%s' % self).lower()
1237 else:
1238 return '%s' % self
1239
1240 encoded = self._quote_encode()
1241
1242 datatype = self.datatype
1243 quoted_dt = None
1244 if datatype:
1245 if qname_callback:
1246 quoted_dt = qname_callback(datatype)
1247 if not quoted_dt:
1248 quoted_dt = "<%s>" % datatype
1249 if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
1250 try:
1251 v = float(self)
1252 if math.isinf(v):
1253 # py string reps: float: 'inf', Decimal: 'Infinity"
1254 # both need to become "INF" in xsd datatypes
1255 encoded = encoded.replace('inf', 'INF').replace(
1256 'Infinity', 'INF')
1257 if math.isnan(v):
1258 encoded = encoded.replace('nan', 'NaN')
1259 except ValueError:
1260 # if we can't cast to float something is wrong, but we can
1261 # still serialize. Warn user about it
1262 warnings.warn("Serializing weird numerical %r" % self)
1263
1264 language = self.language
1265 if language:
1266 return '%s@%s' % (encoded, language)
1267 elif datatype:
1268 return '%s^^%s' % (encoded, quoted_dt)
1269 else:
1270 return '%s' % encoded
1271
1272 def _quote_encode(self):
1273 # This simpler encoding doesn't work; a newline gets encoded as "\\n",
1274 # which is ok in sourcecode, but we want "\n".
1275 # encoded = self.encode('unicode-escape').replace(
1276 # '\\', '\\\\').replace('"','\\"')
1277 # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
1278
1279 # NOTE: Could in theory chose quotes based on quotes appearing in the
1280 # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?).
1281
1282 if "\n" in self:
1283 # Triple quote this string.
1284 encoded = self.replace('\\', '\\\\')
1285 if '"""' in self:
1286 # is this ok?
1287 encoded = encoded.replace('"""', '\\"\\"\\"')
1288 if encoded[-1] == '"' and encoded[-2] != '\\':
1289 encoded = encoded[:-1] + '\\' + '"'
1290
1291 return '"""%s"""' % encoded.replace('\r', '\\r')
1292 else:
1293 return '"%s"' % self.replace(
1294 '\n', '\\n').replace(
1295 '\\', '\\\\').replace(
1296 '"', '\\"').replace(
1297 '\r', '\\r')
1298
1299 if not py3compat.PY3:
1300 def __str__(self):
1301 return self.encode()
1302
1303 def __repr__(self):
1304 args = [super(Literal, self).__repr__()]
1305 if self.language is not None:
1306 args.append("lang=%s" % repr(self.language))
1307 if self.datatype is not None:
1308 args.append("datatype=%s" % repr(self.datatype))
1309 if self.__class__ == Literal:
1310 clsName = "rdflib.term.Literal"
1311 else:
1312 clsName = self.__class__.__name__
1313 return """%s(%s)""" % (clsName, ", ".join(args))
1314
1315 def toPython(self):
1316 """
1317 Returns an appropriate python datatype derived from this RDF Literal
1318 """
1319
1320 if self.value is not None:
1321 return self.value
1322 return self
1323
1324 def md5_term_hash(self):
1325 """a string of hex that will be the same for two Literals that
1326 are the same. It is not a suitable unique id.
1327
1328 Supported for backwards compatibility; new code should
1329 probably just use __hash__
1330 """
1331 warnings.warn(
1332 "method md5_term_hash is deprecated, and will be removed " +
1333 "removed in the future. If you use this please let rdflib-dev know!",
1334 category=DeprecationWarning, stacklevel=2)
1335 d = md5(self.encode())
1336 d.update(b("L"))
1337 return d.hexdigest()
1338
1339
1340 def _parseXML(xmlstring):
1341 if not py3compat.PY3:
1342 xmlstring = xmlstring.encode('utf-8')
1343 retval = xml.dom.minidom.parseString(
1344 "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring)
1345 retval.normalize()
1346 return retval
1347
1348
1349 def _parseHTML(htmltext):
1350 try:
1351 import html5lib
1352 parser = html5lib.HTMLParser(
1353 tree=html5lib.treebuilders.getTreeBuilder("dom"))
1354 retval = parser.parseFragment(htmltext)
1355 retval.normalize()
1356 return retval
1357 except ImportError:
1358 raise ImportError(
1359 "HTML5 parser not available. Try installing" +
1360 " html5lib <http://code.google.com/p/html5lib>")
1361
1362
1363 def _writeXML(xmlnode):
1364 if isinstance(xmlnode, xml.dom.minidom.DocumentFragment):
1365 d = xml.dom.minidom.Document()
1366 d.childNodes += xmlnode.childNodes
1367 xmlnode = d
1368 s = xmlnode.toxml('utf-8')
1369 # for clean round-tripping, remove headers -- I have great and
1370 # specific worries that this will blow up later, but this margin
1371 # is too narrow to contain them
1372 if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')):
1373 s = s[38:]
1374 if s.startswith(b('<rdflibtoplevelelement>')):
1375 s = s[23:-24]
1376 if s == b('<rdflibtoplevelelement/>'):
1377 s = b('')
1378 return s
1379
1380 # Cannot import Namespace/XSD because of circular dependencies
1381 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
1382 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
1383
1384 _RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral')
1385 _RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML')
1386
1387 _XSD_STRING = URIRef(_XSD_PFX + 'string')
1388
1389 _XSD_FLOAT = URIRef(_XSD_PFX + 'float')
1390 _XSD_DOUBLE = URIRef(_XSD_PFX + 'double')
1391 _XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal')
1392 _XSD_INTEGER = URIRef(_XSD_PFX + 'integer')
1393 _XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean')
1394
1395 _XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime')
1396 _XSD_DATE = URIRef(_XSD_PFX + 'date')
1397 _XSD_TIME = URIRef(_XSD_PFX + 'time')
1398
1399 # TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth
1400
1401 _NUMERIC_LITERAL_TYPES = (
1402 _XSD_INTEGER,
1403 _XSD_DECIMAL,
1404 _XSD_DOUBLE,
1405 URIRef(_XSD_PFX + 'float'),
1406
1407 URIRef(_XSD_PFX + 'byte'),
1408 URIRef(_XSD_PFX + 'int'),
1409 URIRef(_XSD_PFX + 'long'),
1410 URIRef(_XSD_PFX + 'negativeInteger'),
1411 URIRef(_XSD_PFX + 'nonNegativeInteger'),
1412 URIRef(_XSD_PFX + 'nonPositiveInteger'),
1413 URIRef(_XSD_PFX + 'positiveInteger'),
1414 URIRef(_XSD_PFX + 'short'),
1415 URIRef(_XSD_PFX + 'unsignedByte'),
1416 URIRef(_XSD_PFX + 'unsignedInt'),
1417 URIRef(_XSD_PFX + 'unsignedLong'),
1418 URIRef(_XSD_PFX + 'unsignedShort'),
1419
1420 )
1421
1422 # these have "native" syntax in N3/SPARQL
1423 _PLAIN_LITERAL_TYPES = (
1424 _XSD_INTEGER,
1425 _XSD_BOOLEAN,
1426 _XSD_DOUBLE,
1427 _XSD_DECIMAL,
1428 )
1429
1430 # these have special INF and NaN XSD representations
1431 _NUMERIC_INF_NAN_LITERAL_TYPES = (
1432 URIRef(_XSD_PFX + 'float'),
1433 _XSD_DOUBLE,
1434 _XSD_DECIMAL,
1435 )
1436
1437
1438 def _castPythonToLiteral(obj):
1439 """
1440 Casts a python datatype to a tuple of the lexical value and a
1441 datatype URI (or None)
1442 """
1443 for pType, (castFunc, dType) in _PythonToXSD:
1444 if isinstance(obj, pType):
1445 if castFunc:
1446 return castFunc(obj), dType
1447 elif dType:
1448 return obj, dType
1449 else:
1450 return obj, None
1451 return obj, None # TODO: is this right for the fall through case?
1452
1453 from decimal import Decimal
1454
1455 # Mappings from Python types to XSD datatypes and back (borrowed from sparta)
1456 # datetime instances are also instances of date... so we need to order these.
1457
1458 # SPARQL/Turtle/N3 has shortcuts for integer, double, decimal
1459 # python has only float - to be in tune with sparql/n3/turtle
1460 # we default to XSD.double for float literals
1461
1462 # python ints are promoted to longs when overflowing
1463 # python longs have no limit
1464 # both map to the abstract integer type,
1465 # rather than some concrete bit-limited datatype
1466
1467 _PythonToXSD = [
1468 (str, (None, None)),
1469 (float, (None, _XSD_DOUBLE)),
1470 (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
1471 (int, (None, _XSD_INTEGER)),
1472 (int, (None, _XSD_INTEGER)),
1473 (Decimal, (None, _XSD_DECIMAL)),
1474 (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)),
1475 (date, (lambda i:i.isoformat(), _XSD_DATE)),
1476 (time, (lambda i:i.isoformat(), _XSD_TIME)),
1477 (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)),
1478 # this is a bit dirty - by accident the html5lib parser produces
1479 # DocumentFragments, and the xml parser Documents, letting this
1480 # decide what datatype to use makes roundtripping easier, but it a
1481 # bit random
1482 (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL))
1483 ]
1484
1485 XSDToPython = {
1486 None : None, # plain literals map directly to value space
1487 URIRef(_XSD_PFX + 'time'): parse_time,
1488 URIRef(_XSD_PFX + 'date'): parse_date,
1489 URIRef(_XSD_PFX + 'gYear'): parse_date,
1490 URIRef(_XSD_PFX + 'gYearMonth'): parse_date,
1491 URIRef(_XSD_PFX + 'dateTime'): parse_datetime,
1492 URIRef(_XSD_PFX + 'string'): None,
1493 URIRef(_XSD_PFX + 'normalizedString'): None,
1494 URIRef(_XSD_PFX + 'token'): None,
1495 URIRef(_XSD_PFX + 'language'): None,
1496 URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'],
1497 URIRef(_XSD_PFX + 'decimal'): Decimal,
1498 URIRef(_XSD_PFX + 'integer'): int,
1499 URIRef(_XSD_PFX + 'nonPositiveInteger'): int,
1500 URIRef(_XSD_PFX + 'long'): int,
1501 URIRef(_XSD_PFX + 'nonNegativeInteger'): int,
1502 URIRef(_XSD_PFX + 'negativeInteger'): int,
1503 URIRef(_XSD_PFX + 'int'): int,
1504 URIRef(_XSD_PFX + 'unsignedLong'): int,
1505 URIRef(_XSD_PFX + 'positiveInteger'): int,
1506 URIRef(_XSD_PFX + 'short'): int,
1507 URIRef(_XSD_PFX + 'unsignedInt'): int,
1508 URIRef(_XSD_PFX + 'byte'): int,
1509 URIRef(_XSD_PFX + 'unsignedShort'): int,
1510 URIRef(_XSD_PFX + 'unsignedByte'): int,
1511 URIRef(_XSD_PFX + 'float'): float,
1512 URIRef(_XSD_PFX + 'double'): float,
1513 URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s),
1514 URIRef(_XSD_PFX + 'anyURI'): None,
1515 _RDF_XMLLITERAL: _parseXML,
1516 _RDF_HTMLLITERAL: _parseHTML
1517 }
1518
1519 _toPythonMapping = {}
1520
1521 _toPythonMapping.update(XSDToPython)
1522
1523 def _castLexicalToPython(lexical, datatype):
1524 """
1525 Map a lexical form to the value-space for the given datatype
1526 :returns: a python object for the value or ``None``
1527 """
1528 convFunc = _toPythonMapping.get(datatype, False)
1529 if convFunc:
1530 try:
1531 return convFunc(lexical)
1532 except:
1533 # not a valid lexical representation for this dt
1534 return None
1535 elif convFunc is None:
1536 # no conv func means 1-1 lexical<->value-space mapping
1537 try:
1538 return str(lexical)
1539 except UnicodeDecodeError:
1540 return str(lexical, 'utf-8')
1541 else:
1542 # no convFunc - unknown data-type
1543 return None
1544
1545 def bind(datatype, pythontype, constructor=None, lexicalizer=None):
1546 """
1547 register a new datatype<->pythontype binding
1548
1549 :param constructor: an optional function for converting lexical forms
1550 into a Python instances, if not given the pythontype
1551 is used directly
1552
1553 :param lexicalizer: an optinoal function for converting python objects to
1554 lexical form, if not given object.__str__ is used
1555
1556 """
1557 if datatype in _toPythonMapping:
1558 logger.warning("datatype '%s' was already bound. Rebinding." %
1559 datatype)
1560
1561 if constructor == None:
1562 constructor = pythontype
1563 _toPythonMapping[datatype] = constructor
1564 _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
1565
1566
1567 class Variable(Identifier):
1568 """
1569 A Variable - this is used for querying, or in Formula aware
1570 graphs, where Variables can stored in the graph
1571 """
1572 __slots__ = ()
1573
1574 def __new__(cls, value):
1575 if len(value) == 0:
1576 raise Exception(
1577 "Attempted to create variable with empty string as name!")
1578 if value[0] == '?':
1579 value = value[1:]
1580 return str.__new__(cls, value)
1581
1582 def __repr__(self):
1583 if self.__class__ is Variable:
1584 clsName = "rdflib.term.Variable"
1585 else:
1586 clsName = self.__class__.__name__
1587
1588 return """%s(%s)""" % (clsName, super(Variable, self).__repr__())
1589
1590 def toPython(self):
1591 return "?%s" % self
1592
1593 def n3(self, namespace_manager = None):
1594 return "?%s" % self
1595
1596 def __reduce__(self):
1597 return (Variable, (str(self),))
1598
1599 def md5_term_hash(self):
1600 """a string of hex that will be the same for two Variables that
1601 are the same. It is not a suitable unique id.
1602
1603 Supported for backwards compatibility; new code should
1604 probably just use __hash__
1605 """
1606 warnings.warn(
1607 "method md5_term_hash is deprecated, and will be removed " +
1608 "removed in the future. If you use this please let rdflib-dev know!",
1609 category=DeprecationWarning, stacklevel=2)
1610 d = md5(self.encode())
1611 d.update(b("V"))
1612 return d.hexdigest()
1613
1614
1615 class Statement(Node, tuple):
1616
1617 def __new__(cls, xxx_todo_changeme, context):
1618 (subject, predicate, object) = xxx_todo_changeme
1619 warnings.warn(
1620 "Class Statement is deprecated, and will be removed in " +
1621 "the future. If you use this please let rdflib-dev know!",
1622 category=DeprecationWarning, stacklevel=2)
1623 return tuple.__new__(cls, ((subject, predicate, object), context))
1624
1625 def __reduce__(self):
1626 return (Statement, (self[0], self[1]))
1627
1628 def toPython(self):
1629 return (self[0], self[1])
1630
1631 # Nodes are ordered like this
1632 # See http://www.w3.org/TR/sparql11-query/#modOrderBy
1633 # we leave "space" for more subclasses of Node elsewhere
1634 # default-dict to grazefully fail for new subclasses
1635 _ORDERING = defaultdict(int)
1636 _ORDERING.update({
1637 BNode: 10,
1638 Variable: 20,
1639 URIRef: 30,
1640 Literal: 40
1641 })
1642
1643
1644 def _isEqualXMLNode(node, other):
1645 from xml.dom.minidom import Node
1646
1647 def recurse():
1648 # Recursion through the children
1649 # In Python2, the semantics of 'map' is such that the check on
1650 # length would be unnecessary. In Python 3,
1651 # the semantics of map has changed (why, oh why???) and the check
1652 # for the length becomes necessary...
1653 if len(node.childNodes) != len(other.childNodes):
1654 return False
1655 for (nc, oc) in map(
1656 lambda x, y: (x, y), node.childNodes, other.childNodes):
1657 if not _isEqualXMLNode(nc, oc):
1658 return False
1659 # if we got here then everything is fine:
1660 return True
1661
1662 if node is None or other is None:
1663 return False
1664
1665 if node.nodeType != other.nodeType:
1666 return False
1667
1668 if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]:
1669 return recurse()
1670
1671 elif node.nodeType == Node.ELEMENT_NODE:
1672 # Get the basics right
1673 if not (node.tagName == other.tagName
1674 and node.namespaceURI == other.namespaceURI):
1675 return False
1676
1677 # Handle the (namespaced) attributes; the namespace setting key
1678 # should be ignored, though
1679 # Note that the minidom orders the keys already, so we do not have
1680 # to worry about that, which is a bonus...
1681 n_keys = [
1682 k for k in node.attributes.keysNS()
1683 if k[0] != 'http://www.w3.org/2000/xmlns/']
1684 o_keys = [
1685 k for k in other.attributes.keysNS()
1686 if k[0] != 'http://www.w3.org/2000/xmlns/']
1687 if len(n_keys) != len(o_keys):
1688 return False
1689 for k in n_keys:
1690 if not (k in o_keys
1691 and node.getAttributeNS(k[0], k[1]) ==
1692 other.getAttributeNS(k[0], k[1])):
1693 return False
1694
1695 # if we got here, the attributes are all right, we can go down
1696 # the tree recursively
1697 return recurse()
1698
1699 elif node.nodeType in [
1700 Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE,
1701 Node.NOTATION_NODE]:
1702 return node.data == other.data
1703
1704 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1705 return node.data == other.data and node.target == other.target
1706
1707 elif node.nodeType == Node.ENTITY_NODE:
1708 return node.nodeValue == other.nodeValue
1709
1710 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1711 return node.publicId == other.publicId \
1712 and node.systemId == other.system.Id
1713
1714 else:
1715 # should not happen, in fact
1716 raise Exception(
1717 'I dont know how to compare XML Node type: %s' % node.nodeType)
1718
1719 if __name__ == '__main__':
1720 import doctest
1721 doctest.testmod()