Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/term.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 This module defines the different types of terms. Terms are the kinds of | |
3 objects that can appear in a quoted/asserted triple. This includes those | |
4 that are core to RDF: | |
5 | |
6 * :class:`Blank Nodes <rdflib.term.BNode>` | |
7 * :class:`URI References <rdflib.term.URIRef>` | |
8 * :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag) | |
9 | |
10 Those that extend the RDF model into N3: | |
11 | |
12 * :class:`Formulae <rdflib.graph.QuotedGraph>` | |
13 * :class:`Universal Quantifications (Variables) <rdflib.term.Variable>` | |
14 | |
15 And those that are primarily for matching against 'Nodes' in the | |
16 underlying Graph: | |
17 | |
18 * REGEX Expressions | |
19 * Date Ranges | |
20 * Numerical Ranges | |
21 | |
22 """ | |
23 from __future__ import absolute_import | |
24 from __future__ import division | |
25 from __future__ import print_function | |
26 # from __future__ import unicode_literals | |
27 from fractions import Fraction | |
28 | |
29 __all__ = [ | |
30 'bind', | |
31 | |
32 'Node', | |
33 'Identifier', | |
34 | |
35 'URIRef', | |
36 'BNode', | |
37 'Literal', | |
38 | |
39 'Variable', | |
40 'Statement', | |
41 ] | |
42 | |
43 import logging | |
44 logger = logging.getLogger(__name__) | |
45 import warnings | |
46 import math | |
47 | |
48 import base64 | |
49 import xml.dom.minidom | |
50 | |
51 from datetime import date, time, datetime, timedelta | |
52 from re import sub, compile | |
53 from collections import defaultdict | |
54 from unicodedata import category | |
55 | |
56 from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat | |
57 from binascii import hexlify, unhexlify | |
58 | |
59 import rdflib | |
60 from six import PY2 | |
61 from six import PY3 | |
62 from six import b | |
63 from rdflib.compat import long_type | |
64 from six import string_types | |
65 from six import text_type | |
66 from six.moves.urllib.parse import urldefrag | |
67 from six.moves.urllib.parse import urljoin | |
68 from six.moves.urllib.parse import urlparse | |
69 | |
70 skolem_genid = "/.well-known/genid/" | |
71 rdflib_skolem_genid = "/.well-known/genid/rdflib/" | |
72 skolems = {} | |
73 | |
74 | |
75 _invalid_uri_chars = '<>" {}|\\^`' | |
76 | |
77 | |
78 def _is_valid_uri(uri): | |
79 return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri)) | |
80 | |
81 | |
82 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') | |
83 | |
84 | |
85 def _is_valid_langtag(tag): | |
86 return bool(_lang_tag_regex.match(tag)) | |
87 | |
88 | |
89 def _is_valid_unicode(value): | |
90 """ | |
91 Verify that the provided value can be converted into a Python | |
92 unicode object. | |
93 """ | |
94 if isinstance(value, bytes): | |
95 coding_func, param = getattr(value, 'decode'), 'utf-8' | |
96 elif PY3: | |
97 coding_func, param = str, value | |
98 else: | |
99 coding_func, param = unicode, value | |
100 | |
101 # try to convert value into unicode | |
102 try: | |
103 coding_func(param) | |
104 except UnicodeError: | |
105 return False | |
106 return True | |
107 | |
108 | |
109 class Node(object): | |
110 """ | |
111 A Node in the Graph. | |
112 """ | |
113 | |
114 __slots__ = () | |
115 | |
116 | |
117 class Identifier(Node, text_type): # allow Identifiers to be Nodes in the Graph | |
118 """ | |
119 See http://www.w3.org/2002/07/rdf-identifer-terminology/ | |
120 regarding choice of terminology. | |
121 """ | |
122 | |
123 __slots__ = () | |
124 | |
125 def __new__(cls, value): | |
126 return text_type.__new__(cls, value) | |
127 | |
128 def eq(self, other): | |
129 """A "semantic"/interpreted equality function, | |
130 by default, same as __eq__""" | |
131 return self.__eq__(other) | |
132 | |
133 def neq(self, other): | |
134 """A "semantic"/interpreted not equal function, | |
135 by default, same as __ne__""" | |
136 return self.__ne__(other) | |
137 | |
138 def __ne__(self, other): | |
139 return not self.__eq__(other) | |
140 | |
141 def __eq__(self, other): | |
142 """ | |
143 Equality for Nodes. | |
144 | |
145 >>> BNode("foo")==None | |
146 False | |
147 >>> BNode("foo")==URIRef("foo") | |
148 False | |
149 >>> URIRef("foo")==BNode("foo") | |
150 False | |
151 >>> BNode("foo")!=URIRef("foo") | |
152 True | |
153 >>> URIRef("foo")!=BNode("foo") | |
154 True | |
155 >>> Variable('a')!=URIRef('a') | |
156 True | |
157 >>> Variable('a')!=Variable('a') | |
158 False | |
159 """ | |
160 | |
161 if type(self) == type(other): | |
162 return text_type(self) == text_type(other) | |
163 else: | |
164 return False | |
165 | |
166 def __gt__(self, other): | |
167 """ | |
168 This implements ordering for Nodes, | |
169 | |
170 This tries to implement this: | |
171 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
172 | |
173 Variables are not included in the SPARQL list, but | |
174 they are greater than BNodes and smaller than everything else | |
175 | |
176 """ | |
177 if other is None: | |
178 return True # everything bigger than None | |
179 elif type(self) == type(other): | |
180 return text_type(self) > text_type(other) | |
181 elif isinstance(other, Node): | |
182 return _ORDERING[type(self)] > _ORDERING[type(other)] | |
183 | |
184 return NotImplemented | |
185 | |
186 def __lt__(self, other): | |
187 if other is None: | |
188 return False # Nothing is less than None | |
189 elif type(self) == type(other): | |
190 return text_type(self) < text_type(other) | |
191 elif isinstance(other, Node): | |
192 return _ORDERING[type(self)] < _ORDERING[type(other)] | |
193 | |
194 return NotImplemented | |
195 | |
196 def __le__(self, other): | |
197 r = self.__lt__(other) | |
198 if r: | |
199 return True | |
200 return self == other | |
201 | |
202 def __ge__(self, other): | |
203 r = self.__gt__(other) | |
204 if r: | |
205 return True | |
206 return self == other | |
207 | |
208 # use parent's hash for efficiency reasons | |
209 # clashes of 'foo', URIRef('foo') and Literal('foo') are typically so rare | |
210 # that they don't justify additional overhead. Notice that even in case of | |
211 # clash __eq__ is still the fallback and very quick in those cases. | |
212 __hash__ = text_type.__hash__ | |
213 | |
214 | |
215 class URIRef(Identifier): | |
216 """ | |
217 RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref | |
218 """ | |
219 | |
220 __slots__ = () | |
221 | |
222 def __new__(cls, value, base=None): | |
223 if base is not None: | |
224 ends_in_hash = value.endswith("#") | |
225 value = urljoin(base, value, allow_fragments=1) | |
226 if ends_in_hash: | |
227 if not value.endswith("#"): | |
228 value += "#" | |
229 | |
230 if not _is_valid_uri(value): | |
231 logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value) | |
232 | |
233 | |
234 try: | |
235 rt = text_type.__new__(cls, value) | |
236 except UnicodeDecodeError: | |
237 rt = text_type.__new__(cls, value, 'utf-8') | |
238 return rt | |
239 | |
240 def toPython(self): | |
241 return text_type(self) | |
242 | |
243 def n3(self, namespace_manager=None): | |
244 """ | |
245 This will do a limited check for valid URIs, | |
246 essentially just making sure that the string includes no illegal | |
247 characters (``<, >, ", {, }, |, \\, `, ^``) | |
248 | |
249 :param namespace_manager: if not None, will be used to make up | |
250 a prefixed name | |
251 """ | |
252 | |
253 if not _is_valid_uri(self): | |
254 raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self) | |
255 | |
256 if namespace_manager: | |
257 return namespace_manager.normalizeUri(self) | |
258 else: | |
259 return "<%s>" % self | |
260 | |
261 def defrag(self): | |
262 if "#" in self: | |
263 url, frag = urldefrag(self) | |
264 return URIRef(url) | |
265 else: | |
266 return self | |
267 | |
268 def __reduce__(self): | |
269 return (URIRef, (text_type(self),)) | |
270 | |
271 def __getnewargs__(self): | |
272 return (text_type(self), ) | |
273 | |
274 if PY2: | |
275 def __str__(self): | |
276 return self.encode() | |
277 | |
278 def __repr__(self): | |
279 if self.__class__ is URIRef: | |
280 clsName = "rdflib.term.URIRef" | |
281 else: | |
282 clsName = self.__class__.__name__ | |
283 | |
284 return """%s(%s)""" % (clsName, super(URIRef, self).__repr__()) | |
285 | |
286 def __add__(self, other): | |
287 return self.__class__(text_type(self) + other) | |
288 | |
289 def __radd__(self, other): | |
290 return self.__class__(other + text_type(self)) | |
291 | |
292 def __mod__(self, other): | |
293 return self.__class__(text_type(self) % other) | |
294 | |
295 def de_skolemize(self): | |
296 """ Create a Blank Node from a skolem URI, in accordance | |
297 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization. | |
298 This function accepts only rdflib type skolemization, to provide | |
299 a round-tripping within the system. | |
300 | |
301 .. versionadded:: 4.0 | |
302 """ | |
303 if isinstance(self, RDFLibGenid): | |
304 parsed_uri = urlparse("%s" % self) | |
305 return BNode( | |
306 value=parsed_uri.path[len(rdflib_skolem_genid):]) | |
307 elif isinstance(self, Genid): | |
308 bnode_id = "%s" % self | |
309 if bnode_id in skolems: | |
310 return skolems[bnode_id] | |
311 else: | |
312 retval = BNode() | |
313 skolems[bnode_id] = retval | |
314 return retval | |
315 else: | |
316 raise Exception("<%s> is not a skolem URI" % self) | |
317 | |
318 | |
319 class Genid(URIRef): | |
320 __slots__ = () | |
321 | |
322 @staticmethod | |
323 def _is_external_skolem(uri): | |
324 if not isinstance(uri, string_types): | |
325 uri = str(uri) | |
326 parsed_uri = urlparse(uri) | |
327 gen_id = parsed_uri.path.rfind(skolem_genid) | |
328 if gen_id != 0: | |
329 return False | |
330 return True | |
331 | |
332 | |
333 class RDFLibGenid(Genid): | |
334 __slots__ = () | |
335 | |
336 @staticmethod | |
337 def _is_rdflib_skolem(uri): | |
338 if not isinstance(uri, string_types): | |
339 uri = str(uri) | |
340 parsed_uri = urlparse(uri) | |
341 if parsed_uri.params != "" \ | |
342 or parsed_uri.query != "" \ | |
343 or parsed_uri.fragment != "": | |
344 return False | |
345 gen_id = parsed_uri.path.rfind(rdflib_skolem_genid) | |
346 if gen_id != 0: | |
347 return False | |
348 return True | |
349 | |
350 | |
351 def _unique_id(): | |
352 # Used to read: """Create a (hopefully) unique prefix""" | |
353 # now retained merely to leave interal API unchanged. | |
354 # From BNode.__new__() below ... | |
355 # | |
356 # acceptable bnode value range for RDF/XML needs to be | |
357 # something that can be serialzed as a nodeID for N3 | |
358 # | |
359 # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
360 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
361 return "N" # ensure that id starts with a letter | |
362 | |
363 | |
364 def _serial_number_generator(): | |
365 """ | |
366 Generates UUID4-based but ncname-compliant identifiers. | |
367 """ | |
368 from uuid import uuid4 | |
369 | |
370 def _generator(): | |
371 return uuid4().hex | |
372 | |
373 return _generator | |
374 | |
375 | |
376 class BNode(Identifier): | |
377 """ | |
378 Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes | |
379 | |
380 """ | |
381 __slots__ = () | |
382 | |
383 def __new__(cls, value=None, | |
384 _sn_gen=_serial_number_generator(), _prefix=_unique_id()): | |
385 """ | |
386 # only store implementations should pass in a value | |
387 """ | |
388 if value is None: | |
389 # so that BNode values do not collide with ones created with | |
390 # a different instance of this module at some other time. | |
391 node_id = _sn_gen() | |
392 value = "%s%s" % (_prefix, node_id) | |
393 else: | |
394 # TODO: check that value falls within acceptable bnode value range | |
395 # for RDF/XML needs to be something that can be serialzed | |
396 # as a nodeID for N3 ?? Unless we require these | |
397 # constraints be enforced elsewhere? | |
398 pass # assert is_ncname(text_type(value)), "BNode identifiers | |
399 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
400 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
401 return Identifier.__new__(cls, value) | |
402 | |
403 def toPython(self): | |
404 return text_type(self) | |
405 | |
406 def n3(self, namespace_manager=None): | |
407 return "_:%s" % self | |
408 | |
409 def __getnewargs__(self): | |
410 return (text_type(self), ) | |
411 | |
412 def __reduce__(self): | |
413 return (BNode, (text_type(self),)) | |
414 | |
415 if PY2: | |
416 def __str__(self): | |
417 return self.encode() | |
418 | |
419 def __repr__(self): | |
420 if self.__class__ is BNode: | |
421 clsName = "rdflib.term.BNode" | |
422 else: | |
423 clsName = self.__class__.__name__ | |
424 return """%s('%s')""" % (clsName, str(self)) | |
425 | |
426 def skolemize(self, authority=None, basepath=None): | |
427 """ Create a URIRef "skolem" representation of the BNode, in accordance | |
428 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization | |
429 | |
430 .. versionadded:: 4.0 | |
431 """ | |
432 if authority is None: | |
433 authority = "http://rdlib.net/" | |
434 if basepath is None: | |
435 basepath = rdflib_skolem_genid | |
436 skolem = "%s%s" % (basepath, text_type(self)) | |
437 return URIRef(urljoin(authority, skolem)) | |
438 | |
439 | |
440 class Literal(Identifier): | |
441 __doc__ = """ | |
442 RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal | |
443 | |
444 The lexical value of the literal is the unicode object | |
445 The interpreted, datatyped value is available from .value | |
446 | |
447 Language tags must be valid according to :rfc:5646 | |
448 | |
449 For valid XSD datatypes, the lexical form is optionally normalized | |
450 at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS | |
451 and can be overridden by the normalize parameter to __new__ | |
452 | |
453 Equality and hashing of Literals are done based on the lexical form, i.e.: | |
454 | |
455 >>> from rdflib.namespace import XSD | |
456 | |
457 >>> Literal('01')!=Literal('1') # clear - strings differ | |
458 True | |
459 | |
460 but with data-type they get normalized: | |
461 | |
462 >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer) | |
463 False | |
464 | |
465 unless disabled: | |
466 | |
467 >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer) | |
468 True | |
469 | |
470 | |
471 Value based comparison is possible: | |
472 | |
473 >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float)) | |
474 True | |
475 | |
476 The eq method also provides limited support for basic python types: | |
477 | |
478 >>> Literal(1).eq(1) # fine - int compatible with xsd:integer | |
479 True | |
480 >>> Literal('a').eq('b') # fine - str compatible with plain-lit | |
481 False | |
482 >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string | |
483 True | |
484 >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit | |
485 NotImplemented | |
486 | |
487 Greater-than/less-than ordering comparisons are also done in value | |
488 space, when compatible datatypes are used. Incompatible datatypes | |
489 are ordered by DT, or by lang-tag. For other nodes the ordering | |
490 is None < BNode < URIRef < Literal | |
491 | |
492 Any comparison with non-rdflib Node are "NotImplemented" | |
493 In PY2.X some stable order will be made up by python | |
494 | |
495 In PY3 this is an error. | |
496 | |
497 >>> from rdflib import Literal, XSD | |
498 >>> lit2006 = Literal('2006-01-01',datatype=XSD.date) | |
499 >>> lit2006.toPython() | |
500 datetime.date(2006, 1, 1) | |
501 >>> lit2006 < Literal('2007-01-01',datatype=XSD.date) | |
502 True | |
503 >>> Literal(datetime.utcnow()).datatype | |
504 rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#dateTime') | |
505 >>> Literal(1) > Literal(2) # by value | |
506 False | |
507 >>> Literal(1) > Literal(2.0) # by value | |
508 False | |
509 >>> Literal('1') > Literal(1) # by DT | |
510 True | |
511 >>> Literal('1') < Literal('1') # by lexical form | |
512 False | |
513 >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag | |
514 False | |
515 >>> Literal(1) > URIRef('foo') # by node-type | |
516 True | |
517 | |
518 The > < operators will eat this NotImplemented and either make up | |
519 an ordering (py2.x) or throw a TypeError (py3k): | |
520 | |
521 >>> Literal(1).__gt__(2.0) | |
522 NotImplemented | |
523 | |
524 | |
525 """ | |
526 | |
527 if not PY3: | |
528 __slots__ = ("language", "datatype", "value", "_language", | |
529 "_datatype", "_value") | |
530 else: | |
531 __slots__ = ("_language", "_datatype", "_value") | |
532 | |
533 def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): | |
534 | |
535 if lang == '': | |
536 lang = None # no empty lang-tags in RDF | |
537 | |
538 normalize = normalize if normalize is not None else rdflib.NORMALIZE_LITERALS | |
539 | |
540 if lang is not None and datatype is not None: | |
541 raise TypeError( | |
542 "A Literal can only have one of lang or datatype, " | |
543 "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") | |
544 | |
545 if lang and not _is_valid_langtag(lang): | |
546 raise Exception("'%s' is not a valid language tag!" % lang) | |
547 | |
548 if datatype: | |
549 datatype = URIRef(datatype) | |
550 | |
551 value = None | |
552 if isinstance(lexical_or_value, Literal): | |
553 # create from another Literal instance | |
554 | |
555 lang = lang or lexical_or_value.language | |
556 if datatype: | |
557 # override datatype | |
558 value = _castLexicalToPython(lexical_or_value, datatype) | |
559 else: | |
560 datatype = lexical_or_value.datatype | |
561 value = lexical_or_value.value | |
562 | |
563 elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)): | |
564 # passed a string | |
565 # try parsing lexical form of datatyped literal | |
566 value = _castLexicalToPython(lexical_or_value, datatype) | |
567 | |
568 if value is not None and normalize: | |
569 _value, _datatype = _castPythonToLiteral(value, datatype) | |
570 if _value is not None and _is_valid_unicode(_value): | |
571 lexical_or_value = _value | |
572 | |
573 else: | |
574 # passed some python object | |
575 value = lexical_or_value | |
576 _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype) | |
577 | |
578 datatype = datatype or _datatype | |
579 if _value is not None: | |
580 lexical_or_value = _value | |
581 if datatype: | |
582 lang = None | |
583 | |
584 if PY3 and isinstance(lexical_or_value, bytes): | |
585 lexical_or_value = lexical_or_value.decode('utf-8') | |
586 | |
587 try: | |
588 inst = text_type.__new__(cls, lexical_or_value) | |
589 except UnicodeDecodeError: | |
590 inst = text_type.__new__(cls, lexical_or_value, 'utf-8') | |
591 | |
592 inst._language = lang | |
593 inst._datatype = datatype | |
594 inst._value = value | |
595 return inst | |
596 | |
597 def normalize(self): | |
598 """ | |
599 Returns a new literal with a normalised lexical representation | |
600 of this literal | |
601 >>> from rdflib import XSD | |
602 >>> Literal("01", datatype=XSD.integer, normalize=False).normalize() | |
603 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
604 | |
605 Illegal lexical forms for the datatype given are simply passed on | |
606 >>> Literal("a", datatype=XSD.integer, normalize=False) | |
607 rdflib.term.Literal(u'a', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
608 | |
609 """ | |
610 | |
611 if self.value is not None: | |
612 return Literal(self.value, datatype=self.datatype, lang=self.language) | |
613 else: | |
614 return self | |
615 | |
616 @property | |
617 def value(self): | |
618 return self._value | |
619 | |
620 @property | |
621 def language(self): | |
622 return self._language | |
623 | |
624 @property | |
625 def datatype(self): | |
626 return self._datatype | |
627 | |
628 def __reduce__(self): | |
629 return (Literal, (text_type(self), self.language, self.datatype),) | |
630 | |
631 def __getstate__(self): | |
632 return (None, dict(language=self.language, datatype=self.datatype)) | |
633 | |
634 def __setstate__(self, arg): | |
635 _, d = arg | |
636 self._language = d["language"] | |
637 self._datatype = d["datatype"] | |
638 | |
639 def __add__(self, val): | |
640 """ | |
641 >>> Literal(1) + 1 | |
642 rdflib.term.Literal(u'2', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
643 >>> Literal("1") + "1" | |
644 rdflib.term.Literal(u'11') | |
645 """ | |
646 | |
647 # if no val is supplied, return this Literal | |
648 if val is None: | |
649 return self | |
650 | |
651 # convert the val to a Literal, if it isn't already one | |
652 if not isinstance(val, Literal): | |
653 val = Literal(val) | |
654 | |
655 # if the datatypes are the same, just add the Python values and convert back | |
656 if self.datatype == val.datatype: | |
657 return Literal(self.toPython() + val.toPython(), self.language, datatype=self.datatype) | |
658 # if the datatypes are not the same but are both numeric, add the Python values and strip off decimal junk | |
659 # (i.e. tiny numbers (more than 17 decimal places) and trailing zeros) and return as a decimal | |
660 elif ( | |
661 self.datatype in _NUMERIC_LITERAL_TYPES | |
662 and | |
663 val.datatype in _NUMERIC_LITERAL_TYPES | |
664 ): | |
665 return Literal( | |
666 Decimal( | |
667 ('%f' % round(Decimal(self.toPython()) + Decimal(val.toPython()), 15)).rstrip('0').rstrip('.') | |
668 ), | |
669 datatype=_XSD_DECIMAL | |
670 ) | |
671 # in all other cases, perform string concatenation | |
672 else: | |
673 try: | |
674 s = text_type.__add__(self, val) | |
675 except TypeError: | |
676 s = str(self.value) + str(val) | |
677 | |
678 # if the original datatype is string-like, use that | |
679 if self.datatype in _STRING_LITERAL_TYPES: | |
680 new_datatype = self.datatype | |
681 # if not, use string | |
682 else: | |
683 new_datatype = _XSD_STRING | |
684 | |
685 return Literal(s, self.language, datatype=new_datatype) | |
686 | |
687 def __bool__(self): | |
688 """ | |
689 Is the Literal "True" | |
690 This is used for if statements, bool(literal), etc. | |
691 """ | |
692 if self.value is not None: | |
693 return bool(self.value) | |
694 return len(self) != 0 | |
695 | |
696 if PY2: | |
697 __nonzero__ = __bool__ | |
698 | |
699 def __neg__(self): | |
700 """ | |
701 >>> (- Literal(1)) | |
702 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
703 >>> (- Literal(10.5)) | |
704 rdflib.term.Literal(u'-10.5', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#double')) | |
705 >>> from rdflib.namespace import XSD | |
706 >>> (- Literal("1", datatype=XSD.integer)) | |
707 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
708 | |
709 >>> (- Literal("1")) | |
710 Traceback (most recent call last): | |
711 File "<stdin>", line 1, in <module> | |
712 TypeError: Not a number; rdflib.term.Literal(u'1') | |
713 >>> | |
714 """ | |
715 | |
716 if isinstance(self.value, (int, long_type, float)): | |
717 return Literal(self.value.__neg__()) | |
718 else: | |
719 raise TypeError("Not a number; %s" % repr(self)) | |
720 | |
721 def __pos__(self): | |
722 """ | |
723 >>> (+ Literal(1)) | |
724 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
725 >>> (+ Literal(-1)) | |
726 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
727 >>> from rdflib.namespace import XSD | |
728 >>> (+ Literal("-1", datatype=XSD.integer)) | |
729 rdflib.term.Literal(u'-1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
730 | |
731 >>> (+ Literal("1")) | |
732 Traceback (most recent call last): | |
733 File "<stdin>", line 1, in <module> | |
734 TypeError: Not a number; rdflib.term.Literal(u'1') | |
735 """ | |
736 if isinstance(self.value, (int, long_type, float)): | |
737 return Literal(self.value.__pos__()) | |
738 else: | |
739 raise TypeError("Not a number; %s" % repr(self)) | |
740 | |
741 def __abs__(self): | |
742 """ | |
743 >>> abs(Literal(-1)) | |
744 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
745 | |
746 >>> from rdflib.namespace import XSD | |
747 >>> abs( Literal("-1", datatype=XSD.integer)) | |
748 rdflib.term.Literal(u'1', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
749 | |
750 >>> abs(Literal("1")) | |
751 Traceback (most recent call last): | |
752 File "<stdin>", line 1, in <module> | |
753 TypeError: Not a number; rdflib.term.Literal(u'1') | |
754 """ | |
755 if isinstance(self.value, (int, long_type, float)): | |
756 return Literal(self.value.__abs__()) | |
757 else: | |
758 raise TypeError("Not a number; %s" % repr(self)) | |
759 | |
760 def __invert__(self): | |
761 """ | |
762 >>> ~(Literal(-1)) | |
763 rdflib.term.Literal(u'0', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
764 | |
765 >>> from rdflib.namespace import XSD | |
766 >>> ~( Literal("-1", datatype=XSD.integer)) | |
767 rdflib.term.Literal(u'0', datatype=rdflib.term.URIRef(u'http://www.w3.org/2001/XMLSchema#integer')) | |
768 | |
769 Not working: | |
770 | |
771 >>> ~(Literal("1")) | |
772 Traceback (most recent call last): | |
773 File "<stdin>", line 1, in <module> | |
774 TypeError: Not a number; rdflib.term.Literal(u'1') | |
775 """ | |
776 if isinstance(self.value, (int, long_type, float)): | |
777 return Literal(self.value.__invert__()) | |
778 else: | |
779 raise TypeError("Not a number; %s" % repr(self)) | |
780 | |
781 def __gt__(self, other): | |
782 """ | |
783 | |
784 This implements ordering for Literals, | |
785 the other comparison methods delegate here | |
786 | |
787 This tries to implement this: | |
788 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
789 | |
790 In short, Literals with compatible data-types are ordered in value | |
791 space, i.e. | |
792 >>> from rdflib import XSD | |
793 | |
794 >>> Literal(1) > Literal(2) # int/int | |
795 False | |
796 >>> Literal(2.0) > Literal(1) # double/int | |
797 True | |
798 >>> from decimal import Decimal | |
799 >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double | |
800 True | |
801 >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double | |
802 True | |
803 >>> Literal('b') > Literal('a') # plain lit/plain lit | |
804 True | |
805 >>> Literal('b') > Literal('a', datatype=XSD.string) # plain lit/xsd:str | |
806 True | |
807 | |
808 Incompatible datatype mismatches ordered by DT | |
809 | |
810 >>> Literal(1) > Literal("2") # int>string | |
811 False | |
812 | |
813 Langtagged literals by lang tag | |
814 >>> Literal("a", lang="en") > Literal("a", lang="fr") | |
815 False | |
816 """ | |
817 if other is None: | |
818 return True # Everything is greater than None | |
819 if isinstance(other, Literal): | |
820 | |
821 if self.datatype in _NUMERIC_LITERAL_TYPES and \ | |
822 other.datatype in _NUMERIC_LITERAL_TYPES: | |
823 return self.value > other.value | |
824 | |
825 # plain-literals and xsd:string literals | |
826 # are "the same" | |
827 dtself = self.datatype or _XSD_STRING | |
828 dtother = other.datatype or _XSD_STRING | |
829 | |
830 if dtself != dtother: | |
831 if rdflib.DAWG_LITERAL_COLLATION: | |
832 return NotImplemented | |
833 else: | |
834 return dtself > dtother | |
835 | |
836 if self.language != other.language: | |
837 if not self.language: | |
838 return False | |
839 elif not other.language: | |
840 return True | |
841 else: | |
842 return self.language > other.language | |
843 | |
844 if self.value is not None and other.value is not None: | |
845 if type(self.value) in _TOTAL_ORDER_CASTERS: | |
846 caster = _TOTAL_ORDER_CASTERS[type(self.value)] | |
847 return caster(self.value) > caster(other.value) | |
848 | |
849 try: | |
850 return self.value > other.value | |
851 except TypeError: | |
852 pass | |
853 | |
854 if text_type(self) != text_type(other): | |
855 return text_type(self) > text_type(other) | |
856 | |
857 # same language, same lexical form, check real dt | |
858 # plain-literals come before xsd:string! | |
859 if self.datatype != other.datatype: | |
860 if not self.datatype: | |
861 return False | |
862 elif not other.datatype: | |
863 return True | |
864 else: | |
865 return self.datatype > other.datatype | |
866 | |
867 return False # they are the same | |
868 | |
869 elif isinstance(other, Node): | |
870 return True # Literal are the greatest! | |
871 else: | |
872 return NotImplemented # we can only compare to nodes | |
873 | |
874 def __lt__(self, other): | |
875 if other is None: | |
876 return False # Nothing is less than None | |
877 if isinstance(other, Literal): | |
878 try: | |
879 return not self.__gt__(other) and not self.eq(other) | |
880 except TypeError: | |
881 return NotImplemented | |
882 if isinstance(other, Node): | |
883 return False # all nodes are less-than Literals | |
884 | |
885 return NotImplemented | |
886 | |
887 def __le__(self, other): | |
888 """ | |
889 >>> from rdflib.namespace import XSD | |
890 >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime | |
891 ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) | |
892 True | |
893 """ | |
894 r = self.__lt__(other) | |
895 if r: | |
896 return True | |
897 try: | |
898 return self.eq(other) | |
899 except TypeError: | |
900 return NotImplemented | |
901 | |
902 def __ge__(self, other): | |
903 r = self.__gt__(other) | |
904 if r: | |
905 return True | |
906 try: | |
907 return self.eq(other) | |
908 except TypeError: | |
909 return NotImplemented | |
910 | |
911 def _comparable_to(self, other): | |
912 """ | |
913 Helper method to decide which things are meaningful to | |
914 rich-compare with this literal | |
915 """ | |
916 if isinstance(other, Literal): | |
917 if (self.datatype and other.datatype): | |
918 # two datatyped literals | |
919 if not self.datatype in XSDToPython or not other.datatype in XSDToPython: | |
920 # non XSD DTs must match | |
921 if self.datatype != other.datatype: | |
922 return False | |
923 | |
924 else: | |
925 # xsd:string may be compared with plain literals | |
926 if not (self.datatype == _XSD_STRING and not other.datatype) or \ | |
927 (other.datatype == _XSD_STRING and not self.datatype): | |
928 return False | |
929 | |
930 # if given lang-tag has to be case insensitive equal | |
931 if (self.language or "").lower() != (other.language or "").lower(): | |
932 return False | |
933 | |
934 return True | |
935 | |
936 def __hash__(self): | |
937 """ | |
938 >>> from rdflib.namespace import XSD | |
939 >>> a = {Literal('1', datatype=XSD.integer):'one'} | |
940 >>> Literal('1', datatype=XSD.double) in a | |
941 False | |
942 | |
943 | |
944 "Called for the key object for dictionary operations, | |
945 and by the built-in function hash(). Should return | |
946 a 32-bit integer usable as a hash value for | |
947 dictionary operations. The only required property | |
948 is that objects which compare equal have the same | |
949 hash value; it is advised to somehow mix together | |
950 (e.g., using exclusive or) the hash values for the | |
951 components of the object that also play a part in | |
952 comparison of objects." -- 3.4.1 Basic customization (Python) | |
953 | |
954 "Two literals are equal if and only if all of the following hold: | |
955 * The strings of the two lexical forms compare equal, character by | |
956 character. | |
957 * Either both or neither have language tags. | |
958 * The language tags, if any, compare equal. | |
959 * Either both or neither have datatype URIs. | |
960 * The two datatype URIs, if any, compare equal, character by | |
961 character." | |
962 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
963 | |
964 """ | |
965 # don't use super()... for efficiency reasons, see Identifier.__hash__ | |
966 res = text_type.__hash__(self) | |
967 if self.language: | |
968 res ^= hash(self.language.lower()) | |
969 if self.datatype: | |
970 res ^= hash(self.datatype) | |
971 return res | |
972 | |
973 def __eq__(self, other): | |
974 """ | |
975 Literals are only equal to other literals. | |
976 | |
977 "Two literals are equal if and only if all of the following hold: | |
978 * The strings of the two lexical forms compare equal, character by character. | |
979 * Either both or neither have language tags. | |
980 * The language tags, if any, compare equal. | |
981 * Either both or neither have datatype URIs. | |
982 * The two datatype URIs, if any, compare equal, character by character." | |
983 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
984 | |
985 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo")) | |
986 True | |
987 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2")) | |
988 False | |
989 | |
990 >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo")) | |
991 False | |
992 >>> Literal("1", datatype=URIRef("foo")) == "asdf" | |
993 False | |
994 >>> from rdflib import XSD | |
995 >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date) | |
996 True | |
997 >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1) | |
998 False | |
999 >>> Literal("one", lang="en") == Literal("one", lang="en") | |
1000 True | |
1001 >>> Literal("hast", lang='en') == Literal("hast", lang='de') | |
1002 False | |
1003 >>> Literal("1", datatype=XSD.integer) == Literal(1) | |
1004 True | |
1005 >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer) | |
1006 True | |
1007 | |
1008 """ | |
1009 if self is other: | |
1010 return True | |
1011 if other is None: | |
1012 return False | |
1013 if isinstance(other, Literal): | |
1014 return self.datatype == other.datatype \ | |
1015 and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \ | |
1016 and text_type.__eq__(self, other) | |
1017 | |
1018 return False | |
1019 | |
1020 def eq(self, other): | |
1021 """ | |
1022 Compare the value of this literal with something else | |
1023 | |
1024 Either, with the value of another literal | |
1025 comparisons are then done in literal "value space", | |
1026 and according to the rules of XSD subtype-substitution/type-promotion | |
1027 | |
1028 OR, with a python object: | |
1029 | |
1030 basestring objects can be compared with plain-literals, | |
1031 or those with datatype xsd:string | |
1032 | |
1033 bool objects with xsd:boolean | |
1034 | |
1035 a int, long or float with numeric xsd types | |
1036 | |
1037 isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime | |
1038 | |
1039 Any other operations returns NotImplemented | |
1040 | |
1041 """ | |
1042 if isinstance(other, Literal): | |
1043 | |
1044 if self.datatype in _NUMERIC_LITERAL_TYPES \ | |
1045 and other.datatype in _NUMERIC_LITERAL_TYPES: | |
1046 if self.value is not None and other.value is not None: | |
1047 return self.value == other.value | |
1048 else: | |
1049 if text_type.__eq__(self, other): | |
1050 return True | |
1051 raise TypeError( | |
1052 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
1053 if (self.language or "").lower() != (other.language or "").lower(): | |
1054 return False | |
1055 | |
1056 dtself = self.datatype or _XSD_STRING | |
1057 dtother = other.datatype or _XSD_STRING | |
1058 | |
1059 if (dtself == _XSD_STRING and dtother == _XSD_STRING): | |
1060 # string/plain literals, compare on lexical form | |
1061 return text_type.__eq__(self, other) | |
1062 | |
1063 if dtself != dtother: | |
1064 if rdflib.DAWG_LITERAL_COLLATION: | |
1065 raise TypeError("I don't know how to compare literals with datatypes %s and %s" % ( | |
1066 self.datatype, other.datatype)) | |
1067 else: | |
1068 return False | |
1069 | |
1070 # matching non-string DTs now - do we compare values or | |
1071 # lexical form first? comparing two ints is far quicker - | |
1072 # maybe there are counter examples | |
1073 | |
1074 if self.value is not None and other.value is not None: | |
1075 | |
1076 if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL): | |
1077 return _isEqualXMLNode(self.value, other.value) | |
1078 | |
1079 return self.value == other.value | |
1080 else: | |
1081 | |
1082 if text_type.__eq__(self, other): | |
1083 return True | |
1084 | |
1085 if self.datatype == _XSD_STRING: | |
1086 return False # string value space=lexical space | |
1087 | |
1088 # matching DTs, but not matching, we cannot compare! | |
1089 raise TypeError( | |
1090 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
1091 | |
1092 elif isinstance(other, Node): | |
1093 return False # no non-Literal nodes are equal to a literal | |
1094 | |
1095 elif isinstance(other, string_types): | |
1096 # only plain-literals can be directly compared to strings | |
1097 | |
1098 # TODO: Is "blah"@en eq "blah" ? | |
1099 if self.language is not None: | |
1100 return False | |
1101 | |
1102 if (self.datatype == _XSD_STRING or self.datatype is None): | |
1103 return text_type(self) == other | |
1104 | |
1105 elif isinstance(other, (int, long_type, float)): | |
1106 if self.datatype in _NUMERIC_LITERAL_TYPES: | |
1107 return self.value == other | |
1108 elif isinstance(other, (date, datetime, time)): | |
1109 if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME): | |
1110 return self.value == other | |
1111 elif isinstance(other, (timedelta, Duration)): | |
1112 if self.datatype in (_XSD_DURATION, _XSD_DAYTIMEDURATION, _XSD_YEARMONTHDURATION): | |
1113 return self.value == other | |
1114 elif isinstance(other, bool): | |
1115 if self.datatype == _XSD_BOOLEAN: | |
1116 return self.value == other | |
1117 | |
1118 return NotImplemented | |
1119 | |
1120 def neq(self, other): | |
1121 return not self.eq(other) | |
1122 | |
1123 def n3(self, namespace_manager=None): | |
1124 r''' | |
1125 Returns a representation in the N3 format. | |
1126 | |
1127 Examples:: | |
1128 | |
1129 >>> Literal("foo").n3() | |
1130 u'"foo"' | |
1131 | |
1132 Strings with newlines or triple-quotes:: | |
1133 | |
1134 >>> Literal("foo\nbar").n3() | |
1135 u'"""foo\nbar"""' | |
1136 | |
1137 >>> Literal("''\'").n3() | |
1138 u'"\'\'\'"' | |
1139 | |
1140 >>> Literal('"""').n3() | |
1141 u'"\\"\\"\\""' | |
1142 | |
1143 Language:: | |
1144 | |
1145 >>> Literal("hello", lang="en").n3() | |
1146 u'"hello"@en' | |
1147 | |
1148 Datatypes:: | |
1149 | |
1150 >>> Literal(1).n3() | |
1151 u'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
1152 | |
1153 >>> Literal(1.0).n3() | |
1154 u'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>' | |
1155 | |
1156 >>> Literal(True).n3() | |
1157 u'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>' | |
1158 | |
1159 Datatype and language isn't allowed (datatype takes precedence):: | |
1160 | |
1161 >>> Literal(1, lang="en").n3() | |
1162 u'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
1163 | |
1164 Custom datatype:: | |
1165 | |
1166 >>> footype = URIRef("http://example.org/ns#foo") | |
1167 >>> Literal("1", datatype=footype).n3() | |
1168 u'"1"^^<http://example.org/ns#foo>' | |
1169 | |
1170 Passing a namespace-manager will use it to abbreviate datatype URIs: | |
1171 | |
1172 >>> from rdflib import Graph | |
1173 >>> Literal(1).n3(Graph().namespace_manager) | |
1174 u'"1"^^xsd:integer' | |
1175 ''' | |
1176 if namespace_manager: | |
1177 return self._literal_n3(qname_callback=namespace_manager.normalizeUri) | |
1178 else: | |
1179 return self._literal_n3() | |
1180 | |
1181 def _literal_n3(self, use_plain=False, qname_callback=None): | |
1182 ''' | |
1183 Using plain literal (shorthand) output:: | |
1184 >>> from rdflib.namespace import XSD | |
1185 | |
1186 >>> Literal(1)._literal_n3(use_plain=True) | |
1187 u'1' | |
1188 | |
1189 >>> Literal(1.0)._literal_n3(use_plain=True) | |
1190 u'1e+00' | |
1191 | |
1192 >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True) | |
1193 u'1.0' | |
1194 | |
1195 >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True) | |
1196 u'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>' | |
1197 | |
1198 >>> Literal("foo", datatype=XSD.string)._literal_n3( | |
1199 ... use_plain=True) | |
1200 u'"foo"^^<http://www.w3.org/2001/XMLSchema#string>' | |
1201 | |
1202 >>> Literal(True)._literal_n3(use_plain=True) | |
1203 u'true' | |
1204 | |
1205 >>> Literal(False)._literal_n3(use_plain=True) | |
1206 u'false' | |
1207 | |
1208 >>> Literal(1.91)._literal_n3(use_plain=True) | |
1209 u'1.91e+00' | |
1210 | |
1211 Only limited precision available for floats: | |
1212 >>> Literal(0.123456789)._literal_n3(use_plain=True) | |
1213 u'1.234568e-01' | |
1214 | |
1215 >>> Literal('0.123456789', | |
1216 ... datatype=XSD.decimal)._literal_n3(use_plain=True) | |
1217 u'0.123456789' | |
1218 | |
1219 Using callback for datatype QNames:: | |
1220 | |
1221 >>> Literal(1)._literal_n3( | |
1222 ... qname_callback=lambda uri: "xsd:integer") | |
1223 u'"1"^^xsd:integer' | |
1224 | |
1225 ''' | |
1226 if use_plain and self.datatype in _PLAIN_LITERAL_TYPES: | |
1227 if self.value is not None: | |
1228 # If self is inf or NaN, we need a datatype | |
1229 # (there is no plain representation) | |
1230 if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
1231 try: | |
1232 v = float(self) | |
1233 if math.isinf(v) or math.isnan(v): | |
1234 return self._literal_n3(False, qname_callback) | |
1235 except ValueError: | |
1236 return self._literal_n3(False, qname_callback) | |
1237 | |
1238 # this is a bit of a mess - | |
1239 # in py >=2.6 the string.format function makes this easier | |
1240 # we try to produce "pretty" output | |
1241 if self.datatype == _XSD_DOUBLE: | |
1242 return sub("\\.?0*e", "e", u'%e' % float(self)) | |
1243 elif self.datatype == _XSD_DECIMAL: | |
1244 s = '%s' % self | |
1245 if '.' not in s: | |
1246 s += '.0' | |
1247 return s | |
1248 | |
1249 elif self.datatype == _XSD_BOOLEAN: | |
1250 return (u'%s' % self).lower() | |
1251 else: | |
1252 return u'%s' % self | |
1253 | |
1254 encoded = self._quote_encode() | |
1255 | |
1256 datatype = self.datatype | |
1257 quoted_dt = None | |
1258 if datatype: | |
1259 if qname_callback: | |
1260 quoted_dt = qname_callback(datatype) | |
1261 if not quoted_dt: | |
1262 quoted_dt = "<%s>" % datatype | |
1263 if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
1264 try: | |
1265 v = float(self) | |
1266 if math.isinf(v): | |
1267 # py string reps: float: 'inf', Decimal: 'Infinity" | |
1268 # both need to become "INF" in xsd datatypes | |
1269 encoded = encoded.replace('inf', 'INF').replace( | |
1270 'Infinity', 'INF') | |
1271 if math.isnan(v): | |
1272 encoded = encoded.replace('nan', 'NaN') | |
1273 except ValueError: | |
1274 # if we can't cast to float something is wrong, but we can | |
1275 # still serialize. Warn user about it | |
1276 warnings.warn("Serializing weird numerical %r" % self) | |
1277 | |
1278 language = self.language | |
1279 if language: | |
1280 return '%s@%s' % (encoded, language) | |
1281 elif datatype: | |
1282 return '%s^^%s' % (encoded, quoted_dt) | |
1283 else: | |
1284 return '%s' % encoded | |
1285 | |
1286 def _quote_encode(self): | |
1287 # This simpler encoding doesn't work; a newline gets encoded as "\\n", | |
1288 # which is ok in sourcecode, but we want "\n". | |
1289 # encoded = self.encode('unicode-escape').replace( | |
1290 # '\\', '\\\\').replace('"','\\"') | |
1291 # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') | |
1292 | |
1293 # NOTE: Could in theory chose quotes based on quotes appearing in the | |
1294 # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). | |
1295 | |
1296 if "\n" in self: | |
1297 # Triple quote this string. | |
1298 encoded = self.replace('\\', '\\\\') | |
1299 if '"""' in self: | |
1300 # is this ok? | |
1301 encoded = encoded.replace('"""', '\\"\\"\\"') | |
1302 if encoded[-1] == '"' and encoded[-2] != '\\': | |
1303 encoded = encoded[:-1] + '\\' + '"' | |
1304 | |
1305 return '"""%s"""' % encoded.replace('\r', '\\r') | |
1306 else: | |
1307 return '"%s"' % self.replace( | |
1308 '\n', '\\n').replace( | |
1309 '\\', '\\\\').replace( | |
1310 '"', '\\"').replace( | |
1311 '\r', '\\r') | |
1312 | |
1313 if PY2: | |
1314 def __str__(self): | |
1315 return self.encode() | |
1316 | |
1317 def __repr__(self): | |
1318 args = [super(Literal, self).__repr__()] | |
1319 if self.language is not None: | |
1320 args.append("lang=%s" % repr(self.language)) | |
1321 if self.datatype is not None: | |
1322 args.append("datatype=%s" % repr(self.datatype)) | |
1323 if self.__class__ == Literal: | |
1324 clsName = "rdflib.term.Literal" | |
1325 else: | |
1326 clsName = self.__class__.__name__ | |
1327 return """%s(%s)""" % (clsName, ", ".join(args)) | |
1328 | |
1329 def toPython(self): | |
1330 """ | |
1331 Returns an appropriate python datatype derived from this RDF Literal | |
1332 """ | |
1333 | |
1334 if self.value is not None: | |
1335 return self.value | |
1336 return self | |
1337 | |
1338 | |
1339 def _parseXML(xmlstring): | |
1340 if PY2: | |
1341 xmlstring = xmlstring.encode('utf-8') | |
1342 retval = xml.dom.minidom.parseString( | |
1343 "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring) | |
1344 retval.normalize() | |
1345 return retval | |
1346 | |
1347 | |
1348 def _parseHTML(htmltext): | |
1349 try: | |
1350 import html5lib | |
1351 parser = html5lib.HTMLParser( | |
1352 tree=html5lib.treebuilders.getTreeBuilder("dom")) | |
1353 retval = parser.parseFragment(htmltext) | |
1354 retval.normalize() | |
1355 return retval | |
1356 except ImportError: | |
1357 raise ImportError( | |
1358 "HTML5 parser not available. Try installing" + | |
1359 " html5lib <http://code.google.com/p/html5lib>") | |
1360 | |
1361 | |
1362 def _writeXML(xmlnode): | |
1363 if isinstance(xmlnode, xml.dom.minidom.DocumentFragment): | |
1364 d = xml.dom.minidom.Document() | |
1365 d.childNodes += xmlnode.childNodes | |
1366 xmlnode = d | |
1367 s = xmlnode.toxml('utf-8') | |
1368 # for clean round-tripping, remove headers -- I have great and | |
1369 # specific worries that this will blow up later, but this margin | |
1370 # is too narrow to contain them | |
1371 if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')): | |
1372 s = s[38:] | |
1373 if s.startswith(b('<rdflibtoplevelelement>')): | |
1374 s = s[23:-24] | |
1375 if s == b('<rdflibtoplevelelement/>'): | |
1376 s = b('') | |
1377 return s | |
1378 | |
1379 | |
1380 def _unhexlify(value): | |
1381 # In Python 3.2, unhexlify does not support str (only bytes) | |
1382 if PY3 and isinstance(value, str): | |
1383 value = value.encode() | |
1384 return unhexlify(value) | |
1385 | |
1386 def _parseBoolean(value): | |
1387 true_accepted_values = ['1', 'true'] | |
1388 false_accepted_values = ['0', 'false'] | |
1389 new_value = value.lower() | |
1390 if new_value in true_accepted_values: | |
1391 return True | |
1392 if new_value not in false_accepted_values: | |
1393 warnings.warn('Parsing weird boolean, % r does not map to True or False' % value, category = DeprecationWarning) | |
1394 return False | |
1395 | |
1396 # Cannot import Namespace/XSD because of circular dependencies | |
1397 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' | |
1398 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' | |
1399 | |
1400 _RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral') | |
1401 _RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML') | |
1402 | |
1403 _XSD_STRING = URIRef(_XSD_PFX + 'string') | |
1404 | |
1405 _XSD_FLOAT = URIRef(_XSD_PFX + 'float') | |
1406 _XSD_DOUBLE = URIRef(_XSD_PFX + 'double') | |
1407 _XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal') | |
1408 _XSD_INTEGER = URIRef(_XSD_PFX + 'integer') | |
1409 _XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean') | |
1410 | |
1411 _XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime') | |
1412 _XSD_DATE = URIRef(_XSD_PFX + 'date') | |
1413 _XSD_TIME = URIRef(_XSD_PFX + 'time') | |
1414 _XSD_DURATION = URIRef(_XSD_PFX + 'duration') | |
1415 _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration') | |
1416 _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration') | |
1417 | |
1418 _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational') | |
1419 _XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary') | |
1420 # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth | |
1421 | |
1422 _NUMERIC_LITERAL_TYPES = ( | |
1423 _XSD_INTEGER, | |
1424 _XSD_DECIMAL, | |
1425 _XSD_DOUBLE, | |
1426 URIRef(_XSD_PFX + 'float'), | |
1427 | |
1428 URIRef(_XSD_PFX + 'byte'), | |
1429 URIRef(_XSD_PFX + 'int'), | |
1430 URIRef(_XSD_PFX + 'long'), | |
1431 URIRef(_XSD_PFX + 'negativeInteger'), | |
1432 URIRef(_XSD_PFX + 'nonNegativeInteger'), | |
1433 URIRef(_XSD_PFX + 'nonPositiveInteger'), | |
1434 URIRef(_XSD_PFX + 'positiveInteger'), | |
1435 URIRef(_XSD_PFX + 'short'), | |
1436 URIRef(_XSD_PFX + 'unsignedByte'), | |
1437 URIRef(_XSD_PFX + 'unsignedInt'), | |
1438 URIRef(_XSD_PFX + 'unsignedLong'), | |
1439 URIRef(_XSD_PFX + 'unsignedShort'), | |
1440 | |
1441 ) | |
1442 | |
1443 # these have "native" syntax in N3/SPARQL | |
1444 _PLAIN_LITERAL_TYPES = ( | |
1445 _XSD_INTEGER, | |
1446 _XSD_BOOLEAN, | |
1447 _XSD_DOUBLE, | |
1448 _XSD_DECIMAL, | |
1449 _OWL_RATIONAL | |
1450 ) | |
1451 | |
1452 # these have special INF and NaN XSD representations | |
1453 _NUMERIC_INF_NAN_LITERAL_TYPES = ( | |
1454 URIRef(_XSD_PFX + 'float'), | |
1455 _XSD_DOUBLE, | |
1456 _XSD_DECIMAL, | |
1457 ) | |
1458 | |
1459 # the following types need special treatment for reasonable sorting because | |
1460 # certain instances can't be compared to each other. We treat this by | |
1461 # partitioning and then sorting within those partitions. | |
1462 _TOTAL_ORDER_CASTERS = { | |
1463 datetime: lambda value: ( | |
1464 # naive vs. aware | |
1465 value.tzinfo is not None and value.tzinfo.utcoffset(value) is not None, | |
1466 value | |
1467 ), | |
1468 time: lambda value: ( | |
1469 # naive vs. aware | |
1470 value.tzinfo is not None and value.tzinfo.utcoffset(None) is not None, | |
1471 value | |
1472 ), | |
1473 xml.dom.minidom.Document: lambda value: value.toxml(), | |
1474 } | |
1475 | |
1476 | |
1477 _STRING_LITERAL_TYPES = ( | |
1478 _XSD_STRING, | |
1479 _RDF_XMLLITERAL, | |
1480 _RDF_HTMLLITERAL, | |
1481 URIRef(_XSD_PFX + 'normalizedString'), | |
1482 URIRef(_XSD_PFX + 'token') | |
1483 ) | |
1484 | |
1485 | |
1486 def _py2literal(obj, pType, castFunc, dType): | |
1487 if castFunc: | |
1488 return castFunc(obj), dType | |
1489 elif dType: | |
1490 return obj, dType | |
1491 else: | |
1492 return obj, None | |
1493 | |
1494 | |
1495 def _castPythonToLiteral(obj, datatype): | |
1496 """ | |
1497 Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a | |
1498 datatype URI (or None) | |
1499 """ | |
1500 for (pType, dType), castFunc in _SpecificPythonToXSDRules: | |
1501 if isinstance(obj, pType) and dType == datatype: | |
1502 return _py2literal(obj, pType, castFunc, dType) | |
1503 | |
1504 for pType, (castFunc, dType) in _GenericPythonToXSDRules: | |
1505 if isinstance(obj, pType): | |
1506 return _py2literal(obj, pType, castFunc, dType) | |
1507 return obj, None # TODO: is this right for the fall through case? | |
1508 | |
1509 | |
1510 from decimal import Decimal | |
1511 | |
1512 # Mappings from Python types to XSD datatypes and back (borrowed from sparta) | |
1513 # datetime instances are also instances of date... so we need to order these. | |
1514 | |
1515 # SPARQL/Turtle/N3 has shortcuts for integer, double, decimal | |
1516 # python has only float - to be in tune with sparql/n3/turtle | |
1517 # we default to XSD.double for float literals | |
1518 | |
1519 # python ints are promoted to longs when overflowing | |
1520 # python longs have no limit | |
1521 # both map to the abstract integer type, | |
1522 # rather than some concrete bit-limited datatype | |
1523 _GenericPythonToXSDRules = [ | |
1524 (string_types, (None, None)), | |
1525 (float, (None, _XSD_DOUBLE)), | |
1526 (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), | |
1527 (int, (None, _XSD_INTEGER)), | |
1528 (long_type, (None, _XSD_INTEGER)), | |
1529 (Decimal, (None, _XSD_DECIMAL)), | |
1530 (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)), | |
1531 (date, (lambda i:i.isoformat(), _XSD_DATE)), | |
1532 (time, (lambda i:i.isoformat(), _XSD_TIME)), | |
1533 (Duration, (lambda i:duration_isoformat(i), _XSD_DURATION)), | |
1534 (timedelta, (lambda i:duration_isoformat(i), _XSD_DAYTIMEDURATION)), | |
1535 (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)), | |
1536 # this is a bit dirty - by accident the html5lib parser produces | |
1537 # DocumentFragments, and the xml parser Documents, letting this | |
1538 # decide what datatype to use makes roundtripping easier, but it a | |
1539 # bit random | |
1540 (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL)), | |
1541 (Fraction, (None, _OWL_RATIONAL)) | |
1542 ] | |
1543 | |
1544 _SpecificPythonToXSDRules = [ | |
1545 ((string_types, _XSD_HEXBINARY), hexlify), | |
1546 ] | |
1547 if PY3: | |
1548 _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify)) | |
1549 | |
1550 XSDToPython = { | |
1551 None: None, # plain literals map directly to value space | |
1552 URIRef(_XSD_PFX + 'time'): parse_time, | |
1553 URIRef(_XSD_PFX + 'date'): parse_date, | |
1554 URIRef(_XSD_PFX + 'gYear'): parse_date, | |
1555 URIRef(_XSD_PFX + 'gYearMonth'): parse_date, | |
1556 URIRef(_XSD_PFX + 'dateTime'): parse_datetime, | |
1557 URIRef(_XSD_PFX + 'duration'): parse_duration, | |
1558 URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration, | |
1559 URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration, | |
1560 URIRef(_XSD_PFX + 'hexBinary'): _unhexlify, | |
1561 URIRef(_XSD_PFX + 'string'): None, | |
1562 URIRef(_XSD_PFX + 'normalizedString'): None, | |
1563 URIRef(_XSD_PFX + 'token'): None, | |
1564 URIRef(_XSD_PFX + 'language'): None, | |
1565 URIRef(_XSD_PFX + 'boolean'): _parseBoolean, | |
1566 URIRef(_XSD_PFX + 'decimal'): Decimal, | |
1567 URIRef(_XSD_PFX + 'integer'): long_type, | |
1568 URIRef(_XSD_PFX + 'nonPositiveInteger'): int, | |
1569 URIRef(_XSD_PFX + 'long'): long_type, | |
1570 URIRef(_XSD_PFX + 'nonNegativeInteger'): int, | |
1571 URIRef(_XSD_PFX + 'negativeInteger'): int, | |
1572 URIRef(_XSD_PFX + 'int'): long_type, | |
1573 URIRef(_XSD_PFX + 'unsignedLong'): long_type, | |
1574 URIRef(_XSD_PFX + 'positiveInteger'): int, | |
1575 URIRef(_XSD_PFX + 'short'): int, | |
1576 URIRef(_XSD_PFX + 'unsignedInt'): long_type, | |
1577 URIRef(_XSD_PFX + 'byte'): int, | |
1578 URIRef(_XSD_PFX + 'unsignedShort'): int, | |
1579 URIRef(_XSD_PFX + 'unsignedByte'): int, | |
1580 URIRef(_XSD_PFX + 'float'): float, | |
1581 URIRef(_XSD_PFX + 'double'): float, | |
1582 URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s), | |
1583 URIRef(_XSD_PFX + 'anyURI'): None, | |
1584 _RDF_XMLLITERAL: _parseXML, | |
1585 _RDF_HTMLLITERAL: _parseHTML | |
1586 } | |
1587 | |
1588 _toPythonMapping = {} | |
1589 | |
1590 _toPythonMapping.update(XSDToPython) | |
1591 | |
1592 | |
1593 def _castLexicalToPython(lexical, datatype): | |
1594 """ | |
1595 Map a lexical form to the value-space for the given datatype | |
1596 :returns: a python object for the value or ``None`` | |
1597 """ | |
1598 convFunc = _toPythonMapping.get(datatype, False) | |
1599 if convFunc: | |
1600 try: | |
1601 return convFunc(lexical) | |
1602 except: | |
1603 # not a valid lexical representation for this dt | |
1604 return None | |
1605 elif convFunc is None: | |
1606 # no conv func means 1-1 lexical<->value-space mapping | |
1607 try: | |
1608 return text_type(lexical) | |
1609 except UnicodeDecodeError: | |
1610 return text_type(lexical, 'utf-8') | |
1611 else: | |
1612 # no convFunc - unknown data-type | |
1613 return None | |
1614 | |
1615 | |
1616 def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False): | |
1617 """ | |
1618 register a new datatype<->pythontype binding | |
1619 | |
1620 :param constructor: an optional function for converting lexical forms | |
1621 into a Python instances, if not given the pythontype | |
1622 is used directly | |
1623 | |
1624 :param lexicalizer: an optional function for converting python objects to | |
1625 lexical form, if not given object.__str__ is used | |
1626 | |
1627 :param datatype_specific: makes the lexicalizer function be accessible | |
1628 from the pair (pythontype, datatype) if set to True | |
1629 or from the pythontype otherwise. False by default | |
1630 """ | |
1631 if datatype_specific and datatype is None: | |
1632 raise Exception("No datatype given for a datatype-specific binding") | |
1633 | |
1634 if datatype in _toPythonMapping: | |
1635 logger.warning("datatype '%s' was already bound. Rebinding." % | |
1636 datatype) | |
1637 | |
1638 if constructor is None: | |
1639 constructor = pythontype | |
1640 _toPythonMapping[datatype] = constructor | |
1641 if datatype_specific: | |
1642 _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer)) | |
1643 else: | |
1644 _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype))) | |
1645 | |
1646 | |
1647 class Variable(Identifier): | |
1648 """ | |
1649 A Variable - this is used for querying, or in Formula aware | |
1650 graphs, where Variables can stored in the graph | |
1651 """ | |
1652 __slots__ = () | |
1653 | |
1654 def __new__(cls, value): | |
1655 if len(value) == 0: | |
1656 raise Exception( | |
1657 "Attempted to create variable with empty string as name!") | |
1658 if value[0] == '?': | |
1659 value = value[1:] | |
1660 return text_type.__new__(cls, value) | |
1661 | |
1662 def __repr__(self): | |
1663 if self.__class__ is Variable: | |
1664 clsName = "rdflib.term.Variable" | |
1665 else: | |
1666 clsName = self.__class__.__name__ | |
1667 | |
1668 return """%s(%s)""" % (clsName, super(Variable, self).__repr__()) | |
1669 | |
1670 def toPython(self): | |
1671 return "?%s" % self | |
1672 | |
1673 def n3(self, namespace_manager=None): | |
1674 return "?%s" % self | |
1675 | |
1676 def __reduce__(self): | |
1677 return (Variable, (text_type(self),)) | |
1678 | |
1679 | |
1680 class Statement(Node, tuple): | |
1681 | |
1682 def __new__(cls, triple, context): | |
1683 subject, predicate, object = triple | |
1684 warnings.warn( | |
1685 "Class Statement is deprecated, and will be removed in " + | |
1686 "the future. If you use this please let rdflib-dev know!", | |
1687 category=DeprecationWarning, stacklevel=2) | |
1688 return tuple.__new__(cls, ((subject, predicate, object), context)) | |
1689 | |
1690 def __reduce__(self): | |
1691 return (Statement, (self[0], self[1])) | |
1692 | |
1693 def toPython(self): | |
1694 return (self[0], self[1]) | |
1695 | |
1696 | |
1697 # Nodes are ordered like this | |
1698 # See http://www.w3.org/TR/sparql11-query/#modOrderBy | |
1699 # we leave "space" for more subclasses of Node elsewhere | |
1700 # default-dict to grazefully fail for new subclasses | |
1701 _ORDERING = defaultdict(int) | |
1702 _ORDERING.update({ | |
1703 BNode: 10, | |
1704 Variable: 20, | |
1705 URIRef: 30, | |
1706 Literal: 40 | |
1707 }) | |
1708 | |
1709 | |
1710 def _isEqualXMLNode(node, other): | |
1711 from xml.dom.minidom import Node | |
1712 | |
1713 def recurse(): | |
1714 # Recursion through the children | |
1715 # In Python2, the semantics of 'map' is such that the check on | |
1716 # length would be unnecessary. In Python 3, | |
1717 # the semantics of map has changed (why, oh why???) and the check | |
1718 # for the length becomes necessary... | |
1719 if len(node.childNodes) != len(other.childNodes): | |
1720 return False | |
1721 for (nc, oc) in map( | |
1722 lambda x, y: (x, y), node.childNodes, other.childNodes): | |
1723 if not _isEqualXMLNode(nc, oc): | |
1724 return False | |
1725 # if we got here then everything is fine: | |
1726 return True | |
1727 | |
1728 if node is None or other is None: | |
1729 return False | |
1730 | |
1731 if node.nodeType != other.nodeType: | |
1732 return False | |
1733 | |
1734 if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]: | |
1735 return recurse() | |
1736 | |
1737 elif node.nodeType == Node.ELEMENT_NODE: | |
1738 # Get the basics right | |
1739 if not (node.tagName == other.tagName and | |
1740 node.namespaceURI == other.namespaceURI): | |
1741 return False | |
1742 | |
1743 # Handle the (namespaced) attributes; the namespace setting key | |
1744 # should be ignored, though | |
1745 # Note that the minidom orders the keys already, so we do not have | |
1746 # to worry about that, which is a bonus... | |
1747 n_keys = [ | |
1748 k for k in node.attributes.keysNS() | |
1749 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
1750 o_keys = [ | |
1751 k for k in other.attributes.keysNS() | |
1752 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
1753 if len(n_keys) != len(o_keys): | |
1754 return False | |
1755 for k in n_keys: | |
1756 if not (k in o_keys | |
1757 and node.getAttributeNS(k[0], k[1]) == | |
1758 other.getAttributeNS(k[0], k[1])): | |
1759 return False | |
1760 | |
1761 # if we got here, the attributes are all right, we can go down | |
1762 # the tree recursively | |
1763 return recurse() | |
1764 | |
1765 elif node.nodeType in [ | |
1766 Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE, | |
1767 Node.NOTATION_NODE]: | |
1768 return node.data == other.data | |
1769 | |
1770 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: | |
1771 return node.data == other.data and node.target == other.target | |
1772 | |
1773 elif node.nodeType == Node.ENTITY_NODE: | |
1774 return node.nodeValue == other.nodeValue | |
1775 | |
1776 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: | |
1777 return node.publicId == other.publicId \ | |
1778 and node.systemId == other.system.Id | |
1779 | |
1780 else: | |
1781 # should not happen, in fact | |
1782 raise Exception( | |
1783 'I dont know how to compare XML Node type: %s' % node.nodeType) | |
1784 | |
1785 | |
1786 if __name__ == '__main__': | |
1787 import doctest | |
1788 doctest.testmod() |