Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/term.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """ | |
2 This module defines the different types of terms. Terms are the kinds of | |
3 objects that can appear in a quoted/asserted triple. This includes those | |
4 that are core to RDF: | |
5 | |
6 * :class:`Blank Nodes <rdflib.term.BNode>` | |
7 * :class:`URI References <rdflib.term.URIRef>` | |
8 * :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag) | |
9 | |
10 Those that extend the RDF model into N3: | |
11 | |
12 * :class:`Formulae <rdflib.graph.QuotedGraph>` | |
13 * :class:`Universal Quantifications (Variables) <rdflib.term.Variable>` | |
14 | |
15 And those that are primarily for matching against 'Nodes' in the | |
16 underlying Graph: | |
17 | |
18 * REGEX Expressions | |
19 * Date Ranges | |
20 * Numerical Ranges | |
21 | |
22 """ | |
23 | |
24 __all__ = [ | |
25 'bind', | |
26 | |
27 'Node', | |
28 'Identifier', | |
29 | |
30 'URIRef', | |
31 'BNode', | |
32 'Literal', | |
33 | |
34 'Variable', | |
35 'Statement', | |
36 ] | |
37 | |
38 import logging | |
39 logger = logging.getLogger(__name__) | |
40 import warnings | |
41 import math | |
42 | |
43 import base64 | |
44 import xml.dom.minidom | |
45 | |
46 from urllib.parse import urlparse, urljoin, urldefrag | |
47 from datetime import date, time, datetime | |
48 from re import sub, compile | |
49 from collections import defaultdict | |
50 | |
51 from isodate import parse_time, parse_date, parse_datetime | |
52 | |
53 try: | |
54 from hashlib import md5 | |
55 assert md5 | |
56 except ImportError: | |
57 from md5 import md5 | |
58 | |
59 | |
60 import rdflib | |
61 from . import py3compat | |
62 from rdflib.compat import numeric_greater | |
63 | |
64 | |
65 b = py3compat.b | |
66 | |
67 skolem_genid = "/.well-known/genid/" | |
68 rdflib_skolem_genid = "/.well-known/genid/rdflib/" | |
69 skolems = {} | |
70 | |
71 | |
72 _invalid_uri_chars = '<>" {}|\\^`' | |
73 | |
74 def _is_valid_uri(uri): | |
75 for c in _invalid_uri_chars: | |
76 if c in uri: return False | |
77 return True | |
78 | |
79 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') | |
80 | |
81 def _is_valid_langtag(tag): | |
82 return bool(_lang_tag_regex.match(tag)) | |
83 | |
84 def _is_valid_unicode(value): | |
85 """ | |
86 Verify that the provided value can be converted into a Python | |
87 unicode object. | |
88 """ | |
89 if isinstance(value, bytes): | |
90 coding_func, param = getattr(value, 'decode'), 'utf-8' | |
91 elif py3compat.PY3: | |
92 coding_func, param = str, value | |
93 else: | |
94 coding_func, param = str, value | |
95 | |
96 # try to convert value into unicode | |
97 try: | |
98 coding_func(param) | |
99 except UnicodeError: | |
100 return False | |
101 return True | |
102 | |
103 class Node(object): | |
104 """ | |
105 A Node in the Graph. | |
106 """ | |
107 | |
108 __slots__ = () | |
109 | |
110 | |
111 class Identifier(Node, str): # allow Identifiers to be Nodes in the Graph | |
112 """ | |
113 See http://www.w3.org/2002/07/rdf-identifer-terminology/ | |
114 regarding choice of terminology. | |
115 """ | |
116 | |
117 __slots__ = () | |
118 | |
119 def __new__(cls, value): | |
120 return str.__new__(cls, value) | |
121 | |
122 def eq(self, other): | |
123 """A "semantic"/interpreted equality function, | |
124 by default, same as __eq__""" | |
125 return self.__eq__(other) | |
126 | |
127 def neq(self, other): | |
128 """A "semantic"/interpreted not equal function, | |
129 by default, same as __ne__""" | |
130 return self.__ne__(other) | |
131 | |
132 def __ne__(self, other): | |
133 return not self.__eq__(other) | |
134 | |
135 def __eq__(self, other): | |
136 """ | |
137 Equality for Nodes. | |
138 | |
139 >>> BNode("foo")==None | |
140 False | |
141 >>> BNode("foo")==URIRef("foo") | |
142 False | |
143 >>> URIRef("foo")==BNode("foo") | |
144 False | |
145 >>> BNode("foo")!=URIRef("foo") | |
146 True | |
147 >>> URIRef("foo")!=BNode("foo") | |
148 True | |
149 >>> Variable('a')!=URIRef('a') | |
150 True | |
151 >>> Variable('a')!=Variable('a') | |
152 False | |
153 """ | |
154 | |
155 if type(self) == type(other): | |
156 return str(self) == str(other) | |
157 else: | |
158 return False | |
159 | |
160 def __gt__(self, other): | |
161 """ | |
162 This implements ordering for Nodes, | |
163 | |
164 This tries to implement this: | |
165 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
166 | |
167 Variables are not included in the SPARQL list, but | |
168 they are greater than BNodes and smaller than everything else | |
169 | |
170 """ | |
171 if other is None: | |
172 return True # everything bigger than None | |
173 elif type(self) == type(other): | |
174 return str(self) > str(other) | |
175 elif isinstance(other, Node): | |
176 return _ORDERING[type(self)] > _ORDERING[type(other)] | |
177 | |
178 return NotImplemented | |
179 | |
180 def __lt__(self, other): | |
181 if other is None: | |
182 return False # Nothing is less than None | |
183 elif type(self) == type(other): | |
184 return str(self) < str(other) | |
185 elif isinstance(other, Node): | |
186 return _ORDERING[type(self)] < _ORDERING[type(other)] | |
187 | |
188 return NotImplemented | |
189 | |
190 def __le__(self, other): | |
191 r = self.__lt__(other) | |
192 if r: | |
193 return True | |
194 return self == other | |
195 | |
196 def __ge__(self, other): | |
197 r = self.__gt__(other) | |
198 if r: | |
199 return True | |
200 return self == other | |
201 | |
202 def __hash__(self): | |
203 t = type(self) | |
204 fqn = t.__module__ + '.' + t.__name__ | |
205 return hash(fqn) ^ hash(str(self)) | |
206 | |
207 | |
208 class URIRef(Identifier): | |
209 """ | |
210 RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref | |
211 """ | |
212 | |
213 __slots__ = () | |
214 | |
215 def __new__(cls, value, base=None): | |
216 if base is not None: | |
217 ends_in_hash = value.endswith("#") | |
218 value = urljoin(base, value, allow_fragments=1) | |
219 if ends_in_hash: | |
220 if not value.endswith("#"): | |
221 value += "#" | |
222 | |
223 if not _is_valid_uri(value): | |
224 logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value) | |
225 | |
226 | |
227 try: | |
228 rt = str.__new__(cls, value) | |
229 except UnicodeDecodeError: | |
230 rt = str.__new__(cls, value, 'utf-8') | |
231 return rt | |
232 | |
233 def toPython(self): | |
234 return str(self) | |
235 | |
236 def n3(self, namespace_manager = None): | |
237 """ | |
238 This will do a limited check for valid URIs, | |
239 essentially just making sure that the string includes no illegal | |
240 characters (``<, >, ", {, }, |, \\, `, ^``) | |
241 | |
242 :param namespace_manager: if not None, will be used to make up | |
243 a prefixed name | |
244 """ | |
245 | |
246 if not _is_valid_uri(self): | |
247 raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self) | |
248 | |
249 if namespace_manager: | |
250 return namespace_manager.normalizeUri(self) | |
251 else: | |
252 return "<%s>" % self | |
253 | |
254 def defrag(self): | |
255 if "#" in self: | |
256 url, frag = urldefrag(self) | |
257 return URIRef(url) | |
258 else: | |
259 return self | |
260 | |
261 def __reduce__(self): | |
262 return (URIRef, (str(self),)) | |
263 | |
264 def __getnewargs__(self): | |
265 return (str(self), ) | |
266 | |
267 if not py3compat.PY3: | |
268 def __str__(self): | |
269 return self.encode() | |
270 | |
271 def __repr__(self): | |
272 if self.__class__ is URIRef: | |
273 clsName = "rdflib.term.URIRef" | |
274 else: | |
275 clsName = self.__class__.__name__ | |
276 | |
277 return """%s(%s)""" % (clsName, super(URIRef, self).__repr__()) | |
278 | |
279 def __add__(self, other): | |
280 return self.__class__(str(self) + other) | |
281 | |
282 def __radd__(self, other): | |
283 return self.__class__(other + str(self)) | |
284 | |
285 def __mod__(self, other): | |
286 return self.__class__(str(self) % other) | |
287 | |
288 def md5_term_hash(self): | |
289 """a string of hex that will be the same for two URIRefs that | |
290 are the same. It is not a suitable unique id. | |
291 | |
292 Supported for backwards compatibility; new code should | |
293 probably just use __hash__ | |
294 """ | |
295 warnings.warn( | |
296 "method md5_term_hash is deprecated, and will be removed " + | |
297 "in the future. If you use this please let rdflib-dev know!", | |
298 category=DeprecationWarning, stacklevel=2) | |
299 d = md5(self.encode()) | |
300 d.update(b("U")) | |
301 return d.hexdigest() | |
302 | |
303 def de_skolemize(self): | |
304 """ Create a Blank Node from a skolem URI, in accordance | |
305 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization. | |
306 This function accepts only rdflib type skolemization, to provide | |
307 a round-tripping within the system. | |
308 | |
309 .. versionadded:: 4.0 | |
310 """ | |
311 if isinstance(self, RDFLibGenid): | |
312 parsed_uri = urlparse("%s" % self) | |
313 return BNode( | |
314 value=parsed_uri.path[len(rdflib_skolem_genid):]) | |
315 elif isinstance(self, Genid): | |
316 bnode_id = "%s" % self | |
317 if bnode_id in skolems: | |
318 return skolems[bnode_id] | |
319 else: | |
320 retval = BNode() | |
321 skolems[bnode_id] = retval | |
322 return retval | |
323 else: | |
324 raise Exception("<%s> is not a skolem URI" % self) | |
325 | |
326 | |
327 class Genid(URIRef): | |
328 __slots__ = () | |
329 | |
330 @staticmethod | |
331 def _is_external_skolem(uri): | |
332 if not isinstance(uri, str): | |
333 uri = str(uri) | |
334 parsed_uri = urlparse(uri) | |
335 gen_id = parsed_uri.path.rfind(skolem_genid) | |
336 if gen_id != 0: | |
337 return False | |
338 return True | |
339 | |
340 | |
341 class RDFLibGenid(Genid): | |
342 __slots__ = () | |
343 | |
344 @staticmethod | |
345 def _is_rdflib_skolem(uri): | |
346 if not isinstance(uri, str): | |
347 uri = str(uri) | |
348 parsed_uri = urlparse(uri) | |
349 if parsed_uri.params != "" \ | |
350 or parsed_uri.query != "" \ | |
351 or parsed_uri.fragment != "": | |
352 return False | |
353 gen_id = parsed_uri.path.rfind(rdflib_skolem_genid) | |
354 if gen_id != 0: | |
355 return False | |
356 return True | |
357 | |
358 | |
359 def _unique_id(): | |
360 # Used to read: """Create a (hopefully) unique prefix""" | |
361 # now retained merely to leave interal API unchanged. | |
362 # From BNode.__new__() below ... | |
363 # | |
364 # acceptable bnode value range for RDF/XML needs to be | |
365 # something that can be serialzed as a nodeID for N3 | |
366 # | |
367 # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
368 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
369 return "N" # ensure that id starts with a letter | |
370 | |
371 | |
372 def _serial_number_generator(): | |
373 """ | |
374 Generates UUID4-based but ncname-compliant identifiers. | |
375 """ | |
376 from uuid import uuid4 | |
377 | |
378 def _generator(): | |
379 return uuid4().hex | |
380 | |
381 return _generator | |
382 | |
383 | |
384 class BNode(Identifier): | |
385 """ | |
386 Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes | |
387 | |
388 """ | |
389 __slots__ = () | |
390 | |
391 def __new__(cls, value=None, | |
392 _sn_gen=_serial_number_generator(), _prefix=_unique_id()): | |
393 """ | |
394 # only store implementations should pass in a value | |
395 """ | |
396 if value is None: | |
397 # so that BNode values do not collide with ones created with | |
398 # a different instance of this module at some other time. | |
399 node_id = _sn_gen() | |
400 value = "%s%s" % (_prefix, node_id) | |
401 else: | |
402 # TODO: check that value falls within acceptable bnode value range | |
403 # for RDF/XML needs to be something that can be serialzed | |
404 # as a nodeID for N3 ?? Unless we require these | |
405 # constraints be enforced elsewhere? | |
406 pass # assert is_ncname(unicode(value)), "BNode identifiers | |
407 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
408 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
409 return Identifier.__new__(cls, value) | |
410 | |
411 def toPython(self): | |
412 return str(self) | |
413 | |
414 def n3(self, namespace_manager=None): | |
415 return "_:%s" % self | |
416 | |
417 def __getnewargs__(self): | |
418 return (str(self), ) | |
419 | |
420 def __reduce__(self): | |
421 return (BNode, (str(self),)) | |
422 | |
423 if not py3compat.PY3: | |
424 def __str__(self): | |
425 return self.encode() | |
426 | |
427 def __repr__(self): | |
428 if self.__class__ is BNode: | |
429 clsName = "rdflib.term.BNode" | |
430 else: | |
431 clsName = self.__class__.__name__ | |
432 return """%s('%s')""" % (clsName, str(self)) | |
433 | |
434 def md5_term_hash(self): | |
435 """a string of hex that will be the same for two BNodes that | |
436 are the same. It is not a suitable unique id. | |
437 | |
438 Supported for backwards compatibility; new code should | |
439 probably just use __hash__ | |
440 """ | |
441 warnings.warn( | |
442 "method md5_term_hash is deprecated, and will be removed " + | |
443 "in the future. If you use this please let rdflib-dev know!", | |
444 category=DeprecationWarning, stacklevel=2) | |
445 d = md5(self.encode()) | |
446 d.update(b("B")) | |
447 return d.hexdigest() | |
448 | |
449 def skolemize(self, authority="http://rdlib.net/"): | |
450 """ Create a URIRef "skolem" representation of the BNode, in accordance | |
451 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization | |
452 | |
453 .. versionadded:: 4.0 | |
454 """ | |
455 skolem = "%s%s" % (rdflib_skolem_genid, str(self)) | |
456 return URIRef(urljoin(authority, skolem)) | |
457 | |
458 | |
459 class Literal(Identifier): | |
460 __doc__ = py3compat.format_doctest_out(""" | |
461 RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal | |
462 | |
463 The lexical value of the literal is the unicode object | |
464 The interpreted, datatyped value is available from .value | |
465 | |
466 Language tags must be valid according to :rfc:5646 | |
467 | |
468 For valid XSD datatypes, the lexical form is optionally normalized | |
469 at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS | |
470 and can be overridden by the normalize parameter to __new__ | |
471 | |
472 Equality and hashing of Literals are done based on the lexical form, i.e.: | |
473 | |
474 >>> from rdflib.namespace import XSD | |
475 | |
476 >>> Literal('01')!=Literal('1') # clear - strings differ | |
477 True | |
478 | |
479 but with data-type they get normalized: | |
480 | |
481 >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer) | |
482 False | |
483 | |
484 unless disabled: | |
485 | |
486 >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer) | |
487 True | |
488 | |
489 | |
490 Value based comparison is possible: | |
491 | |
492 >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float)) | |
493 True | |
494 | |
495 The eq method also provides limited support for basic python types: | |
496 | |
497 >>> Literal(1).eq(1) # fine - int compatible with xsd:integer | |
498 True | |
499 >>> Literal('a').eq('b') # fine - str compatible with plain-lit | |
500 False | |
501 >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string | |
502 True | |
503 >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit | |
504 NotImplemented | |
505 | |
506 Greater-than/less-than ordering comparisons are also done in value | |
507 space, when compatible datatypes are used. Incompatible datatypes | |
508 are ordered by DT, or by lang-tag. For other nodes the ordering | |
509 is None < BNode < URIRef < Literal | |
510 | |
511 Any comparison with non-rdflib Node are "NotImplemented" | |
512 In PY2.X some stable order will be made up by python | |
513 | |
514 In PY3 this is an error. | |
515 | |
516 >>> from rdflib import Literal, XSD | |
517 >>> lit2006 = Literal('2006-01-01',datatype=XSD.date) | |
518 >>> lit2006.toPython() | |
519 datetime.date(2006, 1, 1) | |
520 >>> lit2006 < Literal('2007-01-01',datatype=XSD.date) | |
521 True | |
522 >>> Literal(datetime.utcnow()).datatype | |
523 rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime') | |
524 >>> Literal(1) > Literal(2) # by value | |
525 False | |
526 >>> Literal(1) > Literal(2.0) # by value | |
527 False | |
528 >>> Literal('1') > Literal(1) # by DT | |
529 True | |
530 >>> Literal('1') < Literal('1') # by lexical form | |
531 False | |
532 >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag | |
533 False | |
534 >>> Literal(1) > URIRef('foo') # by node-type | |
535 True | |
536 | |
537 The > < operators will eat this NotImplemented and either make up | |
538 an ordering (py2.x) or throw a TypeError (py3k): | |
539 | |
540 >>> Literal(1).__gt__(2.0) | |
541 NotImplemented | |
542 | |
543 | |
544 """) | |
545 | |
546 | |
547 if not py3compat.PY3: | |
548 __slots__ = ("language", "datatype", "value", "_language", | |
549 "_datatype", "_value") | |
550 else: | |
551 __slots__ = ("_language", "_datatype", "_value") | |
552 | |
553 def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): | |
554 | |
555 if lang == '': | |
556 lang = None # no empty lang-tags in RDF | |
557 | |
558 normalize = normalize if normalize != None else rdflib.NORMALIZE_LITERALS | |
559 | |
560 if lang is not None and datatype is not None: | |
561 raise TypeError( | |
562 "A Literal can only have one of lang or datatype, " | |
563 "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") | |
564 | |
565 if lang and not _is_valid_langtag(lang): | |
566 raise Exception("'%s' is not a valid language tag!"%lang) | |
567 | |
568 if datatype: | |
569 datatype = URIRef(datatype) | |
570 | |
571 value = None | |
572 if isinstance(lexical_or_value, Literal): | |
573 # create from another Literal instance | |
574 | |
575 lang = lang or lexical_or_value.language | |
576 if datatype: | |
577 # override datatype | |
578 value = _castLexicalToPython(lexical_or_value, datatype) | |
579 else: | |
580 datatype = lexical_or_value.datatype | |
581 value = lexical_or_value.value | |
582 | |
583 elif isinstance(lexical_or_value, str): | |
584 # passed a string | |
585 # try parsing lexical form of datatyped literal | |
586 value = _castLexicalToPython(lexical_or_value, datatype) | |
587 | |
588 if value is not None and normalize: | |
589 _value, _datatype = _castPythonToLiteral(value) | |
590 if _value is not None and _is_valid_unicode(_value): | |
591 lexical_or_value = _value | |
592 | |
593 else: | |
594 # passed some python object | |
595 value = lexical_or_value | |
596 _value, _datatype = _castPythonToLiteral(lexical_or_value) | |
597 | |
598 datatype = datatype or _datatype | |
599 if _value is not None: | |
600 lexical_or_value = _value | |
601 if datatype: | |
602 lang = None | |
603 | |
604 if py3compat.PY3 and isinstance(lexical_or_value, bytes): | |
605 lexical_or_value = lexical_or_value.decode('utf-8') | |
606 | |
607 try: | |
608 inst = str.__new__(cls, lexical_or_value) | |
609 except UnicodeDecodeError: | |
610 inst = str.__new__(cls, lexical_or_value, 'utf-8') | |
611 | |
612 inst._language = lang | |
613 inst._datatype = datatype | |
614 inst._value = value | |
615 return inst | |
616 | |
617 @py3compat.format_doctest_out | |
618 def normalize(self): | |
619 """ | |
620 Returns a new literal with a normalised lexical representation | |
621 of this literal | |
622 >>> from rdflib import XSD | |
623 >>> Literal("01", datatype=XSD.integer, normalize=False).normalize() | |
624 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
625 | |
626 Illegal lexical forms for the datatype given are simply passed on | |
627 >>> Literal("a", datatype=XSD.integer, normalize=False) | |
628 rdflib.term.Literal(%(u)s'a', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
629 | |
630 """ | |
631 | |
632 if self.value != None: | |
633 return Literal(self.value, datatype=self.datatype, lang=self.language) | |
634 else: | |
635 return self | |
636 | |
637 @property | |
638 def value(self): | |
639 return self._value | |
640 | |
641 @property | |
642 def language(self): | |
643 return self._language | |
644 | |
645 @property | |
646 def datatype(self): | |
647 return self._datatype | |
648 | |
649 def __reduce__(self): | |
650 return (Literal, (str(self), self.language, self.datatype),) | |
651 | |
652 def __getstate__(self): | |
653 return (None, dict(language=self.language, datatype=self.datatype)) | |
654 | |
655 def __setstate__(self, arg): | |
656 _, d = arg | |
657 self._language = d["language"] | |
658 self._datatype = d["datatype"] | |
659 | |
660 @py3compat.format_doctest_out | |
661 def __add__(self, val): | |
662 """ | |
663 >>> Literal(1) + 1 | |
664 rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
665 >>> Literal("1") + "1" | |
666 rdflib.term.Literal(%(u)s'11') | |
667 """ | |
668 | |
669 py = self.toPython() | |
670 if not isinstance(py, Literal): | |
671 try: | |
672 return Literal(py + val) | |
673 except TypeError: | |
674 pass # fall-through | |
675 | |
676 s = str.__add__(self, val) | |
677 return Literal(s, self.language, self.datatype) | |
678 | |
679 def __bool__(self): | |
680 """ | |
681 Is the Literal "True" | |
682 This is used for if statements, bool(literal), etc. | |
683 """ | |
684 if self.value != None: | |
685 return bool(self.value) | |
686 return len(self) != 0 | |
687 | |
688 @py3compat.format_doctest_out | |
689 def __neg__(self): | |
690 """ | |
691 >>> (- Literal(1)) | |
692 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
693 >>> (- Literal(10.5)) | |
694 rdflib.term.Literal(%(u)s'-10.5', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#double')) | |
695 >>> from rdflib.namespace import XSD | |
696 >>> (- Literal("1", datatype=XSD.integer)) | |
697 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
698 | |
699 >>> (- Literal("1")) | |
700 Traceback (most recent call last): | |
701 File "<stdin>", line 1, in <module> | |
702 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
703 >>> | |
704 """ | |
705 | |
706 if isinstance(self.value, (int, float)): | |
707 return Literal(self.value.__neg__()) | |
708 else: | |
709 raise TypeError("Not a number; %s" % repr(self)) | |
710 | |
711 @py3compat.format_doctest_out | |
712 def __pos__(self): | |
713 """ | |
714 >>> (+ Literal(1)) | |
715 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
716 >>> (+ Literal(-1)) | |
717 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
718 >>> from rdflib.namespace import XSD | |
719 >>> (+ Literal("-1", datatype=XSD.integer)) | |
720 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
721 | |
722 >>> (+ Literal("1")) | |
723 Traceback (most recent call last): | |
724 File "<stdin>", line 1, in <module> | |
725 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
726 """ | |
727 if isinstance(self.value, (int, float)): | |
728 return Literal(self.value.__pos__()) | |
729 else: | |
730 raise TypeError("Not a number; %s" % repr(self)) | |
731 | |
732 @py3compat.format_doctest_out | |
733 def __abs__(self): | |
734 """ | |
735 >>> abs(Literal(-1)) | |
736 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
737 | |
738 >>> from rdflib.namespace import XSD | |
739 >>> abs( Literal("-1", datatype=XSD.integer)) | |
740 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
741 | |
742 >>> abs(Literal("1")) | |
743 Traceback (most recent call last): | |
744 File "<stdin>", line 1, in <module> | |
745 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
746 """ | |
747 if isinstance(self.value, (int, float)): | |
748 return Literal(self.value.__abs__()) | |
749 else: | |
750 raise TypeError("Not a number; %s" % repr(self)) | |
751 | |
752 @py3compat.format_doctest_out | |
753 def __invert__(self): | |
754 """ | |
755 >>> ~(Literal(-1)) | |
756 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
757 | |
758 >>> from rdflib.namespace import XSD | |
759 >>> ~( Literal("-1", datatype=XSD.integer)) | |
760 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
761 | |
762 Not working: | |
763 | |
764 >>> ~(Literal("1")) | |
765 Traceback (most recent call last): | |
766 File "<stdin>", line 1, in <module> | |
767 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
768 """ | |
769 if isinstance(self.value, (int, float)): | |
770 return Literal(self.value.__invert__()) | |
771 else: | |
772 raise TypeError("Not a number; %s" % repr(self)) | |
773 | |
774 def __gt__(self, other): | |
775 """ | |
776 | |
777 This implements ordering for Literals, | |
778 the other comparison methods delegate here | |
779 | |
780 This tries to implement this: | |
781 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
782 | |
783 In short, Literals with compatible data-types are orderd in value space, | |
784 i.e. | |
785 >>> from rdflib import XSD | |
786 | |
787 >>> Literal(1)>Literal(2) # int/int | |
788 False | |
789 >>> Literal(2.0)>Literal(1) # double/int | |
790 True | |
791 >>> from decimal import Decimal | |
792 >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double | |
793 True | |
794 >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double | |
795 True | |
796 >>> Literal('b')>Literal('a') # plain lit/plain lit | |
797 True | |
798 >>> Literal('b')>Literal('a', datatype=XSD.string) # plain lit/xsd:string | |
799 True | |
800 | |
801 Incompatible datatype mismatches ordered by DT | |
802 | |
803 >>> Literal(1)>Literal("2") # int>string | |
804 False | |
805 | |
806 Langtagged literals by lang tag | |
807 >>> Literal("a", lang="en")>Literal("a", lang="fr") | |
808 False | |
809 """ | |
810 if other is None: | |
811 return True # Everything is greater than None | |
812 if isinstance(other, Literal): | |
813 | |
814 if self.datatype in _NUMERIC_LITERAL_TYPES and \ | |
815 other.datatype in _NUMERIC_LITERAL_TYPES: | |
816 return numeric_greater(self.value, other.value) | |
817 | |
818 # plain-literals and xsd:string literals | |
819 # are "the same" | |
820 dtself = self.datatype or _XSD_STRING | |
821 dtother = other.datatype or _XSD_STRING | |
822 | |
823 if dtself != dtother: | |
824 if rdflib.DAWG_LITERAL_COLLATION: | |
825 return NotImplemented | |
826 else: | |
827 return dtself > dtother | |
828 | |
829 if self.language != other.language: | |
830 if not self.language: | |
831 return False | |
832 elif not other.language: | |
833 return True | |
834 else: | |
835 return self.language > other.language | |
836 | |
837 if self.value != None and other.value != None: | |
838 return self.value > other.value | |
839 | |
840 if str(self) != str(other): | |
841 return str(self) > str(other) | |
842 | |
843 # same language, same lexical form, check real dt | |
844 # plain-literals come before xsd:string! | |
845 if self.datatype != other.datatype: | |
846 if not self.datatype: | |
847 return False | |
848 elif not other.datatype: | |
849 return True | |
850 else: | |
851 return self.datatype > other.datatype | |
852 | |
853 return False # they are the same | |
854 | |
855 elif isinstance(other, Node): | |
856 return True # Literal are the greatest! | |
857 else: | |
858 return NotImplemented # we can only compare to nodes | |
859 | |
860 def __lt__(self, other): | |
861 if other is None: | |
862 return False # Nothing is less than None | |
863 if isinstance(other, Literal): | |
864 try: | |
865 return not self.__gt__(other) and not self.eq(other) | |
866 except TypeError: | |
867 return NotImplemented | |
868 if isinstance(other, Node): | |
869 return False # all nodes are less-than Literals | |
870 | |
871 return NotImplemented | |
872 | |
873 def __le__(self, other): | |
874 """ | |
875 >>> from rdflib.namespace import XSD | |
876 >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime | |
877 ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) | |
878 True | |
879 """ | |
880 r = self.__lt__(other) | |
881 if r: | |
882 return True | |
883 try: | |
884 return self.eq(other) | |
885 except TypeError: | |
886 return NotImplemented | |
887 | |
888 def __ge__(self, other): | |
889 r = self.__gt__(other) | |
890 if r: | |
891 return True | |
892 try: | |
893 return self.eq(other) | |
894 except TypeError: | |
895 return NotImplemented | |
896 | |
897 def _comparable_to(self, other): | |
898 """ | |
899 Helper method to decide which things are meaningful to | |
900 rich-compare with this literal | |
901 """ | |
902 if isinstance(other, Literal): | |
903 if (self.datatype and other.datatype): | |
904 # two datatyped literals | |
905 if not self.datatype in XSDToPython or not other.datatype in XSDToPython: | |
906 # non XSD DTs must match | |
907 if self.datatype != other.datatype: | |
908 return False | |
909 | |
910 else: | |
911 # xsd:string may be compared with plain literals | |
912 if not (self.datatype == _XSD_STRING and not other.datatype) or \ | |
913 (other.datatype == _XSD_STRING and not self.datatype): | |
914 return False | |
915 | |
916 # if given lang-tag has to be case insensitive equal | |
917 if (self.language or "").lower() != (other.language or "").lower(): | |
918 return False | |
919 | |
920 return True | |
921 | |
922 def __hash__(self): | |
923 """ | |
924 >>> from rdflib.namespace import XSD | |
925 >>> a = {Literal('1', datatype=XSD.integer):'one'} | |
926 >>> Literal('1', datatype=XSD.double) in a | |
927 False | |
928 | |
929 | |
930 "Called for the key object for dictionary operations, | |
931 and by the built-in function hash(). Should return | |
932 a 32-bit integer usable as a hash value for | |
933 dictionary operations. The only required property | |
934 is that objects which compare equal have the same | |
935 hash value; it is advised to somehow mix together | |
936 (e.g., using exclusive or) the hash values for the | |
937 components of the object that also play a part in | |
938 comparison of objects." -- 3.4.1 Basic customization (Python) | |
939 | |
940 "Two literals are equal if and only if all of the following hold: | |
941 * The strings of the two lexical forms compare equal, character by | |
942 character. | |
943 * Either both or neither have language tags. | |
944 * The language tags, if any, compare equal. | |
945 * Either both or neither have datatype URIs. | |
946 * The two datatype URIs, if any, compare equal, character by | |
947 character." | |
948 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
949 | |
950 """ | |
951 res = super(Literal, self).__hash__() | |
952 if self.language: | |
953 res ^= hash(self.language.lower()) | |
954 if self.datatype: | |
955 res ^= hash(self.datatype) | |
956 return res | |
957 | |
958 @py3compat.format_doctest_out | |
959 def __eq__(self, other): | |
960 """ | |
961 Literals are only equal to other literals. | |
962 | |
963 "Two literals are equal if and only if all of the following hold: | |
964 * The strings of the two lexical forms compare equal, character by character. | |
965 * Either both or neither have language tags. | |
966 * The language tags, if any, compare equal. | |
967 * Either both or neither have datatype URIs. | |
968 * The two datatype URIs, if any, compare equal, character by character." | |
969 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
970 | |
971 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo")) | |
972 True | |
973 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2")) | |
974 False | |
975 | |
976 >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo")) | |
977 False | |
978 >>> Literal("1", datatype=URIRef("foo")) == "asdf" | |
979 False | |
980 >>> from rdflib import XSD | |
981 >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date) | |
982 True | |
983 >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1) | |
984 False | |
985 >>> Literal("one", lang="en") == Literal("one", lang="en") | |
986 True | |
987 >>> Literal("hast", lang='en') == Literal("hast", lang='de') | |
988 False | |
989 >>> Literal("1", datatype=XSD.integer) == Literal(1) | |
990 True | |
991 >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer) | |
992 True | |
993 | |
994 """ | |
995 if self is other: | |
996 return True | |
997 if other is None: | |
998 return False | |
999 if isinstance(other, Literal): | |
1000 return self.datatype == other.datatype \ | |
1001 and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \ | |
1002 and str.__eq__(self, other) | |
1003 | |
1004 return False | |
1005 | |
1006 def eq(self, other): | |
1007 """ | |
1008 Compare the value of this literal with something else | |
1009 | |
1010 Either, with the value of another literal | |
1011 comparisons are then done in literal "value space", | |
1012 and according to the rules of XSD subtype-substitution/type-promotion | |
1013 | |
1014 OR, with a python object: | |
1015 | |
1016 basestring objects can be compared with plain-literals, | |
1017 or those with datatype xsd:string | |
1018 | |
1019 bool objects with xsd:boolean | |
1020 | |
1021 a int, long or float with numeric xsd types | |
1022 | |
1023 isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime | |
1024 | |
1025 Any other operations returns NotImplemented | |
1026 | |
1027 """ | |
1028 if isinstance(other, Literal): | |
1029 | |
1030 if self.datatype in _NUMERIC_LITERAL_TYPES \ | |
1031 and other.datatype in _NUMERIC_LITERAL_TYPES: | |
1032 if self.value != None and other.value != None: | |
1033 return self.value == other.value | |
1034 else: | |
1035 if str.__eq__(self, other): | |
1036 return True | |
1037 raise TypeError( | |
1038 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
1039 if (self.language or "").lower() != (other.language or "").lower(): | |
1040 return False | |
1041 | |
1042 dtself = self.datatype or _XSD_STRING | |
1043 dtother = other.datatype or _XSD_STRING | |
1044 | |
1045 if (dtself == _XSD_STRING and dtother == _XSD_STRING): | |
1046 # string/plain literals, compare on lexical form | |
1047 return str.__eq__(self, other) | |
1048 | |
1049 if dtself != dtother: | |
1050 if rdflib.DAWG_LITERAL_COLLATION: | |
1051 raise TypeError("I don't know how to compare literals with datatypes %s and %s" % ( | |
1052 self.datatype, other.datatype)) | |
1053 else: | |
1054 return False | |
1055 | |
1056 # matching non-string DTs now - do we compare values or | |
1057 # lexical form first? comparing two ints is far quicker - | |
1058 # maybe there are counter examples | |
1059 | |
1060 if self.value != None and other.value != None: | |
1061 | |
1062 if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL): | |
1063 return _isEqualXMLNode(self.value, other.value) | |
1064 | |
1065 return self.value == other.value | |
1066 else: | |
1067 | |
1068 if str.__eq__(self, other): | |
1069 return True | |
1070 | |
1071 if self.datatype == _XSD_STRING: | |
1072 return False # string value space=lexical space | |
1073 | |
1074 # matching DTs, but not matching, we cannot compare! | |
1075 raise TypeError( | |
1076 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
1077 | |
1078 elif isinstance(other, Node): | |
1079 return False # no non-Literal nodes are equal to a literal | |
1080 | |
1081 elif isinstance(other, str): | |
1082 # only plain-literals can be directly compared to strings | |
1083 | |
1084 # TODO: Is "blah"@en eq "blah" ? | |
1085 if self.language is not None: | |
1086 return False | |
1087 | |
1088 if (self.datatype == _XSD_STRING or self.datatype is None): | |
1089 return str(self) == other | |
1090 | |
1091 elif isinstance(other, (int, float)): | |
1092 if self.datatype in _NUMERIC_LITERAL_TYPES: | |
1093 return self.value == other | |
1094 elif isinstance(other, (date, datetime, time)): | |
1095 if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME): | |
1096 return self.value == other | |
1097 elif isinstance(other, bool): | |
1098 if self.datatype == _XSD_BOOLEAN: | |
1099 return self.value == other | |
1100 | |
1101 return NotImplemented | |
1102 | |
1103 def neq(self, other): | |
1104 return not self.eq(other) | |
1105 | |
1106 @py3compat.format_doctest_out | |
1107 def n3(self, namespace_manager = None): | |
1108 r''' | |
1109 Returns a representation in the N3 format. | |
1110 | |
1111 Examples:: | |
1112 | |
1113 >>> Literal("foo").n3() | |
1114 %(u)s'"foo"' | |
1115 | |
1116 Strings with newlines or triple-quotes:: | |
1117 | |
1118 >>> Literal("foo\nbar").n3() | |
1119 %(u)s'"""foo\nbar"""' | |
1120 | |
1121 >>> Literal("''\'").n3() | |
1122 %(u)s'"\'\'\'"' | |
1123 | |
1124 >>> Literal('"""').n3() | |
1125 %(u)s'"\\"\\"\\""' | |
1126 | |
1127 Language:: | |
1128 | |
1129 >>> Literal("hello", lang="en").n3() | |
1130 %(u)s'"hello"@en' | |
1131 | |
1132 Datatypes:: | |
1133 | |
1134 >>> Literal(1).n3() | |
1135 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
1136 | |
1137 >>> Literal(1.0).n3() | |
1138 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>' | |
1139 | |
1140 >>> Literal(True).n3() | |
1141 %(u)s'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>' | |
1142 | |
1143 Datatype and language isn't allowed (datatype takes precedence):: | |
1144 | |
1145 >>> Literal(1, lang="en").n3() | |
1146 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
1147 | |
1148 Custom datatype:: | |
1149 | |
1150 >>> footype = URIRef("http://example.org/ns#foo") | |
1151 >>> Literal("1", datatype=footype).n3() | |
1152 %(u)s'"1"^^<http://example.org/ns#foo>' | |
1153 | |
1154 Passing a namespace-manager will use it to abbreviate datatype URIs: | |
1155 | |
1156 >>> from rdflib import Graph | |
1157 >>> Literal(1).n3(Graph().namespace_manager) | |
1158 %(u)s'"1"^^xsd:integer' | |
1159 ''' | |
1160 if namespace_manager: | |
1161 return self._literal_n3(qname_callback = | |
1162 namespace_manager.normalizeUri) | |
1163 else: | |
1164 return self._literal_n3() | |
1165 | |
1166 @py3compat.format_doctest_out | |
1167 def _literal_n3(self, use_plain=False, qname_callback=None): | |
1168 ''' | |
1169 Using plain literal (shorthand) output:: | |
1170 >>> from rdflib.namespace import XSD | |
1171 | |
1172 >>> Literal(1)._literal_n3(use_plain=True) | |
1173 %(u)s'1' | |
1174 | |
1175 >>> Literal(1.0)._literal_n3(use_plain=True) | |
1176 %(u)s'1e+00' | |
1177 | |
1178 >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True) | |
1179 %(u)s'1.0' | |
1180 | |
1181 >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True) | |
1182 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>' | |
1183 | |
1184 >>> Literal("foo", datatype=XSD.string)._literal_n3( | |
1185 ... use_plain=True) | |
1186 %(u)s'"foo"^^<http://www.w3.org/2001/XMLSchema#string>' | |
1187 | |
1188 >>> Literal(True)._literal_n3(use_plain=True) | |
1189 %(u)s'true' | |
1190 | |
1191 >>> Literal(False)._literal_n3(use_plain=True) | |
1192 %(u)s'false' | |
1193 | |
1194 >>> Literal(1.91)._literal_n3(use_plain=True) | |
1195 %(u)s'1.91e+00' | |
1196 | |
1197 Only limited precision available for floats: | |
1198 >>> Literal(0.123456789)._literal_n3(use_plain=True) | |
1199 %(u)s'1.234568e-01' | |
1200 | |
1201 >>> Literal('0.123456789', | |
1202 ... datatype=XSD.decimal)._literal_n3(use_plain=True) | |
1203 %(u)s'0.123456789' | |
1204 | |
1205 Using callback for datatype QNames:: | |
1206 | |
1207 >>> Literal(1)._literal_n3( | |
1208 ... qname_callback=lambda uri: "xsd:integer") | |
1209 %(u)s'"1"^^xsd:integer' | |
1210 | |
1211 ''' | |
1212 if use_plain and self.datatype in _PLAIN_LITERAL_TYPES: | |
1213 if self.value is not None: | |
1214 # If self is inf or NaN, we need a datatype | |
1215 # (there is no plain representation) | |
1216 if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
1217 try: | |
1218 v = float(self) | |
1219 if math.isinf(v) or math.isnan(v): | |
1220 return self._literal_n3(False, qname_callback) | |
1221 except ValueError: | |
1222 return self._literal_n3(False, qname_callback) | |
1223 | |
1224 # this is a bit of a mess - | |
1225 # in py >=2.6 the string.format function makes this easier | |
1226 # we try to produce "pretty" output | |
1227 if self.datatype == _XSD_DOUBLE: | |
1228 return sub("\\.?0*e", "e", '%e' % float(self)) | |
1229 elif self.datatype == _XSD_DECIMAL: | |
1230 s = '%s' % self | |
1231 if '.' not in s: | |
1232 s += '.0' | |
1233 return s | |
1234 | |
1235 elif self.datatype == _XSD_BOOLEAN: | |
1236 return ('%s' % self).lower() | |
1237 else: | |
1238 return '%s' % self | |
1239 | |
1240 encoded = self._quote_encode() | |
1241 | |
1242 datatype = self.datatype | |
1243 quoted_dt = None | |
1244 if datatype: | |
1245 if qname_callback: | |
1246 quoted_dt = qname_callback(datatype) | |
1247 if not quoted_dt: | |
1248 quoted_dt = "<%s>" % datatype | |
1249 if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
1250 try: | |
1251 v = float(self) | |
1252 if math.isinf(v): | |
1253 # py string reps: float: 'inf', Decimal: 'Infinity" | |
1254 # both need to become "INF" in xsd datatypes | |
1255 encoded = encoded.replace('inf', 'INF').replace( | |
1256 'Infinity', 'INF') | |
1257 if math.isnan(v): | |
1258 encoded = encoded.replace('nan', 'NaN') | |
1259 except ValueError: | |
1260 # if we can't cast to float something is wrong, but we can | |
1261 # still serialize. Warn user about it | |
1262 warnings.warn("Serializing weird numerical %r" % self) | |
1263 | |
1264 language = self.language | |
1265 if language: | |
1266 return '%s@%s' % (encoded, language) | |
1267 elif datatype: | |
1268 return '%s^^%s' % (encoded, quoted_dt) | |
1269 else: | |
1270 return '%s' % encoded | |
1271 | |
1272 def _quote_encode(self): | |
1273 # This simpler encoding doesn't work; a newline gets encoded as "\\n", | |
1274 # which is ok in sourcecode, but we want "\n". | |
1275 # encoded = self.encode('unicode-escape').replace( | |
1276 # '\\', '\\\\').replace('"','\\"') | |
1277 # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') | |
1278 | |
1279 # NOTE: Could in theory chose quotes based on quotes appearing in the | |
1280 # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). | |
1281 | |
1282 if "\n" in self: | |
1283 # Triple quote this string. | |
1284 encoded = self.replace('\\', '\\\\') | |
1285 if '"""' in self: | |
1286 # is this ok? | |
1287 encoded = encoded.replace('"""', '\\"\\"\\"') | |
1288 if encoded[-1] == '"' and encoded[-2] != '\\': | |
1289 encoded = encoded[:-1] + '\\' + '"' | |
1290 | |
1291 return '"""%s"""' % encoded.replace('\r', '\\r') | |
1292 else: | |
1293 return '"%s"' % self.replace( | |
1294 '\n', '\\n').replace( | |
1295 '\\', '\\\\').replace( | |
1296 '"', '\\"').replace( | |
1297 '\r', '\\r') | |
1298 | |
1299 if not py3compat.PY3: | |
1300 def __str__(self): | |
1301 return self.encode() | |
1302 | |
1303 def __repr__(self): | |
1304 args = [super(Literal, self).__repr__()] | |
1305 if self.language is not None: | |
1306 args.append("lang=%s" % repr(self.language)) | |
1307 if self.datatype is not None: | |
1308 args.append("datatype=%s" % repr(self.datatype)) | |
1309 if self.__class__ == Literal: | |
1310 clsName = "rdflib.term.Literal" | |
1311 else: | |
1312 clsName = self.__class__.__name__ | |
1313 return """%s(%s)""" % (clsName, ", ".join(args)) | |
1314 | |
1315 def toPython(self): | |
1316 """ | |
1317 Returns an appropriate python datatype derived from this RDF Literal | |
1318 """ | |
1319 | |
1320 if self.value is not None: | |
1321 return self.value | |
1322 return self | |
1323 | |
1324 def md5_term_hash(self): | |
1325 """a string of hex that will be the same for two Literals that | |
1326 are the same. It is not a suitable unique id. | |
1327 | |
1328 Supported for backwards compatibility; new code should | |
1329 probably just use __hash__ | |
1330 """ | |
1331 warnings.warn( | |
1332 "method md5_term_hash is deprecated, and will be removed " + | |
1333 "removed in the future. If you use this please let rdflib-dev know!", | |
1334 category=DeprecationWarning, stacklevel=2) | |
1335 d = md5(self.encode()) | |
1336 d.update(b("L")) | |
1337 return d.hexdigest() | |
1338 | |
1339 | |
1340 def _parseXML(xmlstring): | |
1341 if not py3compat.PY3: | |
1342 xmlstring = xmlstring.encode('utf-8') | |
1343 retval = xml.dom.minidom.parseString( | |
1344 "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring) | |
1345 retval.normalize() | |
1346 return retval | |
1347 | |
1348 | |
1349 def _parseHTML(htmltext): | |
1350 try: | |
1351 import html5lib | |
1352 parser = html5lib.HTMLParser( | |
1353 tree=html5lib.treebuilders.getTreeBuilder("dom")) | |
1354 retval = parser.parseFragment(htmltext) | |
1355 retval.normalize() | |
1356 return retval | |
1357 except ImportError: | |
1358 raise ImportError( | |
1359 "HTML5 parser not available. Try installing" + | |
1360 " html5lib <http://code.google.com/p/html5lib>") | |
1361 | |
1362 | |
1363 def _writeXML(xmlnode): | |
1364 if isinstance(xmlnode, xml.dom.minidom.DocumentFragment): | |
1365 d = xml.dom.minidom.Document() | |
1366 d.childNodes += xmlnode.childNodes | |
1367 xmlnode = d | |
1368 s = xmlnode.toxml('utf-8') | |
1369 # for clean round-tripping, remove headers -- I have great and | |
1370 # specific worries that this will blow up later, but this margin | |
1371 # is too narrow to contain them | |
1372 if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')): | |
1373 s = s[38:] | |
1374 if s.startswith(b('<rdflibtoplevelelement>')): | |
1375 s = s[23:-24] | |
1376 if s == b('<rdflibtoplevelelement/>'): | |
1377 s = b('') | |
1378 return s | |
1379 | |
1380 # Cannot import Namespace/XSD because of circular dependencies | |
1381 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' | |
1382 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' | |
1383 | |
1384 _RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral') | |
1385 _RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML') | |
1386 | |
1387 _XSD_STRING = URIRef(_XSD_PFX + 'string') | |
1388 | |
1389 _XSD_FLOAT = URIRef(_XSD_PFX + 'float') | |
1390 _XSD_DOUBLE = URIRef(_XSD_PFX + 'double') | |
1391 _XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal') | |
1392 _XSD_INTEGER = URIRef(_XSD_PFX + 'integer') | |
1393 _XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean') | |
1394 | |
1395 _XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime') | |
1396 _XSD_DATE = URIRef(_XSD_PFX + 'date') | |
1397 _XSD_TIME = URIRef(_XSD_PFX + 'time') | |
1398 | |
1399 # TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth | |
1400 | |
1401 _NUMERIC_LITERAL_TYPES = ( | |
1402 _XSD_INTEGER, | |
1403 _XSD_DECIMAL, | |
1404 _XSD_DOUBLE, | |
1405 URIRef(_XSD_PFX + 'float'), | |
1406 | |
1407 URIRef(_XSD_PFX + 'byte'), | |
1408 URIRef(_XSD_PFX + 'int'), | |
1409 URIRef(_XSD_PFX + 'long'), | |
1410 URIRef(_XSD_PFX + 'negativeInteger'), | |
1411 URIRef(_XSD_PFX + 'nonNegativeInteger'), | |
1412 URIRef(_XSD_PFX + 'nonPositiveInteger'), | |
1413 URIRef(_XSD_PFX + 'positiveInteger'), | |
1414 URIRef(_XSD_PFX + 'short'), | |
1415 URIRef(_XSD_PFX + 'unsignedByte'), | |
1416 URIRef(_XSD_PFX + 'unsignedInt'), | |
1417 URIRef(_XSD_PFX + 'unsignedLong'), | |
1418 URIRef(_XSD_PFX + 'unsignedShort'), | |
1419 | |
1420 ) | |
1421 | |
1422 # these have "native" syntax in N3/SPARQL | |
1423 _PLAIN_LITERAL_TYPES = ( | |
1424 _XSD_INTEGER, | |
1425 _XSD_BOOLEAN, | |
1426 _XSD_DOUBLE, | |
1427 _XSD_DECIMAL, | |
1428 ) | |
1429 | |
1430 # these have special INF and NaN XSD representations | |
1431 _NUMERIC_INF_NAN_LITERAL_TYPES = ( | |
1432 URIRef(_XSD_PFX + 'float'), | |
1433 _XSD_DOUBLE, | |
1434 _XSD_DECIMAL, | |
1435 ) | |
1436 | |
1437 | |
1438 def _castPythonToLiteral(obj): | |
1439 """ | |
1440 Casts a python datatype to a tuple of the lexical value and a | |
1441 datatype URI (or None) | |
1442 """ | |
1443 for pType, (castFunc, dType) in _PythonToXSD: | |
1444 if isinstance(obj, pType): | |
1445 if castFunc: | |
1446 return castFunc(obj), dType | |
1447 elif dType: | |
1448 return obj, dType | |
1449 else: | |
1450 return obj, None | |
1451 return obj, None # TODO: is this right for the fall through case? | |
1452 | |
1453 from decimal import Decimal | |
1454 | |
1455 # Mappings from Python types to XSD datatypes and back (borrowed from sparta) | |
1456 # datetime instances are also instances of date... so we need to order these. | |
1457 | |
1458 # SPARQL/Turtle/N3 has shortcuts for integer, double, decimal | |
1459 # python has only float - to be in tune with sparql/n3/turtle | |
1460 # we default to XSD.double for float literals | |
1461 | |
1462 # python ints are promoted to longs when overflowing | |
1463 # python longs have no limit | |
1464 # both map to the abstract integer type, | |
1465 # rather than some concrete bit-limited datatype | |
1466 | |
1467 _PythonToXSD = [ | |
1468 (str, (None, None)), | |
1469 (float, (None, _XSD_DOUBLE)), | |
1470 (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), | |
1471 (int, (None, _XSD_INTEGER)), | |
1472 (int, (None, _XSD_INTEGER)), | |
1473 (Decimal, (None, _XSD_DECIMAL)), | |
1474 (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)), | |
1475 (date, (lambda i:i.isoformat(), _XSD_DATE)), | |
1476 (time, (lambda i:i.isoformat(), _XSD_TIME)), | |
1477 (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)), | |
1478 # this is a bit dirty - by accident the html5lib parser produces | |
1479 # DocumentFragments, and the xml parser Documents, letting this | |
1480 # decide what datatype to use makes roundtripping easier, but it a | |
1481 # bit random | |
1482 (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL)) | |
1483 ] | |
1484 | |
1485 XSDToPython = { | |
1486 None : None, # plain literals map directly to value space | |
1487 URIRef(_XSD_PFX + 'time'): parse_time, | |
1488 URIRef(_XSD_PFX + 'date'): parse_date, | |
1489 URIRef(_XSD_PFX + 'gYear'): parse_date, | |
1490 URIRef(_XSD_PFX + 'gYearMonth'): parse_date, | |
1491 URIRef(_XSD_PFX + 'dateTime'): parse_datetime, | |
1492 URIRef(_XSD_PFX + 'string'): None, | |
1493 URIRef(_XSD_PFX + 'normalizedString'): None, | |
1494 URIRef(_XSD_PFX + 'token'): None, | |
1495 URIRef(_XSD_PFX + 'language'): None, | |
1496 URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'], | |
1497 URIRef(_XSD_PFX + 'decimal'): Decimal, | |
1498 URIRef(_XSD_PFX + 'integer'): int, | |
1499 URIRef(_XSD_PFX + 'nonPositiveInteger'): int, | |
1500 URIRef(_XSD_PFX + 'long'): int, | |
1501 URIRef(_XSD_PFX + 'nonNegativeInteger'): int, | |
1502 URIRef(_XSD_PFX + 'negativeInteger'): int, | |
1503 URIRef(_XSD_PFX + 'int'): int, | |
1504 URIRef(_XSD_PFX + 'unsignedLong'): int, | |
1505 URIRef(_XSD_PFX + 'positiveInteger'): int, | |
1506 URIRef(_XSD_PFX + 'short'): int, | |
1507 URIRef(_XSD_PFX + 'unsignedInt'): int, | |
1508 URIRef(_XSD_PFX + 'byte'): int, | |
1509 URIRef(_XSD_PFX + 'unsignedShort'): int, | |
1510 URIRef(_XSD_PFX + 'unsignedByte'): int, | |
1511 URIRef(_XSD_PFX + 'float'): float, | |
1512 URIRef(_XSD_PFX + 'double'): float, | |
1513 URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s), | |
1514 URIRef(_XSD_PFX + 'anyURI'): None, | |
1515 _RDF_XMLLITERAL: _parseXML, | |
1516 _RDF_HTMLLITERAL: _parseHTML | |
1517 } | |
1518 | |
1519 _toPythonMapping = {} | |
1520 | |
1521 _toPythonMapping.update(XSDToPython) | |
1522 | |
1523 def _castLexicalToPython(lexical, datatype): | |
1524 """ | |
1525 Map a lexical form to the value-space for the given datatype | |
1526 :returns: a python object for the value or ``None`` | |
1527 """ | |
1528 convFunc = _toPythonMapping.get(datatype, False) | |
1529 if convFunc: | |
1530 try: | |
1531 return convFunc(lexical) | |
1532 except: | |
1533 # not a valid lexical representation for this dt | |
1534 return None | |
1535 elif convFunc is None: | |
1536 # no conv func means 1-1 lexical<->value-space mapping | |
1537 try: | |
1538 return str(lexical) | |
1539 except UnicodeDecodeError: | |
1540 return str(lexical, 'utf-8') | |
1541 else: | |
1542 # no convFunc - unknown data-type | |
1543 return None | |
1544 | |
1545 def bind(datatype, pythontype, constructor=None, lexicalizer=None): | |
1546 """ | |
1547 register a new datatype<->pythontype binding | |
1548 | |
1549 :param constructor: an optional function for converting lexical forms | |
1550 into a Python instances, if not given the pythontype | |
1551 is used directly | |
1552 | |
1553 :param lexicalizer: an optinoal function for converting python objects to | |
1554 lexical form, if not given object.__str__ is used | |
1555 | |
1556 """ | |
1557 if datatype in _toPythonMapping: | |
1558 logger.warning("datatype '%s' was already bound. Rebinding." % | |
1559 datatype) | |
1560 | |
1561 if constructor == None: | |
1562 constructor = pythontype | |
1563 _toPythonMapping[datatype] = constructor | |
1564 _PythonToXSD.append((pythontype, (lexicalizer, datatype))) | |
1565 | |
1566 | |
1567 class Variable(Identifier): | |
1568 """ | |
1569 A Variable - this is used for querying, or in Formula aware | |
1570 graphs, where Variables can stored in the graph | |
1571 """ | |
1572 __slots__ = () | |
1573 | |
1574 def __new__(cls, value): | |
1575 if len(value) == 0: | |
1576 raise Exception( | |
1577 "Attempted to create variable with empty string as name!") | |
1578 if value[0] == '?': | |
1579 value = value[1:] | |
1580 return str.__new__(cls, value) | |
1581 | |
1582 def __repr__(self): | |
1583 if self.__class__ is Variable: | |
1584 clsName = "rdflib.term.Variable" | |
1585 else: | |
1586 clsName = self.__class__.__name__ | |
1587 | |
1588 return """%s(%s)""" % (clsName, super(Variable, self).__repr__()) | |
1589 | |
1590 def toPython(self): | |
1591 return "?%s" % self | |
1592 | |
1593 def n3(self, namespace_manager = None): | |
1594 return "?%s" % self | |
1595 | |
1596 def __reduce__(self): | |
1597 return (Variable, (str(self),)) | |
1598 | |
1599 def md5_term_hash(self): | |
1600 """a string of hex that will be the same for two Variables that | |
1601 are the same. It is not a suitable unique id. | |
1602 | |
1603 Supported for backwards compatibility; new code should | |
1604 probably just use __hash__ | |
1605 """ | |
1606 warnings.warn( | |
1607 "method md5_term_hash is deprecated, and will be removed " + | |
1608 "removed in the future. If you use this please let rdflib-dev know!", | |
1609 category=DeprecationWarning, stacklevel=2) | |
1610 d = md5(self.encode()) | |
1611 d.update(b("V")) | |
1612 return d.hexdigest() | |
1613 | |
1614 | |
1615 class Statement(Node, tuple): | |
1616 | |
1617 def __new__(cls, xxx_todo_changeme, context): | |
1618 (subject, predicate, object) = xxx_todo_changeme | |
1619 warnings.warn( | |
1620 "Class Statement is deprecated, and will be removed in " + | |
1621 "the future. If you use this please let rdflib-dev know!", | |
1622 category=DeprecationWarning, stacklevel=2) | |
1623 return tuple.__new__(cls, ((subject, predicate, object), context)) | |
1624 | |
1625 def __reduce__(self): | |
1626 return (Statement, (self[0], self[1])) | |
1627 | |
1628 def toPython(self): | |
1629 return (self[0], self[1]) | |
1630 | |
1631 # Nodes are ordered like this | |
1632 # See http://www.w3.org/TR/sparql11-query/#modOrderBy | |
1633 # we leave "space" for more subclasses of Node elsewhere | |
1634 # default-dict to grazefully fail for new subclasses | |
1635 _ORDERING = defaultdict(int) | |
1636 _ORDERING.update({ | |
1637 BNode: 10, | |
1638 Variable: 20, | |
1639 URIRef: 30, | |
1640 Literal: 40 | |
1641 }) | |
1642 | |
1643 | |
1644 def _isEqualXMLNode(node, other): | |
1645 from xml.dom.minidom import Node | |
1646 | |
1647 def recurse(): | |
1648 # Recursion through the children | |
1649 # In Python2, the semantics of 'map' is such that the check on | |
1650 # length would be unnecessary. In Python 3, | |
1651 # the semantics of map has changed (why, oh why???) and the check | |
1652 # for the length becomes necessary... | |
1653 if len(node.childNodes) != len(other.childNodes): | |
1654 return False | |
1655 for (nc, oc) in map( | |
1656 lambda x, y: (x, y), node.childNodes, other.childNodes): | |
1657 if not _isEqualXMLNode(nc, oc): | |
1658 return False | |
1659 # if we got here then everything is fine: | |
1660 return True | |
1661 | |
1662 if node is None or other is None: | |
1663 return False | |
1664 | |
1665 if node.nodeType != other.nodeType: | |
1666 return False | |
1667 | |
1668 if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]: | |
1669 return recurse() | |
1670 | |
1671 elif node.nodeType == Node.ELEMENT_NODE: | |
1672 # Get the basics right | |
1673 if not (node.tagName == other.tagName | |
1674 and node.namespaceURI == other.namespaceURI): | |
1675 return False | |
1676 | |
1677 # Handle the (namespaced) attributes; the namespace setting key | |
1678 # should be ignored, though | |
1679 # Note that the minidom orders the keys already, so we do not have | |
1680 # to worry about that, which is a bonus... | |
1681 n_keys = [ | |
1682 k for k in node.attributes.keysNS() | |
1683 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
1684 o_keys = [ | |
1685 k for k in other.attributes.keysNS() | |
1686 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
1687 if len(n_keys) != len(o_keys): | |
1688 return False | |
1689 for k in n_keys: | |
1690 if not (k in o_keys | |
1691 and node.getAttributeNS(k[0], k[1]) == | |
1692 other.getAttributeNS(k[0], k[1])): | |
1693 return False | |
1694 | |
1695 # if we got here, the attributes are all right, we can go down | |
1696 # the tree recursively | |
1697 return recurse() | |
1698 | |
1699 elif node.nodeType in [ | |
1700 Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE, | |
1701 Node.NOTATION_NODE]: | |
1702 return node.data == other.data | |
1703 | |
1704 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: | |
1705 return node.data == other.data and node.target == other.target | |
1706 | |
1707 elif node.nodeType == Node.ENTITY_NODE: | |
1708 return node.nodeValue == other.nodeValue | |
1709 | |
1710 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: | |
1711 return node.publicId == other.publicId \ | |
1712 and node.systemId == other.system.Id | |
1713 | |
1714 else: | |
1715 # should not happen, in fact | |
1716 raise Exception( | |
1717 'I dont know how to compare XML Node type: %s' % node.nodeType) | |
1718 | |
1719 if __name__ == '__main__': | |
1720 import doctest | |
1721 doctest.testmod() |