Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/term.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """ | |
| 2 This module defines the different types of terms. Terms are the kinds of | |
| 3 objects that can appear in a quoted/asserted triple. This includes those | |
| 4 that are core to RDF: | |
| 5 | |
| 6 * :class:`Blank Nodes <rdflib.term.BNode>` | |
| 7 * :class:`URI References <rdflib.term.URIRef>` | |
| 8 * :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag) | |
| 9 | |
| 10 Those that extend the RDF model into N3: | |
| 11 | |
| 12 * :class:`Formulae <rdflib.graph.QuotedGraph>` | |
| 13 * :class:`Universal Quantifications (Variables) <rdflib.term.Variable>` | |
| 14 | |
| 15 And those that are primarily for matching against 'Nodes' in the | |
| 16 underlying Graph: | |
| 17 | |
| 18 * REGEX Expressions | |
| 19 * Date Ranges | |
| 20 * Numerical Ranges | |
| 21 | |
| 22 """ | |
| 23 | |
| 24 __all__ = [ | |
| 25 'bind', | |
| 26 | |
| 27 'Node', | |
| 28 'Identifier', | |
| 29 | |
| 30 'URIRef', | |
| 31 'BNode', | |
| 32 'Literal', | |
| 33 | |
| 34 'Variable', | |
| 35 'Statement', | |
| 36 ] | |
| 37 | |
| 38 import logging | |
| 39 logger = logging.getLogger(__name__) | |
| 40 import warnings | |
| 41 import math | |
| 42 | |
| 43 import base64 | |
| 44 import xml.dom.minidom | |
| 45 | |
| 46 from urllib.parse import urlparse, urljoin, urldefrag | |
| 47 from datetime import date, time, datetime | |
| 48 from re import sub, compile | |
| 49 from collections import defaultdict | |
| 50 | |
| 51 from isodate import parse_time, parse_date, parse_datetime | |
| 52 | |
| 53 try: | |
| 54 from hashlib import md5 | |
| 55 assert md5 | |
| 56 except ImportError: | |
| 57 from md5 import md5 | |
| 58 | |
| 59 | |
| 60 import rdflib | |
| 61 from . import py3compat | |
| 62 from rdflib.compat import numeric_greater | |
| 63 | |
| 64 | |
| 65 b = py3compat.b | |
| 66 | |
| 67 skolem_genid = "/.well-known/genid/" | |
| 68 rdflib_skolem_genid = "/.well-known/genid/rdflib/" | |
| 69 skolems = {} | |
| 70 | |
| 71 | |
| 72 _invalid_uri_chars = '<>" {}|\\^`' | |
| 73 | |
| 74 def _is_valid_uri(uri): | |
| 75 for c in _invalid_uri_chars: | |
| 76 if c in uri: return False | |
| 77 return True | |
| 78 | |
| 79 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') | |
| 80 | |
| 81 def _is_valid_langtag(tag): | |
| 82 return bool(_lang_tag_regex.match(tag)) | |
| 83 | |
| 84 def _is_valid_unicode(value): | |
| 85 """ | |
| 86 Verify that the provided value can be converted into a Python | |
| 87 unicode object. | |
| 88 """ | |
| 89 if isinstance(value, bytes): | |
| 90 coding_func, param = getattr(value, 'decode'), 'utf-8' | |
| 91 elif py3compat.PY3: | |
| 92 coding_func, param = str, value | |
| 93 else: | |
| 94 coding_func, param = str, value | |
| 95 | |
| 96 # try to convert value into unicode | |
| 97 try: | |
| 98 coding_func(param) | |
| 99 except UnicodeError: | |
| 100 return False | |
| 101 return True | |
| 102 | |
| 103 class Node(object): | |
| 104 """ | |
| 105 A Node in the Graph. | |
| 106 """ | |
| 107 | |
| 108 __slots__ = () | |
| 109 | |
| 110 | |
| 111 class Identifier(Node, str): # allow Identifiers to be Nodes in the Graph | |
| 112 """ | |
| 113 See http://www.w3.org/2002/07/rdf-identifer-terminology/ | |
| 114 regarding choice of terminology. | |
| 115 """ | |
| 116 | |
| 117 __slots__ = () | |
| 118 | |
| 119 def __new__(cls, value): | |
| 120 return str.__new__(cls, value) | |
| 121 | |
| 122 def eq(self, other): | |
| 123 """A "semantic"/interpreted equality function, | |
| 124 by default, same as __eq__""" | |
| 125 return self.__eq__(other) | |
| 126 | |
| 127 def neq(self, other): | |
| 128 """A "semantic"/interpreted not equal function, | |
| 129 by default, same as __ne__""" | |
| 130 return self.__ne__(other) | |
| 131 | |
| 132 def __ne__(self, other): | |
| 133 return not self.__eq__(other) | |
| 134 | |
| 135 def __eq__(self, other): | |
| 136 """ | |
| 137 Equality for Nodes. | |
| 138 | |
| 139 >>> BNode("foo")==None | |
| 140 False | |
| 141 >>> BNode("foo")==URIRef("foo") | |
| 142 False | |
| 143 >>> URIRef("foo")==BNode("foo") | |
| 144 False | |
| 145 >>> BNode("foo")!=URIRef("foo") | |
| 146 True | |
| 147 >>> URIRef("foo")!=BNode("foo") | |
| 148 True | |
| 149 >>> Variable('a')!=URIRef('a') | |
| 150 True | |
| 151 >>> Variable('a')!=Variable('a') | |
| 152 False | |
| 153 """ | |
| 154 | |
| 155 if type(self) == type(other): | |
| 156 return str(self) == str(other) | |
| 157 else: | |
| 158 return False | |
| 159 | |
| 160 def __gt__(self, other): | |
| 161 """ | |
| 162 This implements ordering for Nodes, | |
| 163 | |
| 164 This tries to implement this: | |
| 165 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
| 166 | |
| 167 Variables are not included in the SPARQL list, but | |
| 168 they are greater than BNodes and smaller than everything else | |
| 169 | |
| 170 """ | |
| 171 if other is None: | |
| 172 return True # everything bigger than None | |
| 173 elif type(self) == type(other): | |
| 174 return str(self) > str(other) | |
| 175 elif isinstance(other, Node): | |
| 176 return _ORDERING[type(self)] > _ORDERING[type(other)] | |
| 177 | |
| 178 return NotImplemented | |
| 179 | |
| 180 def __lt__(self, other): | |
| 181 if other is None: | |
| 182 return False # Nothing is less than None | |
| 183 elif type(self) == type(other): | |
| 184 return str(self) < str(other) | |
| 185 elif isinstance(other, Node): | |
| 186 return _ORDERING[type(self)] < _ORDERING[type(other)] | |
| 187 | |
| 188 return NotImplemented | |
| 189 | |
| 190 def __le__(self, other): | |
| 191 r = self.__lt__(other) | |
| 192 if r: | |
| 193 return True | |
| 194 return self == other | |
| 195 | |
| 196 def __ge__(self, other): | |
| 197 r = self.__gt__(other) | |
| 198 if r: | |
| 199 return True | |
| 200 return self == other | |
| 201 | |
| 202 def __hash__(self): | |
| 203 t = type(self) | |
| 204 fqn = t.__module__ + '.' + t.__name__ | |
| 205 return hash(fqn) ^ hash(str(self)) | |
| 206 | |
| 207 | |
| 208 class URIRef(Identifier): | |
| 209 """ | |
| 210 RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref | |
| 211 """ | |
| 212 | |
| 213 __slots__ = () | |
| 214 | |
| 215 def __new__(cls, value, base=None): | |
| 216 if base is not None: | |
| 217 ends_in_hash = value.endswith("#") | |
| 218 value = urljoin(base, value, allow_fragments=1) | |
| 219 if ends_in_hash: | |
| 220 if not value.endswith("#"): | |
| 221 value += "#" | |
| 222 | |
| 223 if not _is_valid_uri(value): | |
| 224 logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value) | |
| 225 | |
| 226 | |
| 227 try: | |
| 228 rt = str.__new__(cls, value) | |
| 229 except UnicodeDecodeError: | |
| 230 rt = str.__new__(cls, value, 'utf-8') | |
| 231 return rt | |
| 232 | |
| 233 def toPython(self): | |
| 234 return str(self) | |
| 235 | |
| 236 def n3(self, namespace_manager = None): | |
| 237 """ | |
| 238 This will do a limited check for valid URIs, | |
| 239 essentially just making sure that the string includes no illegal | |
| 240 characters (``<, >, ", {, }, |, \\, `, ^``) | |
| 241 | |
| 242 :param namespace_manager: if not None, will be used to make up | |
| 243 a prefixed name | |
| 244 """ | |
| 245 | |
| 246 if not _is_valid_uri(self): | |
| 247 raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self) | |
| 248 | |
| 249 if namespace_manager: | |
| 250 return namespace_manager.normalizeUri(self) | |
| 251 else: | |
| 252 return "<%s>" % self | |
| 253 | |
| 254 def defrag(self): | |
| 255 if "#" in self: | |
| 256 url, frag = urldefrag(self) | |
| 257 return URIRef(url) | |
| 258 else: | |
| 259 return self | |
| 260 | |
| 261 def __reduce__(self): | |
| 262 return (URIRef, (str(self),)) | |
| 263 | |
| 264 def __getnewargs__(self): | |
| 265 return (str(self), ) | |
| 266 | |
| 267 if not py3compat.PY3: | |
| 268 def __str__(self): | |
| 269 return self.encode() | |
| 270 | |
| 271 def __repr__(self): | |
| 272 if self.__class__ is URIRef: | |
| 273 clsName = "rdflib.term.URIRef" | |
| 274 else: | |
| 275 clsName = self.__class__.__name__ | |
| 276 | |
| 277 return """%s(%s)""" % (clsName, super(URIRef, self).__repr__()) | |
| 278 | |
| 279 def __add__(self, other): | |
| 280 return self.__class__(str(self) + other) | |
| 281 | |
| 282 def __radd__(self, other): | |
| 283 return self.__class__(other + str(self)) | |
| 284 | |
| 285 def __mod__(self, other): | |
| 286 return self.__class__(str(self) % other) | |
| 287 | |
| 288 def md5_term_hash(self): | |
| 289 """a string of hex that will be the same for two URIRefs that | |
| 290 are the same. It is not a suitable unique id. | |
| 291 | |
| 292 Supported for backwards compatibility; new code should | |
| 293 probably just use __hash__ | |
| 294 """ | |
| 295 warnings.warn( | |
| 296 "method md5_term_hash is deprecated, and will be removed " + | |
| 297 "in the future. If you use this please let rdflib-dev know!", | |
| 298 category=DeprecationWarning, stacklevel=2) | |
| 299 d = md5(self.encode()) | |
| 300 d.update(b("U")) | |
| 301 return d.hexdigest() | |
| 302 | |
| 303 def de_skolemize(self): | |
| 304 """ Create a Blank Node from a skolem URI, in accordance | |
| 305 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization. | |
| 306 This function accepts only rdflib type skolemization, to provide | |
| 307 a round-tripping within the system. | |
| 308 | |
| 309 .. versionadded:: 4.0 | |
| 310 """ | |
| 311 if isinstance(self, RDFLibGenid): | |
| 312 parsed_uri = urlparse("%s" % self) | |
| 313 return BNode( | |
| 314 value=parsed_uri.path[len(rdflib_skolem_genid):]) | |
| 315 elif isinstance(self, Genid): | |
| 316 bnode_id = "%s" % self | |
| 317 if bnode_id in skolems: | |
| 318 return skolems[bnode_id] | |
| 319 else: | |
| 320 retval = BNode() | |
| 321 skolems[bnode_id] = retval | |
| 322 return retval | |
| 323 else: | |
| 324 raise Exception("<%s> is not a skolem URI" % self) | |
| 325 | |
| 326 | |
| 327 class Genid(URIRef): | |
| 328 __slots__ = () | |
| 329 | |
| 330 @staticmethod | |
| 331 def _is_external_skolem(uri): | |
| 332 if not isinstance(uri, str): | |
| 333 uri = str(uri) | |
| 334 parsed_uri = urlparse(uri) | |
| 335 gen_id = parsed_uri.path.rfind(skolem_genid) | |
| 336 if gen_id != 0: | |
| 337 return False | |
| 338 return True | |
| 339 | |
| 340 | |
| 341 class RDFLibGenid(Genid): | |
| 342 __slots__ = () | |
| 343 | |
| 344 @staticmethod | |
| 345 def _is_rdflib_skolem(uri): | |
| 346 if not isinstance(uri, str): | |
| 347 uri = str(uri) | |
| 348 parsed_uri = urlparse(uri) | |
| 349 if parsed_uri.params != "" \ | |
| 350 or parsed_uri.query != "" \ | |
| 351 or parsed_uri.fragment != "": | |
| 352 return False | |
| 353 gen_id = parsed_uri.path.rfind(rdflib_skolem_genid) | |
| 354 if gen_id != 0: | |
| 355 return False | |
| 356 return True | |
| 357 | |
| 358 | |
| 359 def _unique_id(): | |
| 360 # Used to read: """Create a (hopefully) unique prefix""" | |
| 361 # now retained merely to leave interal API unchanged. | |
| 362 # From BNode.__new__() below ... | |
| 363 # | |
| 364 # acceptable bnode value range for RDF/XML needs to be | |
| 365 # something that can be serialzed as a nodeID for N3 | |
| 366 # | |
| 367 # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
| 368 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
| 369 return "N" # ensure that id starts with a letter | |
| 370 | |
| 371 | |
| 372 def _serial_number_generator(): | |
| 373 """ | |
| 374 Generates UUID4-based but ncname-compliant identifiers. | |
| 375 """ | |
| 376 from uuid import uuid4 | |
| 377 | |
| 378 def _generator(): | |
| 379 return uuid4().hex | |
| 380 | |
| 381 return _generator | |
| 382 | |
| 383 | |
| 384 class BNode(Identifier): | |
| 385 """ | |
| 386 Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes | |
| 387 | |
| 388 """ | |
| 389 __slots__ = () | |
| 390 | |
| 391 def __new__(cls, value=None, | |
| 392 _sn_gen=_serial_number_generator(), _prefix=_unique_id()): | |
| 393 """ | |
| 394 # only store implementations should pass in a value | |
| 395 """ | |
| 396 if value is None: | |
| 397 # so that BNode values do not collide with ones created with | |
| 398 # a different instance of this module at some other time. | |
| 399 node_id = _sn_gen() | |
| 400 value = "%s%s" % (_prefix, node_id) | |
| 401 else: | |
| 402 # TODO: check that value falls within acceptable bnode value range | |
| 403 # for RDF/XML needs to be something that can be serialzed | |
| 404 # as a nodeID for N3 ?? Unless we require these | |
| 405 # constraints be enforced elsewhere? | |
| 406 pass # assert is_ncname(unicode(value)), "BNode identifiers | |
| 407 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* | |
| 408 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID | |
| 409 return Identifier.__new__(cls, value) | |
| 410 | |
| 411 def toPython(self): | |
| 412 return str(self) | |
| 413 | |
| 414 def n3(self, namespace_manager=None): | |
| 415 return "_:%s" % self | |
| 416 | |
| 417 def __getnewargs__(self): | |
| 418 return (str(self), ) | |
| 419 | |
| 420 def __reduce__(self): | |
| 421 return (BNode, (str(self),)) | |
| 422 | |
| 423 if not py3compat.PY3: | |
| 424 def __str__(self): | |
| 425 return self.encode() | |
| 426 | |
| 427 def __repr__(self): | |
| 428 if self.__class__ is BNode: | |
| 429 clsName = "rdflib.term.BNode" | |
| 430 else: | |
| 431 clsName = self.__class__.__name__ | |
| 432 return """%s('%s')""" % (clsName, str(self)) | |
| 433 | |
| 434 def md5_term_hash(self): | |
| 435 """a string of hex that will be the same for two BNodes that | |
| 436 are the same. It is not a suitable unique id. | |
| 437 | |
| 438 Supported for backwards compatibility; new code should | |
| 439 probably just use __hash__ | |
| 440 """ | |
| 441 warnings.warn( | |
| 442 "method md5_term_hash is deprecated, and will be removed " + | |
| 443 "in the future. If you use this please let rdflib-dev know!", | |
| 444 category=DeprecationWarning, stacklevel=2) | |
| 445 d = md5(self.encode()) | |
| 446 d.update(b("B")) | |
| 447 return d.hexdigest() | |
| 448 | |
| 449 def skolemize(self, authority="http://rdlib.net/"): | |
| 450 """ Create a URIRef "skolem" representation of the BNode, in accordance | |
| 451 with http://www.w3.org/TR/rdf11-concepts/#section-skolemization | |
| 452 | |
| 453 .. versionadded:: 4.0 | |
| 454 """ | |
| 455 skolem = "%s%s" % (rdflib_skolem_genid, str(self)) | |
| 456 return URIRef(urljoin(authority, skolem)) | |
| 457 | |
| 458 | |
| 459 class Literal(Identifier): | |
| 460 __doc__ = py3compat.format_doctest_out(""" | |
| 461 RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal | |
| 462 | |
| 463 The lexical value of the literal is the unicode object | |
| 464 The interpreted, datatyped value is available from .value | |
| 465 | |
| 466 Language tags must be valid according to :rfc:5646 | |
| 467 | |
| 468 For valid XSD datatypes, the lexical form is optionally normalized | |
| 469 at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS | |
| 470 and can be overridden by the normalize parameter to __new__ | |
| 471 | |
| 472 Equality and hashing of Literals are done based on the lexical form, i.e.: | |
| 473 | |
| 474 >>> from rdflib.namespace import XSD | |
| 475 | |
| 476 >>> Literal('01')!=Literal('1') # clear - strings differ | |
| 477 True | |
| 478 | |
| 479 but with data-type they get normalized: | |
| 480 | |
| 481 >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer) | |
| 482 False | |
| 483 | |
| 484 unless disabled: | |
| 485 | |
| 486 >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer) | |
| 487 True | |
| 488 | |
| 489 | |
| 490 Value based comparison is possible: | |
| 491 | |
| 492 >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float)) | |
| 493 True | |
| 494 | |
| 495 The eq method also provides limited support for basic python types: | |
| 496 | |
| 497 >>> Literal(1).eq(1) # fine - int compatible with xsd:integer | |
| 498 True | |
| 499 >>> Literal('a').eq('b') # fine - str compatible with plain-lit | |
| 500 False | |
| 501 >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string | |
| 502 True | |
| 503 >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit | |
| 504 NotImplemented | |
| 505 | |
| 506 Greater-than/less-than ordering comparisons are also done in value | |
| 507 space, when compatible datatypes are used. Incompatible datatypes | |
| 508 are ordered by DT, or by lang-tag. For other nodes the ordering | |
| 509 is None < BNode < URIRef < Literal | |
| 510 | |
| 511 Any comparison with non-rdflib Node are "NotImplemented" | |
| 512 In PY2.X some stable order will be made up by python | |
| 513 | |
| 514 In PY3 this is an error. | |
| 515 | |
| 516 >>> from rdflib import Literal, XSD | |
| 517 >>> lit2006 = Literal('2006-01-01',datatype=XSD.date) | |
| 518 >>> lit2006.toPython() | |
| 519 datetime.date(2006, 1, 1) | |
| 520 >>> lit2006 < Literal('2007-01-01',datatype=XSD.date) | |
| 521 True | |
| 522 >>> Literal(datetime.utcnow()).datatype | |
| 523 rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime') | |
| 524 >>> Literal(1) > Literal(2) # by value | |
| 525 False | |
| 526 >>> Literal(1) > Literal(2.0) # by value | |
| 527 False | |
| 528 >>> Literal('1') > Literal(1) # by DT | |
| 529 True | |
| 530 >>> Literal('1') < Literal('1') # by lexical form | |
| 531 False | |
| 532 >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag | |
| 533 False | |
| 534 >>> Literal(1) > URIRef('foo') # by node-type | |
| 535 True | |
| 536 | |
| 537 The > < operators will eat this NotImplemented and either make up | |
| 538 an ordering (py2.x) or throw a TypeError (py3k): | |
| 539 | |
| 540 >>> Literal(1).__gt__(2.0) | |
| 541 NotImplemented | |
| 542 | |
| 543 | |
| 544 """) | |
| 545 | |
| 546 | |
| 547 if not py3compat.PY3: | |
| 548 __slots__ = ("language", "datatype", "value", "_language", | |
| 549 "_datatype", "_value") | |
| 550 else: | |
| 551 __slots__ = ("_language", "_datatype", "_value") | |
| 552 | |
| 553 def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): | |
| 554 | |
| 555 if lang == '': | |
| 556 lang = None # no empty lang-tags in RDF | |
| 557 | |
| 558 normalize = normalize if normalize != None else rdflib.NORMALIZE_LITERALS | |
| 559 | |
| 560 if lang is not None and datatype is not None: | |
| 561 raise TypeError( | |
| 562 "A Literal can only have one of lang or datatype, " | |
| 563 "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") | |
| 564 | |
| 565 if lang and not _is_valid_langtag(lang): | |
| 566 raise Exception("'%s' is not a valid language tag!"%lang) | |
| 567 | |
| 568 if datatype: | |
| 569 datatype = URIRef(datatype) | |
| 570 | |
| 571 value = None | |
| 572 if isinstance(lexical_or_value, Literal): | |
| 573 # create from another Literal instance | |
| 574 | |
| 575 lang = lang or lexical_or_value.language | |
| 576 if datatype: | |
| 577 # override datatype | |
| 578 value = _castLexicalToPython(lexical_or_value, datatype) | |
| 579 else: | |
| 580 datatype = lexical_or_value.datatype | |
| 581 value = lexical_or_value.value | |
| 582 | |
| 583 elif isinstance(lexical_or_value, str): | |
| 584 # passed a string | |
| 585 # try parsing lexical form of datatyped literal | |
| 586 value = _castLexicalToPython(lexical_or_value, datatype) | |
| 587 | |
| 588 if value is not None and normalize: | |
| 589 _value, _datatype = _castPythonToLiteral(value) | |
| 590 if _value is not None and _is_valid_unicode(_value): | |
| 591 lexical_or_value = _value | |
| 592 | |
| 593 else: | |
| 594 # passed some python object | |
| 595 value = lexical_or_value | |
| 596 _value, _datatype = _castPythonToLiteral(lexical_or_value) | |
| 597 | |
| 598 datatype = datatype or _datatype | |
| 599 if _value is not None: | |
| 600 lexical_or_value = _value | |
| 601 if datatype: | |
| 602 lang = None | |
| 603 | |
| 604 if py3compat.PY3 and isinstance(lexical_or_value, bytes): | |
| 605 lexical_or_value = lexical_or_value.decode('utf-8') | |
| 606 | |
| 607 try: | |
| 608 inst = str.__new__(cls, lexical_or_value) | |
| 609 except UnicodeDecodeError: | |
| 610 inst = str.__new__(cls, lexical_or_value, 'utf-8') | |
| 611 | |
| 612 inst._language = lang | |
| 613 inst._datatype = datatype | |
| 614 inst._value = value | |
| 615 return inst | |
| 616 | |
| 617 @py3compat.format_doctest_out | |
| 618 def normalize(self): | |
| 619 """ | |
| 620 Returns a new literal with a normalised lexical representation | |
| 621 of this literal | |
| 622 >>> from rdflib import XSD | |
| 623 >>> Literal("01", datatype=XSD.integer, normalize=False).normalize() | |
| 624 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 625 | |
| 626 Illegal lexical forms for the datatype given are simply passed on | |
| 627 >>> Literal("a", datatype=XSD.integer, normalize=False) | |
| 628 rdflib.term.Literal(%(u)s'a', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 629 | |
| 630 """ | |
| 631 | |
| 632 if self.value != None: | |
| 633 return Literal(self.value, datatype=self.datatype, lang=self.language) | |
| 634 else: | |
| 635 return self | |
| 636 | |
| 637 @property | |
| 638 def value(self): | |
| 639 return self._value | |
| 640 | |
| 641 @property | |
| 642 def language(self): | |
| 643 return self._language | |
| 644 | |
| 645 @property | |
| 646 def datatype(self): | |
| 647 return self._datatype | |
| 648 | |
| 649 def __reduce__(self): | |
| 650 return (Literal, (str(self), self.language, self.datatype),) | |
| 651 | |
| 652 def __getstate__(self): | |
| 653 return (None, dict(language=self.language, datatype=self.datatype)) | |
| 654 | |
| 655 def __setstate__(self, arg): | |
| 656 _, d = arg | |
| 657 self._language = d["language"] | |
| 658 self._datatype = d["datatype"] | |
| 659 | |
| 660 @py3compat.format_doctest_out | |
| 661 def __add__(self, val): | |
| 662 """ | |
| 663 >>> Literal(1) + 1 | |
| 664 rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 665 >>> Literal("1") + "1" | |
| 666 rdflib.term.Literal(%(u)s'11') | |
| 667 """ | |
| 668 | |
| 669 py = self.toPython() | |
| 670 if not isinstance(py, Literal): | |
| 671 try: | |
| 672 return Literal(py + val) | |
| 673 except TypeError: | |
| 674 pass # fall-through | |
| 675 | |
| 676 s = str.__add__(self, val) | |
| 677 return Literal(s, self.language, self.datatype) | |
| 678 | |
| 679 def __bool__(self): | |
| 680 """ | |
| 681 Is the Literal "True" | |
| 682 This is used for if statements, bool(literal), etc. | |
| 683 """ | |
| 684 if self.value != None: | |
| 685 return bool(self.value) | |
| 686 return len(self) != 0 | |
| 687 | |
| 688 @py3compat.format_doctest_out | |
| 689 def __neg__(self): | |
| 690 """ | |
| 691 >>> (- Literal(1)) | |
| 692 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 693 >>> (- Literal(10.5)) | |
| 694 rdflib.term.Literal(%(u)s'-10.5', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#double')) | |
| 695 >>> from rdflib.namespace import XSD | |
| 696 >>> (- Literal("1", datatype=XSD.integer)) | |
| 697 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 698 | |
| 699 >>> (- Literal("1")) | |
| 700 Traceback (most recent call last): | |
| 701 File "<stdin>", line 1, in <module> | |
| 702 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
| 703 >>> | |
| 704 """ | |
| 705 | |
| 706 if isinstance(self.value, (int, float)): | |
| 707 return Literal(self.value.__neg__()) | |
| 708 else: | |
| 709 raise TypeError("Not a number; %s" % repr(self)) | |
| 710 | |
| 711 @py3compat.format_doctest_out | |
| 712 def __pos__(self): | |
| 713 """ | |
| 714 >>> (+ Literal(1)) | |
| 715 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 716 >>> (+ Literal(-1)) | |
| 717 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 718 >>> from rdflib.namespace import XSD | |
| 719 >>> (+ Literal("-1", datatype=XSD.integer)) | |
| 720 rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 721 | |
| 722 >>> (+ Literal("1")) | |
| 723 Traceback (most recent call last): | |
| 724 File "<stdin>", line 1, in <module> | |
| 725 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
| 726 """ | |
| 727 if isinstance(self.value, (int, float)): | |
| 728 return Literal(self.value.__pos__()) | |
| 729 else: | |
| 730 raise TypeError("Not a number; %s" % repr(self)) | |
| 731 | |
| 732 @py3compat.format_doctest_out | |
| 733 def __abs__(self): | |
| 734 """ | |
| 735 >>> abs(Literal(-1)) | |
| 736 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 737 | |
| 738 >>> from rdflib.namespace import XSD | |
| 739 >>> abs( Literal("-1", datatype=XSD.integer)) | |
| 740 rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 741 | |
| 742 >>> abs(Literal("1")) | |
| 743 Traceback (most recent call last): | |
| 744 File "<stdin>", line 1, in <module> | |
| 745 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
| 746 """ | |
| 747 if isinstance(self.value, (int, float)): | |
| 748 return Literal(self.value.__abs__()) | |
| 749 else: | |
| 750 raise TypeError("Not a number; %s" % repr(self)) | |
| 751 | |
| 752 @py3compat.format_doctest_out | |
| 753 def __invert__(self): | |
| 754 """ | |
| 755 >>> ~(Literal(-1)) | |
| 756 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 757 | |
| 758 >>> from rdflib.namespace import XSD | |
| 759 >>> ~( Literal("-1", datatype=XSD.integer)) | |
| 760 rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) | |
| 761 | |
| 762 Not working: | |
| 763 | |
| 764 >>> ~(Literal("1")) | |
| 765 Traceback (most recent call last): | |
| 766 File "<stdin>", line 1, in <module> | |
| 767 TypeError: Not a number; rdflib.term.Literal(%(u)s'1') | |
| 768 """ | |
| 769 if isinstance(self.value, (int, float)): | |
| 770 return Literal(self.value.__invert__()) | |
| 771 else: | |
| 772 raise TypeError("Not a number; %s" % repr(self)) | |
| 773 | |
| 774 def __gt__(self, other): | |
| 775 """ | |
| 776 | |
| 777 This implements ordering for Literals, | |
| 778 the other comparison methods delegate here | |
| 779 | |
| 780 This tries to implement this: | |
| 781 http://www.w3.org/TR/sparql11-query/#modOrderBy | |
| 782 | |
| 783 In short, Literals with compatible data-types are orderd in value space, | |
| 784 i.e. | |
| 785 >>> from rdflib import XSD | |
| 786 | |
| 787 >>> Literal(1)>Literal(2) # int/int | |
| 788 False | |
| 789 >>> Literal(2.0)>Literal(1) # double/int | |
| 790 True | |
| 791 >>> from decimal import Decimal | |
| 792 >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double | |
| 793 True | |
| 794 >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double | |
| 795 True | |
| 796 >>> Literal('b')>Literal('a') # plain lit/plain lit | |
| 797 True | |
| 798 >>> Literal('b')>Literal('a', datatype=XSD.string) # plain lit/xsd:string | |
| 799 True | |
| 800 | |
| 801 Incompatible datatype mismatches ordered by DT | |
| 802 | |
| 803 >>> Literal(1)>Literal("2") # int>string | |
| 804 False | |
| 805 | |
| 806 Langtagged literals by lang tag | |
| 807 >>> Literal("a", lang="en")>Literal("a", lang="fr") | |
| 808 False | |
| 809 """ | |
| 810 if other is None: | |
| 811 return True # Everything is greater than None | |
| 812 if isinstance(other, Literal): | |
| 813 | |
| 814 if self.datatype in _NUMERIC_LITERAL_TYPES and \ | |
| 815 other.datatype in _NUMERIC_LITERAL_TYPES: | |
| 816 return numeric_greater(self.value, other.value) | |
| 817 | |
| 818 # plain-literals and xsd:string literals | |
| 819 # are "the same" | |
| 820 dtself = self.datatype or _XSD_STRING | |
| 821 dtother = other.datatype or _XSD_STRING | |
| 822 | |
| 823 if dtself != dtother: | |
| 824 if rdflib.DAWG_LITERAL_COLLATION: | |
| 825 return NotImplemented | |
| 826 else: | |
| 827 return dtself > dtother | |
| 828 | |
| 829 if self.language != other.language: | |
| 830 if not self.language: | |
| 831 return False | |
| 832 elif not other.language: | |
| 833 return True | |
| 834 else: | |
| 835 return self.language > other.language | |
| 836 | |
| 837 if self.value != None and other.value != None: | |
| 838 return self.value > other.value | |
| 839 | |
| 840 if str(self) != str(other): | |
| 841 return str(self) > str(other) | |
| 842 | |
| 843 # same language, same lexical form, check real dt | |
| 844 # plain-literals come before xsd:string! | |
| 845 if self.datatype != other.datatype: | |
| 846 if not self.datatype: | |
| 847 return False | |
| 848 elif not other.datatype: | |
| 849 return True | |
| 850 else: | |
| 851 return self.datatype > other.datatype | |
| 852 | |
| 853 return False # they are the same | |
| 854 | |
| 855 elif isinstance(other, Node): | |
| 856 return True # Literal are the greatest! | |
| 857 else: | |
| 858 return NotImplemented # we can only compare to nodes | |
| 859 | |
| 860 def __lt__(self, other): | |
| 861 if other is None: | |
| 862 return False # Nothing is less than None | |
| 863 if isinstance(other, Literal): | |
| 864 try: | |
| 865 return not self.__gt__(other) and not self.eq(other) | |
| 866 except TypeError: | |
| 867 return NotImplemented | |
| 868 if isinstance(other, Node): | |
| 869 return False # all nodes are less-than Literals | |
| 870 | |
| 871 return NotImplemented | |
| 872 | |
| 873 def __le__(self, other): | |
| 874 """ | |
| 875 >>> from rdflib.namespace import XSD | |
| 876 >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime | |
| 877 ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) | |
| 878 True | |
| 879 """ | |
| 880 r = self.__lt__(other) | |
| 881 if r: | |
| 882 return True | |
| 883 try: | |
| 884 return self.eq(other) | |
| 885 except TypeError: | |
| 886 return NotImplemented | |
| 887 | |
| 888 def __ge__(self, other): | |
| 889 r = self.__gt__(other) | |
| 890 if r: | |
| 891 return True | |
| 892 try: | |
| 893 return self.eq(other) | |
| 894 except TypeError: | |
| 895 return NotImplemented | |
| 896 | |
| 897 def _comparable_to(self, other): | |
| 898 """ | |
| 899 Helper method to decide which things are meaningful to | |
| 900 rich-compare with this literal | |
| 901 """ | |
| 902 if isinstance(other, Literal): | |
| 903 if (self.datatype and other.datatype): | |
| 904 # two datatyped literals | |
| 905 if not self.datatype in XSDToPython or not other.datatype in XSDToPython: | |
| 906 # non XSD DTs must match | |
| 907 if self.datatype != other.datatype: | |
| 908 return False | |
| 909 | |
| 910 else: | |
| 911 # xsd:string may be compared with plain literals | |
| 912 if not (self.datatype == _XSD_STRING and not other.datatype) or \ | |
| 913 (other.datatype == _XSD_STRING and not self.datatype): | |
| 914 return False | |
| 915 | |
| 916 # if given lang-tag has to be case insensitive equal | |
| 917 if (self.language or "").lower() != (other.language or "").lower(): | |
| 918 return False | |
| 919 | |
| 920 return True | |
| 921 | |
| 922 def __hash__(self): | |
| 923 """ | |
| 924 >>> from rdflib.namespace import XSD | |
| 925 >>> a = {Literal('1', datatype=XSD.integer):'one'} | |
| 926 >>> Literal('1', datatype=XSD.double) in a | |
| 927 False | |
| 928 | |
| 929 | |
| 930 "Called for the key object for dictionary operations, | |
| 931 and by the built-in function hash(). Should return | |
| 932 a 32-bit integer usable as a hash value for | |
| 933 dictionary operations. The only required property | |
| 934 is that objects which compare equal have the same | |
| 935 hash value; it is advised to somehow mix together | |
| 936 (e.g., using exclusive or) the hash values for the | |
| 937 components of the object that also play a part in | |
| 938 comparison of objects." -- 3.4.1 Basic customization (Python) | |
| 939 | |
| 940 "Two literals are equal if and only if all of the following hold: | |
| 941 * The strings of the two lexical forms compare equal, character by | |
| 942 character. | |
| 943 * Either both or neither have language tags. | |
| 944 * The language tags, if any, compare equal. | |
| 945 * Either both or neither have datatype URIs. | |
| 946 * The two datatype URIs, if any, compare equal, character by | |
| 947 character." | |
| 948 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
| 949 | |
| 950 """ | |
| 951 res = super(Literal, self).__hash__() | |
| 952 if self.language: | |
| 953 res ^= hash(self.language.lower()) | |
| 954 if self.datatype: | |
| 955 res ^= hash(self.datatype) | |
| 956 return res | |
| 957 | |
| 958 @py3compat.format_doctest_out | |
| 959 def __eq__(self, other): | |
| 960 """ | |
| 961 Literals are only equal to other literals. | |
| 962 | |
| 963 "Two literals are equal if and only if all of the following hold: | |
| 964 * The strings of the two lexical forms compare equal, character by character. | |
| 965 * Either both or neither have language tags. | |
| 966 * The language tags, if any, compare equal. | |
| 967 * Either both or neither have datatype URIs. | |
| 968 * The two datatype URIs, if any, compare equal, character by character." | |
| 969 -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) | |
| 970 | |
| 971 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo")) | |
| 972 True | |
| 973 >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2")) | |
| 974 False | |
| 975 | |
| 976 >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo")) | |
| 977 False | |
| 978 >>> Literal("1", datatype=URIRef("foo")) == "asdf" | |
| 979 False | |
| 980 >>> from rdflib import XSD | |
| 981 >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date) | |
| 982 True | |
| 983 >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1) | |
| 984 False | |
| 985 >>> Literal("one", lang="en") == Literal("one", lang="en") | |
| 986 True | |
| 987 >>> Literal("hast", lang='en') == Literal("hast", lang='de') | |
| 988 False | |
| 989 >>> Literal("1", datatype=XSD.integer) == Literal(1) | |
| 990 True | |
| 991 >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer) | |
| 992 True | |
| 993 | |
| 994 """ | |
| 995 if self is other: | |
| 996 return True | |
| 997 if other is None: | |
| 998 return False | |
| 999 if isinstance(other, Literal): | |
| 1000 return self.datatype == other.datatype \ | |
| 1001 and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \ | |
| 1002 and str.__eq__(self, other) | |
| 1003 | |
| 1004 return False | |
| 1005 | |
| 1006 def eq(self, other): | |
| 1007 """ | |
| 1008 Compare the value of this literal with something else | |
| 1009 | |
| 1010 Either, with the value of another literal | |
| 1011 comparisons are then done in literal "value space", | |
| 1012 and according to the rules of XSD subtype-substitution/type-promotion | |
| 1013 | |
| 1014 OR, with a python object: | |
| 1015 | |
| 1016 basestring objects can be compared with plain-literals, | |
| 1017 or those with datatype xsd:string | |
| 1018 | |
| 1019 bool objects with xsd:boolean | |
| 1020 | |
| 1021 a int, long or float with numeric xsd types | |
| 1022 | |
| 1023 isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime | |
| 1024 | |
| 1025 Any other operations returns NotImplemented | |
| 1026 | |
| 1027 """ | |
| 1028 if isinstance(other, Literal): | |
| 1029 | |
| 1030 if self.datatype in _NUMERIC_LITERAL_TYPES \ | |
| 1031 and other.datatype in _NUMERIC_LITERAL_TYPES: | |
| 1032 if self.value != None and other.value != None: | |
| 1033 return self.value == other.value | |
| 1034 else: | |
| 1035 if str.__eq__(self, other): | |
| 1036 return True | |
| 1037 raise TypeError( | |
| 1038 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
| 1039 if (self.language or "").lower() != (other.language or "").lower(): | |
| 1040 return False | |
| 1041 | |
| 1042 dtself = self.datatype or _XSD_STRING | |
| 1043 dtother = other.datatype or _XSD_STRING | |
| 1044 | |
| 1045 if (dtself == _XSD_STRING and dtother == _XSD_STRING): | |
| 1046 # string/plain literals, compare on lexical form | |
| 1047 return str.__eq__(self, other) | |
| 1048 | |
| 1049 if dtself != dtother: | |
| 1050 if rdflib.DAWG_LITERAL_COLLATION: | |
| 1051 raise TypeError("I don't know how to compare literals with datatypes %s and %s" % ( | |
| 1052 self.datatype, other.datatype)) | |
| 1053 else: | |
| 1054 return False | |
| 1055 | |
| 1056 # matching non-string DTs now - do we compare values or | |
| 1057 # lexical form first? comparing two ints is far quicker - | |
| 1058 # maybe there are counter examples | |
| 1059 | |
| 1060 if self.value != None and other.value != None: | |
| 1061 | |
| 1062 if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL): | |
| 1063 return _isEqualXMLNode(self.value, other.value) | |
| 1064 | |
| 1065 return self.value == other.value | |
| 1066 else: | |
| 1067 | |
| 1068 if str.__eq__(self, other): | |
| 1069 return True | |
| 1070 | |
| 1071 if self.datatype == _XSD_STRING: | |
| 1072 return False # string value space=lexical space | |
| 1073 | |
| 1074 # matching DTs, but not matching, we cannot compare! | |
| 1075 raise TypeError( | |
| 1076 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) | |
| 1077 | |
| 1078 elif isinstance(other, Node): | |
| 1079 return False # no non-Literal nodes are equal to a literal | |
| 1080 | |
| 1081 elif isinstance(other, str): | |
| 1082 # only plain-literals can be directly compared to strings | |
| 1083 | |
| 1084 # TODO: Is "blah"@en eq "blah" ? | |
| 1085 if self.language is not None: | |
| 1086 return False | |
| 1087 | |
| 1088 if (self.datatype == _XSD_STRING or self.datatype is None): | |
| 1089 return str(self) == other | |
| 1090 | |
| 1091 elif isinstance(other, (int, float)): | |
| 1092 if self.datatype in _NUMERIC_LITERAL_TYPES: | |
| 1093 return self.value == other | |
| 1094 elif isinstance(other, (date, datetime, time)): | |
| 1095 if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME): | |
| 1096 return self.value == other | |
| 1097 elif isinstance(other, bool): | |
| 1098 if self.datatype == _XSD_BOOLEAN: | |
| 1099 return self.value == other | |
| 1100 | |
| 1101 return NotImplemented | |
| 1102 | |
| 1103 def neq(self, other): | |
| 1104 return not self.eq(other) | |
| 1105 | |
| 1106 @py3compat.format_doctest_out | |
| 1107 def n3(self, namespace_manager = None): | |
| 1108 r''' | |
| 1109 Returns a representation in the N3 format. | |
| 1110 | |
| 1111 Examples:: | |
| 1112 | |
| 1113 >>> Literal("foo").n3() | |
| 1114 %(u)s'"foo"' | |
| 1115 | |
| 1116 Strings with newlines or triple-quotes:: | |
| 1117 | |
| 1118 >>> Literal("foo\nbar").n3() | |
| 1119 %(u)s'"""foo\nbar"""' | |
| 1120 | |
| 1121 >>> Literal("''\'").n3() | |
| 1122 %(u)s'"\'\'\'"' | |
| 1123 | |
| 1124 >>> Literal('"""').n3() | |
| 1125 %(u)s'"\\"\\"\\""' | |
| 1126 | |
| 1127 Language:: | |
| 1128 | |
| 1129 >>> Literal("hello", lang="en").n3() | |
| 1130 %(u)s'"hello"@en' | |
| 1131 | |
| 1132 Datatypes:: | |
| 1133 | |
| 1134 >>> Literal(1).n3() | |
| 1135 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
| 1136 | |
| 1137 >>> Literal(1.0).n3() | |
| 1138 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>' | |
| 1139 | |
| 1140 >>> Literal(True).n3() | |
| 1141 %(u)s'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>' | |
| 1142 | |
| 1143 Datatype and language isn't allowed (datatype takes precedence):: | |
| 1144 | |
| 1145 >>> Literal(1, lang="en").n3() | |
| 1146 %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' | |
| 1147 | |
| 1148 Custom datatype:: | |
| 1149 | |
| 1150 >>> footype = URIRef("http://example.org/ns#foo") | |
| 1151 >>> Literal("1", datatype=footype).n3() | |
| 1152 %(u)s'"1"^^<http://example.org/ns#foo>' | |
| 1153 | |
| 1154 Passing a namespace-manager will use it to abbreviate datatype URIs: | |
| 1155 | |
| 1156 >>> from rdflib import Graph | |
| 1157 >>> Literal(1).n3(Graph().namespace_manager) | |
| 1158 %(u)s'"1"^^xsd:integer' | |
| 1159 ''' | |
| 1160 if namespace_manager: | |
| 1161 return self._literal_n3(qname_callback = | |
| 1162 namespace_manager.normalizeUri) | |
| 1163 else: | |
| 1164 return self._literal_n3() | |
| 1165 | |
| 1166 @py3compat.format_doctest_out | |
| 1167 def _literal_n3(self, use_plain=False, qname_callback=None): | |
| 1168 ''' | |
| 1169 Using plain literal (shorthand) output:: | |
| 1170 >>> from rdflib.namespace import XSD | |
| 1171 | |
| 1172 >>> Literal(1)._literal_n3(use_plain=True) | |
| 1173 %(u)s'1' | |
| 1174 | |
| 1175 >>> Literal(1.0)._literal_n3(use_plain=True) | |
| 1176 %(u)s'1e+00' | |
| 1177 | |
| 1178 >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True) | |
| 1179 %(u)s'1.0' | |
| 1180 | |
| 1181 >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True) | |
| 1182 %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>' | |
| 1183 | |
| 1184 >>> Literal("foo", datatype=XSD.string)._literal_n3( | |
| 1185 ... use_plain=True) | |
| 1186 %(u)s'"foo"^^<http://www.w3.org/2001/XMLSchema#string>' | |
| 1187 | |
| 1188 >>> Literal(True)._literal_n3(use_plain=True) | |
| 1189 %(u)s'true' | |
| 1190 | |
| 1191 >>> Literal(False)._literal_n3(use_plain=True) | |
| 1192 %(u)s'false' | |
| 1193 | |
| 1194 >>> Literal(1.91)._literal_n3(use_plain=True) | |
| 1195 %(u)s'1.91e+00' | |
| 1196 | |
| 1197 Only limited precision available for floats: | |
| 1198 >>> Literal(0.123456789)._literal_n3(use_plain=True) | |
| 1199 %(u)s'1.234568e-01' | |
| 1200 | |
| 1201 >>> Literal('0.123456789', | |
| 1202 ... datatype=XSD.decimal)._literal_n3(use_plain=True) | |
| 1203 %(u)s'0.123456789' | |
| 1204 | |
| 1205 Using callback for datatype QNames:: | |
| 1206 | |
| 1207 >>> Literal(1)._literal_n3( | |
| 1208 ... qname_callback=lambda uri: "xsd:integer") | |
| 1209 %(u)s'"1"^^xsd:integer' | |
| 1210 | |
| 1211 ''' | |
| 1212 if use_plain and self.datatype in _PLAIN_LITERAL_TYPES: | |
| 1213 if self.value is not None: | |
| 1214 # If self is inf or NaN, we need a datatype | |
| 1215 # (there is no plain representation) | |
| 1216 if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
| 1217 try: | |
| 1218 v = float(self) | |
| 1219 if math.isinf(v) or math.isnan(v): | |
| 1220 return self._literal_n3(False, qname_callback) | |
| 1221 except ValueError: | |
| 1222 return self._literal_n3(False, qname_callback) | |
| 1223 | |
| 1224 # this is a bit of a mess - | |
| 1225 # in py >=2.6 the string.format function makes this easier | |
| 1226 # we try to produce "pretty" output | |
| 1227 if self.datatype == _XSD_DOUBLE: | |
| 1228 return sub("\\.?0*e", "e", '%e' % float(self)) | |
| 1229 elif self.datatype == _XSD_DECIMAL: | |
| 1230 s = '%s' % self | |
| 1231 if '.' not in s: | |
| 1232 s += '.0' | |
| 1233 return s | |
| 1234 | |
| 1235 elif self.datatype == _XSD_BOOLEAN: | |
| 1236 return ('%s' % self).lower() | |
| 1237 else: | |
| 1238 return '%s' % self | |
| 1239 | |
| 1240 encoded = self._quote_encode() | |
| 1241 | |
| 1242 datatype = self.datatype | |
| 1243 quoted_dt = None | |
| 1244 if datatype: | |
| 1245 if qname_callback: | |
| 1246 quoted_dt = qname_callback(datatype) | |
| 1247 if not quoted_dt: | |
| 1248 quoted_dt = "<%s>" % datatype | |
| 1249 if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: | |
| 1250 try: | |
| 1251 v = float(self) | |
| 1252 if math.isinf(v): | |
| 1253 # py string reps: float: 'inf', Decimal: 'Infinity" | |
| 1254 # both need to become "INF" in xsd datatypes | |
| 1255 encoded = encoded.replace('inf', 'INF').replace( | |
| 1256 'Infinity', 'INF') | |
| 1257 if math.isnan(v): | |
| 1258 encoded = encoded.replace('nan', 'NaN') | |
| 1259 except ValueError: | |
| 1260 # if we can't cast to float something is wrong, but we can | |
| 1261 # still serialize. Warn user about it | |
| 1262 warnings.warn("Serializing weird numerical %r" % self) | |
| 1263 | |
| 1264 language = self.language | |
| 1265 if language: | |
| 1266 return '%s@%s' % (encoded, language) | |
| 1267 elif datatype: | |
| 1268 return '%s^^%s' % (encoded, quoted_dt) | |
| 1269 else: | |
| 1270 return '%s' % encoded | |
| 1271 | |
| 1272 def _quote_encode(self): | |
| 1273 # This simpler encoding doesn't work; a newline gets encoded as "\\n", | |
| 1274 # which is ok in sourcecode, but we want "\n". | |
| 1275 # encoded = self.encode('unicode-escape').replace( | |
| 1276 # '\\', '\\\\').replace('"','\\"') | |
| 1277 # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') | |
| 1278 | |
| 1279 # NOTE: Could in theory chose quotes based on quotes appearing in the | |
| 1280 # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). | |
| 1281 | |
| 1282 if "\n" in self: | |
| 1283 # Triple quote this string. | |
| 1284 encoded = self.replace('\\', '\\\\') | |
| 1285 if '"""' in self: | |
| 1286 # is this ok? | |
| 1287 encoded = encoded.replace('"""', '\\"\\"\\"') | |
| 1288 if encoded[-1] == '"' and encoded[-2] != '\\': | |
| 1289 encoded = encoded[:-1] + '\\' + '"' | |
| 1290 | |
| 1291 return '"""%s"""' % encoded.replace('\r', '\\r') | |
| 1292 else: | |
| 1293 return '"%s"' % self.replace( | |
| 1294 '\n', '\\n').replace( | |
| 1295 '\\', '\\\\').replace( | |
| 1296 '"', '\\"').replace( | |
| 1297 '\r', '\\r') | |
| 1298 | |
| 1299 if not py3compat.PY3: | |
| 1300 def __str__(self): | |
| 1301 return self.encode() | |
| 1302 | |
| 1303 def __repr__(self): | |
| 1304 args = [super(Literal, self).__repr__()] | |
| 1305 if self.language is not None: | |
| 1306 args.append("lang=%s" % repr(self.language)) | |
| 1307 if self.datatype is not None: | |
| 1308 args.append("datatype=%s" % repr(self.datatype)) | |
| 1309 if self.__class__ == Literal: | |
| 1310 clsName = "rdflib.term.Literal" | |
| 1311 else: | |
| 1312 clsName = self.__class__.__name__ | |
| 1313 return """%s(%s)""" % (clsName, ", ".join(args)) | |
| 1314 | |
| 1315 def toPython(self): | |
| 1316 """ | |
| 1317 Returns an appropriate python datatype derived from this RDF Literal | |
| 1318 """ | |
| 1319 | |
| 1320 if self.value is not None: | |
| 1321 return self.value | |
| 1322 return self | |
| 1323 | |
| 1324 def md5_term_hash(self): | |
| 1325 """a string of hex that will be the same for two Literals that | |
| 1326 are the same. It is not a suitable unique id. | |
| 1327 | |
| 1328 Supported for backwards compatibility; new code should | |
| 1329 probably just use __hash__ | |
| 1330 """ | |
| 1331 warnings.warn( | |
| 1332 "method md5_term_hash is deprecated, and will be removed " + | |
| 1333 "removed in the future. If you use this please let rdflib-dev know!", | |
| 1334 category=DeprecationWarning, stacklevel=2) | |
| 1335 d = md5(self.encode()) | |
| 1336 d.update(b("L")) | |
| 1337 return d.hexdigest() | |
| 1338 | |
| 1339 | |
| 1340 def _parseXML(xmlstring): | |
| 1341 if not py3compat.PY3: | |
| 1342 xmlstring = xmlstring.encode('utf-8') | |
| 1343 retval = xml.dom.minidom.parseString( | |
| 1344 "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring) | |
| 1345 retval.normalize() | |
| 1346 return retval | |
| 1347 | |
| 1348 | |
| 1349 def _parseHTML(htmltext): | |
| 1350 try: | |
| 1351 import html5lib | |
| 1352 parser = html5lib.HTMLParser( | |
| 1353 tree=html5lib.treebuilders.getTreeBuilder("dom")) | |
| 1354 retval = parser.parseFragment(htmltext) | |
| 1355 retval.normalize() | |
| 1356 return retval | |
| 1357 except ImportError: | |
| 1358 raise ImportError( | |
| 1359 "HTML5 parser not available. Try installing" + | |
| 1360 " html5lib <http://code.google.com/p/html5lib>") | |
| 1361 | |
| 1362 | |
| 1363 def _writeXML(xmlnode): | |
| 1364 if isinstance(xmlnode, xml.dom.minidom.DocumentFragment): | |
| 1365 d = xml.dom.minidom.Document() | |
| 1366 d.childNodes += xmlnode.childNodes | |
| 1367 xmlnode = d | |
| 1368 s = xmlnode.toxml('utf-8') | |
| 1369 # for clean round-tripping, remove headers -- I have great and | |
| 1370 # specific worries that this will blow up later, but this margin | |
| 1371 # is too narrow to contain them | |
| 1372 if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')): | |
| 1373 s = s[38:] | |
| 1374 if s.startswith(b('<rdflibtoplevelelement>')): | |
| 1375 s = s[23:-24] | |
| 1376 if s == b('<rdflibtoplevelelement/>'): | |
| 1377 s = b('') | |
| 1378 return s | |
| 1379 | |
| 1380 # Cannot import Namespace/XSD because of circular dependencies | |
| 1381 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' | |
| 1382 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' | |
| 1383 | |
| 1384 _RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral') | |
| 1385 _RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML') | |
| 1386 | |
| 1387 _XSD_STRING = URIRef(_XSD_PFX + 'string') | |
| 1388 | |
| 1389 _XSD_FLOAT = URIRef(_XSD_PFX + 'float') | |
| 1390 _XSD_DOUBLE = URIRef(_XSD_PFX + 'double') | |
| 1391 _XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal') | |
| 1392 _XSD_INTEGER = URIRef(_XSD_PFX + 'integer') | |
| 1393 _XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean') | |
| 1394 | |
| 1395 _XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime') | |
| 1396 _XSD_DATE = URIRef(_XSD_PFX + 'date') | |
| 1397 _XSD_TIME = URIRef(_XSD_PFX + 'time') | |
| 1398 | |
| 1399 # TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth | |
| 1400 | |
| 1401 _NUMERIC_LITERAL_TYPES = ( | |
| 1402 _XSD_INTEGER, | |
| 1403 _XSD_DECIMAL, | |
| 1404 _XSD_DOUBLE, | |
| 1405 URIRef(_XSD_PFX + 'float'), | |
| 1406 | |
| 1407 URIRef(_XSD_PFX + 'byte'), | |
| 1408 URIRef(_XSD_PFX + 'int'), | |
| 1409 URIRef(_XSD_PFX + 'long'), | |
| 1410 URIRef(_XSD_PFX + 'negativeInteger'), | |
| 1411 URIRef(_XSD_PFX + 'nonNegativeInteger'), | |
| 1412 URIRef(_XSD_PFX + 'nonPositiveInteger'), | |
| 1413 URIRef(_XSD_PFX + 'positiveInteger'), | |
| 1414 URIRef(_XSD_PFX + 'short'), | |
| 1415 URIRef(_XSD_PFX + 'unsignedByte'), | |
| 1416 URIRef(_XSD_PFX + 'unsignedInt'), | |
| 1417 URIRef(_XSD_PFX + 'unsignedLong'), | |
| 1418 URIRef(_XSD_PFX + 'unsignedShort'), | |
| 1419 | |
| 1420 ) | |
| 1421 | |
| 1422 # these have "native" syntax in N3/SPARQL | |
| 1423 _PLAIN_LITERAL_TYPES = ( | |
| 1424 _XSD_INTEGER, | |
| 1425 _XSD_BOOLEAN, | |
| 1426 _XSD_DOUBLE, | |
| 1427 _XSD_DECIMAL, | |
| 1428 ) | |
| 1429 | |
| 1430 # these have special INF and NaN XSD representations | |
| 1431 _NUMERIC_INF_NAN_LITERAL_TYPES = ( | |
| 1432 URIRef(_XSD_PFX + 'float'), | |
| 1433 _XSD_DOUBLE, | |
| 1434 _XSD_DECIMAL, | |
| 1435 ) | |
| 1436 | |
| 1437 | |
| 1438 def _castPythonToLiteral(obj): | |
| 1439 """ | |
| 1440 Casts a python datatype to a tuple of the lexical value and a | |
| 1441 datatype URI (or None) | |
| 1442 """ | |
| 1443 for pType, (castFunc, dType) in _PythonToXSD: | |
| 1444 if isinstance(obj, pType): | |
| 1445 if castFunc: | |
| 1446 return castFunc(obj), dType | |
| 1447 elif dType: | |
| 1448 return obj, dType | |
| 1449 else: | |
| 1450 return obj, None | |
| 1451 return obj, None # TODO: is this right for the fall through case? | |
| 1452 | |
| 1453 from decimal import Decimal | |
| 1454 | |
| 1455 # Mappings from Python types to XSD datatypes and back (borrowed from sparta) | |
| 1456 # datetime instances are also instances of date... so we need to order these. | |
| 1457 | |
| 1458 # SPARQL/Turtle/N3 has shortcuts for integer, double, decimal | |
| 1459 # python has only float - to be in tune with sparql/n3/turtle | |
| 1460 # we default to XSD.double for float literals | |
| 1461 | |
| 1462 # python ints are promoted to longs when overflowing | |
| 1463 # python longs have no limit | |
| 1464 # both map to the abstract integer type, | |
| 1465 # rather than some concrete bit-limited datatype | |
| 1466 | |
| 1467 _PythonToXSD = [ | |
| 1468 (str, (None, None)), | |
| 1469 (float, (None, _XSD_DOUBLE)), | |
| 1470 (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), | |
| 1471 (int, (None, _XSD_INTEGER)), | |
| 1472 (int, (None, _XSD_INTEGER)), | |
| 1473 (Decimal, (None, _XSD_DECIMAL)), | |
| 1474 (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)), | |
| 1475 (date, (lambda i:i.isoformat(), _XSD_DATE)), | |
| 1476 (time, (lambda i:i.isoformat(), _XSD_TIME)), | |
| 1477 (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)), | |
| 1478 # this is a bit dirty - by accident the html5lib parser produces | |
| 1479 # DocumentFragments, and the xml parser Documents, letting this | |
| 1480 # decide what datatype to use makes roundtripping easier, but it a | |
| 1481 # bit random | |
| 1482 (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL)) | |
| 1483 ] | |
| 1484 | |
| 1485 XSDToPython = { | |
| 1486 None : None, # plain literals map directly to value space | |
| 1487 URIRef(_XSD_PFX + 'time'): parse_time, | |
| 1488 URIRef(_XSD_PFX + 'date'): parse_date, | |
| 1489 URIRef(_XSD_PFX + 'gYear'): parse_date, | |
| 1490 URIRef(_XSD_PFX + 'gYearMonth'): parse_date, | |
| 1491 URIRef(_XSD_PFX + 'dateTime'): parse_datetime, | |
| 1492 URIRef(_XSD_PFX + 'string'): None, | |
| 1493 URIRef(_XSD_PFX + 'normalizedString'): None, | |
| 1494 URIRef(_XSD_PFX + 'token'): None, | |
| 1495 URIRef(_XSD_PFX + 'language'): None, | |
| 1496 URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'], | |
| 1497 URIRef(_XSD_PFX + 'decimal'): Decimal, | |
| 1498 URIRef(_XSD_PFX + 'integer'): int, | |
| 1499 URIRef(_XSD_PFX + 'nonPositiveInteger'): int, | |
| 1500 URIRef(_XSD_PFX + 'long'): int, | |
| 1501 URIRef(_XSD_PFX + 'nonNegativeInteger'): int, | |
| 1502 URIRef(_XSD_PFX + 'negativeInteger'): int, | |
| 1503 URIRef(_XSD_PFX + 'int'): int, | |
| 1504 URIRef(_XSD_PFX + 'unsignedLong'): int, | |
| 1505 URIRef(_XSD_PFX + 'positiveInteger'): int, | |
| 1506 URIRef(_XSD_PFX + 'short'): int, | |
| 1507 URIRef(_XSD_PFX + 'unsignedInt'): int, | |
| 1508 URIRef(_XSD_PFX + 'byte'): int, | |
| 1509 URIRef(_XSD_PFX + 'unsignedShort'): int, | |
| 1510 URIRef(_XSD_PFX + 'unsignedByte'): int, | |
| 1511 URIRef(_XSD_PFX + 'float'): float, | |
| 1512 URIRef(_XSD_PFX + 'double'): float, | |
| 1513 URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s), | |
| 1514 URIRef(_XSD_PFX + 'anyURI'): None, | |
| 1515 _RDF_XMLLITERAL: _parseXML, | |
| 1516 _RDF_HTMLLITERAL: _parseHTML | |
| 1517 } | |
| 1518 | |
| 1519 _toPythonMapping = {} | |
| 1520 | |
| 1521 _toPythonMapping.update(XSDToPython) | |
| 1522 | |
| 1523 def _castLexicalToPython(lexical, datatype): | |
| 1524 """ | |
| 1525 Map a lexical form to the value-space for the given datatype | |
| 1526 :returns: a python object for the value or ``None`` | |
| 1527 """ | |
| 1528 convFunc = _toPythonMapping.get(datatype, False) | |
| 1529 if convFunc: | |
| 1530 try: | |
| 1531 return convFunc(lexical) | |
| 1532 except: | |
| 1533 # not a valid lexical representation for this dt | |
| 1534 return None | |
| 1535 elif convFunc is None: | |
| 1536 # no conv func means 1-1 lexical<->value-space mapping | |
| 1537 try: | |
| 1538 return str(lexical) | |
| 1539 except UnicodeDecodeError: | |
| 1540 return str(lexical, 'utf-8') | |
| 1541 else: | |
| 1542 # no convFunc - unknown data-type | |
| 1543 return None | |
| 1544 | |
| 1545 def bind(datatype, pythontype, constructor=None, lexicalizer=None): | |
| 1546 """ | |
| 1547 register a new datatype<->pythontype binding | |
| 1548 | |
| 1549 :param constructor: an optional function for converting lexical forms | |
| 1550 into a Python instances, if not given the pythontype | |
| 1551 is used directly | |
| 1552 | |
| 1553 :param lexicalizer: an optinoal function for converting python objects to | |
| 1554 lexical form, if not given object.__str__ is used | |
| 1555 | |
| 1556 """ | |
| 1557 if datatype in _toPythonMapping: | |
| 1558 logger.warning("datatype '%s' was already bound. Rebinding." % | |
| 1559 datatype) | |
| 1560 | |
| 1561 if constructor == None: | |
| 1562 constructor = pythontype | |
| 1563 _toPythonMapping[datatype] = constructor | |
| 1564 _PythonToXSD.append((pythontype, (lexicalizer, datatype))) | |
| 1565 | |
| 1566 | |
| 1567 class Variable(Identifier): | |
| 1568 """ | |
| 1569 A Variable - this is used for querying, or in Formula aware | |
| 1570 graphs, where Variables can stored in the graph | |
| 1571 """ | |
| 1572 __slots__ = () | |
| 1573 | |
| 1574 def __new__(cls, value): | |
| 1575 if len(value) == 0: | |
| 1576 raise Exception( | |
| 1577 "Attempted to create variable with empty string as name!") | |
| 1578 if value[0] == '?': | |
| 1579 value = value[1:] | |
| 1580 return str.__new__(cls, value) | |
| 1581 | |
| 1582 def __repr__(self): | |
| 1583 if self.__class__ is Variable: | |
| 1584 clsName = "rdflib.term.Variable" | |
| 1585 else: | |
| 1586 clsName = self.__class__.__name__ | |
| 1587 | |
| 1588 return """%s(%s)""" % (clsName, super(Variable, self).__repr__()) | |
| 1589 | |
| 1590 def toPython(self): | |
| 1591 return "?%s" % self | |
| 1592 | |
| 1593 def n3(self, namespace_manager = None): | |
| 1594 return "?%s" % self | |
| 1595 | |
| 1596 def __reduce__(self): | |
| 1597 return (Variable, (str(self),)) | |
| 1598 | |
| 1599 def md5_term_hash(self): | |
| 1600 """a string of hex that will be the same for two Variables that | |
| 1601 are the same. It is not a suitable unique id. | |
| 1602 | |
| 1603 Supported for backwards compatibility; new code should | |
| 1604 probably just use __hash__ | |
| 1605 """ | |
| 1606 warnings.warn( | |
| 1607 "method md5_term_hash is deprecated, and will be removed " + | |
| 1608 "removed in the future. If you use this please let rdflib-dev know!", | |
| 1609 category=DeprecationWarning, stacklevel=2) | |
| 1610 d = md5(self.encode()) | |
| 1611 d.update(b("V")) | |
| 1612 return d.hexdigest() | |
| 1613 | |
| 1614 | |
| 1615 class Statement(Node, tuple): | |
| 1616 | |
| 1617 def __new__(cls, xxx_todo_changeme, context): | |
| 1618 (subject, predicate, object) = xxx_todo_changeme | |
| 1619 warnings.warn( | |
| 1620 "Class Statement is deprecated, and will be removed in " + | |
| 1621 "the future. If you use this please let rdflib-dev know!", | |
| 1622 category=DeprecationWarning, stacklevel=2) | |
| 1623 return tuple.__new__(cls, ((subject, predicate, object), context)) | |
| 1624 | |
| 1625 def __reduce__(self): | |
| 1626 return (Statement, (self[0], self[1])) | |
| 1627 | |
| 1628 def toPython(self): | |
| 1629 return (self[0], self[1]) | |
| 1630 | |
| 1631 # Nodes are ordered like this | |
| 1632 # See http://www.w3.org/TR/sparql11-query/#modOrderBy | |
| 1633 # we leave "space" for more subclasses of Node elsewhere | |
| 1634 # default-dict to grazefully fail for new subclasses | |
| 1635 _ORDERING = defaultdict(int) | |
| 1636 _ORDERING.update({ | |
| 1637 BNode: 10, | |
| 1638 Variable: 20, | |
| 1639 URIRef: 30, | |
| 1640 Literal: 40 | |
| 1641 }) | |
| 1642 | |
| 1643 | |
| 1644 def _isEqualXMLNode(node, other): | |
| 1645 from xml.dom.minidom import Node | |
| 1646 | |
| 1647 def recurse(): | |
| 1648 # Recursion through the children | |
| 1649 # In Python2, the semantics of 'map' is such that the check on | |
| 1650 # length would be unnecessary. In Python 3, | |
| 1651 # the semantics of map has changed (why, oh why???) and the check | |
| 1652 # for the length becomes necessary... | |
| 1653 if len(node.childNodes) != len(other.childNodes): | |
| 1654 return False | |
| 1655 for (nc, oc) in map( | |
| 1656 lambda x, y: (x, y), node.childNodes, other.childNodes): | |
| 1657 if not _isEqualXMLNode(nc, oc): | |
| 1658 return False | |
| 1659 # if we got here then everything is fine: | |
| 1660 return True | |
| 1661 | |
| 1662 if node is None or other is None: | |
| 1663 return False | |
| 1664 | |
| 1665 if node.nodeType != other.nodeType: | |
| 1666 return False | |
| 1667 | |
| 1668 if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]: | |
| 1669 return recurse() | |
| 1670 | |
| 1671 elif node.nodeType == Node.ELEMENT_NODE: | |
| 1672 # Get the basics right | |
| 1673 if not (node.tagName == other.tagName | |
| 1674 and node.namespaceURI == other.namespaceURI): | |
| 1675 return False | |
| 1676 | |
| 1677 # Handle the (namespaced) attributes; the namespace setting key | |
| 1678 # should be ignored, though | |
| 1679 # Note that the minidom orders the keys already, so we do not have | |
| 1680 # to worry about that, which is a bonus... | |
| 1681 n_keys = [ | |
| 1682 k for k in node.attributes.keysNS() | |
| 1683 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
| 1684 o_keys = [ | |
| 1685 k for k in other.attributes.keysNS() | |
| 1686 if k[0] != 'http://www.w3.org/2000/xmlns/'] | |
| 1687 if len(n_keys) != len(o_keys): | |
| 1688 return False | |
| 1689 for k in n_keys: | |
| 1690 if not (k in o_keys | |
| 1691 and node.getAttributeNS(k[0], k[1]) == | |
| 1692 other.getAttributeNS(k[0], k[1])): | |
| 1693 return False | |
| 1694 | |
| 1695 # if we got here, the attributes are all right, we can go down | |
| 1696 # the tree recursively | |
| 1697 return recurse() | |
| 1698 | |
| 1699 elif node.nodeType in [ | |
| 1700 Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE, | |
| 1701 Node.NOTATION_NODE]: | |
| 1702 return node.data == other.data | |
| 1703 | |
| 1704 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: | |
| 1705 return node.data == other.data and node.target == other.target | |
| 1706 | |
| 1707 elif node.nodeType == Node.ENTITY_NODE: | |
| 1708 return node.nodeValue == other.nodeValue | |
| 1709 | |
| 1710 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: | |
| 1711 return node.publicId == other.publicId \ | |
| 1712 and node.systemId == other.system.Id | |
| 1713 | |
| 1714 else: | |
| 1715 # should not happen, in fact | |
| 1716 raise Exception( | |
| 1717 'I dont know how to compare XML Node type: %s' % node.nodeType) | |
| 1718 | |
| 1719 if __name__ == '__main__': | |
| 1720 import doctest | |
| 1721 doctest.testmod() |
