Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/rdflib/term.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/rdflib/term.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,1721 @@ +""" +This module defines the different types of terms. Terms are the kinds of +objects that can appear in a quoted/asserted triple. This includes those +that are core to RDF: + +* :class:`Blank Nodes <rdflib.term.BNode>` +* :class:`URI References <rdflib.term.URIRef>` +* :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag) + +Those that extend the RDF model into N3: + +* :class:`Formulae <rdflib.graph.QuotedGraph>` +* :class:`Universal Quantifications (Variables) <rdflib.term.Variable>` + +And those that are primarily for matching against 'Nodes' in the +underlying Graph: + +* REGEX Expressions +* Date Ranges +* Numerical Ranges + +""" + +__all__ = [ + 'bind', + + 'Node', + 'Identifier', + + 'URIRef', + 'BNode', + 'Literal', + + 'Variable', + 'Statement', +] + +import logging +logger = logging.getLogger(__name__) +import warnings +import math + +import base64 +import xml.dom.minidom + +from urllib.parse import urlparse, urljoin, urldefrag +from datetime import date, time, datetime +from re import sub, compile +from collections import defaultdict + +from isodate import parse_time, parse_date, parse_datetime + +try: + from hashlib import md5 + assert md5 +except ImportError: + from md5 import md5 + + +import rdflib +from . import py3compat +from rdflib.compat import numeric_greater + + +b = py3compat.b + +skolem_genid = "/.well-known/genid/" +rdflib_skolem_genid = "/.well-known/genid/rdflib/" +skolems = {} + + +_invalid_uri_chars = '<>" {}|\\^`' + +def _is_valid_uri(uri): + for c in _invalid_uri_chars: + if c in uri: return False + return True + +_lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') + +def _is_valid_langtag(tag): + return bool(_lang_tag_regex.match(tag)) + +def _is_valid_unicode(value): + """ + Verify that the provided value can be converted into a Python + unicode object. + """ + if isinstance(value, bytes): + coding_func, param = getattr(value, 'decode'), 'utf-8' + elif py3compat.PY3: + coding_func, param = str, value + else: + coding_func, param = str, value + + # try to convert value into unicode + try: + coding_func(param) + except UnicodeError: + return False + return True + +class Node(object): + """ + A Node in the Graph. + """ + + __slots__ = () + + +class Identifier(Node, str): # allow Identifiers to be Nodes in the Graph + """ + See http://www.w3.org/2002/07/rdf-identifer-terminology/ + regarding choice of terminology. + """ + + __slots__ = () + + def __new__(cls, value): + return str.__new__(cls, value) + + def eq(self, other): + """A "semantic"/interpreted equality function, + by default, same as __eq__""" + return self.__eq__(other) + + def neq(self, other): + """A "semantic"/interpreted not equal function, + by default, same as __ne__""" + return self.__ne__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __eq__(self, other): + """ + Equality for Nodes. + + >>> BNode("foo")==None + False + >>> BNode("foo")==URIRef("foo") + False + >>> URIRef("foo")==BNode("foo") + False + >>> BNode("foo")!=URIRef("foo") + True + >>> URIRef("foo")!=BNode("foo") + True + >>> Variable('a')!=URIRef('a') + True + >>> Variable('a')!=Variable('a') + False + """ + + if type(self) == type(other): + return str(self) == str(other) + else: + return False + + def __gt__(self, other): + """ + This implements ordering for Nodes, + + This tries to implement this: + http://www.w3.org/TR/sparql11-query/#modOrderBy + + Variables are not included in the SPARQL list, but + they are greater than BNodes and smaller than everything else + + """ + if other is None: + return True # everything bigger than None + elif type(self) == type(other): + return str(self) > str(other) + elif isinstance(other, Node): + return _ORDERING[type(self)] > _ORDERING[type(other)] + + return NotImplemented + + def __lt__(self, other): + if other is None: + return False # Nothing is less than None + elif type(self) == type(other): + return str(self) < str(other) + elif isinstance(other, Node): + return _ORDERING[type(self)] < _ORDERING[type(other)] + + return NotImplemented + + def __le__(self, other): + r = self.__lt__(other) + if r: + return True + return self == other + + def __ge__(self, other): + r = self.__gt__(other) + if r: + return True + return self == other + + def __hash__(self): + t = type(self) + fqn = t.__module__ + '.' + t.__name__ + return hash(fqn) ^ hash(str(self)) + + +class URIRef(Identifier): + """ + RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref + """ + + __slots__ = () + + def __new__(cls, value, base=None): + if base is not None: + ends_in_hash = value.endswith("#") + value = urljoin(base, value, allow_fragments=1) + if ends_in_hash: + if not value.endswith("#"): + value += "#" + + if not _is_valid_uri(value): + logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value) + + + try: + rt = str.__new__(cls, value) + except UnicodeDecodeError: + rt = str.__new__(cls, value, 'utf-8') + return rt + + def toPython(self): + return str(self) + + def n3(self, namespace_manager = None): + """ + This will do a limited check for valid URIs, + essentially just making sure that the string includes no illegal + characters (``<, >, ", {, }, |, \\, `, ^``) + + :param namespace_manager: if not None, will be used to make up + a prefixed name + """ + + if not _is_valid_uri(self): + raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self) + + if namespace_manager: + return namespace_manager.normalizeUri(self) + else: + return "<%s>" % self + + def defrag(self): + if "#" in self: + url, frag = urldefrag(self) + return URIRef(url) + else: + return self + + def __reduce__(self): + return (URIRef, (str(self),)) + + def __getnewargs__(self): + return (str(self), ) + + if not py3compat.PY3: + def __str__(self): + return self.encode() + + def __repr__(self): + if self.__class__ is URIRef: + clsName = "rdflib.term.URIRef" + else: + clsName = self.__class__.__name__ + + return """%s(%s)""" % (clsName, super(URIRef, self).__repr__()) + + def __add__(self, other): + return self.__class__(str(self) + other) + + def __radd__(self, other): + return self.__class__(other + str(self)) + + def __mod__(self, other): + return self.__class__(str(self) % other) + + def md5_term_hash(self): + """a string of hex that will be the same for two URIRefs that + are the same. It is not a suitable unique id. + + Supported for backwards compatibility; new code should + probably just use __hash__ + """ + warnings.warn( + "method md5_term_hash is deprecated, and will be removed " + + "in the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) + d = md5(self.encode()) + d.update(b("U")) + return d.hexdigest() + + def de_skolemize(self): + """ Create a Blank Node from a skolem URI, in accordance + with http://www.w3.org/TR/rdf11-concepts/#section-skolemization. + This function accepts only rdflib type skolemization, to provide + a round-tripping within the system. + + .. versionadded:: 4.0 + """ + if isinstance(self, RDFLibGenid): + parsed_uri = urlparse("%s" % self) + return BNode( + value=parsed_uri.path[len(rdflib_skolem_genid):]) + elif isinstance(self, Genid): + bnode_id = "%s" % self + if bnode_id in skolems: + return skolems[bnode_id] + else: + retval = BNode() + skolems[bnode_id] = retval + return retval + else: + raise Exception("<%s> is not a skolem URI" % self) + + +class Genid(URIRef): + __slots__ = () + + @staticmethod + def _is_external_skolem(uri): + if not isinstance(uri, str): + uri = str(uri) + parsed_uri = urlparse(uri) + gen_id = parsed_uri.path.rfind(skolem_genid) + if gen_id != 0: + return False + return True + + +class RDFLibGenid(Genid): + __slots__ = () + + @staticmethod + def _is_rdflib_skolem(uri): + if not isinstance(uri, str): + uri = str(uri) + parsed_uri = urlparse(uri) + if parsed_uri.params != "" \ + or parsed_uri.query != "" \ + or parsed_uri.fragment != "": + return False + gen_id = parsed_uri.path.rfind(rdflib_skolem_genid) + if gen_id != 0: + return False + return True + + +def _unique_id(): + # Used to read: """Create a (hopefully) unique prefix""" + # now retained merely to leave interal API unchanged. + # From BNode.__new__() below ... + # + # acceptable bnode value range for RDF/XML needs to be + # something that can be serialzed as a nodeID for N3 + # + # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* + # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID + return "N" # ensure that id starts with a letter + + +def _serial_number_generator(): + """ + Generates UUID4-based but ncname-compliant identifiers. + """ + from uuid import uuid4 + + def _generator(): + return uuid4().hex + + return _generator + + +class BNode(Identifier): + """ + Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes + + """ + __slots__ = () + + def __new__(cls, value=None, + _sn_gen=_serial_number_generator(), _prefix=_unique_id()): + """ + # only store implementations should pass in a value + """ + if value is None: + # so that BNode values do not collide with ones created with + # a different instance of this module at some other time. + node_id = _sn_gen() + value = "%s%s" % (_prefix, node_id) + else: + # TODO: check that value falls within acceptable bnode value range + # for RDF/XML needs to be something that can be serialzed + # as a nodeID for N3 ?? Unless we require these + # constraints be enforced elsewhere? + pass # assert is_ncname(unicode(value)), "BNode identifiers + # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* + # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID + return Identifier.__new__(cls, value) + + def toPython(self): + return str(self) + + def n3(self, namespace_manager=None): + return "_:%s" % self + + def __getnewargs__(self): + return (str(self), ) + + def __reduce__(self): + return (BNode, (str(self),)) + + if not py3compat.PY3: + def __str__(self): + return self.encode() + + def __repr__(self): + if self.__class__ is BNode: + clsName = "rdflib.term.BNode" + else: + clsName = self.__class__.__name__ + return """%s('%s')""" % (clsName, str(self)) + + def md5_term_hash(self): + """a string of hex that will be the same for two BNodes that + are the same. It is not a suitable unique id. + + Supported for backwards compatibility; new code should + probably just use __hash__ + """ + warnings.warn( + "method md5_term_hash is deprecated, and will be removed " + + "in the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) + d = md5(self.encode()) + d.update(b("B")) + return d.hexdigest() + + def skolemize(self, authority="http://rdlib.net/"): + """ Create a URIRef "skolem" representation of the BNode, in accordance + with http://www.w3.org/TR/rdf11-concepts/#section-skolemization + + .. versionadded:: 4.0 + """ + skolem = "%s%s" % (rdflib_skolem_genid, str(self)) + return URIRef(urljoin(authority, skolem)) + + +class Literal(Identifier): + __doc__ = py3compat.format_doctest_out(""" + RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal + + The lexical value of the literal is the unicode object + The interpreted, datatyped value is available from .value + + Language tags must be valid according to :rfc:5646 + + For valid XSD datatypes, the lexical form is optionally normalized + at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS + and can be overridden by the normalize parameter to __new__ + + Equality and hashing of Literals are done based on the lexical form, i.e.: + + >>> from rdflib.namespace import XSD + + >>> Literal('01')!=Literal('1') # clear - strings differ + True + + but with data-type they get normalized: + + >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer) + False + + unless disabled: + + >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer) + True + + + Value based comparison is possible: + + >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float)) + True + + The eq method also provides limited support for basic python types: + + >>> Literal(1).eq(1) # fine - int compatible with xsd:integer + True + >>> Literal('a').eq('b') # fine - str compatible with plain-lit + False + >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string + True + >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit + NotImplemented + + Greater-than/less-than ordering comparisons are also done in value + space, when compatible datatypes are used. Incompatible datatypes + are ordered by DT, or by lang-tag. For other nodes the ordering + is None < BNode < URIRef < Literal + + Any comparison with non-rdflib Node are "NotImplemented" + In PY2.X some stable order will be made up by python + + In PY3 this is an error. + + >>> from rdflib import Literal, XSD + >>> lit2006 = Literal('2006-01-01',datatype=XSD.date) + >>> lit2006.toPython() + datetime.date(2006, 1, 1) + >>> lit2006 < Literal('2007-01-01',datatype=XSD.date) + True + >>> Literal(datetime.utcnow()).datatype + rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime') + >>> Literal(1) > Literal(2) # by value + False + >>> Literal(1) > Literal(2.0) # by value + False + >>> Literal('1') > Literal(1) # by DT + True + >>> Literal('1') < Literal('1') # by lexical form + False + >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag + False + >>> Literal(1) > URIRef('foo') # by node-type + True + + The > < operators will eat this NotImplemented and either make up + an ordering (py2.x) or throw a TypeError (py3k): + + >>> Literal(1).__gt__(2.0) + NotImplemented + + + """) + + + if not py3compat.PY3: + __slots__ = ("language", "datatype", "value", "_language", + "_datatype", "_value") + else: + __slots__ = ("_language", "_datatype", "_value") + + def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): + + if lang == '': + lang = None # no empty lang-tags in RDF + + normalize = normalize if normalize != None else rdflib.NORMALIZE_LITERALS + + if lang is not None and datatype is not None: + raise TypeError( + "A Literal can only have one of lang or datatype, " + "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") + + if lang and not _is_valid_langtag(lang): + raise Exception("'%s' is not a valid language tag!"%lang) + + if datatype: + datatype = URIRef(datatype) + + value = None + if isinstance(lexical_or_value, Literal): + # create from another Literal instance + + lang = lang or lexical_or_value.language + if datatype: + # override datatype + value = _castLexicalToPython(lexical_or_value, datatype) + else: + datatype = lexical_or_value.datatype + value = lexical_or_value.value + + elif isinstance(lexical_or_value, str): + # passed a string + # try parsing lexical form of datatyped literal + value = _castLexicalToPython(lexical_or_value, datatype) + + if value is not None and normalize: + _value, _datatype = _castPythonToLiteral(value) + if _value is not None and _is_valid_unicode(_value): + lexical_or_value = _value + + else: + # passed some python object + value = lexical_or_value + _value, _datatype = _castPythonToLiteral(lexical_or_value) + + datatype = datatype or _datatype + if _value is not None: + lexical_or_value = _value + if datatype: + lang = None + + if py3compat.PY3 and isinstance(lexical_or_value, bytes): + lexical_or_value = lexical_or_value.decode('utf-8') + + try: + inst = str.__new__(cls, lexical_or_value) + except UnicodeDecodeError: + inst = str.__new__(cls, lexical_or_value, 'utf-8') + + inst._language = lang + inst._datatype = datatype + inst._value = value + return inst + + @py3compat.format_doctest_out + def normalize(self): + """ + Returns a new literal with a normalised lexical representation + of this literal + >>> from rdflib import XSD + >>> Literal("01", datatype=XSD.integer, normalize=False).normalize() + rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + Illegal lexical forms for the datatype given are simply passed on + >>> Literal("a", datatype=XSD.integer, normalize=False) + rdflib.term.Literal(%(u)s'a', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + """ + + if self.value != None: + return Literal(self.value, datatype=self.datatype, lang=self.language) + else: + return self + + @property + def value(self): + return self._value + + @property + def language(self): + return self._language + + @property + def datatype(self): + return self._datatype + + def __reduce__(self): + return (Literal, (str(self), self.language, self.datatype),) + + def __getstate__(self): + return (None, dict(language=self.language, datatype=self.datatype)) + + def __setstate__(self, arg): + _, d = arg + self._language = d["language"] + self._datatype = d["datatype"] + + @py3compat.format_doctest_out + def __add__(self, val): + """ + >>> Literal(1) + 1 + rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + >>> Literal("1") + "1" + rdflib.term.Literal(%(u)s'11') + """ + + py = self.toPython() + if not isinstance(py, Literal): + try: + return Literal(py + val) + except TypeError: + pass # fall-through + + s = str.__add__(self, val) + return Literal(s, self.language, self.datatype) + + def __bool__(self): + """ + Is the Literal "True" + This is used for if statements, bool(literal), etc. + """ + if self.value != None: + return bool(self.value) + return len(self) != 0 + + @py3compat.format_doctest_out + def __neg__(self): + """ + >>> (- Literal(1)) + rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + >>> (- Literal(10.5)) + rdflib.term.Literal(%(u)s'-10.5', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#double')) + >>> from rdflib.namespace import XSD + >>> (- Literal("1", datatype=XSD.integer)) + rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + >>> (- Literal("1")) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: Not a number; rdflib.term.Literal(%(u)s'1') + >>> + """ + + if isinstance(self.value, (int, float)): + return Literal(self.value.__neg__()) + else: + raise TypeError("Not a number; %s" % repr(self)) + + @py3compat.format_doctest_out + def __pos__(self): + """ + >>> (+ Literal(1)) + rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + >>> (+ Literal(-1)) + rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + >>> from rdflib.namespace import XSD + >>> (+ Literal("-1", datatype=XSD.integer)) + rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + >>> (+ Literal("1")) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: Not a number; rdflib.term.Literal(%(u)s'1') + """ + if isinstance(self.value, (int, float)): + return Literal(self.value.__pos__()) + else: + raise TypeError("Not a number; %s" % repr(self)) + + @py3compat.format_doctest_out + def __abs__(self): + """ + >>> abs(Literal(-1)) + rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + >>> from rdflib.namespace import XSD + >>> abs( Literal("-1", datatype=XSD.integer)) + rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + >>> abs(Literal("1")) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: Not a number; rdflib.term.Literal(%(u)s'1') + """ + if isinstance(self.value, (int, float)): + return Literal(self.value.__abs__()) + else: + raise TypeError("Not a number; %s" % repr(self)) + + @py3compat.format_doctest_out + def __invert__(self): + """ + >>> ~(Literal(-1)) + rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + >>> from rdflib.namespace import XSD + >>> ~( Literal("-1", datatype=XSD.integer)) + rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) + + Not working: + + >>> ~(Literal("1")) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: Not a number; rdflib.term.Literal(%(u)s'1') + """ + if isinstance(self.value, (int, float)): + return Literal(self.value.__invert__()) + else: + raise TypeError("Not a number; %s" % repr(self)) + + def __gt__(self, other): + """ + + This implements ordering for Literals, + the other comparison methods delegate here + + This tries to implement this: + http://www.w3.org/TR/sparql11-query/#modOrderBy + + In short, Literals with compatible data-types are orderd in value space, + i.e. + >>> from rdflib import XSD + + >>> Literal(1)>Literal(2) # int/int + False + >>> Literal(2.0)>Literal(1) # double/int + True + >>> from decimal import Decimal + >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double + True + >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double + True + >>> Literal('b')>Literal('a') # plain lit/plain lit + True + >>> Literal('b')>Literal('a', datatype=XSD.string) # plain lit/xsd:string + True + + Incompatible datatype mismatches ordered by DT + + >>> Literal(1)>Literal("2") # int>string + False + + Langtagged literals by lang tag + >>> Literal("a", lang="en")>Literal("a", lang="fr") + False + """ + if other is None: + return True # Everything is greater than None + if isinstance(other, Literal): + + if self.datatype in _NUMERIC_LITERAL_TYPES and \ + other.datatype in _NUMERIC_LITERAL_TYPES: + return numeric_greater(self.value, other.value) + + # plain-literals and xsd:string literals + # are "the same" + dtself = self.datatype or _XSD_STRING + dtother = other.datatype or _XSD_STRING + + if dtself != dtother: + if rdflib.DAWG_LITERAL_COLLATION: + return NotImplemented + else: + return dtself > dtother + + if self.language != other.language: + if not self.language: + return False + elif not other.language: + return True + else: + return self.language > other.language + + if self.value != None and other.value != None: + return self.value > other.value + + if str(self) != str(other): + return str(self) > str(other) + + # same language, same lexical form, check real dt + # plain-literals come before xsd:string! + if self.datatype != other.datatype: + if not self.datatype: + return False + elif not other.datatype: + return True + else: + return self.datatype > other.datatype + + return False # they are the same + + elif isinstance(other, Node): + return True # Literal are the greatest! + else: + return NotImplemented # we can only compare to nodes + + def __lt__(self, other): + if other is None: + return False # Nothing is less than None + if isinstance(other, Literal): + try: + return not self.__gt__(other) and not self.eq(other) + except TypeError: + return NotImplemented + if isinstance(other, Node): + return False # all nodes are less-than Literals + + return NotImplemented + + def __le__(self, other): + """ + >>> from rdflib.namespace import XSD + >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime + ... ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) + True + """ + r = self.__lt__(other) + if r: + return True + try: + return self.eq(other) + except TypeError: + return NotImplemented + + def __ge__(self, other): + r = self.__gt__(other) + if r: + return True + try: + return self.eq(other) + except TypeError: + return NotImplemented + + def _comparable_to(self, other): + """ + Helper method to decide which things are meaningful to + rich-compare with this literal + """ + if isinstance(other, Literal): + if (self.datatype and other.datatype): + # two datatyped literals + if not self.datatype in XSDToPython or not other.datatype in XSDToPython: + # non XSD DTs must match + if self.datatype != other.datatype: + return False + + else: + # xsd:string may be compared with plain literals + if not (self.datatype == _XSD_STRING and not other.datatype) or \ + (other.datatype == _XSD_STRING and not self.datatype): + return False + + # if given lang-tag has to be case insensitive equal + if (self.language or "").lower() != (other.language or "").lower(): + return False + + return True + + def __hash__(self): + """ + >>> from rdflib.namespace import XSD + >>> a = {Literal('1', datatype=XSD.integer):'one'} + >>> Literal('1', datatype=XSD.double) in a + False + + + "Called for the key object for dictionary operations, + and by the built-in function hash(). Should return + a 32-bit integer usable as a hash value for + dictionary operations. The only required property + is that objects which compare equal have the same + hash value; it is advised to somehow mix together + (e.g., using exclusive or) the hash values for the + components of the object that also play a part in + comparison of objects." -- 3.4.1 Basic customization (Python) + + "Two literals are equal if and only if all of the following hold: + * The strings of the two lexical forms compare equal, character by + character. + * Either both or neither have language tags. + * The language tags, if any, compare equal. + * Either both or neither have datatype URIs. + * The two datatype URIs, if any, compare equal, character by + character." + -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) + + """ + res = super(Literal, self).__hash__() + if self.language: + res ^= hash(self.language.lower()) + if self.datatype: + res ^= hash(self.datatype) + return res + + @py3compat.format_doctest_out + def __eq__(self, other): + """ + Literals are only equal to other literals. + + "Two literals are equal if and only if all of the following hold: + * The strings of the two lexical forms compare equal, character by character. + * Either both or neither have language tags. + * The language tags, if any, compare equal. + * Either both or neither have datatype URIs. + * The two datatype URIs, if any, compare equal, character by character." + -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) + + >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo")) + True + >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2")) + False + + >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo")) + False + >>> Literal("1", datatype=URIRef("foo")) == "asdf" + False + >>> from rdflib import XSD + >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date) + True + >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1) + False + >>> Literal("one", lang="en") == Literal("one", lang="en") + True + >>> Literal("hast", lang='en') == Literal("hast", lang='de') + False + >>> Literal("1", datatype=XSD.integer) == Literal(1) + True + >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer) + True + + """ + if self is other: + return True + if other is None: + return False + if isinstance(other, Literal): + return self.datatype == other.datatype \ + and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \ + and str.__eq__(self, other) + + return False + + def eq(self, other): + """ + Compare the value of this literal with something else + + Either, with the value of another literal + comparisons are then done in literal "value space", + and according to the rules of XSD subtype-substitution/type-promotion + + OR, with a python object: + + basestring objects can be compared with plain-literals, + or those with datatype xsd:string + + bool objects with xsd:boolean + + a int, long or float with numeric xsd types + + isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime + + Any other operations returns NotImplemented + + """ + if isinstance(other, Literal): + + if self.datatype in _NUMERIC_LITERAL_TYPES \ + and other.datatype in _NUMERIC_LITERAL_TYPES: + if self.value != None and other.value != None: + return self.value == other.value + else: + if str.__eq__(self, other): + return True + raise TypeError( + 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) + if (self.language or "").lower() != (other.language or "").lower(): + return False + + dtself = self.datatype or _XSD_STRING + dtother = other.datatype or _XSD_STRING + + if (dtself == _XSD_STRING and dtother == _XSD_STRING): + # string/plain literals, compare on lexical form + return str.__eq__(self, other) + + if dtself != dtother: + if rdflib.DAWG_LITERAL_COLLATION: + raise TypeError("I don't know how to compare literals with datatypes %s and %s" % ( + self.datatype, other.datatype)) + else: + return False + + # matching non-string DTs now - do we compare values or + # lexical form first? comparing two ints is far quicker - + # maybe there are counter examples + + if self.value != None and other.value != None: + + if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL): + return _isEqualXMLNode(self.value, other.value) + + return self.value == other.value + else: + + if str.__eq__(self, other): + return True + + if self.datatype == _XSD_STRING: + return False # string value space=lexical space + + # matching DTs, but not matching, we cannot compare! + raise TypeError( + 'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other)) + + elif isinstance(other, Node): + return False # no non-Literal nodes are equal to a literal + + elif isinstance(other, str): + # only plain-literals can be directly compared to strings + + # TODO: Is "blah"@en eq "blah" ? + if self.language is not None: + return False + + if (self.datatype == _XSD_STRING or self.datatype is None): + return str(self) == other + + elif isinstance(other, (int, float)): + if self.datatype in _NUMERIC_LITERAL_TYPES: + return self.value == other + elif isinstance(other, (date, datetime, time)): + if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME): + return self.value == other + elif isinstance(other, bool): + if self.datatype == _XSD_BOOLEAN: + return self.value == other + + return NotImplemented + + def neq(self, other): + return not self.eq(other) + + @py3compat.format_doctest_out + def n3(self, namespace_manager = None): + r''' + Returns a representation in the N3 format. + + Examples:: + + >>> Literal("foo").n3() + %(u)s'"foo"' + + Strings with newlines or triple-quotes:: + + >>> Literal("foo\nbar").n3() + %(u)s'"""foo\nbar"""' + + >>> Literal("''\'").n3() + %(u)s'"\'\'\'"' + + >>> Literal('"""').n3() + %(u)s'"\\"\\"\\""' + + Language:: + + >>> Literal("hello", lang="en").n3() + %(u)s'"hello"@en' + + Datatypes:: + + >>> Literal(1).n3() + %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' + + >>> Literal(1.0).n3() + %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>' + + >>> Literal(True).n3() + %(u)s'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>' + + Datatype and language isn't allowed (datatype takes precedence):: + + >>> Literal(1, lang="en").n3() + %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>' + + Custom datatype:: + + >>> footype = URIRef("http://example.org/ns#foo") + >>> Literal("1", datatype=footype).n3() + %(u)s'"1"^^<http://example.org/ns#foo>' + + Passing a namespace-manager will use it to abbreviate datatype URIs: + + >>> from rdflib import Graph + >>> Literal(1).n3(Graph().namespace_manager) + %(u)s'"1"^^xsd:integer' + ''' + if namespace_manager: + return self._literal_n3(qname_callback = + namespace_manager.normalizeUri) + else: + return self._literal_n3() + + @py3compat.format_doctest_out + def _literal_n3(self, use_plain=False, qname_callback=None): + ''' + Using plain literal (shorthand) output:: + >>> from rdflib.namespace import XSD + + >>> Literal(1)._literal_n3(use_plain=True) + %(u)s'1' + + >>> Literal(1.0)._literal_n3(use_plain=True) + %(u)s'1e+00' + + >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True) + %(u)s'1.0' + + >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True) + %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>' + + >>> Literal("foo", datatype=XSD.string)._literal_n3( + ... use_plain=True) + %(u)s'"foo"^^<http://www.w3.org/2001/XMLSchema#string>' + + >>> Literal(True)._literal_n3(use_plain=True) + %(u)s'true' + + >>> Literal(False)._literal_n3(use_plain=True) + %(u)s'false' + + >>> Literal(1.91)._literal_n3(use_plain=True) + %(u)s'1.91e+00' + + Only limited precision available for floats: + >>> Literal(0.123456789)._literal_n3(use_plain=True) + %(u)s'1.234568e-01' + + >>> Literal('0.123456789', + ... datatype=XSD.decimal)._literal_n3(use_plain=True) + %(u)s'0.123456789' + + Using callback for datatype QNames:: + + >>> Literal(1)._literal_n3( + ... qname_callback=lambda uri: "xsd:integer") + %(u)s'"1"^^xsd:integer' + + ''' + if use_plain and self.datatype in _PLAIN_LITERAL_TYPES: + if self.value is not None: + # If self is inf or NaN, we need a datatype + # (there is no plain representation) + if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: + try: + v = float(self) + if math.isinf(v) or math.isnan(v): + return self._literal_n3(False, qname_callback) + except ValueError: + return self._literal_n3(False, qname_callback) + + # this is a bit of a mess - + # in py >=2.6 the string.format function makes this easier + # we try to produce "pretty" output + if self.datatype == _XSD_DOUBLE: + return sub("\\.?0*e", "e", '%e' % float(self)) + elif self.datatype == _XSD_DECIMAL: + s = '%s' % self + if '.' not in s: + s += '.0' + return s + + elif self.datatype == _XSD_BOOLEAN: + return ('%s' % self).lower() + else: + return '%s' % self + + encoded = self._quote_encode() + + datatype = self.datatype + quoted_dt = None + if datatype: + if qname_callback: + quoted_dt = qname_callback(datatype) + if not quoted_dt: + quoted_dt = "<%s>" % datatype + if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES: + try: + v = float(self) + if math.isinf(v): + # py string reps: float: 'inf', Decimal: 'Infinity" + # both need to become "INF" in xsd datatypes + encoded = encoded.replace('inf', 'INF').replace( + 'Infinity', 'INF') + if math.isnan(v): + encoded = encoded.replace('nan', 'NaN') + except ValueError: + # if we can't cast to float something is wrong, but we can + # still serialize. Warn user about it + warnings.warn("Serializing weird numerical %r" % self) + + language = self.language + if language: + return '%s@%s' % (encoded, language) + elif datatype: + return '%s^^%s' % (encoded, quoted_dt) + else: + return '%s' % encoded + + def _quote_encode(self): + # This simpler encoding doesn't work; a newline gets encoded as "\\n", + # which is ok in sourcecode, but we want "\n". + # encoded = self.encode('unicode-escape').replace( + # '\\', '\\\\').replace('"','\\"') + # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') + + # NOTE: Could in theory chose quotes based on quotes appearing in the + # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). + + if "\n" in self: + # Triple quote this string. + encoded = self.replace('\\', '\\\\') + if '"""' in self: + # is this ok? + encoded = encoded.replace('"""', '\\"\\"\\"') + if encoded[-1] == '"' and encoded[-2] != '\\': + encoded = encoded[:-1] + '\\' + '"' + + return '"""%s"""' % encoded.replace('\r', '\\r') + else: + return '"%s"' % self.replace( + '\n', '\\n').replace( + '\\', '\\\\').replace( + '"', '\\"').replace( + '\r', '\\r') + + if not py3compat.PY3: + def __str__(self): + return self.encode() + + def __repr__(self): + args = [super(Literal, self).__repr__()] + if self.language is not None: + args.append("lang=%s" % repr(self.language)) + if self.datatype is not None: + args.append("datatype=%s" % repr(self.datatype)) + if self.__class__ == Literal: + clsName = "rdflib.term.Literal" + else: + clsName = self.__class__.__name__ + return """%s(%s)""" % (clsName, ", ".join(args)) + + def toPython(self): + """ + Returns an appropriate python datatype derived from this RDF Literal + """ + + if self.value is not None: + return self.value + return self + + def md5_term_hash(self): + """a string of hex that will be the same for two Literals that + are the same. It is not a suitable unique id. + + Supported for backwards compatibility; new code should + probably just use __hash__ + """ + warnings.warn( + "method md5_term_hash is deprecated, and will be removed " + + "removed in the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) + d = md5(self.encode()) + d.update(b("L")) + return d.hexdigest() + + +def _parseXML(xmlstring): + if not py3compat.PY3: + xmlstring = xmlstring.encode('utf-8') + retval = xml.dom.minidom.parseString( + "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring) + retval.normalize() + return retval + + +def _parseHTML(htmltext): + try: + import html5lib + parser = html5lib.HTMLParser( + tree=html5lib.treebuilders.getTreeBuilder("dom")) + retval = parser.parseFragment(htmltext) + retval.normalize() + return retval + except ImportError: + raise ImportError( + "HTML5 parser not available. Try installing" + + " html5lib <http://code.google.com/p/html5lib>") + + +def _writeXML(xmlnode): + if isinstance(xmlnode, xml.dom.minidom.DocumentFragment): + d = xml.dom.minidom.Document() + d.childNodes += xmlnode.childNodes + xmlnode = d + s = xmlnode.toxml('utf-8') + # for clean round-tripping, remove headers -- I have great and + # specific worries that this will blow up later, but this margin + # is too narrow to contain them + if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')): + s = s[38:] + if s.startswith(b('<rdflibtoplevelelement>')): + s = s[23:-24] + if s == b('<rdflibtoplevelelement/>'): + s = b('') + return s + +# Cannot import Namespace/XSD because of circular dependencies +_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' +_RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' + +_RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral') +_RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML') + +_XSD_STRING = URIRef(_XSD_PFX + 'string') + +_XSD_FLOAT = URIRef(_XSD_PFX + 'float') +_XSD_DOUBLE = URIRef(_XSD_PFX + 'double') +_XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal') +_XSD_INTEGER = URIRef(_XSD_PFX + 'integer') +_XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean') + +_XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime') +_XSD_DATE = URIRef(_XSD_PFX + 'date') +_XSD_TIME = URIRef(_XSD_PFX + 'time') + +# TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth + +_NUMERIC_LITERAL_TYPES = ( + _XSD_INTEGER, + _XSD_DECIMAL, + _XSD_DOUBLE, + URIRef(_XSD_PFX + 'float'), + + URIRef(_XSD_PFX + 'byte'), + URIRef(_XSD_PFX + 'int'), + URIRef(_XSD_PFX + 'long'), + URIRef(_XSD_PFX + 'negativeInteger'), + URIRef(_XSD_PFX + 'nonNegativeInteger'), + URIRef(_XSD_PFX + 'nonPositiveInteger'), + URIRef(_XSD_PFX + 'positiveInteger'), + URIRef(_XSD_PFX + 'short'), + URIRef(_XSD_PFX + 'unsignedByte'), + URIRef(_XSD_PFX + 'unsignedInt'), + URIRef(_XSD_PFX + 'unsignedLong'), + URIRef(_XSD_PFX + 'unsignedShort'), + +) + +# these have "native" syntax in N3/SPARQL +_PLAIN_LITERAL_TYPES = ( + _XSD_INTEGER, + _XSD_BOOLEAN, + _XSD_DOUBLE, + _XSD_DECIMAL, +) + +# these have special INF and NaN XSD representations +_NUMERIC_INF_NAN_LITERAL_TYPES = ( + URIRef(_XSD_PFX + 'float'), + _XSD_DOUBLE, + _XSD_DECIMAL, +) + + +def _castPythonToLiteral(obj): + """ + Casts a python datatype to a tuple of the lexical value and a + datatype URI (or None) + """ + for pType, (castFunc, dType) in _PythonToXSD: + if isinstance(obj, pType): + if castFunc: + return castFunc(obj), dType + elif dType: + return obj, dType + else: + return obj, None + return obj, None # TODO: is this right for the fall through case? + +from decimal import Decimal + +# Mappings from Python types to XSD datatypes and back (borrowed from sparta) +# datetime instances are also instances of date... so we need to order these. + +# SPARQL/Turtle/N3 has shortcuts for integer, double, decimal +# python has only float - to be in tune with sparql/n3/turtle +# we default to XSD.double for float literals + +# python ints are promoted to longs when overflowing +# python longs have no limit +# both map to the abstract integer type, +# rather than some concrete bit-limited datatype + +_PythonToXSD = [ + (str, (None, None)), + (float, (None, _XSD_DOUBLE)), + (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), + (int, (None, _XSD_INTEGER)), + (int, (None, _XSD_INTEGER)), + (Decimal, (None, _XSD_DECIMAL)), + (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)), + (date, (lambda i:i.isoformat(), _XSD_DATE)), + (time, (lambda i:i.isoformat(), _XSD_TIME)), + (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)), + # this is a bit dirty - by accident the html5lib parser produces + # DocumentFragments, and the xml parser Documents, letting this + # decide what datatype to use makes roundtripping easier, but it a + # bit random + (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL)) +] + +XSDToPython = { + None : None, # plain literals map directly to value space + URIRef(_XSD_PFX + 'time'): parse_time, + URIRef(_XSD_PFX + 'date'): parse_date, + URIRef(_XSD_PFX + 'gYear'): parse_date, + URIRef(_XSD_PFX + 'gYearMonth'): parse_date, + URIRef(_XSD_PFX + 'dateTime'): parse_datetime, + URIRef(_XSD_PFX + 'string'): None, + URIRef(_XSD_PFX + 'normalizedString'): None, + URIRef(_XSD_PFX + 'token'): None, + URIRef(_XSD_PFX + 'language'): None, + URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'], + URIRef(_XSD_PFX + 'decimal'): Decimal, + URIRef(_XSD_PFX + 'integer'): int, + URIRef(_XSD_PFX + 'nonPositiveInteger'): int, + URIRef(_XSD_PFX + 'long'): int, + URIRef(_XSD_PFX + 'nonNegativeInteger'): int, + URIRef(_XSD_PFX + 'negativeInteger'): int, + URIRef(_XSD_PFX + 'int'): int, + URIRef(_XSD_PFX + 'unsignedLong'): int, + URIRef(_XSD_PFX + 'positiveInteger'): int, + URIRef(_XSD_PFX + 'short'): int, + URIRef(_XSD_PFX + 'unsignedInt'): int, + URIRef(_XSD_PFX + 'byte'): int, + URIRef(_XSD_PFX + 'unsignedShort'): int, + URIRef(_XSD_PFX + 'unsignedByte'): int, + URIRef(_XSD_PFX + 'float'): float, + URIRef(_XSD_PFX + 'double'): float, + URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s), + URIRef(_XSD_PFX + 'anyURI'): None, + _RDF_XMLLITERAL: _parseXML, + _RDF_HTMLLITERAL: _parseHTML +} + +_toPythonMapping = {} + +_toPythonMapping.update(XSDToPython) + +def _castLexicalToPython(lexical, datatype): + """ + Map a lexical form to the value-space for the given datatype + :returns: a python object for the value or ``None`` + """ + convFunc = _toPythonMapping.get(datatype, False) + if convFunc: + try: + return convFunc(lexical) + except: + # not a valid lexical representation for this dt + return None + elif convFunc is None: + # no conv func means 1-1 lexical<->value-space mapping + try: + return str(lexical) + except UnicodeDecodeError: + return str(lexical, 'utf-8') + else: + # no convFunc - unknown data-type + return None + +def bind(datatype, pythontype, constructor=None, lexicalizer=None): + """ + register a new datatype<->pythontype binding + + :param constructor: an optional function for converting lexical forms + into a Python instances, if not given the pythontype + is used directly + + :param lexicalizer: an optinoal function for converting python objects to + lexical form, if not given object.__str__ is used + + """ + if datatype in _toPythonMapping: + logger.warning("datatype '%s' was already bound. Rebinding." % + datatype) + + if constructor == None: + constructor = pythontype + _toPythonMapping[datatype] = constructor + _PythonToXSD.append((pythontype, (lexicalizer, datatype))) + + +class Variable(Identifier): + """ + A Variable - this is used for querying, or in Formula aware + graphs, where Variables can stored in the graph + """ + __slots__ = () + + def __new__(cls, value): + if len(value) == 0: + raise Exception( + "Attempted to create variable with empty string as name!") + if value[0] == '?': + value = value[1:] + return str.__new__(cls, value) + + def __repr__(self): + if self.__class__ is Variable: + clsName = "rdflib.term.Variable" + else: + clsName = self.__class__.__name__ + + return """%s(%s)""" % (clsName, super(Variable, self).__repr__()) + + def toPython(self): + return "?%s" % self + + def n3(self, namespace_manager = None): + return "?%s" % self + + def __reduce__(self): + return (Variable, (str(self),)) + + def md5_term_hash(self): + """a string of hex that will be the same for two Variables that + are the same. It is not a suitable unique id. + + Supported for backwards compatibility; new code should + probably just use __hash__ + """ + warnings.warn( + "method md5_term_hash is deprecated, and will be removed " + + "removed in the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) + d = md5(self.encode()) + d.update(b("V")) + return d.hexdigest() + + +class Statement(Node, tuple): + + def __new__(cls, xxx_todo_changeme, context): + (subject, predicate, object) = xxx_todo_changeme + warnings.warn( + "Class Statement is deprecated, and will be removed in " + + "the future. If you use this please let rdflib-dev know!", + category=DeprecationWarning, stacklevel=2) + return tuple.__new__(cls, ((subject, predicate, object), context)) + + def __reduce__(self): + return (Statement, (self[0], self[1])) + + def toPython(self): + return (self[0], self[1]) + +# Nodes are ordered like this +# See http://www.w3.org/TR/sparql11-query/#modOrderBy +# we leave "space" for more subclasses of Node elsewhere +# default-dict to grazefully fail for new subclasses +_ORDERING = defaultdict(int) +_ORDERING.update({ + BNode: 10, + Variable: 20, + URIRef: 30, + Literal: 40 + }) + + +def _isEqualXMLNode(node, other): + from xml.dom.minidom import Node + + def recurse(): + # Recursion through the children + # In Python2, the semantics of 'map' is such that the check on + # length would be unnecessary. In Python 3, + # the semantics of map has changed (why, oh why???) and the check + # for the length becomes necessary... + if len(node.childNodes) != len(other.childNodes): + return False + for (nc, oc) in map( + lambda x, y: (x, y), node.childNodes, other.childNodes): + if not _isEqualXMLNode(nc, oc): + return False + # if we got here then everything is fine: + return True + + if node is None or other is None: + return False + + if node.nodeType != other.nodeType: + return False + + if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]: + return recurse() + + elif node.nodeType == Node.ELEMENT_NODE: + # Get the basics right + if not (node.tagName == other.tagName + and node.namespaceURI == other.namespaceURI): + return False + + # Handle the (namespaced) attributes; the namespace setting key + # should be ignored, though + # Note that the minidom orders the keys already, so we do not have + # to worry about that, which is a bonus... + n_keys = [ + k for k in node.attributes.keysNS() + if k[0] != 'http://www.w3.org/2000/xmlns/'] + o_keys = [ + k for k in other.attributes.keysNS() + if k[0] != 'http://www.w3.org/2000/xmlns/'] + if len(n_keys) != len(o_keys): + return False + for k in n_keys: + if not (k in o_keys + and node.getAttributeNS(k[0], k[1]) == + other.getAttributeNS(k[0], k[1])): + return False + + # if we got here, the attributes are all right, we can go down + # the tree recursively + return recurse() + + elif node.nodeType in [ + Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE, + Node.NOTATION_NODE]: + return node.data == other.data + + elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: + return node.data == other.data and node.target == other.target + + elif node.nodeType == Node.ENTITY_NODE: + return node.nodeValue == other.nodeValue + + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + return node.publicId == other.publicId \ + and node.systemId == other.system.Id + + else: + # should not happen, in fact + raise Exception( + 'I dont know how to compare XML Node type: %s' % node.nodeType) + +if __name__ == '__main__': + import doctest + doctest.testmod()