Mercurial > repos > guerler > springsuite

diff planemo/lib/python3.7/site-packages/rdflib/term.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author: guerler
date: Fri, 31 Jul 2020 00:32:28 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/rdflib/term.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,1721 @@
+"""
+This module defines the different types of terms. Terms are the kinds of
+objects that can appear in a quoted/asserted triple. This includes those
+that are core to RDF:
+
+* :class:`Blank Nodes <rdflib.term.BNode>`
+* :class:`URI References <rdflib.term.URIRef>`
+* :class:`Literals <rdflib.term.Literal>` (which consist of a literal value,datatype and language tag)
+
+Those that extend the RDF model into N3:
+
+* :class:`Formulae <rdflib.graph.QuotedGraph>`
+* :class:`Universal Quantifications (Variables) <rdflib.term.Variable>`
+
+And those that are primarily for matching against 'Nodes' in the
+underlying Graph:
+
+* REGEX Expressions
+* Date Ranges
+* Numerical Ranges
+
+"""
+
+__all__ = [
+    'bind',
+
+    'Node',
+    'Identifier',
+
+    'URIRef',
+    'BNode',
+    'Literal',
+
+    'Variable',
+    'Statement',
+]
+
+import logging
+logger = logging.getLogger(__name__)
+import warnings
+import math
+
+import base64
+import xml.dom.minidom
+
+from urllib.parse import urlparse, urljoin, urldefrag
+from datetime import date, time, datetime
+from re import sub, compile
+from collections import defaultdict
+
+from isodate import parse_time, parse_date, parse_datetime
+
+try:
+    from hashlib import md5
+    assert md5
+except ImportError:
+    from md5 import md5
+
+
+import rdflib
+from . import py3compat
+from rdflib.compat import numeric_greater
+
+
+b = py3compat.b
+
+skolem_genid = "/.well-known/genid/"
+rdflib_skolem_genid = "/.well-known/genid/rdflib/"
+skolems = {}
+
+
+_invalid_uri_chars = '<>" {}|\\^`'
+
+def _is_valid_uri(uri):
+    for c in _invalid_uri_chars:
+        if c in uri: return False
+    return True
+
+_lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
+
+def _is_valid_langtag(tag):
+    return bool(_lang_tag_regex.match(tag))
+
+def _is_valid_unicode(value):
+    """
+    Verify that the provided value can be converted into a Python
+    unicode object.
+    """
+    if isinstance(value, bytes):
+        coding_func, param = getattr(value, 'decode'), 'utf-8'
+    elif py3compat.PY3:
+        coding_func, param = str, value
+    else:
+        coding_func, param = str, value
+
+    # try to convert value into unicode
+    try:
+        coding_func(param)
+    except UnicodeError:
+        return False
+    return True
+
+class Node(object):
+    """
+    A Node in the Graph.
+    """
+
+    __slots__ = ()
+
+
+class Identifier(Node, str):  # allow Identifiers to be Nodes in the Graph
+    """
+    See http://www.w3.org/2002/07/rdf-identifer-terminology/
+    regarding choice of terminology.
+    """
+
+    __slots__ = ()
+
+    def __new__(cls, value):
+        return str.__new__(cls, value)
+
+    def eq(self, other):
+        """A "semantic"/interpreted equality function,
+        by default, same as __eq__"""
+        return self.__eq__(other)
+
+    def neq(self, other):
+        """A "semantic"/interpreted not equal function,
+        by default, same as __ne__"""
+        return self.__ne__(other)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __eq__(self, other):
+        """
+        Equality for Nodes.
+
+        >>> BNode("foo")==None
+        False
+        >>> BNode("foo")==URIRef("foo")
+        False
+        >>> URIRef("foo")==BNode("foo")
+        False
+        >>> BNode("foo")!=URIRef("foo")
+        True
+        >>> URIRef("foo")!=BNode("foo")
+        True
+        >>> Variable('a')!=URIRef('a')
+        True
+        >>> Variable('a')!=Variable('a')
+        False
+        """
+
+        if type(self) == type(other):
+            return str(self) == str(other)
+        else:
+            return False
+
+    def __gt__(self, other):
+        """
+        This implements ordering for Nodes,
+
+        This tries to implement this:
+        http://www.w3.org/TR/sparql11-query/#modOrderBy
+
+        Variables are not included in the SPARQL list, but
+        they are greater than BNodes and smaller than everything else
+
+        """
+        if other is None:
+            return True  # everything bigger than None
+        elif type(self) == type(other):
+            return str(self) > str(other)
+        elif isinstance(other, Node):
+            return _ORDERING[type(self)] > _ORDERING[type(other)]
+
+        return NotImplemented
+
+    def __lt__(self, other):
+        if other is None:
+            return False  # Nothing is less than None
+        elif type(self) == type(other):
+            return str(self) < str(other)
+        elif isinstance(other, Node):
+            return _ORDERING[type(self)] < _ORDERING[type(other)]
+
+        return NotImplemented
+
+    def __le__(self, other):
+        r = self.__lt__(other)
+        if r:
+            return True
+        return self == other
+
+    def __ge__(self, other):
+        r = self.__gt__(other)
+        if r:
+            return True
+        return self == other
+
+    def __hash__(self):
+        t = type(self)
+        fqn = t.__module__ + '.' + t.__name__
+        return hash(fqn) ^ hash(str(self))
+
+
+class URIRef(Identifier):
+    """
+    RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref
+    """
+
+    __slots__ = ()
+
+    def __new__(cls, value, base=None):
+        if base is not None:
+            ends_in_hash = value.endswith("#")
+            value = urljoin(base, value, allow_fragments=1)
+            if ends_in_hash:
+                if not value.endswith("#"):
+                    value += "#"
+
+        if not _is_valid_uri(value):
+            logger.warning('%s does not look like a valid URI, trying to serialize this will break.'%value)
+
+
+        try:
+            rt = str.__new__(cls, value)
+        except UnicodeDecodeError:
+            rt = str.__new__(cls, value, 'utf-8')
+        return rt
+
+    def toPython(self):
+        return str(self)
+
+    def n3(self, namespace_manager = None):
+        """
+        This will do a limited check for valid URIs,
+        essentially just making sure that the string includes no illegal
+        characters (``<, >, ", {, }, |, \\, `, ^``)
+
+        :param namespace_manager: if not None, will be used to make up
+             a prefixed name
+        """
+
+        if not _is_valid_uri(self):
+            raise Exception('"%s" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?'%self)
+
+        if namespace_manager:
+            return namespace_manager.normalizeUri(self)
+        else:
+            return "<%s>" % self
+
+    def defrag(self):
+        if "#" in self:
+            url, frag = urldefrag(self)
+            return URIRef(url)
+        else:
+            return self
+
+    def __reduce__(self):
+        return (URIRef, (str(self),))
+
+    def __getnewargs__(self):
+        return (str(self), )
+
+    if not py3compat.PY3:
+        def __str__(self):
+            return self.encode()
+
+    def __repr__(self):
+        if self.__class__ is URIRef:
+            clsName = "rdflib.term.URIRef"
+        else:
+            clsName = self.__class__.__name__
+
+        return """%s(%s)""" % (clsName, super(URIRef, self).__repr__())
+
+    def __add__(self, other):
+        return self.__class__(str(self) + other)
+
+    def __radd__(self, other):
+        return self.__class__(other + str(self))
+
+    def __mod__(self, other):
+        return self.__class__(str(self) % other)
+
+    def md5_term_hash(self):
+        """a string of hex that will be the same for two URIRefs that
+        are the same. It is not a suitable unique id.
+
+        Supported for backwards compatibility; new code should
+        probably just use __hash__
+        """
+        warnings.warn(
+            "method md5_term_hash is deprecated, and will be removed " +
+            "in the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
+        d = md5(self.encode())
+        d.update(b("U"))
+        return d.hexdigest()
+
+    def de_skolemize(self):
+        """ Create a Blank Node from a skolem URI, in accordance
+        with http://www.w3.org/TR/rdf11-concepts/#section-skolemization.
+        This function accepts only rdflib type skolemization, to provide
+        a round-tripping within the system.
+
+        .. versionadded:: 4.0
+        """
+        if isinstance(self, RDFLibGenid):
+            parsed_uri = urlparse("%s" % self)
+            return BNode(
+                value=parsed_uri.path[len(rdflib_skolem_genid):])
+        elif isinstance(self, Genid):
+            bnode_id = "%s" % self
+            if bnode_id in skolems:
+                return skolems[bnode_id]
+            else:
+                retval = BNode()
+                skolems[bnode_id] = retval
+                return retval
+        else:
+            raise Exception("<%s> is not a skolem URI" % self)
+
+
+class Genid(URIRef):
+    __slots__ = ()
+
+    @staticmethod
+    def _is_external_skolem(uri):
+        if not isinstance(uri, str):
+            uri = str(uri)
+        parsed_uri = urlparse(uri)
+        gen_id = parsed_uri.path.rfind(skolem_genid)
+        if gen_id != 0:
+            return False
+        return True
+
+
+class RDFLibGenid(Genid):
+    __slots__ = ()
+
+    @staticmethod
+    def _is_rdflib_skolem(uri):
+        if not isinstance(uri, str):
+            uri = str(uri)
+        parsed_uri = urlparse(uri)
+        if parsed_uri.params != "" \
+                or parsed_uri.query != "" \
+                or parsed_uri.fragment != "":
+            return False
+        gen_id = parsed_uri.path.rfind(rdflib_skolem_genid)
+        if gen_id != 0:
+            return False
+        return True
+
+
+def _unique_id():
+    # Used to read: """Create a (hopefully) unique prefix"""
+    # now retained merely to leave interal API unchanged.
+    # From BNode.__new__() below ...
+    #
+    # acceptable bnode value range for RDF/XML needs to be
+    # something that can be serialzed as a nodeID for N3
+    #
+    # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
+    # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
+    return "N"  # ensure that id starts with a letter
+
+
+def _serial_number_generator():
+    """
+    Generates UUID4-based but ncname-compliant identifiers.
+    """
+    from uuid import uuid4
+
+    def _generator():
+        return uuid4().hex
+
+    return _generator
+
+
+class BNode(Identifier):
+    """
+    Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes
+
+    """
+    __slots__ = ()
+
+    def __new__(cls, value=None,
+                _sn_gen=_serial_number_generator(), _prefix=_unique_id()):
+        """
+        # only store implementations should pass in a value
+        """
+        if value is None:
+            # so that BNode values do not collide with ones created with
+            # a different instance of this module at some other time.
+            node_id = _sn_gen()
+            value = "%s%s" % (_prefix, node_id)
+        else:
+            # TODO: check that value falls within acceptable bnode value range
+            # for RDF/XML needs to be something that can be serialzed
+            # as a nodeID for N3 ??  Unless we require these
+            # constraints be enforced elsewhere?
+            pass  # assert is_ncname(unicode(value)), "BNode identifiers
+                 # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]*
+                 # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID
+        return Identifier.__new__(cls, value)
+
+    def toPython(self):
+        return str(self)
+
+    def n3(self, namespace_manager=None):
+        return "_:%s" % self
+
+    def __getnewargs__(self):
+        return (str(self), )
+
+    def __reduce__(self):
+        return (BNode, (str(self),))
+
+    if not py3compat.PY3:
+        def __str__(self):
+            return self.encode()
+
+    def __repr__(self):
+        if self.__class__ is BNode:
+            clsName = "rdflib.term.BNode"
+        else:
+            clsName = self.__class__.__name__
+        return """%s('%s')""" % (clsName, str(self))
+
+    def md5_term_hash(self):
+        """a string of hex that will be the same for two BNodes that
+        are the same. It is not a suitable unique id.
+
+        Supported for backwards compatibility; new code should
+        probably just use __hash__
+        """
+        warnings.warn(
+            "method md5_term_hash is deprecated, and will be removed " +
+            "in the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
+        d = md5(self.encode())
+        d.update(b("B"))
+        return d.hexdigest()
+
+    def skolemize(self, authority="http://rdlib.net/"):
+        """ Create a URIRef "skolem" representation of the BNode, in accordance
+        with http://www.w3.org/TR/rdf11-concepts/#section-skolemization
+
+        .. versionadded:: 4.0
+        """
+        skolem = "%s%s" % (rdflib_skolem_genid, str(self))
+        return URIRef(urljoin(authority, skolem))
+
+
+class Literal(Identifier):
+    __doc__ = py3compat.format_doctest_out("""
+    RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal
+
+    The lexical value of the literal is the unicode object
+    The interpreted, datatyped value is available from .value
+
+    Language tags must be valid according to :rfc:5646
+
+    For valid XSD datatypes, the lexical form is optionally normalized
+    at construction time. Default behaviour is set by rdflib.NORMALIZE_LITERALS
+    and can be overridden by the normalize parameter to __new__
+
+    Equality and hashing of Literals are done based on the lexical form, i.e.:
+
+    >>> from rdflib.namespace import XSD
+
+    >>> Literal('01')!=Literal('1') # clear - strings differ
+    True
+
+    but with data-type they get normalized:
+
+    >>> Literal('01', datatype=XSD.integer)!=Literal('1', datatype=XSD.integer)
+    False
+
+    unless disabled:
+
+    >>> Literal('01', datatype=XSD.integer, normalize=False)!=Literal('1', datatype=XSD.integer)
+    True
+
+
+    Value based comparison is possible:
+
+    >>> Literal('01', datatype=XSD.integer).eq(Literal('1', datatype=XSD.float))
+    True
+
+    The eq method also provides limited support for basic python types:
+
+    >>> Literal(1).eq(1) # fine - int compatible with xsd:integer
+    True
+    >>> Literal('a').eq('b') # fine - str compatible with plain-lit
+    False
+    >>> Literal('a', datatype=XSD.string).eq('a') # fine - str compatible with xsd:string
+    True
+    >>> Literal('a').eq(1) # not fine, int incompatible with plain-lit
+    NotImplemented
+
+    Greater-than/less-than ordering comparisons are also done in value
+    space, when compatible datatypes are used.  Incompatible datatypes
+    are ordered by DT, or by lang-tag.  For other nodes the ordering
+    is None < BNode < URIRef < Literal
+
+    Any comparison with non-rdflib Node are "NotImplemented"
+    In PY2.X some stable order will be made up by python
+
+    In PY3 this is an error.
+
+    >>> from rdflib import Literal, XSD
+    >>> lit2006 = Literal('2006-01-01',datatype=XSD.date)
+    >>> lit2006.toPython()
+    datetime.date(2006, 1, 1)
+    >>> lit2006 < Literal('2007-01-01',datatype=XSD.date)
+    True
+    >>> Literal(datetime.utcnow()).datatype
+    rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime')
+    >>> Literal(1) > Literal(2) # by value
+    False
+    >>> Literal(1) > Literal(2.0) # by value
+    False
+    >>> Literal('1') > Literal(1) # by DT
+    True
+    >>> Literal('1') < Literal('1') # by lexical form
+    False
+    >>> Literal('a', lang='en') > Literal('a', lang='fr') # by lang-tag
+    False
+    >>> Literal(1) > URIRef('foo') # by node-type
+    True
+
+    The > < operators will eat this NotImplemented and either make up
+    an ordering (py2.x) or throw a TypeError (py3k):
+
+    >>> Literal(1).__gt__(2.0)
+    NotImplemented
+
+
+    """)
+
+
+    if not py3compat.PY3:
+        __slots__ = ("language", "datatype", "value", "_language",
+                     "_datatype", "_value")
+    else:
+        __slots__ = ("_language", "_datatype", "_value")
+
+    def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None):
+
+        if lang == '':
+            lang = None  # no empty lang-tags in RDF
+
+        normalize = normalize if normalize != None else rdflib.NORMALIZE_LITERALS
+
+        if lang is not None and datatype is not None:
+            raise TypeError(
+                "A Literal can only have one of lang or datatype, "
+                "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal")
+
+        if lang and not _is_valid_langtag(lang):
+            raise Exception("'%s' is not a valid language tag!"%lang)
+
+        if datatype:
+            datatype = URIRef(datatype)
+
+        value = None
+        if isinstance(lexical_or_value, Literal):
+            # create from another Literal instance
+
+            lang = lang or lexical_or_value.language
+            if datatype:
+                # override datatype
+                value = _castLexicalToPython(lexical_or_value, datatype)
+            else:
+                datatype = lexical_or_value.datatype
+                value = lexical_or_value.value
+
+        elif isinstance(lexical_or_value, str):
+                # passed a string
+                # try parsing lexical form of datatyped literal
+                value = _castLexicalToPython(lexical_or_value, datatype)
+
+                if value is not None and normalize:
+                    _value, _datatype = _castPythonToLiteral(value)
+                    if _value is not None and _is_valid_unicode(_value):
+                        lexical_or_value = _value
+
+        else:
+            # passed some python object
+            value = lexical_or_value
+            _value, _datatype = _castPythonToLiteral(lexical_or_value)
+
+            datatype = datatype or _datatype
+            if _value is not None:
+                lexical_or_value = _value
+            if datatype:
+                lang = None
+
+        if py3compat.PY3 and isinstance(lexical_or_value, bytes):
+            lexical_or_value = lexical_or_value.decode('utf-8')
+
+        try:
+            inst = str.__new__(cls, lexical_or_value)
+        except UnicodeDecodeError:
+            inst = str.__new__(cls, lexical_or_value, 'utf-8')
+
+        inst._language = lang
+        inst._datatype = datatype
+        inst._value = value
+        return inst
+
+    @py3compat.format_doctest_out
+    def normalize(self):
+        """
+        Returns a new literal with a normalised lexical representation
+        of this literal
+        >>> from rdflib import XSD
+        >>> Literal("01", datatype=XSD.integer, normalize=False).normalize()
+        rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        Illegal lexical forms for the datatype given are simply passed on
+        >>> Literal("a", datatype=XSD.integer, normalize=False)
+        rdflib.term.Literal(%(u)s'a', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        """
+
+        if self.value != None:
+            return Literal(self.value, datatype=self.datatype, lang=self.language)
+        else:
+            return self
+
+    @property
+    def value(self):
+        return self._value
+
+    @property
+    def language(self):
+        return self._language
+
+    @property
+    def datatype(self):
+        return self._datatype
+
+    def __reduce__(self):
+        return (Literal, (str(self), self.language, self.datatype),)
+
+    def __getstate__(self):
+        return (None, dict(language=self.language, datatype=self.datatype))
+
+    def __setstate__(self, arg):
+        _, d = arg
+        self._language = d["language"]
+        self._datatype = d["datatype"]
+
+    @py3compat.format_doctest_out
+    def __add__(self, val):
+        """
+        >>> Literal(1) + 1
+        rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+        >>> Literal("1") + "1"
+        rdflib.term.Literal(%(u)s'11')
+        """
+
+        py = self.toPython()
+        if not isinstance(py, Literal):
+            try:
+                return Literal(py + val)
+            except TypeError:
+                pass  # fall-through
+
+        s = str.__add__(self, val)
+        return Literal(s, self.language, self.datatype)
+
+    def __bool__(self):
+        """
+        Is the Literal "True"
+        This is used for if statements, bool(literal), etc.
+        """
+        if self.value != None:
+            return bool(self.value)
+        return len(self) != 0
+
+    @py3compat.format_doctest_out
+    def __neg__(self):
+        """
+        >>> (- Literal(1))
+        rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+        >>> (- Literal(10.5))
+        rdflib.term.Literal(%(u)s'-10.5', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#double'))
+        >>> from rdflib.namespace import XSD
+        >>> (- Literal("1", datatype=XSD.integer))
+        rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        >>> (- Literal("1"))
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in <module>
+        TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
+        >>>
+        """
+
+        if isinstance(self.value, (int, float)):
+            return Literal(self.value.__neg__())
+        else:
+            raise TypeError("Not a number; %s" % repr(self))
+
+    @py3compat.format_doctest_out
+    def __pos__(self):
+        """
+        >>> (+ Literal(1))
+        rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+        >>> (+ Literal(-1))
+        rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+        >>> from rdflib.namespace import XSD
+        >>> (+ Literal("-1", datatype=XSD.integer))
+        rdflib.term.Literal(%(u)s'-1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        >>> (+ Literal("1"))
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in <module>
+        TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
+        """
+        if isinstance(self.value, (int, float)):
+            return Literal(self.value.__pos__())
+        else:
+            raise TypeError("Not a number; %s" % repr(self))
+
+    @py3compat.format_doctest_out
+    def __abs__(self):
+        """
+        >>> abs(Literal(-1))
+        rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        >>> from rdflib.namespace import XSD
+        >>> abs( Literal("-1", datatype=XSD.integer))
+        rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        >>> abs(Literal("1"))
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in <module>
+        TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
+        """
+        if isinstance(self.value, (int, float)):
+            return Literal(self.value.__abs__())
+        else:
+            raise TypeError("Not a number; %s" % repr(self))
+
+    @py3compat.format_doctest_out
+    def __invert__(self):
+        """
+        >>> ~(Literal(-1))
+        rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        >>> from rdflib.namespace import XSD
+        >>> ~( Literal("-1", datatype=XSD.integer))
+        rdflib.term.Literal(%(u)s'0', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))
+
+        Not working:
+
+        >>> ~(Literal("1"))
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in <module>
+        TypeError: Not a number; rdflib.term.Literal(%(u)s'1')
+        """
+        if isinstance(self.value, (int, float)):
+            return Literal(self.value.__invert__())
+        else:
+            raise TypeError("Not a number; %s" % repr(self))
+
+    def __gt__(self, other):
+        """
+
+        This implements ordering for Literals,
+        the other comparison methods delegate here
+
+        This tries to implement this:
+        http://www.w3.org/TR/sparql11-query/#modOrderBy
+
+        In short, Literals with compatible data-types are orderd in value space,
+        i.e.
+        >>> from rdflib import XSD
+
+        >>> Literal(1)>Literal(2) # int/int
+        False
+        >>> Literal(2.0)>Literal(1) # double/int
+        True
+        >>> from decimal import Decimal
+        >>> Literal(Decimal("3.3")) > Literal(2.0) # decimal/double
+        True
+        >>> Literal(Decimal("3.3")) < Literal(4.0) # decimal/double
+        True
+        >>> Literal('b')>Literal('a') # plain lit/plain lit
+        True
+        >>> Literal('b')>Literal('a', datatype=XSD.string) # plain lit/xsd:string
+        True
+
+        Incompatible datatype mismatches ordered by DT
+
+        >>> Literal(1)>Literal("2") # int>string
+        False
+
+        Langtagged literals by lang tag
+        >>> Literal("a", lang="en")>Literal("a", lang="fr")
+        False
+        """
+        if other is None:
+            return True  # Everything is greater than None
+        if isinstance(other, Literal):
+
+            if self.datatype in _NUMERIC_LITERAL_TYPES and \
+                    other.datatype in _NUMERIC_LITERAL_TYPES:
+                return numeric_greater(self.value, other.value)
+
+            # plain-literals and xsd:string literals
+            # are "the same"
+            dtself = self.datatype or _XSD_STRING
+            dtother = other.datatype or _XSD_STRING
+
+            if dtself != dtother:
+                if rdflib.DAWG_LITERAL_COLLATION:
+                    return NotImplemented
+                else:
+                    return dtself > dtother
+
+            if self.language != other.language:
+                if not self.language:
+                    return False
+                elif not other.language:
+                    return True
+                else:
+                    return self.language > other.language
+
+            if self.value != None and other.value != None:
+                return self.value > other.value
+
+            if str(self) != str(other):
+                return str(self) > str(other)
+
+            # same language, same lexical form, check real dt
+            # plain-literals come before xsd:string!
+            if self.datatype != other.datatype:
+                if not self.datatype:
+                    return False
+                elif not other.datatype:
+                    return True
+                else:
+                    return self.datatype > other.datatype
+
+            return False  # they are the same
+
+        elif isinstance(other, Node):
+            return True  # Literal are the greatest!
+        else:
+            return NotImplemented  # we can only compare to nodes
+
+    def __lt__(self, other):
+        if other is None:
+            return False  # Nothing is less than None
+        if isinstance(other, Literal):
+            try:
+                return not self.__gt__(other) and not self.eq(other)
+            except TypeError:
+                return NotImplemented
+        if isinstance(other, Node):
+            return False  # all nodes are less-than Literals
+
+        return NotImplemented
+
+    def __le__(self, other):
+        """
+        >>> from rdflib.namespace import XSD
+        >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime
+        ...     ) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime)
+        True
+        """
+        r = self.__lt__(other)
+        if r:
+            return True
+        try:
+            return self.eq(other)
+        except TypeError:
+            return NotImplemented
+
+    def __ge__(self, other):
+        r = self.__gt__(other)
+        if r:
+            return True
+        try:
+            return self.eq(other)
+        except TypeError:
+            return NotImplemented
+
+    def _comparable_to(self, other):
+        """
+        Helper method to decide which things are meaningful to
+        rich-compare with this literal
+        """
+        if isinstance(other, Literal):
+            if (self.datatype and other.datatype):
+                # two datatyped literals
+                if not self.datatype in XSDToPython or not other.datatype in XSDToPython:
+                    # non XSD DTs must match
+                    if self.datatype != other.datatype:
+                        return False
+
+            else:
+                # xsd:string may be compared with plain literals
+                if not (self.datatype == _XSD_STRING and not other.datatype) or \
+                        (other.datatype == _XSD_STRING and not self.datatype):
+                    return False
+
+                # if given lang-tag has to be case insensitive equal
+                if (self.language or "").lower() != (other.language or "").lower():
+                    return False
+
+        return True
+
+    def __hash__(self):
+        """
+        >>> from rdflib.namespace import XSD
+        >>> a = {Literal('1', datatype=XSD.integer):'one'}
+        >>> Literal('1', datatype=XSD.double) in a
+        False
+
+
+        "Called for the key object for dictionary operations,
+        and by the built-in function hash(). Should return
+        a 32-bit integer usable as a hash value for
+        dictionary operations. The only required property
+        is that objects which compare equal have the same
+        hash value; it is advised to somehow mix together
+        (e.g., using exclusive or) the hash values for the
+        components of the object that also play a part in
+        comparison of objects." -- 3.4.1 Basic customization (Python)
+
+        "Two literals are equal if and only if all of the following hold:
+        * The strings of the two lexical forms compare equal, character by
+        character.
+        * Either both or neither have language tags.
+        * The language tags, if any, compare equal.
+        * Either both or neither have datatype URIs.
+        * The two datatype URIs, if any, compare equal, character by
+        character."
+        -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
+
+        """
+        res = super(Literal, self).__hash__()
+        if self.language:
+            res ^= hash(self.language.lower())
+        if self.datatype:
+            res ^= hash(self.datatype)
+        return res
+
+    @py3compat.format_doctest_out
+    def __eq__(self, other):
+        """
+        Literals are only equal to other literals.
+
+        "Two literals are equal if and only if all of the following hold:
+        * The strings of the two lexical forms compare equal, character by character.
+        * Either both or neither have language tags.
+        * The language tags, if any, compare equal.
+        * Either both or neither have datatype URIs.
+        * The two datatype URIs, if any, compare equal, character by character."
+        -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax)
+
+        >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo"))
+        True
+        >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo2"))
+        False
+
+        >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo"))
+        False
+        >>> Literal("1", datatype=URIRef("foo")) == "asdf"
+        False
+        >>> from rdflib import XSD
+        >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date)
+        True
+        >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1)
+        False
+        >>> Literal("one", lang="en") == Literal("one", lang="en")
+        True
+        >>> Literal("hast", lang='en') == Literal("hast", lang='de')
+        False
+        >>> Literal("1", datatype=XSD.integer) == Literal(1)
+        True
+        >>> Literal("1", datatype=XSD.integer) == Literal("01", datatype=XSD.integer)
+        True
+
+        """
+        if self is other:
+            return True
+        if other is None:
+            return False
+        if isinstance(other, Literal):
+            return self.datatype == other.datatype \
+                and (self.language.lower() if self.language else None) == (other.language.lower() if other.language else None) \
+                and str.__eq__(self, other)
+
+        return False
+
+    def eq(self, other):
+        """
+        Compare the value of this literal with something else
+
+        Either, with the value of another literal
+        comparisons are then done in literal "value space",
+        and according to the rules of XSD subtype-substitution/type-promotion
+
+        OR, with a python object:
+
+        basestring objects can be compared with plain-literals,
+        or those with datatype xsd:string
+
+        bool objects with xsd:boolean
+
+        a int, long or float with numeric xsd types
+
+        isodate date,time,datetime objects with xsd:date,xsd:time or xsd:datetime
+
+        Any other operations returns NotImplemented
+
+        """
+        if isinstance(other, Literal):
+
+            if self.datatype in _NUMERIC_LITERAL_TYPES  \
+                    and other.datatype in _NUMERIC_LITERAL_TYPES:
+                if self.value != None and other.value != None:
+                    return self.value == other.value
+                else:
+                    if str.__eq__(self, other):
+                        return True
+                    raise TypeError(
+                        'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
+            if (self.language or "").lower() != (other.language or "").lower():
+                return False
+
+            dtself = self.datatype or _XSD_STRING
+            dtother = other.datatype or _XSD_STRING
+
+            if (dtself == _XSD_STRING and dtother == _XSD_STRING):
+                # string/plain literals, compare on lexical form
+                return str.__eq__(self, other)
+
+            if dtself != dtother:
+                if rdflib.DAWG_LITERAL_COLLATION:
+                    raise TypeError("I don't know how to compare literals with datatypes %s and %s" % (
+                        self.datatype, other.datatype))
+                else:
+                    return False
+
+            # matching non-string DTs now - do we compare values or
+            # lexical form first?  comparing two ints is far quicker -
+            # maybe there are counter examples
+
+            if self.value != None and other.value != None:
+
+                if self.datatype in (_RDF_XMLLITERAL, _RDF_HTMLLITERAL):
+                    return _isEqualXMLNode(self.value, other.value)
+
+                return self.value == other.value
+            else:
+
+                if str.__eq__(self, other):
+                    return True
+
+                if self.datatype == _XSD_STRING:
+                    return False  # string value space=lexical space
+
+                # matching DTs, but not matching, we cannot compare!
+                raise TypeError(
+                    'I cannot know that these two lexical forms do not map to the same value: %s and %s' % (self, other))
+
+        elif isinstance(other, Node):
+            return False  # no non-Literal nodes are equal to a literal
+
+        elif isinstance(other, str):
+            # only plain-literals can be directly compared to strings
+
+            # TODO: Is "blah"@en eq "blah" ?
+            if self.language is not None:
+                return False
+
+            if (self.datatype == _XSD_STRING or self.datatype is None):
+                return str(self) == other
+
+        elif isinstance(other, (int, float)):
+            if self.datatype in _NUMERIC_LITERAL_TYPES:
+                return self.value == other
+        elif isinstance(other, (date, datetime, time)):
+            if self.datatype in (_XSD_DATETIME, _XSD_DATE, _XSD_TIME):
+                return self.value == other
+        elif isinstance(other, bool):
+            if self.datatype == _XSD_BOOLEAN:
+                return self.value == other
+
+        return NotImplemented
+
+    def neq(self, other):
+        return not self.eq(other)
+
+    @py3compat.format_doctest_out
+    def n3(self, namespace_manager = None):
+        r'''
+        Returns a representation in the N3 format.
+
+        Examples::
+
+            >>> Literal("foo").n3()
+            %(u)s'"foo"'
+
+        Strings with newlines or triple-quotes::
+
+            >>> Literal("foo\nbar").n3()
+            %(u)s'"""foo\nbar"""'
+
+            >>> Literal("''\'").n3()
+            %(u)s'"\'\'\'"'
+
+            >>> Literal('"""').n3()
+            %(u)s'"\\"\\"\\""'
+
+        Language::
+
+            >>> Literal("hello", lang="en").n3()
+            %(u)s'"hello"@en'
+
+        Datatypes::
+
+            >>> Literal(1).n3()
+            %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
+
+            >>> Literal(1.0).n3()
+            %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#double>'
+
+            >>> Literal(True).n3()
+            %(u)s'"true"^^<http://www.w3.org/2001/XMLSchema#boolean>'
+
+        Datatype and language isn't allowed (datatype takes precedence)::
+
+            >>> Literal(1, lang="en").n3()
+            %(u)s'"1"^^<http://www.w3.org/2001/XMLSchema#integer>'
+
+        Custom datatype::
+
+            >>> footype = URIRef("http://example.org/ns#foo")
+            >>> Literal("1", datatype=footype).n3()
+            %(u)s'"1"^^<http://example.org/ns#foo>'
+
+        Passing a namespace-manager will use it to abbreviate datatype URIs:
+
+            >>> from rdflib import Graph
+            >>> Literal(1).n3(Graph().namespace_manager)
+            %(u)s'"1"^^xsd:integer'
+        '''
+        if namespace_manager:
+            return self._literal_n3(qname_callback =
+                                    namespace_manager.normalizeUri)
+        else:
+            return self._literal_n3()
+
+    @py3compat.format_doctest_out
+    def _literal_n3(self, use_plain=False, qname_callback=None):
+        '''
+        Using plain literal (shorthand) output::
+            >>> from rdflib.namespace import XSD
+
+            >>> Literal(1)._literal_n3(use_plain=True)
+            %(u)s'1'
+
+            >>> Literal(1.0)._literal_n3(use_plain=True)
+            %(u)s'1e+00'
+
+            >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True)
+            %(u)s'1.0'
+
+            >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True)
+            %(u)s'"1.0"^^<http://www.w3.org/2001/XMLSchema#float>'
+
+            >>> Literal("foo", datatype=XSD.string)._literal_n3(
+            ...         use_plain=True)
+            %(u)s'"foo"^^<http://www.w3.org/2001/XMLSchema#string>'
+
+            >>> Literal(True)._literal_n3(use_plain=True)
+            %(u)s'true'
+
+            >>> Literal(False)._literal_n3(use_plain=True)
+            %(u)s'false'
+
+            >>> Literal(1.91)._literal_n3(use_plain=True)
+            %(u)s'1.91e+00'
+
+            Only limited precision available for floats:
+            >>> Literal(0.123456789)._literal_n3(use_plain=True)
+            %(u)s'1.234568e-01'
+
+            >>> Literal('0.123456789',
+            ...     datatype=XSD.decimal)._literal_n3(use_plain=True)
+            %(u)s'0.123456789'
+
+        Using callback for datatype QNames::
+
+            >>> Literal(1)._literal_n3(
+            ...         qname_callback=lambda uri: "xsd:integer")
+            %(u)s'"1"^^xsd:integer'
+
+        '''
+        if use_plain and self.datatype in _PLAIN_LITERAL_TYPES:
+            if self.value is not None:
+                # If self is inf or NaN, we need a datatype
+                # (there is no plain representation)
+                if self.datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
+                    try:
+                        v = float(self)
+                        if math.isinf(v) or math.isnan(v):
+                            return self._literal_n3(False, qname_callback)
+                    except ValueError:
+                        return self._literal_n3(False, qname_callback)
+
+                # this is a bit of a mess -
+                # in py >=2.6 the string.format function makes this easier
+                # we try to produce "pretty" output
+                if self.datatype == _XSD_DOUBLE:
+                    return sub("\\.?0*e", "e", '%e' % float(self))
+                elif self.datatype == _XSD_DECIMAL:
+                    s = '%s' % self
+                    if '.' not in s:
+                        s += '.0'
+                    return s
+
+                elif self.datatype == _XSD_BOOLEAN:
+                    return ('%s' % self).lower()
+                else:
+                    return '%s' % self
+
+        encoded = self._quote_encode()
+
+        datatype = self.datatype
+        quoted_dt = None
+        if datatype:
+            if qname_callback:
+                quoted_dt = qname_callback(datatype)
+            if not quoted_dt:
+                quoted_dt = "<%s>" % datatype
+            if datatype in _NUMERIC_INF_NAN_LITERAL_TYPES:
+                try:
+                    v = float(self)
+                    if math.isinf(v):
+                        # py string reps: float: 'inf', Decimal: 'Infinity"
+                        # both need to become "INF" in xsd datatypes
+                        encoded = encoded.replace('inf', 'INF').replace(
+                            'Infinity', 'INF')
+                    if math.isnan(v):
+                        encoded = encoded.replace('nan', 'NaN')
+                except ValueError:
+                    # if we can't cast to float something is wrong, but we can
+                    # still serialize. Warn user about it
+                    warnings.warn("Serializing weird numerical %r" % self)
+
+        language = self.language
+        if language:
+            return '%s@%s' % (encoded, language)
+        elif datatype:
+            return '%s^^%s' % (encoded, quoted_dt)
+        else:
+            return '%s' % encoded
+
+    def _quote_encode(self):
+        # This simpler encoding doesn't work; a newline gets encoded as "\\n",
+        # which is ok in sourcecode, but we want "\n".
+        # encoded = self.encode('unicode-escape').replace(
+        #        '\\', '\\\\').replace('"','\\"')
+        # encoded = self.replace.replace('\\', '\\\\').replace('"','\\"')
+
+        # NOTE: Could in theory chose quotes based on quotes appearing in the
+        # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?).
+
+        if "\n" in self:
+            # Triple quote this string.
+            encoded = self.replace('\\', '\\\\')
+            if '"""' in self:
+                # is this ok?
+                encoded = encoded.replace('"""', '\\"\\"\\"')
+            if encoded[-1] == '"' and encoded[-2] != '\\':
+                encoded = encoded[:-1] + '\\' + '"'
+
+            return '"""%s"""' % encoded.replace('\r', '\\r')
+        else:
+            return '"%s"' % self.replace(
+                '\n', '\\n').replace(
+                    '\\', '\\\\').replace(
+                        '"', '\\"').replace(
+                            '\r', '\\r')
+
+    if not py3compat.PY3:
+        def __str__(self):
+            return self.encode()
+
+    def __repr__(self):
+        args = [super(Literal, self).__repr__()]
+        if self.language is not None:
+            args.append("lang=%s" % repr(self.language))
+        if self.datatype is not None:
+            args.append("datatype=%s" % repr(self.datatype))
+        if self.__class__ == Literal:
+            clsName = "rdflib.term.Literal"
+        else:
+            clsName = self.__class__.__name__
+        return """%s(%s)""" % (clsName, ", ".join(args))
+
+    def toPython(self):
+        """
+        Returns an appropriate python datatype derived from this RDF Literal
+        """
+
+        if self.value is not None:
+            return self.value
+        return self
+
+    def md5_term_hash(self):
+        """a string of hex that will be the same for two Literals that
+        are the same. It is not a suitable unique id.
+
+        Supported for backwards compatibility; new code should
+        probably just use __hash__
+        """
+        warnings.warn(
+            "method md5_term_hash is deprecated, and will be removed " +
+            "removed in the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
+        d = md5(self.encode())
+        d.update(b("L"))
+        return d.hexdigest()
+
+
+def _parseXML(xmlstring):
+    if not py3compat.PY3:
+        xmlstring = xmlstring.encode('utf-8')
+    retval = xml.dom.minidom.parseString(
+        "<rdflibtoplevelelement>%s</rdflibtoplevelelement>" % xmlstring)
+    retval.normalize()
+    return retval
+
+
+def _parseHTML(htmltext):
+    try:
+        import html5lib
+        parser = html5lib.HTMLParser(
+            tree=html5lib.treebuilders.getTreeBuilder("dom"))
+        retval = parser.parseFragment(htmltext)
+        retval.normalize()
+        return retval
+    except ImportError:
+        raise ImportError(
+            "HTML5 parser not available. Try installing" +
+            " html5lib <http://code.google.com/p/html5lib>")
+
+
+def _writeXML(xmlnode):
+    if isinstance(xmlnode, xml.dom.minidom.DocumentFragment):
+        d = xml.dom.minidom.Document()
+        d.childNodes += xmlnode.childNodes
+        xmlnode = d
+    s = xmlnode.toxml('utf-8')
+    # for clean round-tripping, remove headers -- I have great and
+    # specific worries that this will blow up later, but this margin
+    # is too narrow to contain them
+    if s.startswith(b('<?xml version="1.0" encoding="utf-8"?>')):
+        s = s[38:]
+    if s.startswith(b('<rdflibtoplevelelement>')):
+        s = s[23:-24]
+    if s == b('<rdflibtoplevelelement/>'):
+        s = b('')
+    return s
+
+# Cannot import Namespace/XSD because of circular dependencies
+_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
+_RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+
+_RDF_XMLLITERAL = URIRef(_RDF_PFX + 'XMLLiteral')
+_RDF_HTMLLITERAL = URIRef(_RDF_PFX + 'HTML')
+
+_XSD_STRING = URIRef(_XSD_PFX + 'string')
+
+_XSD_FLOAT = URIRef(_XSD_PFX + 'float')
+_XSD_DOUBLE = URIRef(_XSD_PFX + 'double')
+_XSD_DECIMAL = URIRef(_XSD_PFX + 'decimal')
+_XSD_INTEGER = URIRef(_XSD_PFX + 'integer')
+_XSD_BOOLEAN = URIRef(_XSD_PFX + 'boolean')
+
+_XSD_DATETIME = URIRef(_XSD_PFX + 'dateTime')
+_XSD_DATE = URIRef(_XSD_PFX + 'date')
+_XSD_TIME = URIRef(_XSD_PFX + 'time')
+
+# TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth
+
+_NUMERIC_LITERAL_TYPES = (
+    _XSD_INTEGER,
+    _XSD_DECIMAL,
+    _XSD_DOUBLE,
+    URIRef(_XSD_PFX + 'float'),
+
+    URIRef(_XSD_PFX + 'byte'),
+    URIRef(_XSD_PFX + 'int'),
+    URIRef(_XSD_PFX + 'long'),
+    URIRef(_XSD_PFX + 'negativeInteger'),
+    URIRef(_XSD_PFX + 'nonNegativeInteger'),
+    URIRef(_XSD_PFX + 'nonPositiveInteger'),
+    URIRef(_XSD_PFX + 'positiveInteger'),
+    URIRef(_XSD_PFX + 'short'),
+    URIRef(_XSD_PFX + 'unsignedByte'),
+    URIRef(_XSD_PFX + 'unsignedInt'),
+    URIRef(_XSD_PFX + 'unsignedLong'),
+    URIRef(_XSD_PFX + 'unsignedShort'),
+
+)
+
+# these have "native" syntax in N3/SPARQL
+_PLAIN_LITERAL_TYPES = (
+    _XSD_INTEGER,
+    _XSD_BOOLEAN,
+    _XSD_DOUBLE,
+    _XSD_DECIMAL,
+)
+
+# these have special INF and NaN XSD representations
+_NUMERIC_INF_NAN_LITERAL_TYPES = (
+    URIRef(_XSD_PFX + 'float'),
+    _XSD_DOUBLE,
+    _XSD_DECIMAL,
+)
+
+
+def _castPythonToLiteral(obj):
+    """
+    Casts a python datatype to a tuple of the lexical value and a
+    datatype URI (or None)
+    """
+    for pType, (castFunc, dType) in _PythonToXSD:
+        if isinstance(obj, pType):
+            if castFunc:
+                return castFunc(obj), dType
+            elif dType:
+                return obj, dType
+            else:
+                return obj, None
+    return obj, None  # TODO: is this right for the fall through case?
+
+from decimal import Decimal
+
+# Mappings from Python types to XSD datatypes and back (borrowed from sparta)
+# datetime instances are also instances of date... so we need to order these.
+
+# SPARQL/Turtle/N3 has shortcuts for integer, double, decimal
+# python has only float - to be in tune with sparql/n3/turtle
+# we default to XSD.double for float literals
+
+# python ints are promoted to longs when overflowing
+# python longs have no limit
+# both map to the abstract integer type,
+# rather than some concrete bit-limited datatype
+
+_PythonToXSD = [
+    (str, (None, None)),
+    (float, (None, _XSD_DOUBLE)),
+    (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
+    (int, (None, _XSD_INTEGER)),
+    (int, (None, _XSD_INTEGER)),
+    (Decimal, (None, _XSD_DECIMAL)),
+    (datetime, (lambda i:i.isoformat(), _XSD_DATETIME)),
+    (date, (lambda i:i.isoformat(), _XSD_DATE)),
+    (time, (lambda i:i.isoformat(), _XSD_TIME)),
+    (xml.dom.minidom.Document, (_writeXML, _RDF_XMLLITERAL)),
+    # this is a bit dirty - by accident the html5lib parser produces
+    # DocumentFragments, and the xml parser Documents, letting this
+    # decide what datatype to use makes roundtripping easier, but it a
+    # bit random
+    (xml.dom.minidom.DocumentFragment, (_writeXML, _RDF_HTMLLITERAL))
+]
+
+XSDToPython = {
+    None : None, # plain literals map directly to value space
+    URIRef(_XSD_PFX + 'time'): parse_time,
+    URIRef(_XSD_PFX + 'date'): parse_date,
+    URIRef(_XSD_PFX + 'gYear'): parse_date,
+    URIRef(_XSD_PFX + 'gYearMonth'): parse_date,
+    URIRef(_XSD_PFX + 'dateTime'): parse_datetime,
+    URIRef(_XSD_PFX + 'string'): None,
+    URIRef(_XSD_PFX + 'normalizedString'): None,
+    URIRef(_XSD_PFX + 'token'): None,
+    URIRef(_XSD_PFX + 'language'): None,
+    URIRef(_XSD_PFX + 'boolean'): lambda i: i.lower() in ['1', 'true'],
+    URIRef(_XSD_PFX + 'decimal'): Decimal,
+    URIRef(_XSD_PFX + 'integer'): int,
+    URIRef(_XSD_PFX + 'nonPositiveInteger'): int,
+    URIRef(_XSD_PFX + 'long'): int,
+    URIRef(_XSD_PFX + 'nonNegativeInteger'): int,
+    URIRef(_XSD_PFX + 'negativeInteger'): int,
+    URIRef(_XSD_PFX + 'int'): int,
+    URIRef(_XSD_PFX + 'unsignedLong'): int,
+    URIRef(_XSD_PFX + 'positiveInteger'): int,
+    URIRef(_XSD_PFX + 'short'): int,
+    URIRef(_XSD_PFX + 'unsignedInt'): int,
+    URIRef(_XSD_PFX + 'byte'): int,
+    URIRef(_XSD_PFX + 'unsignedShort'): int,
+    URIRef(_XSD_PFX + 'unsignedByte'): int,
+    URIRef(_XSD_PFX + 'float'): float,
+    URIRef(_XSD_PFX + 'double'): float,
+    URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s),
+    URIRef(_XSD_PFX + 'anyURI'): None,
+    _RDF_XMLLITERAL: _parseXML,
+    _RDF_HTMLLITERAL: _parseHTML
+}
+
+_toPythonMapping = {}
+
+_toPythonMapping.update(XSDToPython)
+
+def _castLexicalToPython(lexical, datatype):
+    """
+    Map a lexical form to the value-space for the given datatype
+    :returns: a python object for the value or ``None``
+    """
+    convFunc = _toPythonMapping.get(datatype, False)
+    if convFunc:
+        try:
+            return convFunc(lexical)
+        except:
+            # not a valid lexical representation for this dt
+            return None
+    elif convFunc is None:
+        # no conv func means 1-1 lexical<->value-space mapping
+        try:
+            return str(lexical)
+        except UnicodeDecodeError:
+            return str(lexical, 'utf-8')
+    else:
+        # no convFunc - unknown data-type
+        return None
+
+def bind(datatype, pythontype, constructor=None, lexicalizer=None):
+    """
+    register a new datatype<->pythontype binding
+
+    :param constructor: an optional function for converting lexical forms
+                        into a Python instances, if not given the pythontype
+                        is used directly
+
+    :param lexicalizer: an optinoal function for converting python objects to
+                        lexical form, if not given object.__str__ is used
+
+    """
+    if datatype in _toPythonMapping:
+        logger.warning("datatype '%s' was already bound. Rebinding." %
+                        datatype)
+
+    if constructor == None:
+        constructor = pythontype
+    _toPythonMapping[datatype] = constructor
+    _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
+
+
+class Variable(Identifier):
+    """
+    A Variable - this is used for querying, or in Formula aware
+    graphs, where Variables can stored in the graph
+    """
+    __slots__ = ()
+
+    def __new__(cls, value):
+        if len(value) == 0:
+            raise Exception(
+                "Attempted to create variable with empty string as name!")
+        if value[0] == '?':
+            value = value[1:]
+        return str.__new__(cls, value)
+
+    def __repr__(self):
+        if self.__class__ is Variable:
+            clsName = "rdflib.term.Variable"
+        else:
+            clsName = self.__class__.__name__
+
+        return """%s(%s)""" % (clsName, super(Variable, self).__repr__())
+
+    def toPython(self):
+        return "?%s" % self
+
+    def n3(self, namespace_manager = None):
+        return "?%s" % self
+
+    def __reduce__(self):
+        return (Variable, (str(self),))
+
+    def md5_term_hash(self):
+        """a string of hex that will be the same for two Variables that
+        are the same. It is not a suitable unique id.
+
+        Supported for backwards compatibility; new code should
+        probably just use __hash__
+        """
+        warnings.warn(
+            "method md5_term_hash is deprecated, and will be removed " +
+            "removed in the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
+        d = md5(self.encode())
+        d.update(b("V"))
+        return d.hexdigest()
+
+
+class Statement(Node, tuple):
+
+    def __new__(cls, xxx_todo_changeme, context):
+        (subject, predicate, object) = xxx_todo_changeme
+        warnings.warn(
+            "Class Statement is deprecated, and will be removed in " +
+            "the future. If you use this please let rdflib-dev know!",
+            category=DeprecationWarning, stacklevel=2)
+        return tuple.__new__(cls, ((subject, predicate, object), context))
+
+    def __reduce__(self):
+        return (Statement, (self[0], self[1]))
+
+    def toPython(self):
+        return (self[0], self[1])
+
+# Nodes are ordered like this
+# See http://www.w3.org/TR/sparql11-query/#modOrderBy
+# we leave "space" for more subclasses of Node elsewhere
+# default-dict to grazefully fail for new subclasses
+_ORDERING = defaultdict(int)
+_ORDERING.update({
+    BNode: 10,
+    Variable: 20,
+    URIRef: 30,
+    Literal: 40
+    })
+
+
+def _isEqualXMLNode(node, other):
+    from xml.dom.minidom import Node
+
+    def recurse():
+        # Recursion through the children
+        # In Python2, the semantics of 'map' is such that the check on
+        # length would be unnecessary. In Python 3,
+        # the semantics of map has changed (why, oh why???) and the check
+        # for the length becomes necessary...
+        if len(node.childNodes) != len(other.childNodes):
+            return False
+        for (nc, oc) in map(
+                lambda x, y: (x, y), node.childNodes, other.childNodes):
+            if not _isEqualXMLNode(nc, oc):
+                return False
+        # if we got here then everything is fine:
+        return True
+
+    if node is None or other is None:
+        return False
+
+    if node.nodeType != other.nodeType:
+        return False
+
+    if node.nodeType in [Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]:
+        return recurse()
+
+    elif node.nodeType == Node.ELEMENT_NODE:
+        # Get the basics right
+        if not (node.tagName == other.tagName
+                and node.namespaceURI == other.namespaceURI):
+            return False
+
+        # Handle the (namespaced) attributes; the namespace setting key
+        # should be ignored, though
+        # Note that the minidom orders the keys already, so we do not have
+        # to worry about that, which is a bonus...
+        n_keys = [
+            k for k in node.attributes.keysNS()
+            if k[0] != 'http://www.w3.org/2000/xmlns/']
+        o_keys = [
+            k for k in other.attributes.keysNS()
+            if k[0] != 'http://www.w3.org/2000/xmlns/']
+        if len(n_keys) != len(o_keys):
+            return False
+        for k in n_keys:
+            if not (k in o_keys
+                    and node.getAttributeNS(k[0], k[1]) ==
+                    other.getAttributeNS(k[0], k[1])):
+                return False
+
+        # if we got here, the attributes are all right, we can go down
+        # the tree recursively
+        return recurse()
+
+    elif node.nodeType in [
+            Node.TEXT_NODE, Node.COMMENT_NODE, Node.CDATA_SECTION_NODE,
+            Node.NOTATION_NODE]:
+        return node.data == other.data
+
+    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
+        return node.data == other.data and node.target == other.target
+
+    elif node.nodeType == Node.ENTITY_NODE:
+        return node.nodeValue == other.nodeValue
+
+    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+        return node.publicId == other.publicId \
+            and node.systemId == other.system.Id
+
+    else:
+        # should not happen, in fact
+        raise Exception(
+            'I dont know how to compare XML Node type: %s' % node.nodeType)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
author	guerler
date	Fri, 31 Jul 2020 00:32:28 -0400
parents
children