Mercurial > repos > guerler > springsuite

diff planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/notation3.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author: guerler
date: Fri, 31 Jul 2020 00:32:28 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/notation3.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,1933 @@
+#!/usr/bin/env python
+"""
+notation3.py - Standalone Notation3 Parser
+Derived from CWM, the Closed World Machine
+
+Authors of the original suite:
+
+* Dan Connolly <@@>
+* Tim Berners-Lee <@@>
+* Yosi Scharf <@@>
+* Joseph M. Reagle Jr. <reagle@w3.org>
+* Rich Salz <rsalz@zolera.com>
+
+http://www.w3.org/2000/10/swap/notation3.py
+
+Copyright 2000-2007, World Wide Web Consortium.
+Copyright 2001, MIT.
+Copyright 2001, Zolera Systems Inc.
+
+License: W3C Software License
+http://www.w3.org/Consortium/Legal/copyright-software
+
+Modified by Sean B. Palmer
+Copyright 2007, Sean B. Palmer.
+
+Modified to work with rdflib by Gunnar Aastrand Grimnes
+Copyright 2010, Gunnar A. Grimnes
+
+"""
+
+# Python standard libraries
+import types
+import sys
+import os
+import re
+import codecs
+import warnings
+
+from decimal import Decimal
+
+from uuid import uuid4
+
+from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
+from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph
+from rdflib import py3compat
+b = py3compat.b
+
+__all__ = ['BadSyntax', 'N3Parser', 'TurtleParser',
+           "splitFragP", "join", "base",
+           "runNamespace", "uniqueURI", "hexify"]
+
+from rdflib.parser import Parser
+
+
+def splitFragP(uriref, punct=0):
+    """split a URI reference before the fragment
+
+    Punctuation is kept.
+
+    e.g.
+
+    >>> splitFragP("abc#def")
+    ('abc', '#def')
+
+    >>> splitFragP("abcdef")
+    ('abcdef', '')
+
+    """
+
+    i = uriref.rfind("#")
+    if i >= 0:
+        return uriref[:i], uriref[i:]
+    else:
+        return uriref, ''
+
+
+@py3compat.format_doctest_out
+def join(here, there):
+    """join an absolute URI and URI reference
+    (non-ascii characters are supported/doctested;
+    haven't checked the details of the IRI spec though)
+
+    ``here`` is assumed to be absolute.
+    ``there`` is URI reference.
+
+    >>> join('http://example/x/y/z', '../abc')
+    'http://example/x/abc'
+
+    Raise ValueError if there uses relative path
+    syntax but here has no hierarchical path.
+
+    >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE
+    Traceback (most recent call last):
+        raise ValueError(here)
+    ValueError: Base <mid:foo@example> has no slash
+    after colon - with relative '../foo'.
+
+    >>> join('http://example/x/y/z', '')
+    'http://example/x/y/z'
+
+    >>> join('mid:foo@example', '#foo')
+    'mid:foo@example#foo'
+
+    We grok IRIs
+
+    >>> len(%(u)s'Andr\\xe9')
+    5
+
+    >>> join('http://example.org/', %(u)s'#Andr\\xe9')
+    %(u)s'http://example.org/#Andr\\xe9'
+    """
+
+#    assert(here.find("#") < 0), \
+#        "Base may not contain hash: '%s'" % here  # why must caller splitFrag?
+
+    slashl = there.find('/')
+    colonl = there.find(':')
+
+     # join(base, 'foo:/') -- absolute
+    if colonl >= 0 and (slashl < 0 or colonl < slashl):
+        return there
+
+    bcolonl = here.find(':')
+    assert(bcolonl >= 0), \
+        "Base uri '%s' is not absolute" % here  # else it's not absolute
+
+    path, frag = splitFragP(there)
+    if not path:
+        return here + frag
+
+     # join('mid:foo@example', '../foo') bzzt
+    if here[bcolonl + 1:bcolonl + 2] != '/':
+        raise ValueError(
+            ("Base <%s> has no slash after "
+             "colon - with relative '%s'.") % (here, there))
+
+    if here[bcolonl + 1:bcolonl + 3] == '//':
+        bpath = here.find('/', bcolonl + 3)
+    else:
+        bpath = bcolonl + 1
+
+     # join('http://xyz', 'foo')
+    if bpath < 0:
+        bpath = len(here)
+        here = here + '/'
+
+     # join('http://xyz/', '//abc') => 'http://abc'
+    if there[:2] == '//':
+        return here[:bcolonl + 1] + there
+
+     # join('http://xyz/', '/abc') => 'http://xyz/abc'
+    if there[:1] == '/':
+        return here[:bpath] + there
+
+    slashr = here.rfind('/')
+
+    while 1:
+        if path[:2] == './':
+            path = path[2:]
+        if path == '.':
+            path = ''
+        elif path[:3] == '../' or path == '..':
+            path = path[3:]
+            i = here.rfind('/', bpath, slashr)
+            if i >= 0:
+                here = here[:i + 1]
+                slashr = i
+        else:
+            break
+
+    return here[:slashr + 1] + path + frag
+
+
+def base():
+    """The base URI for this process - the Web equiv of cwd
+
+    Relative or abolute unix-standard filenames parsed relative to
+    this yeild the URI of the file.
+    If we had a reliable way of getting a computer name,
+    we should put it in the hostname just to prevent ambiguity
+
+    """
+     # return "file://" + hostname + os.getcwd() + "/"
+    return "file://" + _fixslash(os.getcwd()) + "/"
+
+
+def _fixslash(s):
+    """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
+    s = s.replace("\\", "/")
+    if s[0] != "/" and s[1] == ":":
+        s = s[2:]   # @@@ Hack when drive letter present
+    return s
+
+
+CONTEXT = 0
+PRED = 1
+SUBJ = 2
+OBJ = 3
+
+PARTS = PRED, SUBJ, OBJ
+ALL4 = CONTEXT, PRED, SUBJ, OBJ
+
+SYMBOL = 0
+FORMULA = 1
+LITERAL = 2
+LITERAL_DT = 21
+LITERAL_LANG = 22
+ANONYMOUS = 3
+XMLLITERAL = 25
+
+Logic_NS = "http://www.w3.org/2000/10/swap/log#"
+NODE_MERGE_URI = Logic_NS + "is"   # Pseudo-property indicating node merging
+forSomeSym = Logic_NS + "forSome"
+forAllSym = Logic_NS + "forAll"
+
+RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+OWL_NS = "http://www.w3.org/2002/07/owl#"
+DAML_sameAs_URI = OWL_NS + "sameAs"
+parsesTo_URI = Logic_NS + "parsesTo"
+RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
+
+List_NS = RDF_NS_URI      # From 20030808
+_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
+
+N3_first = (SYMBOL, List_NS + "first")
+N3_rest = (SYMBOL, List_NS + "rest")
+N3_li = (SYMBOL, List_NS + "li")
+N3_nil = (SYMBOL, List_NS + "nil")
+N3_List = (SYMBOL, List_NS + "List")
+N3_Empty = (SYMBOL, List_NS + "Empty")
+
+
+runNamespaceValue = None
+
+
+def runNamespace():
+    "Return a URI suitable as a namespace for run-local objects"
+     # @@@ include hostname (privacy?) (hash it?)
+    global runNamespaceValue
+    if runNamespaceValue is None:
+        runNamespaceValue = join(base(), _unique_id()) + '#'
+    return runNamespaceValue
+
+nextu = 0
+
+
+def uniqueURI():
+    "A unique URI"
+    global nextu
+    nextu += 1
+    # return runNamespace() + "u_" + `nextu`
+    return runNamespace() + "u_" + str(nextu)
+
+
+tracking = False
+chatty_flag = 50
+
+# from why import BecauseOfData, becauseSubexpression
+
+
+def BecauseOfData(*args, **kargs):
+     # print args, kargs
+    pass
+
+
+def becauseSubexpression(*args, **kargs):
+     # print args, kargs
+    pass
+
+N3_forSome_URI = forSomeSym
+N3_forAll_URI = forAllSym
+
+# Magic resources we know about
+
+ADDED_HASH = "#"   # Stop where we use this in case we want to remove it!
+# This is the hash on namespace URIs
+
+RDF_type = (SYMBOL, RDF_type_URI)
+DAML_sameAs = (SYMBOL, DAML_sameAs_URI)
+
+LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
+
+BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
+DECIMAL_DATATYPE = _XSD_PFX + "decimal"
+DOUBLE_DATATYPE = _XSD_PFX + "double"
+FLOAT_DATATYPE = _XSD_PFX + "float"
+INTEGER_DATATYPE = _XSD_PFX + "integer"
+
+option_noregen = 0    # If set, do not regenerate genids on output
+
+# @@ I18n - the notname chars need extending for well known unicode non-text
+# characters. The XML spec switched to assuming unknown things were name
+# characaters.
+# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
+_notQNameChars = \
+    "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~"  # else valid qname :-/
+_notKeywordsChars = _notQNameChars + "."
+_notNameChars = _notQNameChars + ":"   # Assume anything else valid name :-/
+_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+
+hexChars = 'ABCDEFabcdef0123456789'
+escapeChars = "(_~.-!$&'()*+,;=/?#@%)" # valid for \ escapes in localnames
+
+def unicodeExpand(m):
+    try:
+        return chr(int(m.group(1), 16))
+    except:
+        raise Exception("Invalid unicode code point: " + m.group(1))
+
+if py3compat.narrow_build:
+    def unicodeExpand(m):
+        try:
+            return chr(int(m.group(1), 16))
+        except ValueError:
+            warnings.warn(
+                'Encountered a unicode char > 0xFFFF in a narrow python build. '
+                'Trying to degrade gracefully, but this can cause problems '
+                'later when working with the string:\n%s' % m.group(0))
+            return codecs.decode(m.group(0), 'unicode_escape')
+
+unicodeEscape4 = re.compile(
+    r'\\u([0-9a-fA-F]{4})')
+unicodeEscape8 = re.compile(
+    r'\\U([0-9a-fA-F]{8})')
+
+
+
+N3CommentCharacter = "#"      # For unix script  # ! compatabilty
+
+########################################## Parse string to sink
+#
+# Regular expressions:
+eol = re.compile(
+    r'[ \t]*(#[^\n]*)?\r?\n')       # end  of line, poss. w/comment
+eof = re.compile(
+    r'[ \t]*(#[^\n]*)?$')           # end  of file, poss. w/comment
+ws = re.compile(r'[ \t]*')                       # Whitespace not including NL
+signed_integer = re.compile(r'[-+]?[0-9]+')      # integer
+integer_syntax = re.compile(r'[-+]?[0-9]+')
+decimal_syntax = re.compile(r'[-+]?[0-9]*\.[0-9]+')
+exponent_syntax = re.compile(r'[-+]?(?:[0-9]+\.[0-9]*(?:e|E)[-+]?[0-9]+|'+
+                             r'\.[0-9](?:e|E)[-+]?[0-9]+|'+
+                             r'[0-9]+(?:e|E)[-+]?[0-9]+)')
+digitstring = re.compile(r'[0-9]+')              # Unsigned integer
+interesting = re.compile(r"""[\\\r\n\"\']""")
+langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*')
+
+
+class SinkParser:
+    def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
+                 genPrefix="", why=None, turtle=False):
+        """ note: namespace names should *not* end in  # ;
+        the  # will get added during qname processing """
+
+        self._bindings = {}
+        if thisDoc != "":
+            assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
+            self._bindings[""] = thisDoc + "#"   # default
+
+        self._store = store
+        if genPrefix:
+            store.setGenPrefix(genPrefix)  # pass it on
+
+        self._thisDoc = thisDoc
+        self.lines = 0               # for error handling
+        self.startOfLine = 0         # For calculating character number
+        self._genPrefix = genPrefix
+        self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of',
+                         'true', 'false']
+        self.keywordsSet = 0     # Then only can others be considerd qnames
+        self._anonymousNodes = {}
+            # Dict of anon nodes already declared ln: Term
+        self._variables = {}
+        self._parentVariables = {}
+        self._reason = why       # Why the parser was asked to parse this
+
+        self.turtle = turtle # raise exception when encountering N3 extensions
+        # Turtle allows single or double quotes around strings, whereas N3
+        # only allows double quotes.
+        self.string_delimiters = ('"', "'") if turtle else ('"',)
+
+        self._reason2 = None     # Why these triples
+         # was: diag.tracking
+        if tracking:
+            self._reason2 = BecauseOfData(
+                store.newSymbol(thisDoc), because=self._reason)
+
+        if baseURI:
+            self._baseURI = baseURI
+        else:
+            if thisDoc:
+                self._baseURI = thisDoc
+            else:
+                self._baseURI = None
+
+        assert not self._baseURI or ':' in self._baseURI
+
+        if not self._genPrefix:
+            if self._thisDoc:
+                self._genPrefix = self._thisDoc + "#_g"
+            else:
+                self._genPrefix = uniqueURI()
+
+        if openFormula is None:
+            if self._thisDoc:
+                self._formula = store.newFormula(thisDoc + "#_formula")
+            else:
+                self._formula = store.newFormula()
+        else:
+            self._formula = openFormula
+
+        self._context = self._formula
+        self._parentContext = None
+
+    def here(self, i):
+        """String generated from position in file
+
+        This is for repeatability when refering people to bnodes in a document.
+        This has diagnostic uses less formally, as it should point one to which
+        bnode the arbitrary identifier actually is. It gives the
+        line and character number of the '[' charcacter or path character
+        which introduced the blank node. The first blank node is boringly
+        _L1C1. It used to be used only for tracking, but for tests in general
+        it makes the canonical ordering of bnodes repeatable."""
+
+        return "%s_L%iC%i" % (self._genPrefix, self.lines,
+                              i - self.startOfLine + 1)
+
+    def formula(self):
+        return self._formula
+
+    def loadStream(self, stream):
+        return self.loadBuf(stream.read())    # Not ideal
+
+    def loadBuf(self, buf):
+        """Parses a buffer and returns its top level formula"""
+        self.startDoc()
+
+        self.feed(buf)
+        return self.endDoc()     # self._formula
+
+    def feed(self, octets):
+        """Feed an octet stream tothe parser
+
+        if BadSyntax is raised, the string
+        passed in the exception object is the
+        remainder after any statements have been parsed.
+        So if there is more data to feed to the
+        parser, it should be straightforward to recover."""
+
+        if not isinstance(octets, str):
+            s = octets.decode('utf-8')
+             # NB already decoded, so \ufeff
+            if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
+                s = s[1:]
+        else:
+            s = octets
+
+        i = 0
+        while i >= 0:
+            j = self.skipSpace(s, i)
+            if j < 0:
+                return
+
+            i = self.directiveOrStatement(s, j)
+            if i < 0:
+                #print("# next char: %s" % s[j])
+                self.BadSyntax(s, j,
+                    "expected directive or statement")
+
+    def directiveOrStatement(self, argstr, h):
+
+        i = self.skipSpace(argstr, h)
+        if i < 0:
+            return i    # EOF
+
+        if self.turtle:
+            j = self.sparqlDirective(argstr, i)
+            if j >= 0:
+                return j
+
+        j = self.directive(argstr, i)
+        if j >= 0:
+            return self.checkDot(argstr, j)
+
+        j = self.statement(argstr, i)
+        if j >= 0:
+            return self.checkDot(argstr, j)
+
+        return j
+
+     # @@I18N
+     # _namechars = string.lowercase + string.uppercase + string.digits + '_-'
+
+    def tok(self, tok, argstr, i, colon=False):
+        """Check for keyword.  Space must have been stripped on entry and
+        we must not be at end of file.
+
+        if colon, then keyword followed by colon is ok
+        (@prefix:<blah> is ok, rdf:type shortcut a must be followed by ws)
+        """
+
+        assert tok[0] not in _notNameChars  # not for punctuation
+        if argstr[i:i + 1] == "@":
+            i = i + 1
+        else:
+            if tok not in self.keywords:
+                return -1  # No, this has neither keywords declaration nor "@"
+
+        if (argstr[i:i + len(tok)] == tok
+            and ( argstr[i + len(tok)] in _notKeywordsChars)
+            or (colon and argstr[i+len(tok)] == ':')):
+            i = i + len(tok)
+            return i
+        else:
+            return -1
+
+    def sparqlTok(self, tok, argstr, i):
+        """Check for SPARQL keyword.  Space must have been stripped on entry
+        and we must not be at end of file.
+        Case insensitive and not preceeded by @
+        """
+
+        assert tok[0] not in _notNameChars  # not for punctuation
+
+        if (argstr[i:i + len(tok)].lower() == tok.lower()
+                and (argstr[i + len(tok)] in _notQNameChars)):
+            i = i + len(tok)
+            return i
+        else:
+            return -1
+
+
+    def directive(self, argstr, i):
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return j  # eof
+        res = []
+
+        j = self.tok('bind', argstr, i)         # implied "#". Obsolete.
+        if j > 0:
+            self.BadSyntax(argstr, i,
+                "keyword bind is obsolete: use @prefix")
+
+        j = self.tok('keywords', argstr, i)
+        if j > 0:
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.")
+
+            i = self.commaSeparatedList(argstr, j, res, self.bareWord)
+            if i < 0:
+                self.BadSyntax(argstr, i,
+                    "'@keywords' needs comma separated list of words")
+            self.setKeywords(res[:])
+            return i
+
+        j = self.tok('forAll', argstr, i)
+        if j > 0:
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.")
+
+            i = self.commaSeparatedList(argstr, j, res, self.uri_ref2)
+            if i < 0:
+                self.BadSyntax(argstr, i,
+                    "Bad variable list after @forAll")
+            for x in res:
+                 # self._context.declareUniversal(x)
+                if x not in self._variables or x in self._parentVariables:
+                    self._variables[x] = self._context.newUniversal(x)
+            return i
+
+        j = self.tok('forSome', argstr, i)
+        if j > 0:
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.")
+
+            i = self. commaSeparatedList(argstr, j, res, self.uri_ref2)
+            if i < 0:
+                self.BadSyntax(argstr, i,
+                    "Bad variable list after @forSome")
+            for x in res:
+                self._context.declareExistential(x)
+            return i
+
+        j = self.tok('prefix', argstr, i, colon=True)    # no implied "#"
+        if j >= 0:
+            t = []
+            i = self.qname(argstr, j, t)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected qname after @prefix")
+            j = self.uri_ref2(argstr, i, t)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "expected <uriref> after @prefix _qname_")
+            ns = self.uriOf(t[1])
+
+            if self._baseURI:
+                ns = join(self._baseURI, ns)
+            elif ":" not in ns:
+                self.BadSyntax(argstr, j,
+                    "With no base URI, cannot use " +
+                    "relative URI in @prefix <" + ns + ">")
+            assert ':' in ns  # must be absolute
+            self._bindings[t[0][0]] = ns
+            self.bind(t[0][0], hexify(ns))
+            return j
+
+        j = self.tok('base', argstr, i)       # Added 2007/7/7
+        if j >= 0:
+            t = []
+            i = self.uri_ref2(argstr, j, t)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected <uri> after @base ")
+            ns = self.uriOf(t[0])
+
+            if self._baseURI:
+                ns = join(self._baseURI, ns)
+            else:
+                self.BadSyntax(argstr, j,
+                    "With no previous base URI, cannot use " +
+                    "relative URI in @base  <" + ns + ">")
+            assert ':' in ns  # must be absolute
+            self._baseURI = ns
+            return i
+
+        return -1   # Not a directive, could be something else.
+
+    def sparqlDirective(self, argstr, i):
+
+        """
+        turtle and trig support BASE/PREFIX without @ and without
+        terminating .
+        """
+
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return j  # eof
+
+        j = self.sparqlTok('PREFIX', argstr, i)
+        if j >= 0:
+            t = []
+            i = self.qname(argstr, j, t)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected qname after @prefix")
+            j = self.uri_ref2(argstr, i, t)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "expected <uriref> after @prefix _qname_")
+            ns = self.uriOf(t[1])
+
+            if self._baseURI:
+                ns = join(self._baseURI, ns)
+            elif ":" not in ns:
+                self.BadSyntax(argstr, j,
+                    "With no base URI, cannot use " +
+                    "relative URI in @prefix <" + ns + ">")
+            assert ':' in ns  # must be absolute
+            self._bindings[t[0][0]] = ns
+            self.bind(t[0][0], hexify(ns))
+            return j
+
+        j = self.sparqlTok('BASE', argstr, i)
+        if j >= 0:
+            t = []
+            i = self.uri_ref2(argstr, j, t)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected <uri> after @base ")
+            ns = self.uriOf(t[0])
+
+            if self._baseURI:
+                ns = join(self._baseURI, ns)
+            else:
+                self.BadSyntax(argstr, j,
+                    "With no previous base URI, cannot use " +
+                    "relative URI in @base  <" + ns + ">")
+            assert ':' in ns  # must be absolute
+            self._baseURI = ns
+            return i
+
+        return -1   # Not a directive, could be something else.
+
+
+    def bind(self, qn, uri):
+        assert isinstance(
+            uri, bytes), "Any unicode must be %x-encoded already"
+        if qn == "":
+            self._store.setDefaultNamespace(uri)
+        else:
+            self._store.bind(qn, uri)
+
+    def setKeywords(self, k):
+        "Takes a list of strings"
+        if k is None:
+            self.keywordsSet = 0
+        else:
+            self.keywords = k
+            self.keywordsSet = 1
+
+    def startDoc(self):
+         # was: self._store.startDoc()
+        self._store.startDoc(self._formula)
+
+    def endDoc(self):
+        """Signal end of document and stop parsing. returns formula"""
+        self._store.endDoc(self._formula)   # don't canonicalize yet
+        return self._formula
+
+    def makeStatement(self, quadruple):
+         # $$$$$$$$$$$$$$$$$$$$$
+         # print "# Parser output: ", `quadruple`
+        self._store.makeStatement(quadruple, why=self._reason2)
+
+    def statement(self, argstr, i):
+        r = []
+        i = self.object(
+            argstr, i, r)   # Allow literal for subject - extends RDF
+        if i < 0:
+            return i
+
+        j = self.property_list(argstr, i, r[0])
+
+        if j < 0:
+            self.BadSyntax(
+                argstr, i, "expected propertylist")
+        return j
+
+    def subject(self, argstr, i, res):
+        return self.item(argstr, i, res)
+
+    def verb(self, argstr, i, res):
+        """ has _prop_
+        is _prop_ of
+        a
+        =
+        _prop_
+        >- prop ->
+        <- prop -<
+        _operator_"""
+
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return j  # eof
+
+        r = []
+
+        j = self.tok('has', argstr, i)
+        if j >= 0:
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode")
+
+            i = self.prop(argstr, j, r)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected property after 'has'")
+            res.append(('->', r[0]))
+            return i
+
+        j = self.tok('is', argstr, i)
+        if j >= 0:
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode")
+
+            i = self.prop(argstr, j, r)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "expected <property> after 'is'")
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "End of file found, expected property after 'is'")
+            i = j
+            j = self.tok('of', argstr, i)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "expected 'of' after 'is' <prop>")
+            res.append(('<-', r[0]))
+            return j
+
+        j = self.tok('a', argstr, i)
+        if j >= 0:
+            res.append(('->', RDF_type))
+            return j
+
+        if argstr[i:i + 2] == "<=":
+            if self.turtle:
+                self.BadSyntax(argstr, i,
+                    "Found '<=' in Turtle mode. ")
+
+            res.append(('<-', self._store.newSymbol(Logic_NS + "implies")))
+            return i + 2
+
+        if argstr[i:i + 1] == "=":
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found '=' in Turtle mode")
+            if argstr[i + 1:i + 2] == ">":
+                res.append(('->', self._store.newSymbol(Logic_NS + "implies")))
+                return i + 2
+            res.append(('->', DAML_sameAs))
+            return i + 1
+
+        if argstr[i:i + 2] == ":=":
+            if self.turtle:
+                self.BadSyntax(argstr, i, "Found ':=' in Turtle mode")
+
+             # patch file relates two formulae, uses this    @@ really?
+            res.append(('->', Logic_NS + "becomes"))
+            return i + 2
+
+        j = self.prop(argstr, i, r)
+        if j >= 0:
+            res.append(('->', r[0]))
+            return j
+
+        if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-":
+            self.BadSyntax(argstr, j,
+                ">- ... -> syntax is obsolete.")
+
+        return -1
+
+    def prop(self, argstr, i, res):
+        return self.item(argstr, i, res)
+
+    def item(self, argstr, i, res):
+        return self.path(argstr, i, res)
+
+    def blankNode(self, uri=None):
+        return self._store.newBlankNode(self._context, uri, why=self._reason2)
+
+    def path(self, argstr, i, res):
+        """Parse the path production.
+        """
+        j = self.nodeOrLiteral(argstr, i, res)
+        if j < 0:
+            return j   # nope
+
+        while argstr[j:j + 1] in "!^":   # no spaces, must follow exactly (?)
+            ch = argstr[j:j + 1]
+            subj = res.pop()
+            obj = self.blankNode(uri=self.here(j))
+            j = self.node(argstr, j + 1, res)
+            if j < 0:
+                self.BadSyntax(argstr, j,
+                    "EOF found in middle of path syntax")
+            pred = res.pop()
+            if ch == "^":  # Reverse traverse
+                self.makeStatement((self._context, pred, obj, subj))
+            else:
+                self.makeStatement((self._context, pred, subj, obj))
+            res.append(obj)
+        return j
+
+    def anonymousNode(self, ln):
+        """Remember or generate a term for one of these _: anonymous nodes"""
+        term = self._anonymousNodes.get(ln, None)
+        if term is not None:
+            return term
+        term = self._store.newBlankNode(self._context, why=self._reason2)
+        self._anonymousNodes[ln] = term
+        return term
+
+    def node(self, argstr, i, res, subjectAlready=None):
+        """Parse the <node> production.
+        Space is now skipped once at the beginning
+        instead of in multipe calls to self.skipSpace().
+        """
+        subj = subjectAlready
+
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return j  # eof
+        i = j
+        ch = argstr[i:i + 1]   # Quick 1-character checks first:
+
+        if ch == "[":
+            bnodeID = self.here(i)
+            j = self.skipSpace(argstr, i + 1)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "EOF after '['")
+            # Hack for "is" binding name to anon node
+            if argstr[j:j + 1] == "=":
+                if self.turtle:
+                    self.BadSyntax(argstr, j, "Found '[=' or '[ =' when in turtle mode.")
+                i = j + 1
+                objs = []
+                j = self.objectList(argstr, i, objs)
+                if j >= 0:
+                    subj = objs[0]
+                    if len(objs) > 1:
+                        for obj in objs:
+                            self.makeStatement((self._context,
+                                                DAML_sameAs, subj, obj))
+                    j = self.skipSpace(argstr, j)
+                    if j < 0:
+                        self.BadSyntax(argstr, i,
+                            "EOF when objectList expected after [ = ")
+                    if argstr[j:j + 1] == ";":
+                        j = j + 1
+                else:
+                    self.BadSyntax(argstr, i,
+                        "objectList expected after [= ")
+
+            if subj is None:
+                subj = self.blankNode(uri=bnodeID)
+
+            i = self.property_list(argstr, j, subj)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "property_list expected")
+
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                self.BadSyntax(argstr, i,
+                    "EOF when ']' expected after [ <propertyList>")
+            if argstr[j:j + 1] != "]":
+                self.BadSyntax(argstr, j,
+                    "']' expected")
+            res.append(subj)
+            return j + 1
+
+        if not self.turtle and ch == "{":
+            # if self.turtle:
+            #     self.BadSyntax(argstr, i,
+            #                     "found '{' while in Turtle mode, Formulas not supported!")
+            ch2 = argstr[i + 1:i + 2]
+            if ch2 == '$':
+                # a set
+                i += 1
+                j = i + 1
+                List = []
+                first_run = True
+                while 1:
+                    i = self.skipSpace(argstr, j)
+                    if i < 0:
+                        self.BadSyntax(argstr, i,
+                            "needed '$}', found end.")
+                    if argstr[i:i + 2] == '$}':
+                        j = i + 2
+                        break
+
+                    if not first_run:
+                        if argstr[i:i + 1] == ',':
+                            i += 1
+                        else:
+                            self.BadSyntax(
+                                argstr, i, "expected: ','")
+                    else:
+                        first_run = False
+
+                    item = []
+                    j = self.item(
+                        argstr, i, item)  # @@@@@ should be path, was object
+                    if j < 0:
+                        self.BadSyntax(argstr, i,
+                            "expected item in set or '$}'")
+                    List.append(self._store.intern(item[0]))
+                res.append(self._store.newSet(List, self._context))
+                return j
+            else:
+                # parse a formula
+                j = i + 1
+                oldParentContext = self._parentContext
+                self._parentContext = self._context
+                parentAnonymousNodes = self._anonymousNodes
+                grandParentVariables = self._parentVariables
+                self._parentVariables = self._variables
+                self._anonymousNodes = {}
+                self._variables = self._variables.copy()
+                reason2 = self._reason2
+                self._reason2 = becauseSubexpression
+                if subj is None:
+                    subj = self._store.newFormula()
+                self._context = subj
+
+                while 1:
+                    i = self.skipSpace(argstr, j)
+                    if i < 0:
+                        self.BadSyntax(
+                            argstr, i, "needed '}', found end.")
+
+                    if argstr[i:i + 1] == "}":
+                        j = i + 1
+                        break
+
+                    j = self.directiveOrStatement(argstr, i)
+                    if j < 0:
+                        self.BadSyntax(
+                            argstr, i, "expected statement or '}'")
+
+                self._anonymousNodes = parentAnonymousNodes
+                self._variables = self._parentVariables
+                self._parentVariables = grandParentVariables
+                self._context = self._parentContext
+                self._reason2 = reason2
+                self._parentContext = oldParentContext
+                res.append(subj.close())    # No use until closed
+                return j
+
+        if ch == "(":
+            thing_type = self._store.newList
+            ch2 = argstr[i + 1:i + 2]
+            if ch2 == '$':
+                thing_type = self._store.newSet
+                i += 1
+            j = i + 1
+
+            List = []
+            while 1:
+                i = self.skipSpace(argstr, j)
+                if i < 0:
+                    self.BadSyntax(
+                        argstr, i, "needed ')', found end.")
+                if argstr[i:i + 1] == ')':
+                    j = i + 1
+                    break
+
+                item = []
+                j = self.item(
+                    argstr, i, item)  # @@@@@ should be path, was object
+                if j < 0:
+                    self.BadSyntax(argstr, i,
+                        "expected item in list or ')'")
+                List.append(self._store.intern(item[0]))
+            res.append(thing_type(List, self._context))
+            return j
+
+        j = self.tok('this', argstr, i)    # This context
+        if j >= 0:
+            self.BadSyntax(argstr, i,
+                "Keyword 'this' was ancient N3. Now use " +
+                "@forSome and @forAll keywords.")
+
+         # booleans
+        j = self.tok('true', argstr, i)
+        if j >= 0:
+            res.append(True)
+            return j
+        j = self.tok('false', argstr, i)
+        if j >= 0:
+            res.append(False)
+            return j
+
+        if subj is None:  # If this can be a named node, then check for a name.
+            j = self.uri_ref2(argstr, i, res)
+            if j >= 0:
+                return j
+
+        return -1
+
+    def property_list(self, argstr, i, subj):
+        """Parse property list
+        Leaves the terminating punctuation in the buffer
+        """
+        while 1:
+            while 1: # skip repeat ;
+                j = self.skipSpace(argstr, i)
+                if j < 0:
+                    self.BadSyntax(argstr, i,
+                                   "EOF found when expected verb in property list")
+                if argstr[j]!=';': break
+                i = j+1
+
+            if argstr[j:j + 2] == ":-":
+                if self.turtle:
+                    self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode")
+                i = j + 2
+                res = []
+                j = self.node(argstr, i, res, subj)
+                if j < 0:
+                    self.BadSyntax(argstr, i,
+                        "bad {} or () or [] node after :- ")
+                i = j
+                continue
+            i = j
+            v = []
+            j = self.verb(argstr, i, v)
+            if j <= 0:
+                return i  # void but valid
+
+            objs = []
+            i = self.objectList(argstr, j, objs)
+            if i < 0:
+                self.BadSyntax(argstr, j,
+                    "objectList expected")
+            for obj in objs:
+                dira, sym = v[0]
+                if dira == '->':
+                    self.makeStatement((self._context, sym, subj, obj))
+                else:
+                    self.makeStatement((self._context, sym, obj, subj))
+
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                self.BadSyntax(argstr, j,
+                    "EOF found in list of objects")
+            if argstr[i:i + 1] != ";":
+                return i
+            i = i + 1  # skip semicolon and continue
+
+    def commaSeparatedList(self, argstr, j, res, what):
+        """return value: -1 bad syntax; >1 new position in argstr
+        res has things found appended
+        """
+        i = self.skipSpace(argstr, j)
+        if i < 0:
+            self.BadSyntax(argstr, i,
+                "EOF found expecting comma sep list")
+        if argstr[i] == ".":
+            return j   # empty list is OK
+        i = what(argstr, i, res)
+        if i < 0:
+            return -1
+
+        while 1:
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                return j  # eof
+            ch = argstr[j:j + 1]
+            if ch != ",":
+                if ch != ".":
+                    return -1
+                return j     # Found  but not swallowed "."
+            i = what(argstr, j + 1, res)
+            if i < 0:
+                self.BadSyntax(argstr, i,
+                    "bad list content")
+
+    def objectList(self, argstr, i, res):
+        i = self.object(argstr, i, res)
+        if i < 0:
+            return -1
+        while 1:
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                self.BadSyntax(argstr, j,
+                    "EOF found after object")
+            if argstr[j:j + 1] != ",":
+                return j     # Found something else!
+            i = self.object(argstr, j + 1, res)
+            if i < 0:
+                return i
+
+    def checkDot(self, argstr, i):
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return j  # eof
+        if argstr[j:j + 1] == ".":
+            return j + 1   # skip
+        if argstr[j:j + 1] == "}":
+            return j      # don't skip it
+        if argstr[j:j + 1] == "]":
+            return j
+        self.BadSyntax(argstr, j,
+            "expected '.' or '}' or ']' at end of statement")
+
+    def uri_ref2(self, argstr, i, res):
+        """Generate uri from n3 representation.
+
+        Note that the RDF convention of directly concatenating
+        NS and local name is now used though I prefer inserting a '#'
+        to make the namesapces look more like what XML folks expect.
+        """
+        qn = []
+        j = self.qname(argstr, i, qn)
+        if j >= 0:
+            pfx, ln = qn[0]
+            if pfx is None:
+                assert 0, "not used?"
+                ns = self._baseURI + ADDED_HASH
+            else:
+                try:
+                    ns = self._bindings[pfx]
+                except KeyError:
+                    if pfx == "_":  # Magic prefix 2001/05/30, can be changed
+                        res.append(self.anonymousNode(ln))
+                        return j
+                    if not self.turtle and pfx == "":
+                        ns = join(self._baseURI or "", "#")
+                    else:
+                        self.BadSyntax(argstr, i,
+                                       "Prefix \"%s:\" not bound" % (pfx))
+            symb = self._store.newSymbol(ns + ln)
+            if symb in self._variables:
+                res.append(self._variables[symb])
+            else:
+                res.append(symb)  # @@@ "#" CONVENTION
+            return j
+
+        i = self.skipSpace(argstr, i)
+        if i < 0:
+            return -1
+
+        if argstr[i] == "?":
+            v = []
+            j = self.variable(argstr, i, v)
+            if j > 0:  # Forget varibles as a class, only in context.
+                res.append(v[0])
+                return j
+            return -1
+
+        elif argstr[i] == "<":
+            i = i + 1
+            st = i
+            while i < len(argstr):
+                if argstr[i] == ">":
+                    uref = argstr[st:i]  # the join should dealt with "":
+
+                    # expand unicode escapes
+                    uref = unicodeEscape8.sub(unicodeExpand, uref)
+                    uref = unicodeEscape4.sub(unicodeExpand, uref)
+
+                    if self._baseURI:
+                        uref = join(self._baseURI, uref)  # was: uripath.join
+                    else:
+                        assert ":" in uref, \
+                            "With no base URI, cannot deal with relative URIs"
+                    if argstr[i - 1:i] == "#" and not uref[-1:] == "#":
+                        uref = uref + \
+                            "#"  # She meant it! Weirdness in urlparse?
+                    symb = self._store.newSymbol(uref)
+                    if symb in self._variables:
+                        res.append(self._variables[symb])
+                    else:
+                        res.append(symb)
+                    return i + 1
+                i = i + 1
+            self.BadSyntax(argstr, j,
+                "unterminated URI reference")
+
+        elif self.keywordsSet:
+            v = []
+            j = self.bareWord(argstr, i, v)
+            if j < 0:
+                return -1       # Forget varibles as a class, only in context.
+            if v[0] in self.keywords:
+                self.BadSyntax(argstr, i,
+                    'Keyword "%s" not allowed here.' % v[0])
+            res.append(self._store.newSymbol(self._bindings[""] + v[0]))
+            return j
+        else:
+            return -1
+
+    def skipSpace(self, argstr, i):
+        """Skip white space, newlines and comments.
+        return -1 if EOF, else position of first non-ws character"""
+        while 1:
+            m = eol.match(argstr, i)
+            if m is None:
+                break
+            self.lines = self.lines + 1
+            i = m.end()    # Point to first character unmatched
+            self.startOfLine = i
+        m = ws.match(argstr, i)
+        if m is not None:
+            i = m.end()
+        m = eof.match(argstr, i)
+        if m is not None:
+            return -1
+        return i
+
+    def variable(self, argstr, i, res):
+        """     ?abc -> variable(:abc)
+        """
+
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return -1
+
+        if argstr[j:j + 1] != "?":
+            return -1
+        j = j + 1
+        i = j
+        if argstr[j] in "0123456789-":
+            self.BadSyntax(argstr, j,
+                "Varible name can't start with '%s'" % argstr[j])
+        while i < len(argstr) and argstr[i] not in _notKeywordsChars:
+            i = i + 1
+        if self._parentContext is None:
+            varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i])
+            if varURI not in self._variables:
+                self._variables[varURI] = self._context.newUniversal(
+                    varURI, why=self._reason2)
+            res.append(self._variables[varURI])
+            return i
+             # @@ was:
+             # self.BadSyntax(argstr, j,
+             #     "Can't use ?xxx syntax for variable in outermost level: %s"
+             #     % argstr[j-1:i])
+        varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i])
+        if varURI not in self._parentVariables:
+            self._parentVariables[varURI] = self._parentContext.newUniversal(
+                varURI, why=self._reason2)
+        res.append(self._parentVariables[varURI])
+        return i
+
+    def bareWord(self, argstr, i, res):
+        """     abc -> :abc
+        """
+        j = self.skipSpace(argstr, i)
+        if j < 0:
+            return -1
+
+        if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars:
+            return -1
+        i = j
+        while i < len(argstr) and argstr[i] not in _notKeywordsChars:
+            i = i + 1
+        res.append(argstr[j:i])
+        return i
+
+    def qname(self, argstr, i, res):
+        """
+        xyz:def -> ('xyz', 'def')
+        If not in keywords and keywordsSet: def -> ('', 'def')
+        :def -> ('', 'def')
+        """
+
+        i = self.skipSpace(argstr, i)
+        if i < 0:
+            return -1
+
+        c = argstr[i]
+        if c in "0123456789-+.":
+            return -1
+        if c not in _notNameChars:
+            ln = c
+            i = i + 1
+            while i < len(argstr):
+                c = argstr[i]
+                if c not in _notNameChars:
+                    ln = ln + c
+                    i = i + 1
+                else:
+                    break
+
+            if argstr[i - 1] == ".":  # qname cannot end with "."
+                ln = ln[:-1]
+                if not ln: return -1
+                i -= 1
+
+        else:  # First character is non-alpha
+            ln = ''    # Was:  None - TBL (why? useful?)
+
+        if i < len(argstr) and argstr[i] == ':':
+            pfx = ln
+            # bnodes names have different rules
+            if pfx == '_':
+                allowedChars = _notNameChars
+            else:
+                allowedChars = _notQNameChars
+
+            i = i + 1
+            lastslash = False
+            # start = i # TODO first char .
+            ln = ''
+            while i < len(argstr):
+                c = argstr[i]
+                if not lastslash and c == '\\':
+                    lastslash = True
+                    i += 1
+
+                elif lastslash or c not in allowedChars:
+
+                    if lastslash:
+                        if c not in escapeChars:
+                            raise BadSyntax(self._thisDoc, self.line, argstr, i,
+                                            "illegal escape "+c)
+                    elif c=='%':
+                        if argstr[i+1] not in hexChars or argstr[i+2] not in hexChars:
+                            raise BadSyntax(self._thisDoc, self.line, argstr, i,
+                                            "illegal hex escape "+c)
+
+                    ln = ln + c
+                    i = i + 1
+                    lastslash = False
+                else:
+                    break
+
+            if lastslash:
+                raise BadSyntax(
+                    self._thisDoc, self.line, argstr, i,
+                    "qname cannot end with \\")
+
+
+            if argstr[i-1]=='.':
+                # localname cannot end in .
+                ln = ln[:-1]
+                if not ln: return -1
+                i -= 1
+
+            res.append((pfx, ln))
+            return i
+
+        else:   # delimiter was not ":"
+            if ln and self.keywordsSet and ln not in self.keywords:
+                res.append(('', ln))
+                return i
+            return -1
+
+    def object(self, argstr, i, res):
+        j = self.subject(argstr, i, res)
+        if j >= 0:
+            return j
+        else:
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                return -1
+            else:
+                i = j
+
+            if argstr[i] in self.string_delimiters:
+                if argstr[i:i + 3] == argstr[i] * 3:
+                    delim = argstr[i] * 3
+                else:
+                    delim = argstr[i]
+                i = i + len(delim)
+
+                j, s = self.strconst(argstr, i, delim)
+
+                res.append(self._store.newLiteral(s))
+                return j
+            else:
+                return -1
+
+    def nodeOrLiteral(self, argstr, i, res):
+        j = self.node(argstr, i, res)
+        startline = self.lines  # Remember where for error messages
+        if j >= 0:
+            return j
+        else:
+            j = self.skipSpace(argstr, i)
+            if j < 0:
+                return -1
+            else:
+                i = j
+
+            ch = argstr[i]
+            if ch in "-+0987654321.":
+                m = exponent_syntax.match(argstr, i)
+                if m:
+                    j = m.end()
+                    res.append(float(argstr[i:j]))
+                    return j
+
+                m = decimal_syntax.match(argstr, i)
+                if m:
+                    j = m.end()
+                    res.append(Decimal(argstr[i:j]))
+                    return j
+
+                m = integer_syntax.match(argstr, i)
+                if m:
+                    j = m.end()
+                    res.append(int(argstr[i:j]))
+                    return j
+
+                # return -1  ## or fall through?
+
+            if argstr[i] in self.string_delimiters:
+                if argstr[i:i + 3] == argstr[i] * 3:
+                    delim = argstr[i] * 3
+                else:
+                    delim = argstr[i]
+                i = i + len(delim)
+
+                dt = None
+                j, s = self.strconst(argstr, i, delim)
+                lang = None
+                if argstr[j:j + 1] == "@":   # Language?
+                    m = langcode.match(argstr, j + 1)
+                    if m is None:
+                        raise BadSyntax(
+                            self._thisDoc, startline, argstr, i,
+                            "Bad language code syntax on string " +
+                            "literal, after @")
+                    i = m.end()
+                    lang = argstr[j + 1:i]
+                    j = i
+                if argstr[j:j + 2] == "^^":
+                    res2 = []
+                    j = self.uri_ref2(argstr, j + 2, res2)  # Read datatype URI
+                    dt = res2[0]
+                res.append(self._store.newLiteral(s, dt, lang))
+                return j
+            else:
+                return -1
+
+    def uriOf(self, sym):
+        if isinstance(sym, tuple):
+            return sym[1]  # old system for --pipe
+         # return sym.uriref()  # cwm api
+        return sym
+
+    def strconst(self, argstr, i, delim):
+        """parse an N3 string constant delimited by delim.
+        return index, val
+        """
+        delim1 = delim[0]
+        delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5
+
+        j = i
+        ustr = ""    # Empty unicode string
+        startline = self.lines  # Remember where for error messages
+        while j < len(argstr):
+            if argstr[j] == delim1:
+                if delim == delim1:  # done when delim is " or '
+                    i = j + 1
+                    return i, ustr
+                if delim == delim3:  # done when delim is """ or ''' and, respectively ...
+                    if argstr[j:j + 5] == delim5:  # ... we have "" or '' before
+                        i = j + 5
+                        ustr = ustr + delim2
+                        return i, ustr
+                    if argstr[j:j + 4] == delim4:  # ... we have " or ' before
+                        i = j + 4
+                        ustr = ustr + delim1
+                        return i, ustr
+                    if argstr[j:j + 3] == delim3:  # current " or ' is part of delim
+                        i = j + 3
+                        return i, ustr
+
+                     # we are inside of the string and current char is " or '
+                    j = j + 1
+                    ustr = ustr + delim1
+                    continue
+
+            m = interesting.search(argstr, j)   # was argstr[j:].
+             # Note for pos param to work, MUST be compiled  ... re bug?
+            assert m, "Quote expected in string at ^ in %s^%s" % (
+                argstr[j - 20:j], argstr[j:j + 20])  # at least need a quote
+
+            i = m.start()
+            try:
+                ustr = ustr + argstr[j:i]
+            except UnicodeError:
+                err = ""
+                for c in argstr[j:i]:
+                    err = err + (" %02x" % ord(c))
+                streason = sys.exc_info()[1].__str__()
+                raise BadSyntax(
+                    self._thisDoc, startline, argstr, j,
+                    "Unicode error appending characters" +
+                    " %s to string, because\n\t%s"
+                    % (err, streason))
+
+             # print "@@@ i = ",i, " j=",j, "m.end=", m.end()
+
+            ch = argstr[i]
+            if ch == delim1:
+                j = i
+                continue
+            elif ch in ('"', "'") and ch != delim1:
+                ustr = ustr + ch
+                j = i + 1
+                continue
+            elif ch in "\r\n":
+                if delim == delim1:
+                    raise BadSyntax(
+                        self._thisDoc, startline, argstr, i,
+                        "newline found in string literal")
+                self.lines = self.lines + 1
+                ustr = ustr + ch
+                j = i + 1
+                self.startOfLine = j
+
+            elif ch == "\\":
+                j = i + 1
+                ch = argstr[j:j + 1]   # Will be empty if string ends
+                if not ch:
+                    raise BadSyntax(
+                        self._thisDoc, startline, argstr, i,
+                        "unterminated string literal (2)")
+                k = 'abfrtvn\\"'.find(ch)
+                if k >= 0:
+                    uch = '\a\b\f\r\t\v\n\\"'[k]
+                    ustr = ustr + uch
+                    j = j + 1
+                elif ch == "u":
+                    j, ch = self.uEscape(argstr, j + 1, startline)
+                    ustr = ustr + ch
+                elif ch == "U":
+                    j, ch = self.UEscape(argstr, j + 1, startline)
+                    ustr = ustr + ch
+                else:
+                    self.BadSyntax(argstr, i,
+                        "bad escape")
+
+        self.BadSyntax(argstr, i,
+                        "unterminated string literal")
+
+    def _unicodeEscape(self, argstr, i, startline, reg, n, prefix):
+        if len(argstr)<i+n:
+            raise BadSyntax(
+                    self._thisDoc, startline, argstr, i,
+                    "unterminated string literal(3)")
+        try:
+            return i+n, reg.sub(unicodeExpand, '\\'+prefix+argstr[i:i+n])
+        except:
+            raise BadSyntax(
+                self._thisDoc, startline, argstr, i,
+                "bad string literal hex escape: "+argstr[i:i+n])
+
+    def uEscape(self, argstr, i, startline):
+        return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, 'u')
+
+    def UEscape(self, argstr, i, startline):
+        return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, 'U')
+
+    def BadSyntax(self, argstr, i, msg):
+        raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)
+
+# If we are going to do operators then they should generate
+# [  is  operator:plus  of (  \1  \2 ) ]
+
+
+class BadSyntax(SyntaxError):
+    def __init__(self, uri, lines, argstr, i, why):
+        self._str = argstr.encode(
+            'utf-8')  # Better go back to strings for errors
+        self._i = i
+        self._why = why
+        self.lines = lines
+        self._uri = uri
+
+    def __str__(self):
+        argstr = self._str
+        i = self._i
+        st = 0
+        if i > 60:
+            pre = "..."
+            st = i - 60
+        else:
+            pre = ""
+        if len(argstr) - i > 60:
+            post = "..."
+        else:
+            post = ""
+
+        return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
+               % (self.lines + 1, self._uri, self._why, pre,
+                  argstr[st:i], argstr[i:i + 60], post)
+
+    @property
+    def message(self):
+        return str(self)
+
+
+
+###############################################################################
+class Formula(object):
+    number = 0
+
+    def __init__(self, parent):
+        self.uuid = uuid4().hex
+        self.counter = 0
+        Formula.number += 1
+        self.number = Formula.number
+        self.existentials = {}
+        self.universals = {}
+
+        self.quotedgraph = QuotedGraph(
+            store=parent.store, identifier=self.id())
+
+    def __str__(self):
+        return '_:Formula%s' % self.number
+
+    def id(self):
+        return BNode('_:Formula%s' % self.number)
+
+    def newBlankNode(self, uri=None, why=None):
+        if uri is None:
+            self.counter += 1
+            bn = BNode('f%sb%s' % (self.uuid, self.counter))
+        else:
+            bn = BNode(uri.split('#').pop().replace('_', 'b'))
+        return bn
+
+    def newUniversal(self, uri, why=None):
+        return Variable(uri.split('#').pop())
+
+    def declareExistential(self, x):
+        self.existentials[x] = self.newBlankNode()
+
+    def close(self):
+
+        return self.quotedgraph
+
+
+r_hibyte = re.compile(r'([\x80-\xff])')
+
+
+class RDFSink(object):
+    def __init__(self, graph):
+        self.rootFormula = None
+        self.counter = 0
+        self.graph = graph
+
+    def newFormula(self):
+        assert self.graph.store.formula_aware
+        f = Formula(self.graph)
+        return f
+
+    def newGraph(self, identifier):
+        return Graph(self.graph.store, identifier)
+
+    def newSymbol(self, *args):
+        return URIRef(args[0])
+
+    def newBlankNode(self, arg=None, uri=None, why=None):
+        if isinstance(arg, Formula):
+            return arg.newBlankNode(uri)
+        elif isinstance(arg, Graph) or arg is None:
+            self.counter += 1
+            bn = BNode('n' + str(self.counter))
+        else:
+            bn = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
+        return bn
+
+    def newLiteral(self, s, dt, lang):
+        if dt:
+            return Literal(s, datatype=dt)
+        else:
+            return Literal(s, lang=lang)
+
+    def newList(self, n, f):
+        if not n:
+            return self.newSymbol(
+                'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
+            )
+
+        a = self.newBlankNode(f)
+        first = self.newSymbol(
+            'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
+        )
+        rest = self.newSymbol(
+            'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
+        self.makeStatement((f, first, a, n[0]))
+        self.makeStatement((f, rest, a, self.newList(n[1:], f)))
+        return a
+
+    def newSet(self, *args):
+        return set(args)
+
+    def setDefaultNamespace(self, *args):
+        return ':'.join(repr(n) for n in args)
+
+    def makeStatement(self, quadruple, why=None):
+        f, p, s, o = quadruple
+
+        if hasattr(p, 'formula'):
+            raise Exception("Formula used as predicate")
+
+        s = self.normalise(f, s)
+        p = self.normalise(f, p)
+        o = self.normalise(f, o)
+
+        if f == self.rootFormula:
+             # print s, p, o, '.'
+            self.graph.add((s, p, o))
+        elif isinstance(f, Formula):
+            f.quotedgraph.add((s, p, o))
+        else:
+            f.add((s,p,o))
+
+         # return str(quadruple)
+
+    def normalise(self, f, n):
+        if isinstance(n, tuple):
+            return URIRef(str(n[1]))
+
+        if isinstance(n, bool):
+            s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
+            return s
+
+        if isinstance(n, int) or isinstance(n, int):
+            s = Literal(str(n), datatype=INTEGER_DATATYPE)
+            return s
+
+        if isinstance(n, Decimal):
+            value = str(n)
+            if value == '-0':
+                value = '0'
+            s = Literal(value, datatype=DECIMAL_DATATYPE)
+            return s
+
+        if isinstance(n, float):
+            s = Literal(str(n), datatype=DOUBLE_DATATYPE)
+            return s
+
+        if isinstance(f, Formula):
+            if n in f.existentials:
+                return f.existentials[n]
+
+         # if isinstance(n, Var):
+         #    if f.universals.has_key(n):
+         #       return f.universals[n]
+         #    f.universals[n] = f.newBlankNode()
+         #    return f.universals[n]
+
+        return n
+
+    def intern(self, something):
+        return something
+
+    def bind(self, pfx, uri):
+        pass  # print pfx, ':', uri
+
+    def startDoc(self, formula):
+        self.rootFormula = formula
+
+    def endDoc(self, formula):
+        pass
+
+
+###################################################
+#
+#  Utilities
+#
+
+
+@py3compat.format_doctest_out
+def hexify(ustr):
+    """Use URL encoding to return an ASCII string
+    corresponding to the given UTF8 string
+
+    >>> hexify("http://example/a b")
+    %(b)s'http://example/a%%20b'
+
+    """
+     # s1=ustr.encode('utf-8')
+    s = ""
+    for ch in ustr:   # .encode('utf-8'):
+        if ord(ch) > 126 or ord(ch) < 33:
+            ch = "%%%02X" % ord(ch)
+        else:
+            ch = "%c" % ord(ch)
+        s = s + ch
+    return b(s)
+
+
+class TurtleParser(Parser):
+
+    """
+    An RDFLib parser for Turtle
+
+    See http://www.w3.org/TR/turtle/
+    """
+
+    def __init__(self):
+        pass
+
+    def parse(self, source, graph, encoding="utf-8", turtle=True):
+
+        if encoding not in [None, "utf-8"]:
+            raise Exception(
+                ("N3/Turtle files are always utf-8 encoded, ",
+                 "I was passed: %s") % encoding)
+
+        sink = RDFSink(graph)
+
+        baseURI = graph.absolutize(
+            source.getPublicId() or source.getSystemId() or "")
+        p = SinkParser(sink, baseURI=baseURI, turtle=turtle)
+
+        p.loadStream(source.getByteStream())
+
+        for prefix, namespace in list(p._bindings.items()):
+            graph.bind(prefix, namespace)
+
+
+class N3Parser(TurtleParser):
+
+    """
+    An RDFLib parser for Notation3
+
+    See http://www.w3.org/DesignIssues/Notation3.html
+
+    """
+
+    def __init__(self):
+        pass
+
+    def parse(self, source, graph, encoding="utf-8"):
+         # we're currently being handed a Graph, not a ConjunctiveGraph
+        assert graph.store.context_aware  # is this implied by formula_aware
+        assert graph.store.formula_aware
+
+        conj_graph = ConjunctiveGraph(store=graph.store)
+        conj_graph.default_context = graph  # TODO: CG __init__ should have a
+                                            # default_context arg
+         # TODO: update N3Processor so that it can use conj_graph as the sink
+        conj_graph.namespace_manager = graph.namespace_manager
+
+        TurtleParser.parse(self, source, conj_graph, encoding, turtle=False)
+
+
+def _test(): # pragma: no cover
+    import doctest
+    doctest.testmod()
+
+
+# if __name__ == '__main__':
+#    _test()
+
+def main(): # pragma: no cover
+    g = ConjunctiveGraph()
+
+    sink = RDFSink(g)
+    base_uri = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
+
+    p = SinkParser(sink, baseURI=base_uri)
+    p._bindings[''] = p._baseURI + '#'
+    p.startDoc()
+
+    f = open(sys.argv[1], 'rb')
+    rdbytes = f.read()
+    f.close()
+
+    p.feed(rdbytes)
+    p.endDoc()
+    for t in g.quads((None, None, None)):
+
+        print(t)
+
+if __name__ == '__main__':
+    main()
+
+# ends
author	guerler
date	Fri, 31 Jul 2020 00:32:28 -0400
parents
children