comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/notation3.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 #!/usr/bin/env python
2 """
3 notation3.py - Standalone Notation3 Parser
4 Derived from CWM, the Closed World Machine
5
6 Authors of the original suite:
7
8 * Dan Connolly <@@>
9 * Tim Berners-Lee <@@>
10 * Yosi Scharf <@@>
11 * Joseph M. Reagle Jr. <reagle@w3.org>
12 * Rich Salz <rsalz@zolera.com>
13
14 http://www.w3.org/2000/10/swap/notation3.py
15
16 Copyright 2000-2007, World Wide Web Consortium.
17 Copyright 2001, MIT.
18 Copyright 2001, Zolera Systems Inc.
19
20 License: W3C Software License
21 http://www.w3.org/Consortium/Legal/copyright-software
22
23 Modified by Sean B. Palmer
24 Copyright 2007, Sean B. Palmer.
25
26 Modified to work with rdflib by Gunnar Aastrand Grimnes
27 Copyright 2010, Gunnar A. Grimnes
28
29 """
30
31 # Python standard libraries
32 import types
33 import sys
34 import os
35 import re
36 import codecs
37 import warnings
38
39 from decimal import Decimal
40
41 from uuid import uuid4
42
43 from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
44 from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph
45 from rdflib import py3compat
46 b = py3compat.b
47
48 __all__ = ['BadSyntax', 'N3Parser', 'TurtleParser',
49 "splitFragP", "join", "base",
50 "runNamespace", "uniqueURI", "hexify"]
51
52 from rdflib.parser import Parser
53
54
55 def splitFragP(uriref, punct=0):
56 """split a URI reference before the fragment
57
58 Punctuation is kept.
59
60 e.g.
61
62 >>> splitFragP("abc#def")
63 ('abc', '#def')
64
65 >>> splitFragP("abcdef")
66 ('abcdef', '')
67
68 """
69
70 i = uriref.rfind("#")
71 if i >= 0:
72 return uriref[:i], uriref[i:]
73 else:
74 return uriref, ''
75
76
77 @py3compat.format_doctest_out
78 def join(here, there):
79 """join an absolute URI and URI reference
80 (non-ascii characters are supported/doctested;
81 haven't checked the details of the IRI spec though)
82
83 ``here`` is assumed to be absolute.
84 ``there`` is URI reference.
85
86 >>> join('http://example/x/y/z', '../abc')
87 'http://example/x/abc'
88
89 Raise ValueError if there uses relative path
90 syntax but here has no hierarchical path.
91
92 >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE
93 Traceback (most recent call last):
94 raise ValueError(here)
95 ValueError: Base <mid:foo@example> has no slash
96 after colon - with relative '../foo'.
97
98 >>> join('http://example/x/y/z', '')
99 'http://example/x/y/z'
100
101 >>> join('mid:foo@example', '#foo')
102 'mid:foo@example#foo'
103
104 We grok IRIs
105
106 >>> len(%(u)s'Andr\\xe9')
107 5
108
109 >>> join('http://example.org/', %(u)s'#Andr\\xe9')
110 %(u)s'http://example.org/#Andr\\xe9'
111 """
112
113 # assert(here.find("#") < 0), \
114 # "Base may not contain hash: '%s'" % here # why must caller splitFrag?
115
116 slashl = there.find('/')
117 colonl = there.find(':')
118
119 # join(base, 'foo:/') -- absolute
120 if colonl >= 0 and (slashl < 0 or colonl < slashl):
121 return there
122
123 bcolonl = here.find(':')
124 assert(bcolonl >= 0), \
125 "Base uri '%s' is not absolute" % here # else it's not absolute
126
127 path, frag = splitFragP(there)
128 if not path:
129 return here + frag
130
131 # join('mid:foo@example', '../foo') bzzt
132 if here[bcolonl + 1:bcolonl + 2] != '/':
133 raise ValueError(
134 ("Base <%s> has no slash after "
135 "colon - with relative '%s'.") % (here, there))
136
137 if here[bcolonl + 1:bcolonl + 3] == '//':
138 bpath = here.find('/', bcolonl + 3)
139 else:
140 bpath = bcolonl + 1
141
142 # join('http://xyz', 'foo')
143 if bpath < 0:
144 bpath = len(here)
145 here = here + '/'
146
147 # join('http://xyz/', '//abc') => 'http://abc'
148 if there[:2] == '//':
149 return here[:bcolonl + 1] + there
150
151 # join('http://xyz/', '/abc') => 'http://xyz/abc'
152 if there[:1] == '/':
153 return here[:bpath] + there
154
155 slashr = here.rfind('/')
156
157 while 1:
158 if path[:2] == './':
159 path = path[2:]
160 if path == '.':
161 path = ''
162 elif path[:3] == '../' or path == '..':
163 path = path[3:]
164 i = here.rfind('/', bpath, slashr)
165 if i >= 0:
166 here = here[:i + 1]
167 slashr = i
168 else:
169 break
170
171 return here[:slashr + 1] + path + frag
172
173
174 def base():
175 """The base URI for this process - the Web equiv of cwd
176
177 Relative or abolute unix-standard filenames parsed relative to
178 this yeild the URI of the file.
179 If we had a reliable way of getting a computer name,
180 we should put it in the hostname just to prevent ambiguity
181
182 """
183 # return "file://" + hostname + os.getcwd() + "/"
184 return "file://" + _fixslash(os.getcwd()) + "/"
185
186
187 def _fixslash(s):
188 """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
189 s = s.replace("\\", "/")
190 if s[0] != "/" and s[1] == ":":
191 s = s[2:] # @@@ Hack when drive letter present
192 return s
193
194
195 CONTEXT = 0
196 PRED = 1
197 SUBJ = 2
198 OBJ = 3
199
200 PARTS = PRED, SUBJ, OBJ
201 ALL4 = CONTEXT, PRED, SUBJ, OBJ
202
203 SYMBOL = 0
204 FORMULA = 1
205 LITERAL = 2
206 LITERAL_DT = 21
207 LITERAL_LANG = 22
208 ANONYMOUS = 3
209 XMLLITERAL = 25
210
211 Logic_NS = "http://www.w3.org/2000/10/swap/log#"
212 NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
213 forSomeSym = Logic_NS + "forSome"
214 forAllSym = Logic_NS + "forAll"
215
216 RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
217 RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
218 OWL_NS = "http://www.w3.org/2002/07/owl#"
219 DAML_sameAs_URI = OWL_NS + "sameAs"
220 parsesTo_URI = Logic_NS + "parsesTo"
221 RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
222
223 List_NS = RDF_NS_URI # From 20030808
224 _Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
225
226 N3_first = (SYMBOL, List_NS + "first")
227 N3_rest = (SYMBOL, List_NS + "rest")
228 N3_li = (SYMBOL, List_NS + "li")
229 N3_nil = (SYMBOL, List_NS + "nil")
230 N3_List = (SYMBOL, List_NS + "List")
231 N3_Empty = (SYMBOL, List_NS + "Empty")
232
233
234 runNamespaceValue = None
235
236
237 def runNamespace():
238 "Return a URI suitable as a namespace for run-local objects"
239 # @@@ include hostname (privacy?) (hash it?)
240 global runNamespaceValue
241 if runNamespaceValue is None:
242 runNamespaceValue = join(base(), _unique_id()) + '#'
243 return runNamespaceValue
244
245 nextu = 0
246
247
248 def uniqueURI():
249 "A unique URI"
250 global nextu
251 nextu += 1
252 # return runNamespace() + "u_" + `nextu`
253 return runNamespace() + "u_" + str(nextu)
254
255
256 tracking = False
257 chatty_flag = 50
258
259 # from why import BecauseOfData, becauseSubexpression
260
261
262 def BecauseOfData(*args, **kargs):
263 # print args, kargs
264 pass
265
266
267 def becauseSubexpression(*args, **kargs):
268 # print args, kargs
269 pass
270
271 N3_forSome_URI = forSomeSym
272 N3_forAll_URI = forAllSym
273
274 # Magic resources we know about
275
276 ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
277 # This is the hash on namespace URIs
278
279 RDF_type = (SYMBOL, RDF_type_URI)
280 DAML_sameAs = (SYMBOL, DAML_sameAs_URI)
281
282 LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
283
284 BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
285 DECIMAL_DATATYPE = _XSD_PFX + "decimal"
286 DOUBLE_DATATYPE = _XSD_PFX + "double"
287 FLOAT_DATATYPE = _XSD_PFX + "float"
288 INTEGER_DATATYPE = _XSD_PFX + "integer"
289
290 option_noregen = 0 # If set, do not regenerate genids on output
291
292 # @@ I18n - the notname chars need extending for well known unicode non-text
293 # characters. The XML spec switched to assuming unknown things were name
294 # characaters.
295 # _namechars = string.lowercase + string.uppercase + string.digits + '_-'
296 _notQNameChars = \
297 "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
298 _notKeywordsChars = _notQNameChars + "."
299 _notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
300 _rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
301
302 hexChars = 'ABCDEFabcdef0123456789'
303 escapeChars = "(_~.-!$&'()*+,;=/?#@%)" # valid for \ escapes in localnames
304
305 def unicodeExpand(m):
306 try:
307 return chr(int(m.group(1), 16))
308 except:
309 raise Exception("Invalid unicode code point: " + m.group(1))
310
311 if py3compat.narrow_build:
312 def unicodeExpand(m):
313 try:
314 return chr(int(m.group(1), 16))
315 except ValueError:
316 warnings.warn(
317 'Encountered a unicode char > 0xFFFF in a narrow python build. '
318 'Trying to degrade gracefully, but this can cause problems '
319 'later when working with the string:\n%s' % m.group(0))
320 return codecs.decode(m.group(0), 'unicode_escape')
321
322 unicodeEscape4 = re.compile(
323 r'\\u([0-9a-fA-F]{4})')
324 unicodeEscape8 = re.compile(
325 r'\\U([0-9a-fA-F]{8})')
326
327
328
329 N3CommentCharacter = "#" # For unix script # ! compatabilty
330
331 ########################################## Parse string to sink
332 #
333 # Regular expressions:
334 eol = re.compile(
335 r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
336 eof = re.compile(
337 r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
338 ws = re.compile(r'[ \t]*') # Whitespace not including NL
339 signed_integer = re.compile(r'[-+]?[0-9]+') # integer
340 integer_syntax = re.compile(r'[-+]?[0-9]+')
341 decimal_syntax = re.compile(r'[-+]?[0-9]*\.[0-9]+')
342 exponent_syntax = re.compile(r'[-+]?(?:[0-9]+\.[0-9]*(?:e|E)[-+]?[0-9]+|'+
343 r'\.[0-9](?:e|E)[-+]?[0-9]+|'+
344 r'[0-9]+(?:e|E)[-+]?[0-9]+)')
345 digitstring = re.compile(r'[0-9]+') # Unsigned integer
346 interesting = re.compile(r"""[\\\r\n\"\']""")
347 langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*')
348
349
350 class SinkParser:
351 def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
352 genPrefix="", why=None, turtle=False):
353 """ note: namespace names should *not* end in # ;
354 the # will get added during qname processing """
355
356 self._bindings = {}
357 if thisDoc != "":
358 assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
359 self._bindings[""] = thisDoc + "#" # default
360
361 self._store = store
362 if genPrefix:
363 store.setGenPrefix(genPrefix) # pass it on
364
365 self._thisDoc = thisDoc
366 self.lines = 0 # for error handling
367 self.startOfLine = 0 # For calculating character number
368 self._genPrefix = genPrefix
369 self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of',
370 'true', 'false']
371 self.keywordsSet = 0 # Then only can others be considerd qnames
372 self._anonymousNodes = {}
373 # Dict of anon nodes already declared ln: Term
374 self._variables = {}
375 self._parentVariables = {}
376 self._reason = why # Why the parser was asked to parse this
377
378 self.turtle = turtle # raise exception when encountering N3 extensions
379 # Turtle allows single or double quotes around strings, whereas N3
380 # only allows double quotes.
381 self.string_delimiters = ('"', "'") if turtle else ('"',)
382
383 self._reason2 = None # Why these triples
384 # was: diag.tracking
385 if tracking:
386 self._reason2 = BecauseOfData(
387 store.newSymbol(thisDoc), because=self._reason)
388
389 if baseURI:
390 self._baseURI = baseURI
391 else:
392 if thisDoc:
393 self._baseURI = thisDoc
394 else:
395 self._baseURI = None
396
397 assert not self._baseURI or ':' in self._baseURI
398
399 if not self._genPrefix:
400 if self._thisDoc:
401 self._genPrefix = self._thisDoc + "#_g"
402 else:
403 self._genPrefix = uniqueURI()
404
405 if openFormula is None:
406 if self._thisDoc:
407 self._formula = store.newFormula(thisDoc + "#_formula")
408 else:
409 self._formula = store.newFormula()
410 else:
411 self._formula = openFormula
412
413 self._context = self._formula
414 self._parentContext = None
415
416 def here(self, i):
417 """String generated from position in file
418
419 This is for repeatability when refering people to bnodes in a document.
420 This has diagnostic uses less formally, as it should point one to which
421 bnode the arbitrary identifier actually is. It gives the
422 line and character number of the '[' charcacter or path character
423 which introduced the blank node. The first blank node is boringly
424 _L1C1. It used to be used only for tracking, but for tests in general
425 it makes the canonical ordering of bnodes repeatable."""
426
427 return "%s_L%iC%i" % (self._genPrefix, self.lines,
428 i - self.startOfLine + 1)
429
430 def formula(self):
431 return self._formula
432
433 def loadStream(self, stream):
434 return self.loadBuf(stream.read()) # Not ideal
435
436 def loadBuf(self, buf):
437 """Parses a buffer and returns its top level formula"""
438 self.startDoc()
439
440 self.feed(buf)
441 return self.endDoc() # self._formula
442
443 def feed(self, octets):
444 """Feed an octet stream tothe parser
445
446 if BadSyntax is raised, the string
447 passed in the exception object is the
448 remainder after any statements have been parsed.
449 So if there is more data to feed to the
450 parser, it should be straightforward to recover."""
451
452 if not isinstance(octets, str):
453 s = octets.decode('utf-8')
454 # NB already decoded, so \ufeff
455 if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
456 s = s[1:]
457 else:
458 s = octets
459
460 i = 0
461 while i >= 0:
462 j = self.skipSpace(s, i)
463 if j < 0:
464 return
465
466 i = self.directiveOrStatement(s, j)
467 if i < 0:
468 #print("# next char: %s" % s[j])
469 self.BadSyntax(s, j,
470 "expected directive or statement")
471
472 def directiveOrStatement(self, argstr, h):
473
474 i = self.skipSpace(argstr, h)
475 if i < 0:
476 return i # EOF
477
478 if self.turtle:
479 j = self.sparqlDirective(argstr, i)
480 if j >= 0:
481 return j
482
483 j = self.directive(argstr, i)
484 if j >= 0:
485 return self.checkDot(argstr, j)
486
487 j = self.statement(argstr, i)
488 if j >= 0:
489 return self.checkDot(argstr, j)
490
491 return j
492
493 # @@I18N
494 # _namechars = string.lowercase + string.uppercase + string.digits + '_-'
495
496 def tok(self, tok, argstr, i, colon=False):
497 """Check for keyword. Space must have been stripped on entry and
498 we must not be at end of file.
499
500 if colon, then keyword followed by colon is ok
501 (@prefix:<blah> is ok, rdf:type shortcut a must be followed by ws)
502 """
503
504 assert tok[0] not in _notNameChars # not for punctuation
505 if argstr[i:i + 1] == "@":
506 i = i + 1
507 else:
508 if tok not in self.keywords:
509 return -1 # No, this has neither keywords declaration nor "@"
510
511 if (argstr[i:i + len(tok)] == tok
512 and ( argstr[i + len(tok)] in _notKeywordsChars)
513 or (colon and argstr[i+len(tok)] == ':')):
514 i = i + len(tok)
515 return i
516 else:
517 return -1
518
519 def sparqlTok(self, tok, argstr, i):
520 """Check for SPARQL keyword. Space must have been stripped on entry
521 and we must not be at end of file.
522 Case insensitive and not preceeded by @
523 """
524
525 assert tok[0] not in _notNameChars # not for punctuation
526
527 if (argstr[i:i + len(tok)].lower() == tok.lower()
528 and (argstr[i + len(tok)] in _notQNameChars)):
529 i = i + len(tok)
530 return i
531 else:
532 return -1
533
534
535 def directive(self, argstr, i):
536 j = self.skipSpace(argstr, i)
537 if j < 0:
538 return j # eof
539 res = []
540
541 j = self.tok('bind', argstr, i) # implied "#". Obsolete.
542 if j > 0:
543 self.BadSyntax(argstr, i,
544 "keyword bind is obsolete: use @prefix")
545
546 j = self.tok('keywords', argstr, i)
547 if j > 0:
548 if self.turtle:
549 self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.")
550
551 i = self.commaSeparatedList(argstr, j, res, self.bareWord)
552 if i < 0:
553 self.BadSyntax(argstr, i,
554 "'@keywords' needs comma separated list of words")
555 self.setKeywords(res[:])
556 return i
557
558 j = self.tok('forAll', argstr, i)
559 if j > 0:
560 if self.turtle:
561 self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.")
562
563 i = self.commaSeparatedList(argstr, j, res, self.uri_ref2)
564 if i < 0:
565 self.BadSyntax(argstr, i,
566 "Bad variable list after @forAll")
567 for x in res:
568 # self._context.declareUniversal(x)
569 if x not in self._variables or x in self._parentVariables:
570 self._variables[x] = self._context.newUniversal(x)
571 return i
572
573 j = self.tok('forSome', argstr, i)
574 if j > 0:
575 if self.turtle:
576 self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.")
577
578 i = self. commaSeparatedList(argstr, j, res, self.uri_ref2)
579 if i < 0:
580 self.BadSyntax(argstr, i,
581 "Bad variable list after @forSome")
582 for x in res:
583 self._context.declareExistential(x)
584 return i
585
586 j = self.tok('prefix', argstr, i, colon=True) # no implied "#"
587 if j >= 0:
588 t = []
589 i = self.qname(argstr, j, t)
590 if i < 0:
591 self.BadSyntax(argstr, j,
592 "expected qname after @prefix")
593 j = self.uri_ref2(argstr, i, t)
594 if j < 0:
595 self.BadSyntax(argstr, i,
596 "expected <uriref> after @prefix _qname_")
597 ns = self.uriOf(t[1])
598
599 if self._baseURI:
600 ns = join(self._baseURI, ns)
601 elif ":" not in ns:
602 self.BadSyntax(argstr, j,
603 "With no base URI, cannot use " +
604 "relative URI in @prefix <" + ns + ">")
605 assert ':' in ns # must be absolute
606 self._bindings[t[0][0]] = ns
607 self.bind(t[0][0], hexify(ns))
608 return j
609
610 j = self.tok('base', argstr, i) # Added 2007/7/7
611 if j >= 0:
612 t = []
613 i = self.uri_ref2(argstr, j, t)
614 if i < 0:
615 self.BadSyntax(argstr, j,
616 "expected <uri> after @base ")
617 ns = self.uriOf(t[0])
618
619 if self._baseURI:
620 ns = join(self._baseURI, ns)
621 else:
622 self.BadSyntax(argstr, j,
623 "With no previous base URI, cannot use " +
624 "relative URI in @base <" + ns + ">")
625 assert ':' in ns # must be absolute
626 self._baseURI = ns
627 return i
628
629 return -1 # Not a directive, could be something else.
630
631 def sparqlDirective(self, argstr, i):
632
633 """
634 turtle and trig support BASE/PREFIX without @ and without
635 terminating .
636 """
637
638 j = self.skipSpace(argstr, i)
639 if j < 0:
640 return j # eof
641
642 j = self.sparqlTok('PREFIX', argstr, i)
643 if j >= 0:
644 t = []
645 i = self.qname(argstr, j, t)
646 if i < 0:
647 self.BadSyntax(argstr, j,
648 "expected qname after @prefix")
649 j = self.uri_ref2(argstr, i, t)
650 if j < 0:
651 self.BadSyntax(argstr, i,
652 "expected <uriref> after @prefix _qname_")
653 ns = self.uriOf(t[1])
654
655 if self._baseURI:
656 ns = join(self._baseURI, ns)
657 elif ":" not in ns:
658 self.BadSyntax(argstr, j,
659 "With no base URI, cannot use " +
660 "relative URI in @prefix <" + ns + ">")
661 assert ':' in ns # must be absolute
662 self._bindings[t[0][0]] = ns
663 self.bind(t[0][0], hexify(ns))
664 return j
665
666 j = self.sparqlTok('BASE', argstr, i)
667 if j >= 0:
668 t = []
669 i = self.uri_ref2(argstr, j, t)
670 if i < 0:
671 self.BadSyntax(argstr, j,
672 "expected <uri> after @base ")
673 ns = self.uriOf(t[0])
674
675 if self._baseURI:
676 ns = join(self._baseURI, ns)
677 else:
678 self.BadSyntax(argstr, j,
679 "With no previous base URI, cannot use " +
680 "relative URI in @base <" + ns + ">")
681 assert ':' in ns # must be absolute
682 self._baseURI = ns
683 return i
684
685 return -1 # Not a directive, could be something else.
686
687
688 def bind(self, qn, uri):
689 assert isinstance(
690 uri, bytes), "Any unicode must be %x-encoded already"
691 if qn == "":
692 self._store.setDefaultNamespace(uri)
693 else:
694 self._store.bind(qn, uri)
695
696 def setKeywords(self, k):
697 "Takes a list of strings"
698 if k is None:
699 self.keywordsSet = 0
700 else:
701 self.keywords = k
702 self.keywordsSet = 1
703
704 def startDoc(self):
705 # was: self._store.startDoc()
706 self._store.startDoc(self._formula)
707
708 def endDoc(self):
709 """Signal end of document and stop parsing. returns formula"""
710 self._store.endDoc(self._formula) # don't canonicalize yet
711 return self._formula
712
713 def makeStatement(self, quadruple):
714 # $$$$$$$$$$$$$$$$$$$$$
715 # print "# Parser output: ", `quadruple`
716 self._store.makeStatement(quadruple, why=self._reason2)
717
718 def statement(self, argstr, i):
719 r = []
720 i = self.object(
721 argstr, i, r) # Allow literal for subject - extends RDF
722 if i < 0:
723 return i
724
725 j = self.property_list(argstr, i, r[0])
726
727 if j < 0:
728 self.BadSyntax(
729 argstr, i, "expected propertylist")
730 return j
731
732 def subject(self, argstr, i, res):
733 return self.item(argstr, i, res)
734
735 def verb(self, argstr, i, res):
736 """ has _prop_
737 is _prop_ of
738 a
739 =
740 _prop_
741 >- prop ->
742 <- prop -<
743 _operator_"""
744
745 j = self.skipSpace(argstr, i)
746 if j < 0:
747 return j # eof
748
749 r = []
750
751 j = self.tok('has', argstr, i)
752 if j >= 0:
753 if self.turtle:
754 self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode")
755
756 i = self.prop(argstr, j, r)
757 if i < 0:
758 self.BadSyntax(argstr, j,
759 "expected property after 'has'")
760 res.append(('->', r[0]))
761 return i
762
763 j = self.tok('is', argstr, i)
764 if j >= 0:
765 if self.turtle:
766 self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode")
767
768 i = self.prop(argstr, j, r)
769 if i < 0:
770 self.BadSyntax(argstr, j,
771 "expected <property> after 'is'")
772 j = self.skipSpace(argstr, i)
773 if j < 0:
774 self.BadSyntax(argstr, i,
775 "End of file found, expected property after 'is'")
776 i = j
777 j = self.tok('of', argstr, i)
778 if j < 0:
779 self.BadSyntax(argstr, i,
780 "expected 'of' after 'is' <prop>")
781 res.append(('<-', r[0]))
782 return j
783
784 j = self.tok('a', argstr, i)
785 if j >= 0:
786 res.append(('->', RDF_type))
787 return j
788
789 if argstr[i:i + 2] == "<=":
790 if self.turtle:
791 self.BadSyntax(argstr, i,
792 "Found '<=' in Turtle mode. ")
793
794 res.append(('<-', self._store.newSymbol(Logic_NS + "implies")))
795 return i + 2
796
797 if argstr[i:i + 1] == "=":
798 if self.turtle:
799 self.BadSyntax(argstr, i, "Found '=' in Turtle mode")
800 if argstr[i + 1:i + 2] == ">":
801 res.append(('->', self._store.newSymbol(Logic_NS + "implies")))
802 return i + 2
803 res.append(('->', DAML_sameAs))
804 return i + 1
805
806 if argstr[i:i + 2] == ":=":
807 if self.turtle:
808 self.BadSyntax(argstr, i, "Found ':=' in Turtle mode")
809
810 # patch file relates two formulae, uses this @@ really?
811 res.append(('->', Logic_NS + "becomes"))
812 return i + 2
813
814 j = self.prop(argstr, i, r)
815 if j >= 0:
816 res.append(('->', r[0]))
817 return j
818
819 if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-":
820 self.BadSyntax(argstr, j,
821 ">- ... -> syntax is obsolete.")
822
823 return -1
824
825 def prop(self, argstr, i, res):
826 return self.item(argstr, i, res)
827
828 def item(self, argstr, i, res):
829 return self.path(argstr, i, res)
830
831 def blankNode(self, uri=None):
832 return self._store.newBlankNode(self._context, uri, why=self._reason2)
833
834 def path(self, argstr, i, res):
835 """Parse the path production.
836 """
837 j = self.nodeOrLiteral(argstr, i, res)
838 if j < 0:
839 return j # nope
840
841 while argstr[j:j + 1] in "!^": # no spaces, must follow exactly (?)
842 ch = argstr[j:j + 1]
843 subj = res.pop()
844 obj = self.blankNode(uri=self.here(j))
845 j = self.node(argstr, j + 1, res)
846 if j < 0:
847 self.BadSyntax(argstr, j,
848 "EOF found in middle of path syntax")
849 pred = res.pop()
850 if ch == "^": # Reverse traverse
851 self.makeStatement((self._context, pred, obj, subj))
852 else:
853 self.makeStatement((self._context, pred, subj, obj))
854 res.append(obj)
855 return j
856
857 def anonymousNode(self, ln):
858 """Remember or generate a term for one of these _: anonymous nodes"""
859 term = self._anonymousNodes.get(ln, None)
860 if term is not None:
861 return term
862 term = self._store.newBlankNode(self._context, why=self._reason2)
863 self._anonymousNodes[ln] = term
864 return term
865
866 def node(self, argstr, i, res, subjectAlready=None):
867 """Parse the <node> production.
868 Space is now skipped once at the beginning
869 instead of in multipe calls to self.skipSpace().
870 """
871 subj = subjectAlready
872
873 j = self.skipSpace(argstr, i)
874 if j < 0:
875 return j # eof
876 i = j
877 ch = argstr[i:i + 1] # Quick 1-character checks first:
878
879 if ch == "[":
880 bnodeID = self.here(i)
881 j = self.skipSpace(argstr, i + 1)
882 if j < 0:
883 self.BadSyntax(argstr, i,
884 "EOF after '['")
885 # Hack for "is" binding name to anon node
886 if argstr[j:j + 1] == "=":
887 if self.turtle:
888 self.BadSyntax(argstr, j, "Found '[=' or '[ =' when in turtle mode.")
889 i = j + 1
890 objs = []
891 j = self.objectList(argstr, i, objs)
892 if j >= 0:
893 subj = objs[0]
894 if len(objs) > 1:
895 for obj in objs:
896 self.makeStatement((self._context,
897 DAML_sameAs, subj, obj))
898 j = self.skipSpace(argstr, j)
899 if j < 0:
900 self.BadSyntax(argstr, i,
901 "EOF when objectList expected after [ = ")
902 if argstr[j:j + 1] == ";":
903 j = j + 1
904 else:
905 self.BadSyntax(argstr, i,
906 "objectList expected after [= ")
907
908 if subj is None:
909 subj = self.blankNode(uri=bnodeID)
910
911 i = self.property_list(argstr, j, subj)
912 if i < 0:
913 self.BadSyntax(argstr, j,
914 "property_list expected")
915
916 j = self.skipSpace(argstr, i)
917 if j < 0:
918 self.BadSyntax(argstr, i,
919 "EOF when ']' expected after [ <propertyList>")
920 if argstr[j:j + 1] != "]":
921 self.BadSyntax(argstr, j,
922 "']' expected")
923 res.append(subj)
924 return j + 1
925
926 if not self.turtle and ch == "{":
927 # if self.turtle:
928 # self.BadSyntax(argstr, i,
929 # "found '{' while in Turtle mode, Formulas not supported!")
930 ch2 = argstr[i + 1:i + 2]
931 if ch2 == '$':
932 # a set
933 i += 1
934 j = i + 1
935 List = []
936 first_run = True
937 while 1:
938 i = self.skipSpace(argstr, j)
939 if i < 0:
940 self.BadSyntax(argstr, i,
941 "needed '$}', found end.")
942 if argstr[i:i + 2] == '$}':
943 j = i + 2
944 break
945
946 if not first_run:
947 if argstr[i:i + 1] == ',':
948 i += 1
949 else:
950 self.BadSyntax(
951 argstr, i, "expected: ','")
952 else:
953 first_run = False
954
955 item = []
956 j = self.item(
957 argstr, i, item) # @@@@@ should be path, was object
958 if j < 0:
959 self.BadSyntax(argstr, i,
960 "expected item in set or '$}'")
961 List.append(self._store.intern(item[0]))
962 res.append(self._store.newSet(List, self._context))
963 return j
964 else:
965 # parse a formula
966 j = i + 1
967 oldParentContext = self._parentContext
968 self._parentContext = self._context
969 parentAnonymousNodes = self._anonymousNodes
970 grandParentVariables = self._parentVariables
971 self._parentVariables = self._variables
972 self._anonymousNodes = {}
973 self._variables = self._variables.copy()
974 reason2 = self._reason2
975 self._reason2 = becauseSubexpression
976 if subj is None:
977 subj = self._store.newFormula()
978 self._context = subj
979
980 while 1:
981 i = self.skipSpace(argstr, j)
982 if i < 0:
983 self.BadSyntax(
984 argstr, i, "needed '}', found end.")
985
986 if argstr[i:i + 1] == "}":
987 j = i + 1
988 break
989
990 j = self.directiveOrStatement(argstr, i)
991 if j < 0:
992 self.BadSyntax(
993 argstr, i, "expected statement or '}'")
994
995 self._anonymousNodes = parentAnonymousNodes
996 self._variables = self._parentVariables
997 self._parentVariables = grandParentVariables
998 self._context = self._parentContext
999 self._reason2 = reason2
1000 self._parentContext = oldParentContext
1001 res.append(subj.close()) # No use until closed
1002 return j
1003
1004 if ch == "(":
1005 thing_type = self._store.newList
1006 ch2 = argstr[i + 1:i + 2]
1007 if ch2 == '$':
1008 thing_type = self._store.newSet
1009 i += 1
1010 j = i + 1
1011
1012 List = []
1013 while 1:
1014 i = self.skipSpace(argstr, j)
1015 if i < 0:
1016 self.BadSyntax(
1017 argstr, i, "needed ')', found end.")
1018 if argstr[i:i + 1] == ')':
1019 j = i + 1
1020 break
1021
1022 item = []
1023 j = self.item(
1024 argstr, i, item) # @@@@@ should be path, was object
1025 if j < 0:
1026 self.BadSyntax(argstr, i,
1027 "expected item in list or ')'")
1028 List.append(self._store.intern(item[0]))
1029 res.append(thing_type(List, self._context))
1030 return j
1031
1032 j = self.tok('this', argstr, i) # This context
1033 if j >= 0:
1034 self.BadSyntax(argstr, i,
1035 "Keyword 'this' was ancient N3. Now use " +
1036 "@forSome and @forAll keywords.")
1037
1038 # booleans
1039 j = self.tok('true', argstr, i)
1040 if j >= 0:
1041 res.append(True)
1042 return j
1043 j = self.tok('false', argstr, i)
1044 if j >= 0:
1045 res.append(False)
1046 return j
1047
1048 if subj is None: # If this can be a named node, then check for a name.
1049 j = self.uri_ref2(argstr, i, res)
1050 if j >= 0:
1051 return j
1052
1053 return -1
1054
1055 def property_list(self, argstr, i, subj):
1056 """Parse property list
1057 Leaves the terminating punctuation in the buffer
1058 """
1059 while 1:
1060 while 1: # skip repeat ;
1061 j = self.skipSpace(argstr, i)
1062 if j < 0:
1063 self.BadSyntax(argstr, i,
1064 "EOF found when expected verb in property list")
1065 if argstr[j]!=';': break
1066 i = j+1
1067
1068 if argstr[j:j + 2] == ":-":
1069 if self.turtle:
1070 self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode")
1071 i = j + 2
1072 res = []
1073 j = self.node(argstr, i, res, subj)
1074 if j < 0:
1075 self.BadSyntax(argstr, i,
1076 "bad {} or () or [] node after :- ")
1077 i = j
1078 continue
1079 i = j
1080 v = []
1081 j = self.verb(argstr, i, v)
1082 if j <= 0:
1083 return i # void but valid
1084
1085 objs = []
1086 i = self.objectList(argstr, j, objs)
1087 if i < 0:
1088 self.BadSyntax(argstr, j,
1089 "objectList expected")
1090 for obj in objs:
1091 dira, sym = v[0]
1092 if dira == '->':
1093 self.makeStatement((self._context, sym, subj, obj))
1094 else:
1095 self.makeStatement((self._context, sym, obj, subj))
1096
1097 j = self.skipSpace(argstr, i)
1098 if j < 0:
1099 self.BadSyntax(argstr, j,
1100 "EOF found in list of objects")
1101 if argstr[i:i + 1] != ";":
1102 return i
1103 i = i + 1 # skip semicolon and continue
1104
1105 def commaSeparatedList(self, argstr, j, res, what):
1106 """return value: -1 bad syntax; >1 new position in argstr
1107 res has things found appended
1108 """
1109 i = self.skipSpace(argstr, j)
1110 if i < 0:
1111 self.BadSyntax(argstr, i,
1112 "EOF found expecting comma sep list")
1113 if argstr[i] == ".":
1114 return j # empty list is OK
1115 i = what(argstr, i, res)
1116 if i < 0:
1117 return -1
1118
1119 while 1:
1120 j = self.skipSpace(argstr, i)
1121 if j < 0:
1122 return j # eof
1123 ch = argstr[j:j + 1]
1124 if ch != ",":
1125 if ch != ".":
1126 return -1
1127 return j # Found but not swallowed "."
1128 i = what(argstr, j + 1, res)
1129 if i < 0:
1130 self.BadSyntax(argstr, i,
1131 "bad list content")
1132
1133 def objectList(self, argstr, i, res):
1134 i = self.object(argstr, i, res)
1135 if i < 0:
1136 return -1
1137 while 1:
1138 j = self.skipSpace(argstr, i)
1139 if j < 0:
1140 self.BadSyntax(argstr, j,
1141 "EOF found after object")
1142 if argstr[j:j + 1] != ",":
1143 return j # Found something else!
1144 i = self.object(argstr, j + 1, res)
1145 if i < 0:
1146 return i
1147
1148 def checkDot(self, argstr, i):
1149 j = self.skipSpace(argstr, i)
1150 if j < 0:
1151 return j # eof
1152 if argstr[j:j + 1] == ".":
1153 return j + 1 # skip
1154 if argstr[j:j + 1] == "}":
1155 return j # don't skip it
1156 if argstr[j:j + 1] == "]":
1157 return j
1158 self.BadSyntax(argstr, j,
1159 "expected '.' or '}' or ']' at end of statement")
1160
1161 def uri_ref2(self, argstr, i, res):
1162 """Generate uri from n3 representation.
1163
1164 Note that the RDF convention of directly concatenating
1165 NS and local name is now used though I prefer inserting a '#'
1166 to make the namesapces look more like what XML folks expect.
1167 """
1168 qn = []
1169 j = self.qname(argstr, i, qn)
1170 if j >= 0:
1171 pfx, ln = qn[0]
1172 if pfx is None:
1173 assert 0, "not used?"
1174 ns = self._baseURI + ADDED_HASH
1175 else:
1176 try:
1177 ns = self._bindings[pfx]
1178 except KeyError:
1179 if pfx == "_": # Magic prefix 2001/05/30, can be changed
1180 res.append(self.anonymousNode(ln))
1181 return j
1182 if not self.turtle and pfx == "":
1183 ns = join(self._baseURI or "", "#")
1184 else:
1185 self.BadSyntax(argstr, i,
1186 "Prefix \"%s:\" not bound" % (pfx))
1187 symb = self._store.newSymbol(ns + ln)
1188 if symb in self._variables:
1189 res.append(self._variables[symb])
1190 else:
1191 res.append(symb) # @@@ "#" CONVENTION
1192 return j
1193
1194 i = self.skipSpace(argstr, i)
1195 if i < 0:
1196 return -1
1197
1198 if argstr[i] == "?":
1199 v = []
1200 j = self.variable(argstr, i, v)
1201 if j > 0: # Forget varibles as a class, only in context.
1202 res.append(v[0])
1203 return j
1204 return -1
1205
1206 elif argstr[i] == "<":
1207 i = i + 1
1208 st = i
1209 while i < len(argstr):
1210 if argstr[i] == ">":
1211 uref = argstr[st:i] # the join should dealt with "":
1212
1213 # expand unicode escapes
1214 uref = unicodeEscape8.sub(unicodeExpand, uref)
1215 uref = unicodeEscape4.sub(unicodeExpand, uref)
1216
1217 if self._baseURI:
1218 uref = join(self._baseURI, uref) # was: uripath.join
1219 else:
1220 assert ":" in uref, \
1221 "With no base URI, cannot deal with relative URIs"
1222 if argstr[i - 1:i] == "#" and not uref[-1:] == "#":
1223 uref = uref + \
1224 "#" # She meant it! Weirdness in urlparse?
1225 symb = self._store.newSymbol(uref)
1226 if symb in self._variables:
1227 res.append(self._variables[symb])
1228 else:
1229 res.append(symb)
1230 return i + 1
1231 i = i + 1
1232 self.BadSyntax(argstr, j,
1233 "unterminated URI reference")
1234
1235 elif self.keywordsSet:
1236 v = []
1237 j = self.bareWord(argstr, i, v)
1238 if j < 0:
1239 return -1 # Forget varibles as a class, only in context.
1240 if v[0] in self.keywords:
1241 self.BadSyntax(argstr, i,
1242 'Keyword "%s" not allowed here.' % v[0])
1243 res.append(self._store.newSymbol(self._bindings[""] + v[0]))
1244 return j
1245 else:
1246 return -1
1247
1248 def skipSpace(self, argstr, i):
1249 """Skip white space, newlines and comments.
1250 return -1 if EOF, else position of first non-ws character"""
1251 while 1:
1252 m = eol.match(argstr, i)
1253 if m is None:
1254 break
1255 self.lines = self.lines + 1
1256 i = m.end() # Point to first character unmatched
1257 self.startOfLine = i
1258 m = ws.match(argstr, i)
1259 if m is not None:
1260 i = m.end()
1261 m = eof.match(argstr, i)
1262 if m is not None:
1263 return -1
1264 return i
1265
1266 def variable(self, argstr, i, res):
1267 """ ?abc -> variable(:abc)
1268 """
1269
1270 j = self.skipSpace(argstr, i)
1271 if j < 0:
1272 return -1
1273
1274 if argstr[j:j + 1] != "?":
1275 return -1
1276 j = j + 1
1277 i = j
1278 if argstr[j] in "0123456789-":
1279 self.BadSyntax(argstr, j,
1280 "Varible name can't start with '%s'" % argstr[j])
1281 while i < len(argstr) and argstr[i] not in _notKeywordsChars:
1282 i = i + 1
1283 if self._parentContext is None:
1284 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i])
1285 if varURI not in self._variables:
1286 self._variables[varURI] = self._context.newUniversal(
1287 varURI, why=self._reason2)
1288 res.append(self._variables[varURI])
1289 return i
1290 # @@ was:
1291 # self.BadSyntax(argstr, j,
1292 # "Can't use ?xxx syntax for variable in outermost level: %s"
1293 # % argstr[j-1:i])
1294 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i])
1295 if varURI not in self._parentVariables:
1296 self._parentVariables[varURI] = self._parentContext.newUniversal(
1297 varURI, why=self._reason2)
1298 res.append(self._parentVariables[varURI])
1299 return i
1300
1301 def bareWord(self, argstr, i, res):
1302 """ abc -> :abc
1303 """
1304 j = self.skipSpace(argstr, i)
1305 if j < 0:
1306 return -1
1307
1308 if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars:
1309 return -1
1310 i = j
1311 while i < len(argstr) and argstr[i] not in _notKeywordsChars:
1312 i = i + 1
1313 res.append(argstr[j:i])
1314 return i
1315
1316 def qname(self, argstr, i, res):
1317 """
1318 xyz:def -> ('xyz', 'def')
1319 If not in keywords and keywordsSet: def -> ('', 'def')
1320 :def -> ('', 'def')
1321 """
1322
1323 i = self.skipSpace(argstr, i)
1324 if i < 0:
1325 return -1
1326
1327 c = argstr[i]
1328 if c in "0123456789-+.":
1329 return -1
1330 if c not in _notNameChars:
1331 ln = c
1332 i = i + 1
1333 while i < len(argstr):
1334 c = argstr[i]
1335 if c not in _notNameChars:
1336 ln = ln + c
1337 i = i + 1
1338 else:
1339 break
1340
1341 if argstr[i - 1] == ".": # qname cannot end with "."
1342 ln = ln[:-1]
1343 if not ln: return -1
1344 i -= 1
1345
1346 else: # First character is non-alpha
1347 ln = '' # Was: None - TBL (why? useful?)
1348
1349 if i < len(argstr) and argstr[i] == ':':
1350 pfx = ln
1351 # bnodes names have different rules
1352 if pfx == '_':
1353 allowedChars = _notNameChars
1354 else:
1355 allowedChars = _notQNameChars
1356
1357 i = i + 1
1358 lastslash = False
1359 # start = i # TODO first char .
1360 ln = ''
1361 while i < len(argstr):
1362 c = argstr[i]
1363 if not lastslash and c == '\\':
1364 lastslash = True
1365 i += 1
1366
1367 elif lastslash or c not in allowedChars:
1368
1369 if lastslash:
1370 if c not in escapeChars:
1371 raise BadSyntax(self._thisDoc, self.line, argstr, i,
1372 "illegal escape "+c)
1373 elif c=='%':
1374 if argstr[i+1] not in hexChars or argstr[i+2] not in hexChars:
1375 raise BadSyntax(self._thisDoc, self.line, argstr, i,
1376 "illegal hex escape "+c)
1377
1378 ln = ln + c
1379 i = i + 1
1380 lastslash = False
1381 else:
1382 break
1383
1384 if lastslash:
1385 raise BadSyntax(
1386 self._thisDoc, self.line, argstr, i,
1387 "qname cannot end with \\")
1388
1389
1390 if argstr[i-1]=='.':
1391 # localname cannot end in .
1392 ln = ln[:-1]
1393 if not ln: return -1
1394 i -= 1
1395
1396 res.append((pfx, ln))
1397 return i
1398
1399 else: # delimiter was not ":"
1400 if ln and self.keywordsSet and ln not in self.keywords:
1401 res.append(('', ln))
1402 return i
1403 return -1
1404
1405 def object(self, argstr, i, res):
1406 j = self.subject(argstr, i, res)
1407 if j >= 0:
1408 return j
1409 else:
1410 j = self.skipSpace(argstr, i)
1411 if j < 0:
1412 return -1
1413 else:
1414 i = j
1415
1416 if argstr[i] in self.string_delimiters:
1417 if argstr[i:i + 3] == argstr[i] * 3:
1418 delim = argstr[i] * 3
1419 else:
1420 delim = argstr[i]
1421 i = i + len(delim)
1422
1423 j, s = self.strconst(argstr, i, delim)
1424
1425 res.append(self._store.newLiteral(s))
1426 return j
1427 else:
1428 return -1
1429
1430 def nodeOrLiteral(self, argstr, i, res):
1431 j = self.node(argstr, i, res)
1432 startline = self.lines # Remember where for error messages
1433 if j >= 0:
1434 return j
1435 else:
1436 j = self.skipSpace(argstr, i)
1437 if j < 0:
1438 return -1
1439 else:
1440 i = j
1441
1442 ch = argstr[i]
1443 if ch in "-+0987654321.":
1444 m = exponent_syntax.match(argstr, i)
1445 if m:
1446 j = m.end()
1447 res.append(float(argstr[i:j]))
1448 return j
1449
1450 m = decimal_syntax.match(argstr, i)
1451 if m:
1452 j = m.end()
1453 res.append(Decimal(argstr[i:j]))
1454 return j
1455
1456 m = integer_syntax.match(argstr, i)
1457 if m:
1458 j = m.end()
1459 res.append(int(argstr[i:j]))
1460 return j
1461
1462 # return -1 ## or fall through?
1463
1464 if argstr[i] in self.string_delimiters:
1465 if argstr[i:i + 3] == argstr[i] * 3:
1466 delim = argstr[i] * 3
1467 else:
1468 delim = argstr[i]
1469 i = i + len(delim)
1470
1471 dt = None
1472 j, s = self.strconst(argstr, i, delim)
1473 lang = None
1474 if argstr[j:j + 1] == "@": # Language?
1475 m = langcode.match(argstr, j + 1)
1476 if m is None:
1477 raise BadSyntax(
1478 self._thisDoc, startline, argstr, i,
1479 "Bad language code syntax on string " +
1480 "literal, after @")
1481 i = m.end()
1482 lang = argstr[j + 1:i]
1483 j = i
1484 if argstr[j:j + 2] == "^^":
1485 res2 = []
1486 j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI
1487 dt = res2[0]
1488 res.append(self._store.newLiteral(s, dt, lang))
1489 return j
1490 else:
1491 return -1
1492
1493 def uriOf(self, sym):
1494 if isinstance(sym, tuple):
1495 return sym[1] # old system for --pipe
1496 # return sym.uriref() # cwm api
1497 return sym
1498
1499 def strconst(self, argstr, i, delim):
1500 """parse an N3 string constant delimited by delim.
1501 return index, val
1502 """
1503 delim1 = delim[0]
1504 delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5
1505
1506 j = i
1507 ustr = "" # Empty unicode string
1508 startline = self.lines # Remember where for error messages
1509 while j < len(argstr):
1510 if argstr[j] == delim1:
1511 if delim == delim1: # done when delim is " or '
1512 i = j + 1
1513 return i, ustr
1514 if delim == delim3: # done when delim is """ or ''' and, respectively ...
1515 if argstr[j:j + 5] == delim5: # ... we have "" or '' before
1516 i = j + 5
1517 ustr = ustr + delim2
1518 return i, ustr
1519 if argstr[j:j + 4] == delim4: # ... we have " or ' before
1520 i = j + 4
1521 ustr = ustr + delim1
1522 return i, ustr
1523 if argstr[j:j + 3] == delim3: # current " or ' is part of delim
1524 i = j + 3
1525 return i, ustr
1526
1527 # we are inside of the string and current char is " or '
1528 j = j + 1
1529 ustr = ustr + delim1
1530 continue
1531
1532 m = interesting.search(argstr, j) # was argstr[j:].
1533 # Note for pos param to work, MUST be compiled ... re bug?
1534 assert m, "Quote expected in string at ^ in %s^%s" % (
1535 argstr[j - 20:j], argstr[j:j + 20]) # at least need a quote
1536
1537 i = m.start()
1538 try:
1539 ustr = ustr + argstr[j:i]
1540 except UnicodeError:
1541 err = ""
1542 for c in argstr[j:i]:
1543 err = err + (" %02x" % ord(c))
1544 streason = sys.exc_info()[1].__str__()
1545 raise BadSyntax(
1546 self._thisDoc, startline, argstr, j,
1547 "Unicode error appending characters" +
1548 " %s to string, because\n\t%s"
1549 % (err, streason))
1550
1551 # print "@@@ i = ",i, " j=",j, "m.end=", m.end()
1552
1553 ch = argstr[i]
1554 if ch == delim1:
1555 j = i
1556 continue
1557 elif ch in ('"', "'") and ch != delim1:
1558 ustr = ustr + ch
1559 j = i + 1
1560 continue
1561 elif ch in "\r\n":
1562 if delim == delim1:
1563 raise BadSyntax(
1564 self._thisDoc, startline, argstr, i,
1565 "newline found in string literal")
1566 self.lines = self.lines + 1
1567 ustr = ustr + ch
1568 j = i + 1
1569 self.startOfLine = j
1570
1571 elif ch == "\\":
1572 j = i + 1
1573 ch = argstr[j:j + 1] # Will be empty if string ends
1574 if not ch:
1575 raise BadSyntax(
1576 self._thisDoc, startline, argstr, i,
1577 "unterminated string literal (2)")
1578 k = 'abfrtvn\\"'.find(ch)
1579 if k >= 0:
1580 uch = '\a\b\f\r\t\v\n\\"'[k]
1581 ustr = ustr + uch
1582 j = j + 1
1583 elif ch == "u":
1584 j, ch = self.uEscape(argstr, j + 1, startline)
1585 ustr = ustr + ch
1586 elif ch == "U":
1587 j, ch = self.UEscape(argstr, j + 1, startline)
1588 ustr = ustr + ch
1589 else:
1590 self.BadSyntax(argstr, i,
1591 "bad escape")
1592
1593 self.BadSyntax(argstr, i,
1594 "unterminated string literal")
1595
1596 def _unicodeEscape(self, argstr, i, startline, reg, n, prefix):
1597 if len(argstr)<i+n:
1598 raise BadSyntax(
1599 self._thisDoc, startline, argstr, i,
1600 "unterminated string literal(3)")
1601 try:
1602 return i+n, reg.sub(unicodeExpand, '\\'+prefix+argstr[i:i+n])
1603 except:
1604 raise BadSyntax(
1605 self._thisDoc, startline, argstr, i,
1606 "bad string literal hex escape: "+argstr[i:i+n])
1607
1608 def uEscape(self, argstr, i, startline):
1609 return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, 'u')
1610
1611 def UEscape(self, argstr, i, startline):
1612 return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, 'U')
1613
1614 def BadSyntax(self, argstr, i, msg):
1615 raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)
1616
1617 # If we are going to do operators then they should generate
1618 # [ is operator:plus of ( \1 \2 ) ]
1619
1620
1621 class BadSyntax(SyntaxError):
1622 def __init__(self, uri, lines, argstr, i, why):
1623 self._str = argstr.encode(
1624 'utf-8') # Better go back to strings for errors
1625 self._i = i
1626 self._why = why
1627 self.lines = lines
1628 self._uri = uri
1629
1630 def __str__(self):
1631 argstr = self._str
1632 i = self._i
1633 st = 0
1634 if i > 60:
1635 pre = "..."
1636 st = i - 60
1637 else:
1638 pre = ""
1639 if len(argstr) - i > 60:
1640 post = "..."
1641 else:
1642 post = ""
1643
1644 return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
1645 % (self.lines + 1, self._uri, self._why, pre,
1646 argstr[st:i], argstr[i:i + 60], post)
1647
1648 @property
1649 def message(self):
1650 return str(self)
1651
1652
1653
1654 ###############################################################################
1655 class Formula(object):
1656 number = 0
1657
1658 def __init__(self, parent):
1659 self.uuid = uuid4().hex
1660 self.counter = 0
1661 Formula.number += 1
1662 self.number = Formula.number
1663 self.existentials = {}
1664 self.universals = {}
1665
1666 self.quotedgraph = QuotedGraph(
1667 store=parent.store, identifier=self.id())
1668
1669 def __str__(self):
1670 return '_:Formula%s' % self.number
1671
1672 def id(self):
1673 return BNode('_:Formula%s' % self.number)
1674
1675 def newBlankNode(self, uri=None, why=None):
1676 if uri is None:
1677 self.counter += 1
1678 bn = BNode('f%sb%s' % (self.uuid, self.counter))
1679 else:
1680 bn = BNode(uri.split('#').pop().replace('_', 'b'))
1681 return bn
1682
1683 def newUniversal(self, uri, why=None):
1684 return Variable(uri.split('#').pop())
1685
1686 def declareExistential(self, x):
1687 self.existentials[x] = self.newBlankNode()
1688
1689 def close(self):
1690
1691 return self.quotedgraph
1692
1693
1694 r_hibyte = re.compile(r'([\x80-\xff])')
1695
1696
1697 class RDFSink(object):
1698 def __init__(self, graph):
1699 self.rootFormula = None
1700 self.counter = 0
1701 self.graph = graph
1702
1703 def newFormula(self):
1704 assert self.graph.store.formula_aware
1705 f = Formula(self.graph)
1706 return f
1707
1708 def newGraph(self, identifier):
1709 return Graph(self.graph.store, identifier)
1710
1711 def newSymbol(self, *args):
1712 return URIRef(args[0])
1713
1714 def newBlankNode(self, arg=None, uri=None, why=None):
1715 if isinstance(arg, Formula):
1716 return arg.newBlankNode(uri)
1717 elif isinstance(arg, Graph) or arg is None:
1718 self.counter += 1
1719 bn = BNode('n' + str(self.counter))
1720 else:
1721 bn = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
1722 return bn
1723
1724 def newLiteral(self, s, dt, lang):
1725 if dt:
1726 return Literal(s, datatype=dt)
1727 else:
1728 return Literal(s, lang=lang)
1729
1730 def newList(self, n, f):
1731 if not n:
1732 return self.newSymbol(
1733 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
1734 )
1735
1736 a = self.newBlankNode(f)
1737 first = self.newSymbol(
1738 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
1739 )
1740 rest = self.newSymbol(
1741 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
1742 self.makeStatement((f, first, a, n[0]))
1743 self.makeStatement((f, rest, a, self.newList(n[1:], f)))
1744 return a
1745
1746 def newSet(self, *args):
1747 return set(args)
1748
1749 def setDefaultNamespace(self, *args):
1750 return ':'.join(repr(n) for n in args)
1751
1752 def makeStatement(self, quadruple, why=None):
1753 f, p, s, o = quadruple
1754
1755 if hasattr(p, 'formula'):
1756 raise Exception("Formula used as predicate")
1757
1758 s = self.normalise(f, s)
1759 p = self.normalise(f, p)
1760 o = self.normalise(f, o)
1761
1762 if f == self.rootFormula:
1763 # print s, p, o, '.'
1764 self.graph.add((s, p, o))
1765 elif isinstance(f, Formula):
1766 f.quotedgraph.add((s, p, o))
1767 else:
1768 f.add((s,p,o))
1769
1770 # return str(quadruple)
1771
1772 def normalise(self, f, n):
1773 if isinstance(n, tuple):
1774 return URIRef(str(n[1]))
1775
1776 if isinstance(n, bool):
1777 s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
1778 return s
1779
1780 if isinstance(n, int) or isinstance(n, int):
1781 s = Literal(str(n), datatype=INTEGER_DATATYPE)
1782 return s
1783
1784 if isinstance(n, Decimal):
1785 value = str(n)
1786 if value == '-0':
1787 value = '0'
1788 s = Literal(value, datatype=DECIMAL_DATATYPE)
1789 return s
1790
1791 if isinstance(n, float):
1792 s = Literal(str(n), datatype=DOUBLE_DATATYPE)
1793 return s
1794
1795 if isinstance(f, Formula):
1796 if n in f.existentials:
1797 return f.existentials[n]
1798
1799 # if isinstance(n, Var):
1800 # if f.universals.has_key(n):
1801 # return f.universals[n]
1802 # f.universals[n] = f.newBlankNode()
1803 # return f.universals[n]
1804
1805 return n
1806
1807 def intern(self, something):
1808 return something
1809
1810 def bind(self, pfx, uri):
1811 pass # print pfx, ':', uri
1812
1813 def startDoc(self, formula):
1814 self.rootFormula = formula
1815
1816 def endDoc(self, formula):
1817 pass
1818
1819
1820 ###################################################
1821 #
1822 # Utilities
1823 #
1824
1825
1826 @py3compat.format_doctest_out
1827 def hexify(ustr):
1828 """Use URL encoding to return an ASCII string
1829 corresponding to the given UTF8 string
1830
1831 >>> hexify("http://example/a b")
1832 %(b)s'http://example/a%%20b'
1833
1834 """
1835 # s1=ustr.encode('utf-8')
1836 s = ""
1837 for ch in ustr: # .encode('utf-8'):
1838 if ord(ch) > 126 or ord(ch) < 33:
1839 ch = "%%%02X" % ord(ch)
1840 else:
1841 ch = "%c" % ord(ch)
1842 s = s + ch
1843 return b(s)
1844
1845
1846 class TurtleParser(Parser):
1847
1848 """
1849 An RDFLib parser for Turtle
1850
1851 See http://www.w3.org/TR/turtle/
1852 """
1853
1854 def __init__(self):
1855 pass
1856
1857 def parse(self, source, graph, encoding="utf-8", turtle=True):
1858
1859 if encoding not in [None, "utf-8"]:
1860 raise Exception(
1861 ("N3/Turtle files are always utf-8 encoded, ",
1862 "I was passed: %s") % encoding)
1863
1864 sink = RDFSink(graph)
1865
1866 baseURI = graph.absolutize(
1867 source.getPublicId() or source.getSystemId() or "")
1868 p = SinkParser(sink, baseURI=baseURI, turtle=turtle)
1869
1870 p.loadStream(source.getByteStream())
1871
1872 for prefix, namespace in list(p._bindings.items()):
1873 graph.bind(prefix, namespace)
1874
1875
1876 class N3Parser(TurtleParser):
1877
1878 """
1879 An RDFLib parser for Notation3
1880
1881 See http://www.w3.org/DesignIssues/Notation3.html
1882
1883 """
1884
1885 def __init__(self):
1886 pass
1887
1888 def parse(self, source, graph, encoding="utf-8"):
1889 # we're currently being handed a Graph, not a ConjunctiveGraph
1890 assert graph.store.context_aware # is this implied by formula_aware
1891 assert graph.store.formula_aware
1892
1893 conj_graph = ConjunctiveGraph(store=graph.store)
1894 conj_graph.default_context = graph # TODO: CG __init__ should have a
1895 # default_context arg
1896 # TODO: update N3Processor so that it can use conj_graph as the sink
1897 conj_graph.namespace_manager = graph.namespace_manager
1898
1899 TurtleParser.parse(self, source, conj_graph, encoding, turtle=False)
1900
1901
1902 def _test(): # pragma: no cover
1903 import doctest
1904 doctest.testmod()
1905
1906
1907 # if __name__ == '__main__':
1908 # _test()
1909
1910 def main(): # pragma: no cover
1911 g = ConjunctiveGraph()
1912
1913 sink = RDFSink(g)
1914 base_uri = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
1915
1916 p = SinkParser(sink, baseURI=base_uri)
1917 p._bindings[''] = p._baseURI + '#'
1918 p.startDoc()
1919
1920 f = open(sys.argv[1], 'rb')
1921 rdbytes = f.read()
1922 f.close()
1923
1924 p.feed(rdbytes)
1925 p.endDoc()
1926 for t in g.quads((None, None, None)):
1927
1928 print(t)
1929
1930 if __name__ == '__main__':
1931 main()
1932
1933 # ends