Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/notation3.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 notation3.py - Standalone Notation3 Parser | |
4 Derived from CWM, the Closed World Machine | |
5 | |
6 Authors of the original suite: | |
7 | |
8 * Dan Connolly <@@> | |
9 * Tim Berners-Lee <@@> | |
10 * Yosi Scharf <@@> | |
11 * Joseph M. Reagle Jr. <reagle@w3.org> | |
12 * Rich Salz <rsalz@zolera.com> | |
13 | |
14 http://www.w3.org/2000/10/swap/notation3.py | |
15 | |
16 Copyright 2000-2007, World Wide Web Consortium. | |
17 Copyright 2001, MIT. | |
18 Copyright 2001, Zolera Systems Inc. | |
19 | |
20 License: W3C Software License | |
21 http://www.w3.org/Consortium/Legal/copyright-software | |
22 | |
23 Modified by Sean B. Palmer | |
24 Copyright 2007, Sean B. Palmer. | |
25 | |
26 Modified to work with rdflib by Gunnar Aastrand Grimnes | |
27 Copyright 2010, Gunnar A. Grimnes | |
28 | |
29 """ | |
30 | |
31 # Python standard libraries | |
32 import types | |
33 import sys | |
34 import os | |
35 import re | |
36 import codecs | |
37 import warnings | |
38 | |
39 from decimal import Decimal | |
40 | |
41 from uuid import uuid4 | |
42 | |
43 from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id | |
44 from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph | |
45 from rdflib import py3compat | |
46 b = py3compat.b | |
47 | |
48 __all__ = ['BadSyntax', 'N3Parser', 'TurtleParser', | |
49 "splitFragP", "join", "base", | |
50 "runNamespace", "uniqueURI", "hexify"] | |
51 | |
52 from rdflib.parser import Parser | |
53 | |
54 | |
55 def splitFragP(uriref, punct=0): | |
56 """split a URI reference before the fragment | |
57 | |
58 Punctuation is kept. | |
59 | |
60 e.g. | |
61 | |
62 >>> splitFragP("abc#def") | |
63 ('abc', '#def') | |
64 | |
65 >>> splitFragP("abcdef") | |
66 ('abcdef', '') | |
67 | |
68 """ | |
69 | |
70 i = uriref.rfind("#") | |
71 if i >= 0: | |
72 return uriref[:i], uriref[i:] | |
73 else: | |
74 return uriref, '' | |
75 | |
76 | |
77 @py3compat.format_doctest_out | |
78 def join(here, there): | |
79 """join an absolute URI and URI reference | |
80 (non-ascii characters are supported/doctested; | |
81 haven't checked the details of the IRI spec though) | |
82 | |
83 ``here`` is assumed to be absolute. | |
84 ``there`` is URI reference. | |
85 | |
86 >>> join('http://example/x/y/z', '../abc') | |
87 'http://example/x/abc' | |
88 | |
89 Raise ValueError if there uses relative path | |
90 syntax but here has no hierarchical path. | |
91 | |
92 >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE | |
93 Traceback (most recent call last): | |
94 raise ValueError(here) | |
95 ValueError: Base <mid:foo@example> has no slash | |
96 after colon - with relative '../foo'. | |
97 | |
98 >>> join('http://example/x/y/z', '') | |
99 'http://example/x/y/z' | |
100 | |
101 >>> join('mid:foo@example', '#foo') | |
102 'mid:foo@example#foo' | |
103 | |
104 We grok IRIs | |
105 | |
106 >>> len(%(u)s'Andr\\xe9') | |
107 5 | |
108 | |
109 >>> join('http://example.org/', %(u)s'#Andr\\xe9') | |
110 %(u)s'http://example.org/#Andr\\xe9' | |
111 """ | |
112 | |
113 # assert(here.find("#") < 0), \ | |
114 # "Base may not contain hash: '%s'" % here # why must caller splitFrag? | |
115 | |
116 slashl = there.find('/') | |
117 colonl = there.find(':') | |
118 | |
119 # join(base, 'foo:/') -- absolute | |
120 if colonl >= 0 and (slashl < 0 or colonl < slashl): | |
121 return there | |
122 | |
123 bcolonl = here.find(':') | |
124 assert(bcolonl >= 0), \ | |
125 "Base uri '%s' is not absolute" % here # else it's not absolute | |
126 | |
127 path, frag = splitFragP(there) | |
128 if not path: | |
129 return here + frag | |
130 | |
131 # join('mid:foo@example', '../foo') bzzt | |
132 if here[bcolonl + 1:bcolonl + 2] != '/': | |
133 raise ValueError( | |
134 ("Base <%s> has no slash after " | |
135 "colon - with relative '%s'.") % (here, there)) | |
136 | |
137 if here[bcolonl + 1:bcolonl + 3] == '//': | |
138 bpath = here.find('/', bcolonl + 3) | |
139 else: | |
140 bpath = bcolonl + 1 | |
141 | |
142 # join('http://xyz', 'foo') | |
143 if bpath < 0: | |
144 bpath = len(here) | |
145 here = here + '/' | |
146 | |
147 # join('http://xyz/', '//abc') => 'http://abc' | |
148 if there[:2] == '//': | |
149 return here[:bcolonl + 1] + there | |
150 | |
151 # join('http://xyz/', '/abc') => 'http://xyz/abc' | |
152 if there[:1] == '/': | |
153 return here[:bpath] + there | |
154 | |
155 slashr = here.rfind('/') | |
156 | |
157 while 1: | |
158 if path[:2] == './': | |
159 path = path[2:] | |
160 if path == '.': | |
161 path = '' | |
162 elif path[:3] == '../' or path == '..': | |
163 path = path[3:] | |
164 i = here.rfind('/', bpath, slashr) | |
165 if i >= 0: | |
166 here = here[:i + 1] | |
167 slashr = i | |
168 else: | |
169 break | |
170 | |
171 return here[:slashr + 1] + path + frag | |
172 | |
173 | |
174 def base(): | |
175 """The base URI for this process - the Web equiv of cwd | |
176 | |
177 Relative or abolute unix-standard filenames parsed relative to | |
178 this yeild the URI of the file. | |
179 If we had a reliable way of getting a computer name, | |
180 we should put it in the hostname just to prevent ambiguity | |
181 | |
182 """ | |
183 # return "file://" + hostname + os.getcwd() + "/" | |
184 return "file://" + _fixslash(os.getcwd()) + "/" | |
185 | |
186 | |
187 def _fixslash(s): | |
188 """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" | |
189 s = s.replace("\\", "/") | |
190 if s[0] != "/" and s[1] == ":": | |
191 s = s[2:] # @@@ Hack when drive letter present | |
192 return s | |
193 | |
194 | |
195 CONTEXT = 0 | |
196 PRED = 1 | |
197 SUBJ = 2 | |
198 OBJ = 3 | |
199 | |
200 PARTS = PRED, SUBJ, OBJ | |
201 ALL4 = CONTEXT, PRED, SUBJ, OBJ | |
202 | |
203 SYMBOL = 0 | |
204 FORMULA = 1 | |
205 LITERAL = 2 | |
206 LITERAL_DT = 21 | |
207 LITERAL_LANG = 22 | |
208 ANONYMOUS = 3 | |
209 XMLLITERAL = 25 | |
210 | |
211 Logic_NS = "http://www.w3.org/2000/10/swap/log#" | |
212 NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging | |
213 forSomeSym = Logic_NS + "forSome" | |
214 forAllSym = Logic_NS + "forAll" | |
215 | |
216 RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" | |
217 RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
218 OWL_NS = "http://www.w3.org/2002/07/owl#" | |
219 DAML_sameAs_URI = OWL_NS + "sameAs" | |
220 parsesTo_URI = Logic_NS + "parsesTo" | |
221 RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" | |
222 | |
223 List_NS = RDF_NS_URI # From 20030808 | |
224 _Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" | |
225 | |
226 N3_first = (SYMBOL, List_NS + "first") | |
227 N3_rest = (SYMBOL, List_NS + "rest") | |
228 N3_li = (SYMBOL, List_NS + "li") | |
229 N3_nil = (SYMBOL, List_NS + "nil") | |
230 N3_List = (SYMBOL, List_NS + "List") | |
231 N3_Empty = (SYMBOL, List_NS + "Empty") | |
232 | |
233 | |
234 runNamespaceValue = None | |
235 | |
236 | |
237 def runNamespace(): | |
238 "Return a URI suitable as a namespace for run-local objects" | |
239 # @@@ include hostname (privacy?) (hash it?) | |
240 global runNamespaceValue | |
241 if runNamespaceValue is None: | |
242 runNamespaceValue = join(base(), _unique_id()) + '#' | |
243 return runNamespaceValue | |
244 | |
245 nextu = 0 | |
246 | |
247 | |
248 def uniqueURI(): | |
249 "A unique URI" | |
250 global nextu | |
251 nextu += 1 | |
252 # return runNamespace() + "u_" + `nextu` | |
253 return runNamespace() + "u_" + str(nextu) | |
254 | |
255 | |
256 tracking = False | |
257 chatty_flag = 50 | |
258 | |
259 # from why import BecauseOfData, becauseSubexpression | |
260 | |
261 | |
262 def BecauseOfData(*args, **kargs): | |
263 # print args, kargs | |
264 pass | |
265 | |
266 | |
267 def becauseSubexpression(*args, **kargs): | |
268 # print args, kargs | |
269 pass | |
270 | |
271 N3_forSome_URI = forSomeSym | |
272 N3_forAll_URI = forAllSym | |
273 | |
274 # Magic resources we know about | |
275 | |
276 ADDED_HASH = "#" # Stop where we use this in case we want to remove it! | |
277 # This is the hash on namespace URIs | |
278 | |
279 RDF_type = (SYMBOL, RDF_type_URI) | |
280 DAML_sameAs = (SYMBOL, DAML_sameAs_URI) | |
281 | |
282 LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" | |
283 | |
284 BOOLEAN_DATATYPE = _XSD_PFX + "boolean" | |
285 DECIMAL_DATATYPE = _XSD_PFX + "decimal" | |
286 DOUBLE_DATATYPE = _XSD_PFX + "double" | |
287 FLOAT_DATATYPE = _XSD_PFX + "float" | |
288 INTEGER_DATATYPE = _XSD_PFX + "integer" | |
289 | |
290 option_noregen = 0 # If set, do not regenerate genids on output | |
291 | |
292 # @@ I18n - the notname chars need extending for well known unicode non-text | |
293 # characters. The XML spec switched to assuming unknown things were name | |
294 # characaters. | |
295 # _namechars = string.lowercase + string.uppercase + string.digits + '_-' | |
296 _notQNameChars = \ | |
297 "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ | |
298 _notKeywordsChars = _notQNameChars + "." | |
299 _notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ | |
300 _rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' | |
301 | |
302 hexChars = 'ABCDEFabcdef0123456789' | |
303 escapeChars = "(_~.-!$&'()*+,;=/?#@%)" # valid for \ escapes in localnames | |
304 | |
305 def unicodeExpand(m): | |
306 try: | |
307 return chr(int(m.group(1), 16)) | |
308 except: | |
309 raise Exception("Invalid unicode code point: " + m.group(1)) | |
310 | |
311 if py3compat.narrow_build: | |
312 def unicodeExpand(m): | |
313 try: | |
314 return chr(int(m.group(1), 16)) | |
315 except ValueError: | |
316 warnings.warn( | |
317 'Encountered a unicode char > 0xFFFF in a narrow python build. ' | |
318 'Trying to degrade gracefully, but this can cause problems ' | |
319 'later when working with the string:\n%s' % m.group(0)) | |
320 return codecs.decode(m.group(0), 'unicode_escape') | |
321 | |
322 unicodeEscape4 = re.compile( | |
323 r'\\u([0-9a-fA-F]{4})') | |
324 unicodeEscape8 = re.compile( | |
325 r'\\U([0-9a-fA-F]{8})') | |
326 | |
327 | |
328 | |
329 N3CommentCharacter = "#" # For unix script # ! compatabilty | |
330 | |
331 ########################################## Parse string to sink | |
332 # | |
333 # Regular expressions: | |
334 eol = re.compile( | |
335 r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment | |
336 eof = re.compile( | |
337 r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment | |
338 ws = re.compile(r'[ \t]*') # Whitespace not including NL | |
339 signed_integer = re.compile(r'[-+]?[0-9]+') # integer | |
340 integer_syntax = re.compile(r'[-+]?[0-9]+') | |
341 decimal_syntax = re.compile(r'[-+]?[0-9]*\.[0-9]+') | |
342 exponent_syntax = re.compile(r'[-+]?(?:[0-9]+\.[0-9]*(?:e|E)[-+]?[0-9]+|'+ | |
343 r'\.[0-9](?:e|E)[-+]?[0-9]+|'+ | |
344 r'[0-9]+(?:e|E)[-+]?[0-9]+)') | |
345 digitstring = re.compile(r'[0-9]+') # Unsigned integer | |
346 interesting = re.compile(r"""[\\\r\n\"\']""") | |
347 langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*') | |
348 | |
349 | |
350 class SinkParser: | |
351 def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, | |
352 genPrefix="", why=None, turtle=False): | |
353 """ note: namespace names should *not* end in # ; | |
354 the # will get added during qname processing """ | |
355 | |
356 self._bindings = {} | |
357 if thisDoc != "": | |
358 assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc | |
359 self._bindings[""] = thisDoc + "#" # default | |
360 | |
361 self._store = store | |
362 if genPrefix: | |
363 store.setGenPrefix(genPrefix) # pass it on | |
364 | |
365 self._thisDoc = thisDoc | |
366 self.lines = 0 # for error handling | |
367 self.startOfLine = 0 # For calculating character number | |
368 self._genPrefix = genPrefix | |
369 self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', | |
370 'true', 'false'] | |
371 self.keywordsSet = 0 # Then only can others be considerd qnames | |
372 self._anonymousNodes = {} | |
373 # Dict of anon nodes already declared ln: Term | |
374 self._variables = {} | |
375 self._parentVariables = {} | |
376 self._reason = why # Why the parser was asked to parse this | |
377 | |
378 self.turtle = turtle # raise exception when encountering N3 extensions | |
379 # Turtle allows single or double quotes around strings, whereas N3 | |
380 # only allows double quotes. | |
381 self.string_delimiters = ('"', "'") if turtle else ('"',) | |
382 | |
383 self._reason2 = None # Why these triples | |
384 # was: diag.tracking | |
385 if tracking: | |
386 self._reason2 = BecauseOfData( | |
387 store.newSymbol(thisDoc), because=self._reason) | |
388 | |
389 if baseURI: | |
390 self._baseURI = baseURI | |
391 else: | |
392 if thisDoc: | |
393 self._baseURI = thisDoc | |
394 else: | |
395 self._baseURI = None | |
396 | |
397 assert not self._baseURI or ':' in self._baseURI | |
398 | |
399 if not self._genPrefix: | |
400 if self._thisDoc: | |
401 self._genPrefix = self._thisDoc + "#_g" | |
402 else: | |
403 self._genPrefix = uniqueURI() | |
404 | |
405 if openFormula is None: | |
406 if self._thisDoc: | |
407 self._formula = store.newFormula(thisDoc + "#_formula") | |
408 else: | |
409 self._formula = store.newFormula() | |
410 else: | |
411 self._formula = openFormula | |
412 | |
413 self._context = self._formula | |
414 self._parentContext = None | |
415 | |
416 def here(self, i): | |
417 """String generated from position in file | |
418 | |
419 This is for repeatability when refering people to bnodes in a document. | |
420 This has diagnostic uses less formally, as it should point one to which | |
421 bnode the arbitrary identifier actually is. It gives the | |
422 line and character number of the '[' charcacter or path character | |
423 which introduced the blank node. The first blank node is boringly | |
424 _L1C1. It used to be used only for tracking, but for tests in general | |
425 it makes the canonical ordering of bnodes repeatable.""" | |
426 | |
427 return "%s_L%iC%i" % (self._genPrefix, self.lines, | |
428 i - self.startOfLine + 1) | |
429 | |
430 def formula(self): | |
431 return self._formula | |
432 | |
433 def loadStream(self, stream): | |
434 return self.loadBuf(stream.read()) # Not ideal | |
435 | |
436 def loadBuf(self, buf): | |
437 """Parses a buffer and returns its top level formula""" | |
438 self.startDoc() | |
439 | |
440 self.feed(buf) | |
441 return self.endDoc() # self._formula | |
442 | |
443 def feed(self, octets): | |
444 """Feed an octet stream tothe parser | |
445 | |
446 if BadSyntax is raised, the string | |
447 passed in the exception object is the | |
448 remainder after any statements have been parsed. | |
449 So if there is more data to feed to the | |
450 parser, it should be straightforward to recover.""" | |
451 | |
452 if not isinstance(octets, str): | |
453 s = octets.decode('utf-8') | |
454 # NB already decoded, so \ufeff | |
455 if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): | |
456 s = s[1:] | |
457 else: | |
458 s = octets | |
459 | |
460 i = 0 | |
461 while i >= 0: | |
462 j = self.skipSpace(s, i) | |
463 if j < 0: | |
464 return | |
465 | |
466 i = self.directiveOrStatement(s, j) | |
467 if i < 0: | |
468 #print("# next char: %s" % s[j]) | |
469 self.BadSyntax(s, j, | |
470 "expected directive or statement") | |
471 | |
472 def directiveOrStatement(self, argstr, h): | |
473 | |
474 i = self.skipSpace(argstr, h) | |
475 if i < 0: | |
476 return i # EOF | |
477 | |
478 if self.turtle: | |
479 j = self.sparqlDirective(argstr, i) | |
480 if j >= 0: | |
481 return j | |
482 | |
483 j = self.directive(argstr, i) | |
484 if j >= 0: | |
485 return self.checkDot(argstr, j) | |
486 | |
487 j = self.statement(argstr, i) | |
488 if j >= 0: | |
489 return self.checkDot(argstr, j) | |
490 | |
491 return j | |
492 | |
493 # @@I18N | |
494 # _namechars = string.lowercase + string.uppercase + string.digits + '_-' | |
495 | |
496 def tok(self, tok, argstr, i, colon=False): | |
497 """Check for keyword. Space must have been stripped on entry and | |
498 we must not be at end of file. | |
499 | |
500 if colon, then keyword followed by colon is ok | |
501 (@prefix:<blah> is ok, rdf:type shortcut a must be followed by ws) | |
502 """ | |
503 | |
504 assert tok[0] not in _notNameChars # not for punctuation | |
505 if argstr[i:i + 1] == "@": | |
506 i = i + 1 | |
507 else: | |
508 if tok not in self.keywords: | |
509 return -1 # No, this has neither keywords declaration nor "@" | |
510 | |
511 if (argstr[i:i + len(tok)] == tok | |
512 and ( argstr[i + len(tok)] in _notKeywordsChars) | |
513 or (colon and argstr[i+len(tok)] == ':')): | |
514 i = i + len(tok) | |
515 return i | |
516 else: | |
517 return -1 | |
518 | |
519 def sparqlTok(self, tok, argstr, i): | |
520 """Check for SPARQL keyword. Space must have been stripped on entry | |
521 and we must not be at end of file. | |
522 Case insensitive and not preceeded by @ | |
523 """ | |
524 | |
525 assert tok[0] not in _notNameChars # not for punctuation | |
526 | |
527 if (argstr[i:i + len(tok)].lower() == tok.lower() | |
528 and (argstr[i + len(tok)] in _notQNameChars)): | |
529 i = i + len(tok) | |
530 return i | |
531 else: | |
532 return -1 | |
533 | |
534 | |
535 def directive(self, argstr, i): | |
536 j = self.skipSpace(argstr, i) | |
537 if j < 0: | |
538 return j # eof | |
539 res = [] | |
540 | |
541 j = self.tok('bind', argstr, i) # implied "#". Obsolete. | |
542 if j > 0: | |
543 self.BadSyntax(argstr, i, | |
544 "keyword bind is obsolete: use @prefix") | |
545 | |
546 j = self.tok('keywords', argstr, i) | |
547 if j > 0: | |
548 if self.turtle: | |
549 self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") | |
550 | |
551 i = self.commaSeparatedList(argstr, j, res, self.bareWord) | |
552 if i < 0: | |
553 self.BadSyntax(argstr, i, | |
554 "'@keywords' needs comma separated list of words") | |
555 self.setKeywords(res[:]) | |
556 return i | |
557 | |
558 j = self.tok('forAll', argstr, i) | |
559 if j > 0: | |
560 if self.turtle: | |
561 self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") | |
562 | |
563 i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) | |
564 if i < 0: | |
565 self.BadSyntax(argstr, i, | |
566 "Bad variable list after @forAll") | |
567 for x in res: | |
568 # self._context.declareUniversal(x) | |
569 if x not in self._variables or x in self._parentVariables: | |
570 self._variables[x] = self._context.newUniversal(x) | |
571 return i | |
572 | |
573 j = self.tok('forSome', argstr, i) | |
574 if j > 0: | |
575 if self.turtle: | |
576 self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") | |
577 | |
578 i = self. commaSeparatedList(argstr, j, res, self.uri_ref2) | |
579 if i < 0: | |
580 self.BadSyntax(argstr, i, | |
581 "Bad variable list after @forSome") | |
582 for x in res: | |
583 self._context.declareExistential(x) | |
584 return i | |
585 | |
586 j = self.tok('prefix', argstr, i, colon=True) # no implied "#" | |
587 if j >= 0: | |
588 t = [] | |
589 i = self.qname(argstr, j, t) | |
590 if i < 0: | |
591 self.BadSyntax(argstr, j, | |
592 "expected qname after @prefix") | |
593 j = self.uri_ref2(argstr, i, t) | |
594 if j < 0: | |
595 self.BadSyntax(argstr, i, | |
596 "expected <uriref> after @prefix _qname_") | |
597 ns = self.uriOf(t[1]) | |
598 | |
599 if self._baseURI: | |
600 ns = join(self._baseURI, ns) | |
601 elif ":" not in ns: | |
602 self.BadSyntax(argstr, j, | |
603 "With no base URI, cannot use " + | |
604 "relative URI in @prefix <" + ns + ">") | |
605 assert ':' in ns # must be absolute | |
606 self._bindings[t[0][0]] = ns | |
607 self.bind(t[0][0], hexify(ns)) | |
608 return j | |
609 | |
610 j = self.tok('base', argstr, i) # Added 2007/7/7 | |
611 if j >= 0: | |
612 t = [] | |
613 i = self.uri_ref2(argstr, j, t) | |
614 if i < 0: | |
615 self.BadSyntax(argstr, j, | |
616 "expected <uri> after @base ") | |
617 ns = self.uriOf(t[0]) | |
618 | |
619 if self._baseURI: | |
620 ns = join(self._baseURI, ns) | |
621 else: | |
622 self.BadSyntax(argstr, j, | |
623 "With no previous base URI, cannot use " + | |
624 "relative URI in @base <" + ns + ">") | |
625 assert ':' in ns # must be absolute | |
626 self._baseURI = ns | |
627 return i | |
628 | |
629 return -1 # Not a directive, could be something else. | |
630 | |
631 def sparqlDirective(self, argstr, i): | |
632 | |
633 """ | |
634 turtle and trig support BASE/PREFIX without @ and without | |
635 terminating . | |
636 """ | |
637 | |
638 j = self.skipSpace(argstr, i) | |
639 if j < 0: | |
640 return j # eof | |
641 | |
642 j = self.sparqlTok('PREFIX', argstr, i) | |
643 if j >= 0: | |
644 t = [] | |
645 i = self.qname(argstr, j, t) | |
646 if i < 0: | |
647 self.BadSyntax(argstr, j, | |
648 "expected qname after @prefix") | |
649 j = self.uri_ref2(argstr, i, t) | |
650 if j < 0: | |
651 self.BadSyntax(argstr, i, | |
652 "expected <uriref> after @prefix _qname_") | |
653 ns = self.uriOf(t[1]) | |
654 | |
655 if self._baseURI: | |
656 ns = join(self._baseURI, ns) | |
657 elif ":" not in ns: | |
658 self.BadSyntax(argstr, j, | |
659 "With no base URI, cannot use " + | |
660 "relative URI in @prefix <" + ns + ">") | |
661 assert ':' in ns # must be absolute | |
662 self._bindings[t[0][0]] = ns | |
663 self.bind(t[0][0], hexify(ns)) | |
664 return j | |
665 | |
666 j = self.sparqlTok('BASE', argstr, i) | |
667 if j >= 0: | |
668 t = [] | |
669 i = self.uri_ref2(argstr, j, t) | |
670 if i < 0: | |
671 self.BadSyntax(argstr, j, | |
672 "expected <uri> after @base ") | |
673 ns = self.uriOf(t[0]) | |
674 | |
675 if self._baseURI: | |
676 ns = join(self._baseURI, ns) | |
677 else: | |
678 self.BadSyntax(argstr, j, | |
679 "With no previous base URI, cannot use " + | |
680 "relative URI in @base <" + ns + ">") | |
681 assert ':' in ns # must be absolute | |
682 self._baseURI = ns | |
683 return i | |
684 | |
685 return -1 # Not a directive, could be something else. | |
686 | |
687 | |
688 def bind(self, qn, uri): | |
689 assert isinstance( | |
690 uri, bytes), "Any unicode must be %x-encoded already" | |
691 if qn == "": | |
692 self._store.setDefaultNamespace(uri) | |
693 else: | |
694 self._store.bind(qn, uri) | |
695 | |
696 def setKeywords(self, k): | |
697 "Takes a list of strings" | |
698 if k is None: | |
699 self.keywordsSet = 0 | |
700 else: | |
701 self.keywords = k | |
702 self.keywordsSet = 1 | |
703 | |
704 def startDoc(self): | |
705 # was: self._store.startDoc() | |
706 self._store.startDoc(self._formula) | |
707 | |
708 def endDoc(self): | |
709 """Signal end of document and stop parsing. returns formula""" | |
710 self._store.endDoc(self._formula) # don't canonicalize yet | |
711 return self._formula | |
712 | |
713 def makeStatement(self, quadruple): | |
714 # $$$$$$$$$$$$$$$$$$$$$ | |
715 # print "# Parser output: ", `quadruple` | |
716 self._store.makeStatement(quadruple, why=self._reason2) | |
717 | |
718 def statement(self, argstr, i): | |
719 r = [] | |
720 i = self.object( | |
721 argstr, i, r) # Allow literal for subject - extends RDF | |
722 if i < 0: | |
723 return i | |
724 | |
725 j = self.property_list(argstr, i, r[0]) | |
726 | |
727 if j < 0: | |
728 self.BadSyntax( | |
729 argstr, i, "expected propertylist") | |
730 return j | |
731 | |
732 def subject(self, argstr, i, res): | |
733 return self.item(argstr, i, res) | |
734 | |
735 def verb(self, argstr, i, res): | |
736 """ has _prop_ | |
737 is _prop_ of | |
738 a | |
739 = | |
740 _prop_ | |
741 >- prop -> | |
742 <- prop -< | |
743 _operator_""" | |
744 | |
745 j = self.skipSpace(argstr, i) | |
746 if j < 0: | |
747 return j # eof | |
748 | |
749 r = [] | |
750 | |
751 j = self.tok('has', argstr, i) | |
752 if j >= 0: | |
753 if self.turtle: | |
754 self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") | |
755 | |
756 i = self.prop(argstr, j, r) | |
757 if i < 0: | |
758 self.BadSyntax(argstr, j, | |
759 "expected property after 'has'") | |
760 res.append(('->', r[0])) | |
761 return i | |
762 | |
763 j = self.tok('is', argstr, i) | |
764 if j >= 0: | |
765 if self.turtle: | |
766 self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") | |
767 | |
768 i = self.prop(argstr, j, r) | |
769 if i < 0: | |
770 self.BadSyntax(argstr, j, | |
771 "expected <property> after 'is'") | |
772 j = self.skipSpace(argstr, i) | |
773 if j < 0: | |
774 self.BadSyntax(argstr, i, | |
775 "End of file found, expected property after 'is'") | |
776 i = j | |
777 j = self.tok('of', argstr, i) | |
778 if j < 0: | |
779 self.BadSyntax(argstr, i, | |
780 "expected 'of' after 'is' <prop>") | |
781 res.append(('<-', r[0])) | |
782 return j | |
783 | |
784 j = self.tok('a', argstr, i) | |
785 if j >= 0: | |
786 res.append(('->', RDF_type)) | |
787 return j | |
788 | |
789 if argstr[i:i + 2] == "<=": | |
790 if self.turtle: | |
791 self.BadSyntax(argstr, i, | |
792 "Found '<=' in Turtle mode. ") | |
793 | |
794 res.append(('<-', self._store.newSymbol(Logic_NS + "implies"))) | |
795 return i + 2 | |
796 | |
797 if argstr[i:i + 1] == "=": | |
798 if self.turtle: | |
799 self.BadSyntax(argstr, i, "Found '=' in Turtle mode") | |
800 if argstr[i + 1:i + 2] == ">": | |
801 res.append(('->', self._store.newSymbol(Logic_NS + "implies"))) | |
802 return i + 2 | |
803 res.append(('->', DAML_sameAs)) | |
804 return i + 1 | |
805 | |
806 if argstr[i:i + 2] == ":=": | |
807 if self.turtle: | |
808 self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") | |
809 | |
810 # patch file relates two formulae, uses this @@ really? | |
811 res.append(('->', Logic_NS + "becomes")) | |
812 return i + 2 | |
813 | |
814 j = self.prop(argstr, i, r) | |
815 if j >= 0: | |
816 res.append(('->', r[0])) | |
817 return j | |
818 | |
819 if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-": | |
820 self.BadSyntax(argstr, j, | |
821 ">- ... -> syntax is obsolete.") | |
822 | |
823 return -1 | |
824 | |
825 def prop(self, argstr, i, res): | |
826 return self.item(argstr, i, res) | |
827 | |
828 def item(self, argstr, i, res): | |
829 return self.path(argstr, i, res) | |
830 | |
831 def blankNode(self, uri=None): | |
832 return self._store.newBlankNode(self._context, uri, why=self._reason2) | |
833 | |
834 def path(self, argstr, i, res): | |
835 """Parse the path production. | |
836 """ | |
837 j = self.nodeOrLiteral(argstr, i, res) | |
838 if j < 0: | |
839 return j # nope | |
840 | |
841 while argstr[j:j + 1] in "!^": # no spaces, must follow exactly (?) | |
842 ch = argstr[j:j + 1] | |
843 subj = res.pop() | |
844 obj = self.blankNode(uri=self.here(j)) | |
845 j = self.node(argstr, j + 1, res) | |
846 if j < 0: | |
847 self.BadSyntax(argstr, j, | |
848 "EOF found in middle of path syntax") | |
849 pred = res.pop() | |
850 if ch == "^": # Reverse traverse | |
851 self.makeStatement((self._context, pred, obj, subj)) | |
852 else: | |
853 self.makeStatement((self._context, pred, subj, obj)) | |
854 res.append(obj) | |
855 return j | |
856 | |
857 def anonymousNode(self, ln): | |
858 """Remember or generate a term for one of these _: anonymous nodes""" | |
859 term = self._anonymousNodes.get(ln, None) | |
860 if term is not None: | |
861 return term | |
862 term = self._store.newBlankNode(self._context, why=self._reason2) | |
863 self._anonymousNodes[ln] = term | |
864 return term | |
865 | |
866 def node(self, argstr, i, res, subjectAlready=None): | |
867 """Parse the <node> production. | |
868 Space is now skipped once at the beginning | |
869 instead of in multipe calls to self.skipSpace(). | |
870 """ | |
871 subj = subjectAlready | |
872 | |
873 j = self.skipSpace(argstr, i) | |
874 if j < 0: | |
875 return j # eof | |
876 i = j | |
877 ch = argstr[i:i + 1] # Quick 1-character checks first: | |
878 | |
879 if ch == "[": | |
880 bnodeID = self.here(i) | |
881 j = self.skipSpace(argstr, i + 1) | |
882 if j < 0: | |
883 self.BadSyntax(argstr, i, | |
884 "EOF after '['") | |
885 # Hack for "is" binding name to anon node | |
886 if argstr[j:j + 1] == "=": | |
887 if self.turtle: | |
888 self.BadSyntax(argstr, j, "Found '[=' or '[ =' when in turtle mode.") | |
889 i = j + 1 | |
890 objs = [] | |
891 j = self.objectList(argstr, i, objs) | |
892 if j >= 0: | |
893 subj = objs[0] | |
894 if len(objs) > 1: | |
895 for obj in objs: | |
896 self.makeStatement((self._context, | |
897 DAML_sameAs, subj, obj)) | |
898 j = self.skipSpace(argstr, j) | |
899 if j < 0: | |
900 self.BadSyntax(argstr, i, | |
901 "EOF when objectList expected after [ = ") | |
902 if argstr[j:j + 1] == ";": | |
903 j = j + 1 | |
904 else: | |
905 self.BadSyntax(argstr, i, | |
906 "objectList expected after [= ") | |
907 | |
908 if subj is None: | |
909 subj = self.blankNode(uri=bnodeID) | |
910 | |
911 i = self.property_list(argstr, j, subj) | |
912 if i < 0: | |
913 self.BadSyntax(argstr, j, | |
914 "property_list expected") | |
915 | |
916 j = self.skipSpace(argstr, i) | |
917 if j < 0: | |
918 self.BadSyntax(argstr, i, | |
919 "EOF when ']' expected after [ <propertyList>") | |
920 if argstr[j:j + 1] != "]": | |
921 self.BadSyntax(argstr, j, | |
922 "']' expected") | |
923 res.append(subj) | |
924 return j + 1 | |
925 | |
926 if not self.turtle and ch == "{": | |
927 # if self.turtle: | |
928 # self.BadSyntax(argstr, i, | |
929 # "found '{' while in Turtle mode, Formulas not supported!") | |
930 ch2 = argstr[i + 1:i + 2] | |
931 if ch2 == '$': | |
932 # a set | |
933 i += 1 | |
934 j = i + 1 | |
935 List = [] | |
936 first_run = True | |
937 while 1: | |
938 i = self.skipSpace(argstr, j) | |
939 if i < 0: | |
940 self.BadSyntax(argstr, i, | |
941 "needed '$}', found end.") | |
942 if argstr[i:i + 2] == '$}': | |
943 j = i + 2 | |
944 break | |
945 | |
946 if not first_run: | |
947 if argstr[i:i + 1] == ',': | |
948 i += 1 | |
949 else: | |
950 self.BadSyntax( | |
951 argstr, i, "expected: ','") | |
952 else: | |
953 first_run = False | |
954 | |
955 item = [] | |
956 j = self.item( | |
957 argstr, i, item) # @@@@@ should be path, was object | |
958 if j < 0: | |
959 self.BadSyntax(argstr, i, | |
960 "expected item in set or '$}'") | |
961 List.append(self._store.intern(item[0])) | |
962 res.append(self._store.newSet(List, self._context)) | |
963 return j | |
964 else: | |
965 # parse a formula | |
966 j = i + 1 | |
967 oldParentContext = self._parentContext | |
968 self._parentContext = self._context | |
969 parentAnonymousNodes = self._anonymousNodes | |
970 grandParentVariables = self._parentVariables | |
971 self._parentVariables = self._variables | |
972 self._anonymousNodes = {} | |
973 self._variables = self._variables.copy() | |
974 reason2 = self._reason2 | |
975 self._reason2 = becauseSubexpression | |
976 if subj is None: | |
977 subj = self._store.newFormula() | |
978 self._context = subj | |
979 | |
980 while 1: | |
981 i = self.skipSpace(argstr, j) | |
982 if i < 0: | |
983 self.BadSyntax( | |
984 argstr, i, "needed '}', found end.") | |
985 | |
986 if argstr[i:i + 1] == "}": | |
987 j = i + 1 | |
988 break | |
989 | |
990 j = self.directiveOrStatement(argstr, i) | |
991 if j < 0: | |
992 self.BadSyntax( | |
993 argstr, i, "expected statement or '}'") | |
994 | |
995 self._anonymousNodes = parentAnonymousNodes | |
996 self._variables = self._parentVariables | |
997 self._parentVariables = grandParentVariables | |
998 self._context = self._parentContext | |
999 self._reason2 = reason2 | |
1000 self._parentContext = oldParentContext | |
1001 res.append(subj.close()) # No use until closed | |
1002 return j | |
1003 | |
1004 if ch == "(": | |
1005 thing_type = self._store.newList | |
1006 ch2 = argstr[i + 1:i + 2] | |
1007 if ch2 == '$': | |
1008 thing_type = self._store.newSet | |
1009 i += 1 | |
1010 j = i + 1 | |
1011 | |
1012 List = [] | |
1013 while 1: | |
1014 i = self.skipSpace(argstr, j) | |
1015 if i < 0: | |
1016 self.BadSyntax( | |
1017 argstr, i, "needed ')', found end.") | |
1018 if argstr[i:i + 1] == ')': | |
1019 j = i + 1 | |
1020 break | |
1021 | |
1022 item = [] | |
1023 j = self.item( | |
1024 argstr, i, item) # @@@@@ should be path, was object | |
1025 if j < 0: | |
1026 self.BadSyntax(argstr, i, | |
1027 "expected item in list or ')'") | |
1028 List.append(self._store.intern(item[0])) | |
1029 res.append(thing_type(List, self._context)) | |
1030 return j | |
1031 | |
1032 j = self.tok('this', argstr, i) # This context | |
1033 if j >= 0: | |
1034 self.BadSyntax(argstr, i, | |
1035 "Keyword 'this' was ancient N3. Now use " + | |
1036 "@forSome and @forAll keywords.") | |
1037 | |
1038 # booleans | |
1039 j = self.tok('true', argstr, i) | |
1040 if j >= 0: | |
1041 res.append(True) | |
1042 return j | |
1043 j = self.tok('false', argstr, i) | |
1044 if j >= 0: | |
1045 res.append(False) | |
1046 return j | |
1047 | |
1048 if subj is None: # If this can be a named node, then check for a name. | |
1049 j = self.uri_ref2(argstr, i, res) | |
1050 if j >= 0: | |
1051 return j | |
1052 | |
1053 return -1 | |
1054 | |
1055 def property_list(self, argstr, i, subj): | |
1056 """Parse property list | |
1057 Leaves the terminating punctuation in the buffer | |
1058 """ | |
1059 while 1: | |
1060 while 1: # skip repeat ; | |
1061 j = self.skipSpace(argstr, i) | |
1062 if j < 0: | |
1063 self.BadSyntax(argstr, i, | |
1064 "EOF found when expected verb in property list") | |
1065 if argstr[j]!=';': break | |
1066 i = j+1 | |
1067 | |
1068 if argstr[j:j + 2] == ":-": | |
1069 if self.turtle: | |
1070 self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") | |
1071 i = j + 2 | |
1072 res = [] | |
1073 j = self.node(argstr, i, res, subj) | |
1074 if j < 0: | |
1075 self.BadSyntax(argstr, i, | |
1076 "bad {} or () or [] node after :- ") | |
1077 i = j | |
1078 continue | |
1079 i = j | |
1080 v = [] | |
1081 j = self.verb(argstr, i, v) | |
1082 if j <= 0: | |
1083 return i # void but valid | |
1084 | |
1085 objs = [] | |
1086 i = self.objectList(argstr, j, objs) | |
1087 if i < 0: | |
1088 self.BadSyntax(argstr, j, | |
1089 "objectList expected") | |
1090 for obj in objs: | |
1091 dira, sym = v[0] | |
1092 if dira == '->': | |
1093 self.makeStatement((self._context, sym, subj, obj)) | |
1094 else: | |
1095 self.makeStatement((self._context, sym, obj, subj)) | |
1096 | |
1097 j = self.skipSpace(argstr, i) | |
1098 if j < 0: | |
1099 self.BadSyntax(argstr, j, | |
1100 "EOF found in list of objects") | |
1101 if argstr[i:i + 1] != ";": | |
1102 return i | |
1103 i = i + 1 # skip semicolon and continue | |
1104 | |
1105 def commaSeparatedList(self, argstr, j, res, what): | |
1106 """return value: -1 bad syntax; >1 new position in argstr | |
1107 res has things found appended | |
1108 """ | |
1109 i = self.skipSpace(argstr, j) | |
1110 if i < 0: | |
1111 self.BadSyntax(argstr, i, | |
1112 "EOF found expecting comma sep list") | |
1113 if argstr[i] == ".": | |
1114 return j # empty list is OK | |
1115 i = what(argstr, i, res) | |
1116 if i < 0: | |
1117 return -1 | |
1118 | |
1119 while 1: | |
1120 j = self.skipSpace(argstr, i) | |
1121 if j < 0: | |
1122 return j # eof | |
1123 ch = argstr[j:j + 1] | |
1124 if ch != ",": | |
1125 if ch != ".": | |
1126 return -1 | |
1127 return j # Found but not swallowed "." | |
1128 i = what(argstr, j + 1, res) | |
1129 if i < 0: | |
1130 self.BadSyntax(argstr, i, | |
1131 "bad list content") | |
1132 | |
1133 def objectList(self, argstr, i, res): | |
1134 i = self.object(argstr, i, res) | |
1135 if i < 0: | |
1136 return -1 | |
1137 while 1: | |
1138 j = self.skipSpace(argstr, i) | |
1139 if j < 0: | |
1140 self.BadSyntax(argstr, j, | |
1141 "EOF found after object") | |
1142 if argstr[j:j + 1] != ",": | |
1143 return j # Found something else! | |
1144 i = self.object(argstr, j + 1, res) | |
1145 if i < 0: | |
1146 return i | |
1147 | |
1148 def checkDot(self, argstr, i): | |
1149 j = self.skipSpace(argstr, i) | |
1150 if j < 0: | |
1151 return j # eof | |
1152 if argstr[j:j + 1] == ".": | |
1153 return j + 1 # skip | |
1154 if argstr[j:j + 1] == "}": | |
1155 return j # don't skip it | |
1156 if argstr[j:j + 1] == "]": | |
1157 return j | |
1158 self.BadSyntax(argstr, j, | |
1159 "expected '.' or '}' or ']' at end of statement") | |
1160 | |
1161 def uri_ref2(self, argstr, i, res): | |
1162 """Generate uri from n3 representation. | |
1163 | |
1164 Note that the RDF convention of directly concatenating | |
1165 NS and local name is now used though I prefer inserting a '#' | |
1166 to make the namesapces look more like what XML folks expect. | |
1167 """ | |
1168 qn = [] | |
1169 j = self.qname(argstr, i, qn) | |
1170 if j >= 0: | |
1171 pfx, ln = qn[0] | |
1172 if pfx is None: | |
1173 assert 0, "not used?" | |
1174 ns = self._baseURI + ADDED_HASH | |
1175 else: | |
1176 try: | |
1177 ns = self._bindings[pfx] | |
1178 except KeyError: | |
1179 if pfx == "_": # Magic prefix 2001/05/30, can be changed | |
1180 res.append(self.anonymousNode(ln)) | |
1181 return j | |
1182 if not self.turtle and pfx == "": | |
1183 ns = join(self._baseURI or "", "#") | |
1184 else: | |
1185 self.BadSyntax(argstr, i, | |
1186 "Prefix \"%s:\" not bound" % (pfx)) | |
1187 symb = self._store.newSymbol(ns + ln) | |
1188 if symb in self._variables: | |
1189 res.append(self._variables[symb]) | |
1190 else: | |
1191 res.append(symb) # @@@ "#" CONVENTION | |
1192 return j | |
1193 | |
1194 i = self.skipSpace(argstr, i) | |
1195 if i < 0: | |
1196 return -1 | |
1197 | |
1198 if argstr[i] == "?": | |
1199 v = [] | |
1200 j = self.variable(argstr, i, v) | |
1201 if j > 0: # Forget varibles as a class, only in context. | |
1202 res.append(v[0]) | |
1203 return j | |
1204 return -1 | |
1205 | |
1206 elif argstr[i] == "<": | |
1207 i = i + 1 | |
1208 st = i | |
1209 while i < len(argstr): | |
1210 if argstr[i] == ">": | |
1211 uref = argstr[st:i] # the join should dealt with "": | |
1212 | |
1213 # expand unicode escapes | |
1214 uref = unicodeEscape8.sub(unicodeExpand, uref) | |
1215 uref = unicodeEscape4.sub(unicodeExpand, uref) | |
1216 | |
1217 if self._baseURI: | |
1218 uref = join(self._baseURI, uref) # was: uripath.join | |
1219 else: | |
1220 assert ":" in uref, \ | |
1221 "With no base URI, cannot deal with relative URIs" | |
1222 if argstr[i - 1:i] == "#" and not uref[-1:] == "#": | |
1223 uref = uref + \ | |
1224 "#" # She meant it! Weirdness in urlparse? | |
1225 symb = self._store.newSymbol(uref) | |
1226 if symb in self._variables: | |
1227 res.append(self._variables[symb]) | |
1228 else: | |
1229 res.append(symb) | |
1230 return i + 1 | |
1231 i = i + 1 | |
1232 self.BadSyntax(argstr, j, | |
1233 "unterminated URI reference") | |
1234 | |
1235 elif self.keywordsSet: | |
1236 v = [] | |
1237 j = self.bareWord(argstr, i, v) | |
1238 if j < 0: | |
1239 return -1 # Forget varibles as a class, only in context. | |
1240 if v[0] in self.keywords: | |
1241 self.BadSyntax(argstr, i, | |
1242 'Keyword "%s" not allowed here.' % v[0]) | |
1243 res.append(self._store.newSymbol(self._bindings[""] + v[0])) | |
1244 return j | |
1245 else: | |
1246 return -1 | |
1247 | |
1248 def skipSpace(self, argstr, i): | |
1249 """Skip white space, newlines and comments. | |
1250 return -1 if EOF, else position of first non-ws character""" | |
1251 while 1: | |
1252 m = eol.match(argstr, i) | |
1253 if m is None: | |
1254 break | |
1255 self.lines = self.lines + 1 | |
1256 i = m.end() # Point to first character unmatched | |
1257 self.startOfLine = i | |
1258 m = ws.match(argstr, i) | |
1259 if m is not None: | |
1260 i = m.end() | |
1261 m = eof.match(argstr, i) | |
1262 if m is not None: | |
1263 return -1 | |
1264 return i | |
1265 | |
1266 def variable(self, argstr, i, res): | |
1267 """ ?abc -> variable(:abc) | |
1268 """ | |
1269 | |
1270 j = self.skipSpace(argstr, i) | |
1271 if j < 0: | |
1272 return -1 | |
1273 | |
1274 if argstr[j:j + 1] != "?": | |
1275 return -1 | |
1276 j = j + 1 | |
1277 i = j | |
1278 if argstr[j] in "0123456789-": | |
1279 self.BadSyntax(argstr, j, | |
1280 "Varible name can't start with '%s'" % argstr[j]) | |
1281 while i < len(argstr) and argstr[i] not in _notKeywordsChars: | |
1282 i = i + 1 | |
1283 if self._parentContext is None: | |
1284 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) | |
1285 if varURI not in self._variables: | |
1286 self._variables[varURI] = self._context.newUniversal( | |
1287 varURI, why=self._reason2) | |
1288 res.append(self._variables[varURI]) | |
1289 return i | |
1290 # @@ was: | |
1291 # self.BadSyntax(argstr, j, | |
1292 # "Can't use ?xxx syntax for variable in outermost level: %s" | |
1293 # % argstr[j-1:i]) | |
1294 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) | |
1295 if varURI not in self._parentVariables: | |
1296 self._parentVariables[varURI] = self._parentContext.newUniversal( | |
1297 varURI, why=self._reason2) | |
1298 res.append(self._parentVariables[varURI]) | |
1299 return i | |
1300 | |
1301 def bareWord(self, argstr, i, res): | |
1302 """ abc -> :abc | |
1303 """ | |
1304 j = self.skipSpace(argstr, i) | |
1305 if j < 0: | |
1306 return -1 | |
1307 | |
1308 if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars: | |
1309 return -1 | |
1310 i = j | |
1311 while i < len(argstr) and argstr[i] not in _notKeywordsChars: | |
1312 i = i + 1 | |
1313 res.append(argstr[j:i]) | |
1314 return i | |
1315 | |
1316 def qname(self, argstr, i, res): | |
1317 """ | |
1318 xyz:def -> ('xyz', 'def') | |
1319 If not in keywords and keywordsSet: def -> ('', 'def') | |
1320 :def -> ('', 'def') | |
1321 """ | |
1322 | |
1323 i = self.skipSpace(argstr, i) | |
1324 if i < 0: | |
1325 return -1 | |
1326 | |
1327 c = argstr[i] | |
1328 if c in "0123456789-+.": | |
1329 return -1 | |
1330 if c not in _notNameChars: | |
1331 ln = c | |
1332 i = i + 1 | |
1333 while i < len(argstr): | |
1334 c = argstr[i] | |
1335 if c not in _notNameChars: | |
1336 ln = ln + c | |
1337 i = i + 1 | |
1338 else: | |
1339 break | |
1340 | |
1341 if argstr[i - 1] == ".": # qname cannot end with "." | |
1342 ln = ln[:-1] | |
1343 if not ln: return -1 | |
1344 i -= 1 | |
1345 | |
1346 else: # First character is non-alpha | |
1347 ln = '' # Was: None - TBL (why? useful?) | |
1348 | |
1349 if i < len(argstr) and argstr[i] == ':': | |
1350 pfx = ln | |
1351 # bnodes names have different rules | |
1352 if pfx == '_': | |
1353 allowedChars = _notNameChars | |
1354 else: | |
1355 allowedChars = _notQNameChars | |
1356 | |
1357 i = i + 1 | |
1358 lastslash = False | |
1359 # start = i # TODO first char . | |
1360 ln = '' | |
1361 while i < len(argstr): | |
1362 c = argstr[i] | |
1363 if not lastslash and c == '\\': | |
1364 lastslash = True | |
1365 i += 1 | |
1366 | |
1367 elif lastslash or c not in allowedChars: | |
1368 | |
1369 if lastslash: | |
1370 if c not in escapeChars: | |
1371 raise BadSyntax(self._thisDoc, self.line, argstr, i, | |
1372 "illegal escape "+c) | |
1373 elif c=='%': | |
1374 if argstr[i+1] not in hexChars or argstr[i+2] not in hexChars: | |
1375 raise BadSyntax(self._thisDoc, self.line, argstr, i, | |
1376 "illegal hex escape "+c) | |
1377 | |
1378 ln = ln + c | |
1379 i = i + 1 | |
1380 lastslash = False | |
1381 else: | |
1382 break | |
1383 | |
1384 if lastslash: | |
1385 raise BadSyntax( | |
1386 self._thisDoc, self.line, argstr, i, | |
1387 "qname cannot end with \\") | |
1388 | |
1389 | |
1390 if argstr[i-1]=='.': | |
1391 # localname cannot end in . | |
1392 ln = ln[:-1] | |
1393 if not ln: return -1 | |
1394 i -= 1 | |
1395 | |
1396 res.append((pfx, ln)) | |
1397 return i | |
1398 | |
1399 else: # delimiter was not ":" | |
1400 if ln and self.keywordsSet and ln not in self.keywords: | |
1401 res.append(('', ln)) | |
1402 return i | |
1403 return -1 | |
1404 | |
1405 def object(self, argstr, i, res): | |
1406 j = self.subject(argstr, i, res) | |
1407 if j >= 0: | |
1408 return j | |
1409 else: | |
1410 j = self.skipSpace(argstr, i) | |
1411 if j < 0: | |
1412 return -1 | |
1413 else: | |
1414 i = j | |
1415 | |
1416 if argstr[i] in self.string_delimiters: | |
1417 if argstr[i:i + 3] == argstr[i] * 3: | |
1418 delim = argstr[i] * 3 | |
1419 else: | |
1420 delim = argstr[i] | |
1421 i = i + len(delim) | |
1422 | |
1423 j, s = self.strconst(argstr, i, delim) | |
1424 | |
1425 res.append(self._store.newLiteral(s)) | |
1426 return j | |
1427 else: | |
1428 return -1 | |
1429 | |
1430 def nodeOrLiteral(self, argstr, i, res): | |
1431 j = self.node(argstr, i, res) | |
1432 startline = self.lines # Remember where for error messages | |
1433 if j >= 0: | |
1434 return j | |
1435 else: | |
1436 j = self.skipSpace(argstr, i) | |
1437 if j < 0: | |
1438 return -1 | |
1439 else: | |
1440 i = j | |
1441 | |
1442 ch = argstr[i] | |
1443 if ch in "-+0987654321.": | |
1444 m = exponent_syntax.match(argstr, i) | |
1445 if m: | |
1446 j = m.end() | |
1447 res.append(float(argstr[i:j])) | |
1448 return j | |
1449 | |
1450 m = decimal_syntax.match(argstr, i) | |
1451 if m: | |
1452 j = m.end() | |
1453 res.append(Decimal(argstr[i:j])) | |
1454 return j | |
1455 | |
1456 m = integer_syntax.match(argstr, i) | |
1457 if m: | |
1458 j = m.end() | |
1459 res.append(int(argstr[i:j])) | |
1460 return j | |
1461 | |
1462 # return -1 ## or fall through? | |
1463 | |
1464 if argstr[i] in self.string_delimiters: | |
1465 if argstr[i:i + 3] == argstr[i] * 3: | |
1466 delim = argstr[i] * 3 | |
1467 else: | |
1468 delim = argstr[i] | |
1469 i = i + len(delim) | |
1470 | |
1471 dt = None | |
1472 j, s = self.strconst(argstr, i, delim) | |
1473 lang = None | |
1474 if argstr[j:j + 1] == "@": # Language? | |
1475 m = langcode.match(argstr, j + 1) | |
1476 if m is None: | |
1477 raise BadSyntax( | |
1478 self._thisDoc, startline, argstr, i, | |
1479 "Bad language code syntax on string " + | |
1480 "literal, after @") | |
1481 i = m.end() | |
1482 lang = argstr[j + 1:i] | |
1483 j = i | |
1484 if argstr[j:j + 2] == "^^": | |
1485 res2 = [] | |
1486 j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI | |
1487 dt = res2[0] | |
1488 res.append(self._store.newLiteral(s, dt, lang)) | |
1489 return j | |
1490 else: | |
1491 return -1 | |
1492 | |
1493 def uriOf(self, sym): | |
1494 if isinstance(sym, tuple): | |
1495 return sym[1] # old system for --pipe | |
1496 # return sym.uriref() # cwm api | |
1497 return sym | |
1498 | |
1499 def strconst(self, argstr, i, delim): | |
1500 """parse an N3 string constant delimited by delim. | |
1501 return index, val | |
1502 """ | |
1503 delim1 = delim[0] | |
1504 delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 | |
1505 | |
1506 j = i | |
1507 ustr = "" # Empty unicode string | |
1508 startline = self.lines # Remember where for error messages | |
1509 while j < len(argstr): | |
1510 if argstr[j] == delim1: | |
1511 if delim == delim1: # done when delim is " or ' | |
1512 i = j + 1 | |
1513 return i, ustr | |
1514 if delim == delim3: # done when delim is """ or ''' and, respectively ... | |
1515 if argstr[j:j + 5] == delim5: # ... we have "" or '' before | |
1516 i = j + 5 | |
1517 ustr = ustr + delim2 | |
1518 return i, ustr | |
1519 if argstr[j:j + 4] == delim4: # ... we have " or ' before | |
1520 i = j + 4 | |
1521 ustr = ustr + delim1 | |
1522 return i, ustr | |
1523 if argstr[j:j + 3] == delim3: # current " or ' is part of delim | |
1524 i = j + 3 | |
1525 return i, ustr | |
1526 | |
1527 # we are inside of the string and current char is " or ' | |
1528 j = j + 1 | |
1529 ustr = ustr + delim1 | |
1530 continue | |
1531 | |
1532 m = interesting.search(argstr, j) # was argstr[j:]. | |
1533 # Note for pos param to work, MUST be compiled ... re bug? | |
1534 assert m, "Quote expected in string at ^ in %s^%s" % ( | |
1535 argstr[j - 20:j], argstr[j:j + 20]) # at least need a quote | |
1536 | |
1537 i = m.start() | |
1538 try: | |
1539 ustr = ustr + argstr[j:i] | |
1540 except UnicodeError: | |
1541 err = "" | |
1542 for c in argstr[j:i]: | |
1543 err = err + (" %02x" % ord(c)) | |
1544 streason = sys.exc_info()[1].__str__() | |
1545 raise BadSyntax( | |
1546 self._thisDoc, startline, argstr, j, | |
1547 "Unicode error appending characters" + | |
1548 " %s to string, because\n\t%s" | |
1549 % (err, streason)) | |
1550 | |
1551 # print "@@@ i = ",i, " j=",j, "m.end=", m.end() | |
1552 | |
1553 ch = argstr[i] | |
1554 if ch == delim1: | |
1555 j = i | |
1556 continue | |
1557 elif ch in ('"', "'") and ch != delim1: | |
1558 ustr = ustr + ch | |
1559 j = i + 1 | |
1560 continue | |
1561 elif ch in "\r\n": | |
1562 if delim == delim1: | |
1563 raise BadSyntax( | |
1564 self._thisDoc, startline, argstr, i, | |
1565 "newline found in string literal") | |
1566 self.lines = self.lines + 1 | |
1567 ustr = ustr + ch | |
1568 j = i + 1 | |
1569 self.startOfLine = j | |
1570 | |
1571 elif ch == "\\": | |
1572 j = i + 1 | |
1573 ch = argstr[j:j + 1] # Will be empty if string ends | |
1574 if not ch: | |
1575 raise BadSyntax( | |
1576 self._thisDoc, startline, argstr, i, | |
1577 "unterminated string literal (2)") | |
1578 k = 'abfrtvn\\"'.find(ch) | |
1579 if k >= 0: | |
1580 uch = '\a\b\f\r\t\v\n\\"'[k] | |
1581 ustr = ustr + uch | |
1582 j = j + 1 | |
1583 elif ch == "u": | |
1584 j, ch = self.uEscape(argstr, j + 1, startline) | |
1585 ustr = ustr + ch | |
1586 elif ch == "U": | |
1587 j, ch = self.UEscape(argstr, j + 1, startline) | |
1588 ustr = ustr + ch | |
1589 else: | |
1590 self.BadSyntax(argstr, i, | |
1591 "bad escape") | |
1592 | |
1593 self.BadSyntax(argstr, i, | |
1594 "unterminated string literal") | |
1595 | |
1596 def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): | |
1597 if len(argstr)<i+n: | |
1598 raise BadSyntax( | |
1599 self._thisDoc, startline, argstr, i, | |
1600 "unterminated string literal(3)") | |
1601 try: | |
1602 return i+n, reg.sub(unicodeExpand, '\\'+prefix+argstr[i:i+n]) | |
1603 except: | |
1604 raise BadSyntax( | |
1605 self._thisDoc, startline, argstr, i, | |
1606 "bad string literal hex escape: "+argstr[i:i+n]) | |
1607 | |
1608 def uEscape(self, argstr, i, startline): | |
1609 return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, 'u') | |
1610 | |
1611 def UEscape(self, argstr, i, startline): | |
1612 return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, 'U') | |
1613 | |
1614 def BadSyntax(self, argstr, i, msg): | |
1615 raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) | |
1616 | |
1617 # If we are going to do operators then they should generate | |
1618 # [ is operator:plus of ( \1 \2 ) ] | |
1619 | |
1620 | |
1621 class BadSyntax(SyntaxError): | |
1622 def __init__(self, uri, lines, argstr, i, why): | |
1623 self._str = argstr.encode( | |
1624 'utf-8') # Better go back to strings for errors | |
1625 self._i = i | |
1626 self._why = why | |
1627 self.lines = lines | |
1628 self._uri = uri | |
1629 | |
1630 def __str__(self): | |
1631 argstr = self._str | |
1632 i = self._i | |
1633 st = 0 | |
1634 if i > 60: | |
1635 pre = "..." | |
1636 st = i - 60 | |
1637 else: | |
1638 pre = "" | |
1639 if len(argstr) - i > 60: | |
1640 post = "..." | |
1641 else: | |
1642 post = "" | |
1643 | |
1644 return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ | |
1645 % (self.lines + 1, self._uri, self._why, pre, | |
1646 argstr[st:i], argstr[i:i + 60], post) | |
1647 | |
1648 @property | |
1649 def message(self): | |
1650 return str(self) | |
1651 | |
1652 | |
1653 | |
1654 ############################################################################### | |
1655 class Formula(object): | |
1656 number = 0 | |
1657 | |
1658 def __init__(self, parent): | |
1659 self.uuid = uuid4().hex | |
1660 self.counter = 0 | |
1661 Formula.number += 1 | |
1662 self.number = Formula.number | |
1663 self.existentials = {} | |
1664 self.universals = {} | |
1665 | |
1666 self.quotedgraph = QuotedGraph( | |
1667 store=parent.store, identifier=self.id()) | |
1668 | |
1669 def __str__(self): | |
1670 return '_:Formula%s' % self.number | |
1671 | |
1672 def id(self): | |
1673 return BNode('_:Formula%s' % self.number) | |
1674 | |
1675 def newBlankNode(self, uri=None, why=None): | |
1676 if uri is None: | |
1677 self.counter += 1 | |
1678 bn = BNode('f%sb%s' % (self.uuid, self.counter)) | |
1679 else: | |
1680 bn = BNode(uri.split('#').pop().replace('_', 'b')) | |
1681 return bn | |
1682 | |
1683 def newUniversal(self, uri, why=None): | |
1684 return Variable(uri.split('#').pop()) | |
1685 | |
1686 def declareExistential(self, x): | |
1687 self.existentials[x] = self.newBlankNode() | |
1688 | |
1689 def close(self): | |
1690 | |
1691 return self.quotedgraph | |
1692 | |
1693 | |
1694 r_hibyte = re.compile(r'([\x80-\xff])') | |
1695 | |
1696 | |
1697 class RDFSink(object): | |
1698 def __init__(self, graph): | |
1699 self.rootFormula = None | |
1700 self.counter = 0 | |
1701 self.graph = graph | |
1702 | |
1703 def newFormula(self): | |
1704 assert self.graph.store.formula_aware | |
1705 f = Formula(self.graph) | |
1706 return f | |
1707 | |
1708 def newGraph(self, identifier): | |
1709 return Graph(self.graph.store, identifier) | |
1710 | |
1711 def newSymbol(self, *args): | |
1712 return URIRef(args[0]) | |
1713 | |
1714 def newBlankNode(self, arg=None, uri=None, why=None): | |
1715 if isinstance(arg, Formula): | |
1716 return arg.newBlankNode(uri) | |
1717 elif isinstance(arg, Graph) or arg is None: | |
1718 self.counter += 1 | |
1719 bn = BNode('n' + str(self.counter)) | |
1720 else: | |
1721 bn = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) | |
1722 return bn | |
1723 | |
1724 def newLiteral(self, s, dt, lang): | |
1725 if dt: | |
1726 return Literal(s, datatype=dt) | |
1727 else: | |
1728 return Literal(s, lang=lang) | |
1729 | |
1730 def newList(self, n, f): | |
1731 if not n: | |
1732 return self.newSymbol( | |
1733 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' | |
1734 ) | |
1735 | |
1736 a = self.newBlankNode(f) | |
1737 first = self.newSymbol( | |
1738 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' | |
1739 ) | |
1740 rest = self.newSymbol( | |
1741 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') | |
1742 self.makeStatement((f, first, a, n[0])) | |
1743 self.makeStatement((f, rest, a, self.newList(n[1:], f))) | |
1744 return a | |
1745 | |
1746 def newSet(self, *args): | |
1747 return set(args) | |
1748 | |
1749 def setDefaultNamespace(self, *args): | |
1750 return ':'.join(repr(n) for n in args) | |
1751 | |
1752 def makeStatement(self, quadruple, why=None): | |
1753 f, p, s, o = quadruple | |
1754 | |
1755 if hasattr(p, 'formula'): | |
1756 raise Exception("Formula used as predicate") | |
1757 | |
1758 s = self.normalise(f, s) | |
1759 p = self.normalise(f, p) | |
1760 o = self.normalise(f, o) | |
1761 | |
1762 if f == self.rootFormula: | |
1763 # print s, p, o, '.' | |
1764 self.graph.add((s, p, o)) | |
1765 elif isinstance(f, Formula): | |
1766 f.quotedgraph.add((s, p, o)) | |
1767 else: | |
1768 f.add((s,p,o)) | |
1769 | |
1770 # return str(quadruple) | |
1771 | |
1772 def normalise(self, f, n): | |
1773 if isinstance(n, tuple): | |
1774 return URIRef(str(n[1])) | |
1775 | |
1776 if isinstance(n, bool): | |
1777 s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) | |
1778 return s | |
1779 | |
1780 if isinstance(n, int) or isinstance(n, int): | |
1781 s = Literal(str(n), datatype=INTEGER_DATATYPE) | |
1782 return s | |
1783 | |
1784 if isinstance(n, Decimal): | |
1785 value = str(n) | |
1786 if value == '-0': | |
1787 value = '0' | |
1788 s = Literal(value, datatype=DECIMAL_DATATYPE) | |
1789 return s | |
1790 | |
1791 if isinstance(n, float): | |
1792 s = Literal(str(n), datatype=DOUBLE_DATATYPE) | |
1793 return s | |
1794 | |
1795 if isinstance(f, Formula): | |
1796 if n in f.existentials: | |
1797 return f.existentials[n] | |
1798 | |
1799 # if isinstance(n, Var): | |
1800 # if f.universals.has_key(n): | |
1801 # return f.universals[n] | |
1802 # f.universals[n] = f.newBlankNode() | |
1803 # return f.universals[n] | |
1804 | |
1805 return n | |
1806 | |
1807 def intern(self, something): | |
1808 return something | |
1809 | |
1810 def bind(self, pfx, uri): | |
1811 pass # print pfx, ':', uri | |
1812 | |
1813 def startDoc(self, formula): | |
1814 self.rootFormula = formula | |
1815 | |
1816 def endDoc(self, formula): | |
1817 pass | |
1818 | |
1819 | |
1820 ################################################### | |
1821 # | |
1822 # Utilities | |
1823 # | |
1824 | |
1825 | |
1826 @py3compat.format_doctest_out | |
1827 def hexify(ustr): | |
1828 """Use URL encoding to return an ASCII string | |
1829 corresponding to the given UTF8 string | |
1830 | |
1831 >>> hexify("http://example/a b") | |
1832 %(b)s'http://example/a%%20b' | |
1833 | |
1834 """ | |
1835 # s1=ustr.encode('utf-8') | |
1836 s = "" | |
1837 for ch in ustr: # .encode('utf-8'): | |
1838 if ord(ch) > 126 or ord(ch) < 33: | |
1839 ch = "%%%02X" % ord(ch) | |
1840 else: | |
1841 ch = "%c" % ord(ch) | |
1842 s = s + ch | |
1843 return b(s) | |
1844 | |
1845 | |
1846 class TurtleParser(Parser): | |
1847 | |
1848 """ | |
1849 An RDFLib parser for Turtle | |
1850 | |
1851 See http://www.w3.org/TR/turtle/ | |
1852 """ | |
1853 | |
1854 def __init__(self): | |
1855 pass | |
1856 | |
1857 def parse(self, source, graph, encoding="utf-8", turtle=True): | |
1858 | |
1859 if encoding not in [None, "utf-8"]: | |
1860 raise Exception( | |
1861 ("N3/Turtle files are always utf-8 encoded, ", | |
1862 "I was passed: %s") % encoding) | |
1863 | |
1864 sink = RDFSink(graph) | |
1865 | |
1866 baseURI = graph.absolutize( | |
1867 source.getPublicId() or source.getSystemId() or "") | |
1868 p = SinkParser(sink, baseURI=baseURI, turtle=turtle) | |
1869 | |
1870 p.loadStream(source.getByteStream()) | |
1871 | |
1872 for prefix, namespace in list(p._bindings.items()): | |
1873 graph.bind(prefix, namespace) | |
1874 | |
1875 | |
1876 class N3Parser(TurtleParser): | |
1877 | |
1878 """ | |
1879 An RDFLib parser for Notation3 | |
1880 | |
1881 See http://www.w3.org/DesignIssues/Notation3.html | |
1882 | |
1883 """ | |
1884 | |
1885 def __init__(self): | |
1886 pass | |
1887 | |
1888 def parse(self, source, graph, encoding="utf-8"): | |
1889 # we're currently being handed a Graph, not a ConjunctiveGraph | |
1890 assert graph.store.context_aware # is this implied by formula_aware | |
1891 assert graph.store.formula_aware | |
1892 | |
1893 conj_graph = ConjunctiveGraph(store=graph.store) | |
1894 conj_graph.default_context = graph # TODO: CG __init__ should have a | |
1895 # default_context arg | |
1896 # TODO: update N3Processor so that it can use conj_graph as the sink | |
1897 conj_graph.namespace_manager = graph.namespace_manager | |
1898 | |
1899 TurtleParser.parse(self, source, conj_graph, encoding, turtle=False) | |
1900 | |
1901 | |
1902 def _test(): # pragma: no cover | |
1903 import doctest | |
1904 doctest.testmod() | |
1905 | |
1906 | |
1907 # if __name__ == '__main__': | |
1908 # _test() | |
1909 | |
1910 def main(): # pragma: no cover | |
1911 g = ConjunctiveGraph() | |
1912 | |
1913 sink = RDFSink(g) | |
1914 base_uri = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) | |
1915 | |
1916 p = SinkParser(sink, baseURI=base_uri) | |
1917 p._bindings[''] = p._baseURI + '#' | |
1918 p.startDoc() | |
1919 | |
1920 f = open(sys.argv[1], 'rb') | |
1921 rdbytes = f.read() | |
1922 f.close() | |
1923 | |
1924 p.feed(rdbytes) | |
1925 p.endDoc() | |
1926 for t in g.quads((None, None, None)): | |
1927 | |
1928 print(t) | |
1929 | |
1930 if __name__ == '__main__': | |
1931 main() | |
1932 | |
1933 # ends |