Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/trix.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """ | |
2 A TriX parser for RDFLib | |
3 """ | |
4 from rdflib.namespace import Namespace | |
5 from rdflib.term import URIRef | |
6 from rdflib.term import BNode | |
7 from rdflib.term import Literal | |
8 from rdflib.graph import Graph, ConjunctiveGraph | |
9 from rdflib.exceptions import ParserError | |
10 from rdflib.parser import Parser | |
11 | |
12 from xml.sax.saxutils import handler | |
13 from xml.sax import make_parser | |
14 from xml.sax.handler import ErrorHandler | |
15 | |
16 __all__ = ['create_parser', 'TriXHandler', 'TriXParser'] | |
17 | |
18 | |
19 TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/") | |
20 XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") | |
21 | |
22 | |
23 class TriXHandler(handler.ContentHandler): | |
24 """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" | |
25 | |
26 def __init__(self, store): | |
27 self.store = store | |
28 self.preserve_bnode_ids = False | |
29 self.reset() | |
30 | |
31 def reset(self): | |
32 self.bnode = {} | |
33 self.graph = None | |
34 self.triple = None | |
35 self.state = 0 | |
36 self.lang = None | |
37 self.datatype = None | |
38 | |
39 # ContentHandler methods | |
40 | |
41 def setDocumentLocator(self, locator): | |
42 self.locator = locator | |
43 | |
44 def startDocument(self): | |
45 pass | |
46 | |
47 def startPrefixMapping(self, prefix, namespace): | |
48 pass | |
49 | |
50 def endPrefixMapping(self, prefix): | |
51 pass | |
52 | |
53 def startElementNS(self, name, qname, attrs): | |
54 | |
55 if name[0] != str(TRIXNS): | |
56 self.error( | |
57 "Only elements in the TriX namespace are allowed. %s!=%s" | |
58 % (name[0], TRIXNS)) | |
59 | |
60 if name[1] == "TriX": | |
61 if self.state == 0: | |
62 self.state = 1 | |
63 else: | |
64 self.error("Unexpected TriX element") | |
65 | |
66 elif name[1] == "graph": | |
67 if self.state == 1: | |
68 self.state = 2 | |
69 else: | |
70 self.error("Unexpected graph element") | |
71 | |
72 elif name[1] == "uri": | |
73 if self.state == 2: | |
74 # the context uri | |
75 self.state = 3 | |
76 elif self.state == 4: | |
77 # part of a triple | |
78 pass | |
79 else: | |
80 self.error("Unexpected uri element") | |
81 | |
82 elif name[1] == "triple": | |
83 if self.state == 2: | |
84 if self.graph is None: | |
85 # anonymous graph, create one with random bnode id | |
86 self.graph = Graph(store=self.store) | |
87 # start of a triple | |
88 self.triple = [] | |
89 self.state = 4 | |
90 else: | |
91 self.error("Unexpected triple element") | |
92 | |
93 elif name[1] == "typedLiteral": | |
94 if self.state == 4: | |
95 # part of triple | |
96 self.lang = None | |
97 self.datatype = None | |
98 | |
99 try: | |
100 self.lang = attrs.getValue((str(XMLNS), "lang")) | |
101 except: | |
102 # language not required - ignore | |
103 pass | |
104 try: | |
105 self.datatype = attrs.getValueByQName("datatype") | |
106 except KeyError: | |
107 self.error("No required attribute 'datatype'") | |
108 else: | |
109 self.error("Unexpected typedLiteral element") | |
110 | |
111 elif name[1] == "plainLiteral": | |
112 if self.state == 4: | |
113 # part of triple | |
114 self.lang = None | |
115 self.datatype = None | |
116 try: | |
117 self.lang = attrs.getValue((str(XMLNS), "lang")) | |
118 except: | |
119 # language not required - ignore | |
120 pass | |
121 | |
122 else: | |
123 self.error("Unexpected plainLiteral element") | |
124 | |
125 elif name[1] == "id": | |
126 if self.state == 2: | |
127 # the context uri | |
128 self.state = 3 | |
129 | |
130 elif self.state == 4: | |
131 # part of triple | |
132 pass | |
133 else: | |
134 self.error("Unexpected id element") | |
135 | |
136 else: | |
137 self.error("Unknown element %s in TriX namespace" % name[1]) | |
138 | |
139 self.chars = "" | |
140 | |
141 def endElementNS(self, name, qname): | |
142 if name[0] != str(TRIXNS): | |
143 self.error( | |
144 "Only elements in the TriX namespace are allowed. %s!=%s" | |
145 % (name[0], TRIXNS)) | |
146 | |
147 if name[1] == "uri": | |
148 if self.state == 3: | |
149 self.graph = Graph(store=self.store, | |
150 identifier=URIRef(self.chars.strip())) | |
151 self.state = 2 | |
152 elif self.state == 4: | |
153 self.triple += [URIRef(self.chars.strip())] | |
154 else: | |
155 self.error( | |
156 "Illegal internal self.state - This should never " + | |
157 "happen if the SAX parser ensures XML syntax correctness") | |
158 | |
159 elif name[1] == "id": | |
160 if self.state == 3: | |
161 self.graph = Graph(self.store, identifier=self.get_bnode( | |
162 self.chars.strip())) | |
163 self.state = 2 | |
164 elif self.state == 4: | |
165 self.triple += [self.get_bnode(self.chars.strip())] | |
166 else: | |
167 self.error( | |
168 "Illegal internal self.state - This should never " + | |
169 "happen if the SAX parser ensures XML syntax correctness") | |
170 | |
171 elif name[1] == "plainLiteral" or name[1] == "typedLiteral": | |
172 if self.state == 4: | |
173 self.triple += [Literal( | |
174 self.chars, lang=self.lang, datatype=self.datatype)] | |
175 else: | |
176 self.error( | |
177 "This should never happen if the SAX parser " + | |
178 "ensures XML syntax correctness") | |
179 | |
180 elif name[1] == "triple": | |
181 if self.state == 4: | |
182 if len(self.triple) != 3: | |
183 self.error("Triple has wrong length, got %d elements: %s" % | |
184 (len(self.triple), self.triple)) | |
185 | |
186 self.graph.add(self.triple) | |
187 # self.store.store.add(self.triple,context=self.graph) | |
188 # self.store.addN([self.triple+[self.graph]]) | |
189 self.state = 2 | |
190 else: | |
191 self.error( | |
192 "This should never happen if the SAX parser " + | |
193 "ensures XML syntax correctness") | |
194 | |
195 elif name[1] == "graph": | |
196 self.graph = None | |
197 self.state = 1 | |
198 | |
199 elif name[1] == "TriX": | |
200 self.state = 0 | |
201 | |
202 else: | |
203 self.error("Unexpected close element") | |
204 | |
205 def get_bnode(self, label): | |
206 if self.preserve_bnode_ids: | |
207 bn = BNode(label) | |
208 else: | |
209 if label in self.bnode: | |
210 bn = self.bnode[label] | |
211 else: | |
212 bn = BNode(label) | |
213 self.bnode[label] = bn | |
214 return bn | |
215 | |
216 def characters(self, content): | |
217 self.chars += content | |
218 | |
219 def ignorableWhitespace(self, content): | |
220 pass | |
221 | |
222 def processingInstruction(self, target, data): | |
223 pass | |
224 | |
225 def error(self, message): | |
226 locator = self.locator | |
227 info = "%s:%s:%s: " % ( | |
228 locator.getSystemId(), | |
229 locator.getLineNumber(), | |
230 locator.getColumnNumber()) | |
231 raise ParserError(info + message) | |
232 | |
233 | |
234 def create_parser(store): | |
235 parser = make_parser() | |
236 try: | |
237 # Workaround for bug in expatreader.py. Needed when | |
238 # expatreader is trying to guess a prefix. | |
239 parser.start_namespace_decl( | |
240 "xml", "http://www.w3.org/XML/1998/namespace") | |
241 except AttributeError: | |
242 pass # Not present in Jython (at least) | |
243 parser.setFeature(handler.feature_namespaces, 1) | |
244 trix = TriXHandler(store) | |
245 parser.setContentHandler(trix) | |
246 parser.setErrorHandler(ErrorHandler()) | |
247 return parser | |
248 | |
249 | |
250 class TriXParser(Parser): | |
251 """A parser for TriX. See http://sw.nokia.com/trix/""" | |
252 | |
253 def __init__(self): | |
254 pass | |
255 | |
256 def parse(self, source, sink, **args): | |
257 assert sink.store.context_aware, ( | |
258 "TriXParser must be given a context aware store.") | |
259 | |
260 self._parser = create_parser(sink.store) | |
261 content_handler = self._parser.getContentHandler() | |
262 preserve_bnode_ids = args.get("preserve_bnode_ids", None) | |
263 if preserve_bnode_ids is not None: | |
264 content_handler.preserve_bnode_ids = preserve_bnode_ids | |
265 # We're only using it once now | |
266 # content_handler.reset() | |
267 # self._parser.reset() | |
268 self._parser.parse(source) |