comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/trix.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 """
2 A TriX parser for RDFLib
3 """
4 from rdflib.namespace import Namespace
5 from rdflib.term import URIRef
6 from rdflib.term import BNode
7 from rdflib.term import Literal
8 from rdflib.graph import Graph, ConjunctiveGraph
9 from rdflib.exceptions import ParserError
10 from rdflib.parser import Parser
11
12 from xml.sax.saxutils import handler
13 from xml.sax import make_parser
14 from xml.sax.handler import ErrorHandler
15
16 __all__ = ['create_parser', 'TriXHandler', 'TriXParser']
17
18
19 TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
20 XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
21
22
23 class TriXHandler(handler.ContentHandler):
24 """An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
25
26 def __init__(self, store):
27 self.store = store
28 self.preserve_bnode_ids = False
29 self.reset()
30
31 def reset(self):
32 self.bnode = {}
33 self.graph = None
34 self.triple = None
35 self.state = 0
36 self.lang = None
37 self.datatype = None
38
39 # ContentHandler methods
40
41 def setDocumentLocator(self, locator):
42 self.locator = locator
43
44 def startDocument(self):
45 pass
46
47 def startPrefixMapping(self, prefix, namespace):
48 pass
49
50 def endPrefixMapping(self, prefix):
51 pass
52
53 def startElementNS(self, name, qname, attrs):
54
55 if name[0] != str(TRIXNS):
56 self.error(
57 "Only elements in the TriX namespace are allowed. %s!=%s"
58 % (name[0], TRIXNS))
59
60 if name[1] == "TriX":
61 if self.state == 0:
62 self.state = 1
63 else:
64 self.error("Unexpected TriX element")
65
66 elif name[1] == "graph":
67 if self.state == 1:
68 self.state = 2
69 else:
70 self.error("Unexpected graph element")
71
72 elif name[1] == "uri":
73 if self.state == 2:
74 # the context uri
75 self.state = 3
76 elif self.state == 4:
77 # part of a triple
78 pass
79 else:
80 self.error("Unexpected uri element")
81
82 elif name[1] == "triple":
83 if self.state == 2:
84 if self.graph is None:
85 # anonymous graph, create one with random bnode id
86 self.graph = Graph(store=self.store)
87 # start of a triple
88 self.triple = []
89 self.state = 4
90 else:
91 self.error("Unexpected triple element")
92
93 elif name[1] == "typedLiteral":
94 if self.state == 4:
95 # part of triple
96 self.lang = None
97 self.datatype = None
98
99 try:
100 self.lang = attrs.getValue((str(XMLNS), "lang"))
101 except:
102 # language not required - ignore
103 pass
104 try:
105 self.datatype = attrs.getValueByQName("datatype")
106 except KeyError:
107 self.error("No required attribute 'datatype'")
108 else:
109 self.error("Unexpected typedLiteral element")
110
111 elif name[1] == "plainLiteral":
112 if self.state == 4:
113 # part of triple
114 self.lang = None
115 self.datatype = None
116 try:
117 self.lang = attrs.getValue((str(XMLNS), "lang"))
118 except:
119 # language not required - ignore
120 pass
121
122 else:
123 self.error("Unexpected plainLiteral element")
124
125 elif name[1] == "id":
126 if self.state == 2:
127 # the context uri
128 self.state = 3
129
130 elif self.state == 4:
131 # part of triple
132 pass
133 else:
134 self.error("Unexpected id element")
135
136 else:
137 self.error("Unknown element %s in TriX namespace" % name[1])
138
139 self.chars = ""
140
141 def endElementNS(self, name, qname):
142 if name[0] != str(TRIXNS):
143 self.error(
144 "Only elements in the TriX namespace are allowed. %s!=%s"
145 % (name[0], TRIXNS))
146
147 if name[1] == "uri":
148 if self.state == 3:
149 self.graph = Graph(store=self.store,
150 identifier=URIRef(self.chars.strip()))
151 self.state = 2
152 elif self.state == 4:
153 self.triple += [URIRef(self.chars.strip())]
154 else:
155 self.error(
156 "Illegal internal self.state - This should never " +
157 "happen if the SAX parser ensures XML syntax correctness")
158
159 elif name[1] == "id":
160 if self.state == 3:
161 self.graph = Graph(self.store, identifier=self.get_bnode(
162 self.chars.strip()))
163 self.state = 2
164 elif self.state == 4:
165 self.triple += [self.get_bnode(self.chars.strip())]
166 else:
167 self.error(
168 "Illegal internal self.state - This should never " +
169 "happen if the SAX parser ensures XML syntax correctness")
170
171 elif name[1] == "plainLiteral" or name[1] == "typedLiteral":
172 if self.state == 4:
173 self.triple += [Literal(
174 self.chars, lang=self.lang, datatype=self.datatype)]
175 else:
176 self.error(
177 "This should never happen if the SAX parser " +
178 "ensures XML syntax correctness")
179
180 elif name[1] == "triple":
181 if self.state == 4:
182 if len(self.triple) != 3:
183 self.error("Triple has wrong length, got %d elements: %s" %
184 (len(self.triple), self.triple))
185
186 self.graph.add(self.triple)
187 # self.store.store.add(self.triple,context=self.graph)
188 # self.store.addN([self.triple+[self.graph]])
189 self.state = 2
190 else:
191 self.error(
192 "This should never happen if the SAX parser " +
193 "ensures XML syntax correctness")
194
195 elif name[1] == "graph":
196 self.graph = None
197 self.state = 1
198
199 elif name[1] == "TriX":
200 self.state = 0
201
202 else:
203 self.error("Unexpected close element")
204
205 def get_bnode(self, label):
206 if self.preserve_bnode_ids:
207 bn = BNode(label)
208 else:
209 if label in self.bnode:
210 bn = self.bnode[label]
211 else:
212 bn = BNode(label)
213 self.bnode[label] = bn
214 return bn
215
216 def characters(self, content):
217 self.chars += content
218
219 def ignorableWhitespace(self, content):
220 pass
221
222 def processingInstruction(self, target, data):
223 pass
224
225 def error(self, message):
226 locator = self.locator
227 info = "%s:%s:%s: " % (
228 locator.getSystemId(),
229 locator.getLineNumber(),
230 locator.getColumnNumber())
231 raise ParserError(info + message)
232
233
234 def create_parser(store):
235 parser = make_parser()
236 try:
237 # Workaround for bug in expatreader.py. Needed when
238 # expatreader is trying to guess a prefix.
239 parser.start_namespace_decl(
240 "xml", "http://www.w3.org/XML/1998/namespace")
241 except AttributeError:
242 pass # Not present in Jython (at least)
243 parser.setFeature(handler.feature_namespaces, 1)
244 trix = TriXHandler(store)
245 parser.setContentHandler(trix)
246 parser.setErrorHandler(ErrorHandler())
247 return parser
248
249
250 class TriXParser(Parser):
251 """A parser for TriX. See http://sw.nokia.com/trix/"""
252
253 def __init__(self):
254 pass
255
256 def parse(self, source, sink, **args):
257 assert sink.store.context_aware, (
258 "TriXParser must be given a context aware store.")
259
260 self._parser = create_parser(sink.store)
261 content_handler = self._parser.getContentHandler()
262 preserve_bnode_ids = args.get("preserve_bnode_ids", None)
263 if preserve_bnode_ids is not None:
264 content_handler.preserve_bnode_ids = preserve_bnode_ids
265 # We're only using it once now
266 # content_handler.reset()
267 # self._parser.reset()
268 self._parser.parse(source)