comparison env/lib/python3.7/site-packages/prov/tests/test_xml.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 from __future__ import (absolute_import, division, print_function,
2 unicode_literals)
3
4 import difflib
5 import glob
6 import inspect
7 import io
8 from lxml import etree
9 import os
10 import unittest
11 import warnings
12
13 from prov.identifier import Namespace, QualifiedName
14 from prov.constants import PROV
15 import prov.model as prov
16 from prov.tests.test_model import AllTestsBase
17 from prov.tests.utility import RoundTripTestCase
18
19
20 EX_NS = ('ex', 'http://example.com/ns/ex#')
21 EX_TR = ('tr', 'http://example.com/ns/tr#')
22
23 # Most general way to get the path.
24 DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(
25 inspect.currentframe()))), "xml")
26
27
28 def remove_empty_tags(tree):
29 if tree.text is not None and tree.text.strip() == "":
30 tree.text = None
31 for elem in tree:
32 if etree.iselement(elem):
33 remove_empty_tags(elem)
34
35
36 def compare_xml(doc1, doc2):
37 """
38 Helper function to compare two XML files. It will parse both once again
39 and write them in a canonical fashion.
40 """
41 try:
42 doc1.seek(0, 0)
43 except AttributeError:
44 pass
45 try:
46 doc2.seek(0, 0)
47 except AttributeError:
48 pass
49
50 obj1 = etree.parse(doc1)
51 obj2 = etree.parse(doc2)
52
53 # Remove comments from both.
54 for c in obj1.getroot().xpath("//comment()"):
55 p = c.getparent()
56 p.remove(c)
57 for c in obj2.getroot().xpath("//comment()"):
58 p = c.getparent()
59 p.remove(c)
60
61 remove_empty_tags(obj1.getroot())
62 remove_empty_tags(obj2.getroot())
63
64 buf = io.BytesIO()
65 obj1.write_c14n(buf)
66 buf.seek(0, 0)
67 str1 = buf.read().decode()
68 str1 = [_i.strip() for _i in str1.splitlines() if _i.strip()]
69
70 buf = io.BytesIO()
71 obj2.write_c14n(buf)
72 buf.seek(0, 0)
73 str2 = buf.read().decode()
74 str2 = [_i.strip() for _i in str2.splitlines() if _i.strip()]
75
76 unified_diff = difflib.unified_diff(str1, str2)
77
78 err_msg = "\n".join(unified_diff)
79 if err_msg:
80 msg = "Strings are not equal.\n"
81 raise AssertionError(msg + err_msg)
82
83
84 class ProvXMLTestCase(unittest.TestCase):
85 def test_serialization_example_6(self):
86 """
87 Test the serialization of example 6 which is a simple entity
88 description.
89 """
90 document = prov.ProvDocument()
91 ex_ns = document.add_namespace(*EX_NS)
92 document.add_namespace(*EX_TR)
93
94 document.entity("tr:WD-prov-dm-20111215", (
95 (prov.PROV_TYPE, ex_ns["Document"]),
96 ("ex:version", "2")
97 ))
98
99 with io.BytesIO() as actual:
100 document.serialize(format='xml', destination=actual)
101 compare_xml(os.path.join(DATA_PATH, "example_06.xml"), actual)
102
103 def test_serialization_example_7(self):
104 """
105 Test the serialization of example 7 which is a basic activity.
106 """
107 document = prov.ProvDocument()
108 document.add_namespace(*EX_NS)
109
110 document.activity(
111 "ex:a1",
112 "2011-11-16T16:05:00",
113 "2011-11-16T16:06:00", [
114 (prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)),
115 ("ex:host", "server.example.org")])
116
117 with io.BytesIO() as actual:
118 document.serialize(format='xml', destination=actual)
119 compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual)
120
121 def test_serialization_example_8(self):
122 """
123 Test the serialization of example 8 which deals with generation.
124 """
125 document = prov.ProvDocument()
126 document.add_namespace(*EX_NS)
127
128 e1 = document.entity("ex:e1")
129 a1 = document.activity("ex:a1")
130
131 document.wasGeneratedBy(entity=e1, activity=a1,
132 time="2001-10-26T21:32:52",
133 other_attributes={"ex:port": "p1"})
134
135 e2 = document.entity("ex:e2")
136
137 document.wasGeneratedBy(entity=e2, activity=a1,
138 time="2001-10-26T10:00:00",
139 other_attributes={"ex:port": "p2"})
140
141 with io.BytesIO() as actual:
142 document.serialize(format='xml', destination=actual)
143 compare_xml(os.path.join(DATA_PATH, "example_08.xml"), actual)
144
145 def test_deserialization_example_6(self):
146 """
147 Test the deserialization of example 6 which is a simple entity
148 description.
149 """
150 actual_doc = prov.ProvDocument.deserialize(
151 source=os.path.join(DATA_PATH, "example_06.xml"),
152 format="xml")
153
154 expected_document = prov.ProvDocument()
155 ex_ns = expected_document.add_namespace(*EX_NS)
156 expected_document.add_namespace(*EX_TR)
157
158 expected_document.entity("tr:WD-prov-dm-20111215", (
159 (prov.PROV_TYPE, ex_ns["Document"]),
160 ("ex:version", "2")
161 ))
162
163 self.assertEqual(actual_doc, expected_document)
164
165 def test_deserialization_example_7(self):
166 """
167 Test the deserialization of example 7 which is a simple activity
168 description.
169 """
170 actual_doc = prov.ProvDocument.deserialize(
171 source=os.path.join(DATA_PATH, "example_07.xml"),
172 format="xml")
173
174 expected_document = prov.ProvDocument()
175 ex_ns = Namespace(*EX_NS)
176 expected_document.add_namespace(ex_ns)
177
178 expected_document.activity(
179 "ex:a1",
180 "2011-11-16T16:05:00",
181 "2011-11-16T16:06:00", [
182 (prov.PROV_TYPE, QualifiedName(ex_ns, "edit")),
183 ("ex:host", "server.example.org")])
184
185 self.assertEqual(actual_doc, expected_document)
186
187 def test_deserialization_example_04_and_05(self):
188 """
189 Example 4 and 5 have a different type specification. They use an
190 xsi:type as an attribute on an entity. This can be read but if
191 written again it will become an XML child element. This is
192 semantically identical but cannot be tested with a round trip.
193 """
194 # Example 4.
195 xml_string = """
196 <prov:document
197 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
198 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
199 xmlns:prov="http://www.w3.org/ns/prov#"
200 xmlns:ex="http://example.com/ns/ex#"
201 xmlns:tr="http://example.com/ns/tr#">
202
203 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan">
204 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type>
205 </prov:entity>
206
207 </prov:document>
208 """
209 with io.StringIO() as xml:
210 xml.write(xml_string)
211 xml.seek(0, 0)
212 actual_document = prov.ProvDocument.deserialize(source=xml,
213 format="xml")
214
215 expected_document = prov.ProvDocument()
216 ex_ns = Namespace(*EX_NS)
217 expected_document.add_namespace(ex_ns)
218 expected_document.add_namespace(*EX_TR)
219
220 # The xsi:type attribute is mapped to a proper PROV attribute.
221 expected_document.entity("tr:WD-prov-dm-20111215", (
222 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")),
223 (prov.PROV_TYPE, PROV["Plan"])))
224
225 self.assertEqual(actual_document, expected_document, "example_04")
226
227 # Example 5.
228 xml_string = """
229 <prov:document
230 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
231 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
232 xmlns:prov="http://www.w3.org/ns/prov#"
233 xmlns:ex="http://example.com/ns/ex#"
234 xmlns:tr="http://example.com/ns/tr#">
235
236 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan">
237 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type>
238 <prov:type xsi:type="xsd:QName">prov:Plan</prov:type> <!-- inferred -->
239 <prov:type xsi:type="xsd:QName">prov:Entity</prov:type> <!-- inferred -->
240 </prov:entity>
241
242 </prov:document>
243 """
244 with io.StringIO() as xml:
245 xml.write(xml_string)
246 xml.seek(0, 0)
247 actual_document = prov.ProvDocument.deserialize(source=xml,
248 format="xml")
249
250 expected_document = prov.ProvDocument()
251 expected_document.add_namespace(*EX_NS)
252 expected_document.add_namespace(*EX_TR)
253
254 # The xsi:type attribute is mapped to a proper PROV attribute.
255 expected_document.entity("tr:WD-prov-dm-20111215", (
256 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")),
257 (prov.PROV_TYPE, PROV["Entity"]),
258 (prov.PROV_TYPE, PROV["Plan"])
259 ))
260
261 self.assertEqual(actual_document, expected_document, "example_05")
262
263 def test_other_elements(self):
264 """
265 PROV XML uses the <prov:other> element to enable the storage of non
266 PROV information in a PROV XML document. It will be ignored by this
267 library a warning will be raised informing the user.
268 """
269 # This is example 42 from the PROV XML documentation.
270 xml_string = """
271 <prov:document
272 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
273 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
274 xmlns:prov="http://www.w3.org/ns/prov#"
275 xmlns:ex="http://example.com/ns/ex#">
276
277 <!-- prov statements go here -->
278
279 <prov:other>
280 <ex:foo>
281 <ex:content>bar</ex:content>
282 </ex:foo>
283 </prov:other>
284
285 <!-- more prov statements can go here -->
286
287 </prov:document>
288 """
289 with warnings.catch_warnings(record=True) as w:
290 warnings.simplefilter("always")
291
292 with io.StringIO() as xml:
293 xml.write(xml_string)
294 xml.seek(0, 0)
295 doc = prov.ProvDocument.deserialize(source=xml, format="xml")
296
297 self.assertEqual(len(w), 1)
298 self.assertTrue(
299 "Document contains non-PROV information in <prov:other>. It will "
300 "be ignored in this package." in str(w[0].message))
301
302 # This document contains nothing else.
303 self.assertEqual(len(doc._records), 0)
304
305 def test_nested_default_namespace(self):
306 """
307 Tests that a default namespace that is defined in a lower level tag is
308 written to a bundle.
309 """
310 filename = os.path.join(DATA_PATH, "nested_default_namespace.xml")
311 doc = prov.ProvDocument.deserialize(source=filename, format="xml")
312
313 ns = Namespace("", "http://example.org/0/")
314
315 self.assertEqual(len(doc._records), 1)
316 self.assertEqual(doc.get_default_namespace(), ns)
317 self.assertEqual(doc._records[0].identifier.namespace, ns)
318 self.assertEqual(doc._records[0].identifier.localpart, "e001")
319
320 def test_redefining_namespaces(self):
321 """
322 Test the behaviour when namespaces are redefined at the element level.
323 """
324 filename = os.path.join(DATA_PATH,
325 "namespace_redefined_but_does_not_change.xml")
326 doc = prov.ProvDocument.deserialize(source=filename, format="xml")
327 # This has one record part of the original namespace.
328 self.assertEqual(len(doc._records), 1)
329 ns = Namespace("ex", "http://example.com/ns/ex#")
330 self.assertEqual(doc._records[0].attributes[0][1].namespace, ns)
331
332 # This also has one record but now in a different namespace.
333 filename = os.path.join(DATA_PATH, "namespace_redefined.xml")
334 doc = prov.ProvDocument.deserialize(source=filename, format="xml")
335 new_ns = doc._records[0].attributes[0][1].namespace
336 self.assertNotEqual(new_ns, ns)
337 self.assertEqual(new_ns.uri, "http://example.com/ns/new_ex#")
338
339
340 class ProvXMLRoundTripFromFileTestCase(unittest.TestCase):
341 def _perform_round_trip(self, filename, force_types=False):
342 document = prov.ProvDocument.deserialize(
343 source=filename, format="xml")
344
345 with io.BytesIO() as new_xml:
346 document.serialize(format='xml', destination=new_xml,
347 force_types=force_types)
348 compare_xml(filename, new_xml)
349
350
351 # Add one test for each found file. Lazy way to do metaprogramming...
352 # I think parametrized tests are justified in this case as the test
353 # function names make it clear what is going on.
354 for filename in glob.iglob(os.path.join(
355 DATA_PATH, "*" + os.path.extsep + "xml")):
356 name = os.path.splitext(os.path.basename(filename))[0]
357 test_name = "test_roundtrip_from_xml_%s" % name
358
359 # Cannot round trip this one as the namespace in the PROV data model are
360 # always defined per bundle and not per element.
361 if name in ("nested_default_namespace",
362 "nested_changing_default_namespace",
363 "namespace_redefined_but_does_not_change",
364 "namespace_redefined"):
365 continue
366
367 # Python creates closures on function calls...
368 def get_fct(f):
369 # Some test files have a lot of type declarations...
370 if name in ["pc1"]:
371 force_types = True
372 else:
373 force_types = False
374
375 def fct(self):
376 self._perform_round_trip(f, force_types=force_types)
377 return fct
378
379 fct = get_fct(filename)
380 fct.__name__ = str(test_name)
381
382 # Disabled round-trip XML comparisons since deserializing then serializing
383 # PROV-XML does not maintain XML equivalence. (For example, prov:entity
384 # elements with type prov:Plan become prov:plan elements)
385 # TODO: Revisit these tests
386
387 # setattr(ProvXMLRoundTripFromFileTestCase, test_name, fct)
388
389
390 class RoundTripXMLTests(RoundTripTestCase, AllTestsBase):
391 FORMAT = 'xml'
392
393
394 if __name__ == '__main__':
395 unittest.main()