Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/prov/tests/test_xml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 from __future__ import (absolute_import, division, print_function, | |
2 unicode_literals) | |
3 | |
4 import difflib | |
5 import glob | |
6 import inspect | |
7 import io | |
8 from lxml import etree | |
9 import os | |
10 import unittest | |
11 import warnings | |
12 | |
13 from prov.identifier import Namespace, QualifiedName | |
14 from prov.constants import PROV | |
15 import prov.model as prov | |
16 from prov.tests.test_model import AllTestsBase | |
17 from prov.tests.utility import RoundTripTestCase | |
18 | |
19 | |
20 EX_NS = ('ex', 'http://example.com/ns/ex#') | |
21 EX_TR = ('tr', 'http://example.com/ns/tr#') | |
22 | |
23 # Most general way to get the path. | |
24 DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile( | |
25 inspect.currentframe()))), "xml") | |
26 | |
27 | |
28 def remove_empty_tags(tree): | |
29 if tree.text is not None and tree.text.strip() == "": | |
30 tree.text = None | |
31 for elem in tree: | |
32 if etree.iselement(elem): | |
33 remove_empty_tags(elem) | |
34 | |
35 | |
36 def compare_xml(doc1, doc2): | |
37 """ | |
38 Helper function to compare two XML files. It will parse both once again | |
39 and write them in a canonical fashion. | |
40 """ | |
41 try: | |
42 doc1.seek(0, 0) | |
43 except AttributeError: | |
44 pass | |
45 try: | |
46 doc2.seek(0, 0) | |
47 except AttributeError: | |
48 pass | |
49 | |
50 obj1 = etree.parse(doc1) | |
51 obj2 = etree.parse(doc2) | |
52 | |
53 # Remove comments from both. | |
54 for c in obj1.getroot().xpath("//comment()"): | |
55 p = c.getparent() | |
56 p.remove(c) | |
57 for c in obj2.getroot().xpath("//comment()"): | |
58 p = c.getparent() | |
59 p.remove(c) | |
60 | |
61 remove_empty_tags(obj1.getroot()) | |
62 remove_empty_tags(obj2.getroot()) | |
63 | |
64 buf = io.BytesIO() | |
65 obj1.write_c14n(buf) | |
66 buf.seek(0, 0) | |
67 str1 = buf.read().decode() | |
68 str1 = [_i.strip() for _i in str1.splitlines() if _i.strip()] | |
69 | |
70 buf = io.BytesIO() | |
71 obj2.write_c14n(buf) | |
72 buf.seek(0, 0) | |
73 str2 = buf.read().decode() | |
74 str2 = [_i.strip() for _i in str2.splitlines() if _i.strip()] | |
75 | |
76 unified_diff = difflib.unified_diff(str1, str2) | |
77 | |
78 err_msg = "\n".join(unified_diff) | |
79 if err_msg: | |
80 msg = "Strings are not equal.\n" | |
81 raise AssertionError(msg + err_msg) | |
82 | |
83 | |
84 class ProvXMLTestCase(unittest.TestCase): | |
85 def test_serialization_example_6(self): | |
86 """ | |
87 Test the serialization of example 6 which is a simple entity | |
88 description. | |
89 """ | |
90 document = prov.ProvDocument() | |
91 ex_ns = document.add_namespace(*EX_NS) | |
92 document.add_namespace(*EX_TR) | |
93 | |
94 document.entity("tr:WD-prov-dm-20111215", ( | |
95 (prov.PROV_TYPE, ex_ns["Document"]), | |
96 ("ex:version", "2") | |
97 )) | |
98 | |
99 with io.BytesIO() as actual: | |
100 document.serialize(format='xml', destination=actual) | |
101 compare_xml(os.path.join(DATA_PATH, "example_06.xml"), actual) | |
102 | |
103 def test_serialization_example_7(self): | |
104 """ | |
105 Test the serialization of example 7 which is a basic activity. | |
106 """ | |
107 document = prov.ProvDocument() | |
108 document.add_namespace(*EX_NS) | |
109 | |
110 document.activity( | |
111 "ex:a1", | |
112 "2011-11-16T16:05:00", | |
113 "2011-11-16T16:06:00", [ | |
114 (prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)), | |
115 ("ex:host", "server.example.org")]) | |
116 | |
117 with io.BytesIO() as actual: | |
118 document.serialize(format='xml', destination=actual) | |
119 compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual) | |
120 | |
121 def test_serialization_example_8(self): | |
122 """ | |
123 Test the serialization of example 8 which deals with generation. | |
124 """ | |
125 document = prov.ProvDocument() | |
126 document.add_namespace(*EX_NS) | |
127 | |
128 e1 = document.entity("ex:e1") | |
129 a1 = document.activity("ex:a1") | |
130 | |
131 document.wasGeneratedBy(entity=e1, activity=a1, | |
132 time="2001-10-26T21:32:52", | |
133 other_attributes={"ex:port": "p1"}) | |
134 | |
135 e2 = document.entity("ex:e2") | |
136 | |
137 document.wasGeneratedBy(entity=e2, activity=a1, | |
138 time="2001-10-26T10:00:00", | |
139 other_attributes={"ex:port": "p2"}) | |
140 | |
141 with io.BytesIO() as actual: | |
142 document.serialize(format='xml', destination=actual) | |
143 compare_xml(os.path.join(DATA_PATH, "example_08.xml"), actual) | |
144 | |
145 def test_deserialization_example_6(self): | |
146 """ | |
147 Test the deserialization of example 6 which is a simple entity | |
148 description. | |
149 """ | |
150 actual_doc = prov.ProvDocument.deserialize( | |
151 source=os.path.join(DATA_PATH, "example_06.xml"), | |
152 format="xml") | |
153 | |
154 expected_document = prov.ProvDocument() | |
155 ex_ns = expected_document.add_namespace(*EX_NS) | |
156 expected_document.add_namespace(*EX_TR) | |
157 | |
158 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
159 (prov.PROV_TYPE, ex_ns["Document"]), | |
160 ("ex:version", "2") | |
161 )) | |
162 | |
163 self.assertEqual(actual_doc, expected_document) | |
164 | |
165 def test_deserialization_example_7(self): | |
166 """ | |
167 Test the deserialization of example 7 which is a simple activity | |
168 description. | |
169 """ | |
170 actual_doc = prov.ProvDocument.deserialize( | |
171 source=os.path.join(DATA_PATH, "example_07.xml"), | |
172 format="xml") | |
173 | |
174 expected_document = prov.ProvDocument() | |
175 ex_ns = Namespace(*EX_NS) | |
176 expected_document.add_namespace(ex_ns) | |
177 | |
178 expected_document.activity( | |
179 "ex:a1", | |
180 "2011-11-16T16:05:00", | |
181 "2011-11-16T16:06:00", [ | |
182 (prov.PROV_TYPE, QualifiedName(ex_ns, "edit")), | |
183 ("ex:host", "server.example.org")]) | |
184 | |
185 self.assertEqual(actual_doc, expected_document) | |
186 | |
187 def test_deserialization_example_04_and_05(self): | |
188 """ | |
189 Example 4 and 5 have a different type specification. They use an | |
190 xsi:type as an attribute on an entity. This can be read but if | |
191 written again it will become an XML child element. This is | |
192 semantically identical but cannot be tested with a round trip. | |
193 """ | |
194 # Example 4. | |
195 xml_string = """ | |
196 <prov:document | |
197 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
198 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
199 xmlns:prov="http://www.w3.org/ns/prov#" | |
200 xmlns:ex="http://example.com/ns/ex#" | |
201 xmlns:tr="http://example.com/ns/tr#"> | |
202 | |
203 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> | |
204 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> | |
205 </prov:entity> | |
206 | |
207 </prov:document> | |
208 """ | |
209 with io.StringIO() as xml: | |
210 xml.write(xml_string) | |
211 xml.seek(0, 0) | |
212 actual_document = prov.ProvDocument.deserialize(source=xml, | |
213 format="xml") | |
214 | |
215 expected_document = prov.ProvDocument() | |
216 ex_ns = Namespace(*EX_NS) | |
217 expected_document.add_namespace(ex_ns) | |
218 expected_document.add_namespace(*EX_TR) | |
219 | |
220 # The xsi:type attribute is mapped to a proper PROV attribute. | |
221 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
222 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), | |
223 (prov.PROV_TYPE, PROV["Plan"]))) | |
224 | |
225 self.assertEqual(actual_document, expected_document, "example_04") | |
226 | |
227 # Example 5. | |
228 xml_string = """ | |
229 <prov:document | |
230 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
231 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
232 xmlns:prov="http://www.w3.org/ns/prov#" | |
233 xmlns:ex="http://example.com/ns/ex#" | |
234 xmlns:tr="http://example.com/ns/tr#"> | |
235 | |
236 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> | |
237 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> | |
238 <prov:type xsi:type="xsd:QName">prov:Plan</prov:type> <!-- inferred --> | |
239 <prov:type xsi:type="xsd:QName">prov:Entity</prov:type> <!-- inferred --> | |
240 </prov:entity> | |
241 | |
242 </prov:document> | |
243 """ | |
244 with io.StringIO() as xml: | |
245 xml.write(xml_string) | |
246 xml.seek(0, 0) | |
247 actual_document = prov.ProvDocument.deserialize(source=xml, | |
248 format="xml") | |
249 | |
250 expected_document = prov.ProvDocument() | |
251 expected_document.add_namespace(*EX_NS) | |
252 expected_document.add_namespace(*EX_TR) | |
253 | |
254 # The xsi:type attribute is mapped to a proper PROV attribute. | |
255 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
256 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), | |
257 (prov.PROV_TYPE, PROV["Entity"]), | |
258 (prov.PROV_TYPE, PROV["Plan"]) | |
259 )) | |
260 | |
261 self.assertEqual(actual_document, expected_document, "example_05") | |
262 | |
263 def test_other_elements(self): | |
264 """ | |
265 PROV XML uses the <prov:other> element to enable the storage of non | |
266 PROV information in a PROV XML document. It will be ignored by this | |
267 library a warning will be raised informing the user. | |
268 """ | |
269 # This is example 42 from the PROV XML documentation. | |
270 xml_string = """ | |
271 <prov:document | |
272 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
273 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
274 xmlns:prov="http://www.w3.org/ns/prov#" | |
275 xmlns:ex="http://example.com/ns/ex#"> | |
276 | |
277 <!-- prov statements go here --> | |
278 | |
279 <prov:other> | |
280 <ex:foo> | |
281 <ex:content>bar</ex:content> | |
282 </ex:foo> | |
283 </prov:other> | |
284 | |
285 <!-- more prov statements can go here --> | |
286 | |
287 </prov:document> | |
288 """ | |
289 with warnings.catch_warnings(record=True) as w: | |
290 warnings.simplefilter("always") | |
291 | |
292 with io.StringIO() as xml: | |
293 xml.write(xml_string) | |
294 xml.seek(0, 0) | |
295 doc = prov.ProvDocument.deserialize(source=xml, format="xml") | |
296 | |
297 self.assertEqual(len(w), 1) | |
298 self.assertTrue( | |
299 "Document contains non-PROV information in <prov:other>. It will " | |
300 "be ignored in this package." in str(w[0].message)) | |
301 | |
302 # This document contains nothing else. | |
303 self.assertEqual(len(doc._records), 0) | |
304 | |
305 def test_nested_default_namespace(self): | |
306 """ | |
307 Tests that a default namespace that is defined in a lower level tag is | |
308 written to a bundle. | |
309 """ | |
310 filename = os.path.join(DATA_PATH, "nested_default_namespace.xml") | |
311 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
312 | |
313 ns = Namespace("", "http://example.org/0/") | |
314 | |
315 self.assertEqual(len(doc._records), 1) | |
316 self.assertEqual(doc.get_default_namespace(), ns) | |
317 self.assertEqual(doc._records[0].identifier.namespace, ns) | |
318 self.assertEqual(doc._records[0].identifier.localpart, "e001") | |
319 | |
320 def test_redefining_namespaces(self): | |
321 """ | |
322 Test the behaviour when namespaces are redefined at the element level. | |
323 """ | |
324 filename = os.path.join(DATA_PATH, | |
325 "namespace_redefined_but_does_not_change.xml") | |
326 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
327 # This has one record part of the original namespace. | |
328 self.assertEqual(len(doc._records), 1) | |
329 ns = Namespace("ex", "http://example.com/ns/ex#") | |
330 self.assertEqual(doc._records[0].attributes[0][1].namespace, ns) | |
331 | |
332 # This also has one record but now in a different namespace. | |
333 filename = os.path.join(DATA_PATH, "namespace_redefined.xml") | |
334 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
335 new_ns = doc._records[0].attributes[0][1].namespace | |
336 self.assertNotEqual(new_ns, ns) | |
337 self.assertEqual(new_ns.uri, "http://example.com/ns/new_ex#") | |
338 | |
339 | |
340 class ProvXMLRoundTripFromFileTestCase(unittest.TestCase): | |
341 def _perform_round_trip(self, filename, force_types=False): | |
342 document = prov.ProvDocument.deserialize( | |
343 source=filename, format="xml") | |
344 | |
345 with io.BytesIO() as new_xml: | |
346 document.serialize(format='xml', destination=new_xml, | |
347 force_types=force_types) | |
348 compare_xml(filename, new_xml) | |
349 | |
350 | |
351 # Add one test for each found file. Lazy way to do metaprogramming... | |
352 # I think parametrized tests are justified in this case as the test | |
353 # function names make it clear what is going on. | |
354 for filename in glob.iglob(os.path.join( | |
355 DATA_PATH, "*" + os.path.extsep + "xml")): | |
356 name = os.path.splitext(os.path.basename(filename))[0] | |
357 test_name = "test_roundtrip_from_xml_%s" % name | |
358 | |
359 # Cannot round trip this one as the namespace in the PROV data model are | |
360 # always defined per bundle and not per element. | |
361 if name in ("nested_default_namespace", | |
362 "nested_changing_default_namespace", | |
363 "namespace_redefined_but_does_not_change", | |
364 "namespace_redefined"): | |
365 continue | |
366 | |
367 # Python creates closures on function calls... | |
368 def get_fct(f): | |
369 # Some test files have a lot of type declarations... | |
370 if name in ["pc1"]: | |
371 force_types = True | |
372 else: | |
373 force_types = False | |
374 | |
375 def fct(self): | |
376 self._perform_round_trip(f, force_types=force_types) | |
377 return fct | |
378 | |
379 fct = get_fct(filename) | |
380 fct.__name__ = str(test_name) | |
381 | |
382 # Disabled round-trip XML comparisons since deserializing then serializing | |
383 # PROV-XML does not maintain XML equivalence. (For example, prov:entity | |
384 # elements with type prov:Plan become prov:plan elements) | |
385 # TODO: Revisit these tests | |
386 | |
387 # setattr(ProvXMLRoundTripFromFileTestCase, test_name, fct) | |
388 | |
389 | |
390 class RoundTripXMLTests(RoundTripTestCase, AllTestsBase): | |
391 FORMAT = 'xml' | |
392 | |
393 | |
394 if __name__ == '__main__': | |
395 unittest.main() |