comparison env/lib/python3.7/site-packages/lxml/ElementInclude.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 #
2 # ElementTree
3 # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
4 #
5 # limited xinclude support for element trees
6 #
7 # history:
8 # 2003-08-15 fl created
9 # 2003-11-14 fl fixed default loader
10 #
11 # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
12 #
13 # fredrik@pythonware.com
14 # http://www.pythonware.com
15 #
16 # --------------------------------------------------------------------
17 # The ElementTree toolkit is
18 #
19 # Copyright (c) 1999-2004 by Fredrik Lundh
20 #
21 # By obtaining, using, and/or copying this software and/or its
22 # associated documentation, you agree that you have read, understood,
23 # and will comply with the following terms and conditions:
24 #
25 # Permission to use, copy, modify, and distribute this software and
26 # its associated documentation for any purpose and without fee is
27 # hereby granted, provided that the above copyright notice appears in
28 # all copies, and that both that copyright notice and this permission
29 # notice appear in supporting documentation, and that the name of
30 # Secret Labs AB or the author not be used in advertising or publicity
31 # pertaining to distribution of the software without specific, written
32 # prior permission.
33 #
34 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41 # OF THIS SOFTWARE.
42 # --------------------------------------------------------------------
43
44 """
45 Limited XInclude support for the ElementTree package.
46
47 While lxml.etree has full support for XInclude (see
48 `etree.ElementTree.xinclude()`), this module provides a simpler, pure
49 Python, ElementTree compatible implementation that supports a simple
50 form of custom URL resolvers.
51 """
52
53 from lxml import etree
54 try:
55 from urlparse import urljoin
56 from urllib2 import urlopen
57 except ImportError:
58 # Python 3
59 from urllib.parse import urljoin
60 from urllib.request import urlopen
61
62 XINCLUDE = "{http://www.w3.org/2001/XInclude}"
63
64 XINCLUDE_INCLUDE = XINCLUDE + "include"
65 XINCLUDE_FALLBACK = XINCLUDE + "fallback"
66 XINCLUDE_ITER_TAG = XINCLUDE + "*"
67
68 # For security reasons, the inclusion depth is limited to this read-only value by default.
69 DEFAULT_MAX_INCLUSION_DEPTH = 6
70
71
72 ##
73 # Fatal include error.
74
75 class FatalIncludeError(etree.LxmlSyntaxError):
76 pass
77
78
79 class LimitedRecursiveIncludeError(FatalIncludeError):
80 pass
81
82
83 ##
84 # ET compatible default loader.
85 # This loader reads an included resource from disk.
86 #
87 # @param href Resource reference.
88 # @param parse Parse mode. Either "xml" or "text".
89 # @param encoding Optional text encoding.
90 # @return The expanded resource. If the parse mode is "xml", this
91 # is an ElementTree instance. If the parse mode is "text", this
92 # is a Unicode string. If the loader fails, it can return None
93 # or raise an IOError exception.
94 # @throws IOError If the loader fails to load the resource.
95
96 def default_loader(href, parse, encoding=None):
97 file = open(href, 'rb')
98 if parse == "xml":
99 data = etree.parse(file).getroot()
100 else:
101 data = file.read()
102 if not encoding:
103 encoding = 'utf-8'
104 data = data.decode(encoding)
105 file.close()
106 return data
107
108
109 ##
110 # Default loader used by lxml.etree - handles custom resolvers properly
111 #
112
113 def _lxml_default_loader(href, parse, encoding=None, parser=None):
114 if parse == "xml":
115 data = etree.parse(href, parser).getroot()
116 else:
117 if "://" in href:
118 f = urlopen(href)
119 else:
120 f = open(href, 'rb')
121 data = f.read()
122 f.close()
123 if not encoding:
124 encoding = 'utf-8'
125 data = data.decode(encoding)
126 return data
127
128
129 ##
130 # Wrapper for ET compatibility - drops the parser
131
132 def _wrap_et_loader(loader):
133 def load(href, parse, encoding=None, parser=None):
134 return loader(href, parse, encoding)
135 return load
136
137
138 ##
139 # Expand XInclude directives.
140 #
141 # @param elem Root element.
142 # @param loader Optional resource loader. If omitted, it defaults
143 # to {@link default_loader}. If given, it should be a callable
144 # that implements the same interface as <b>default_loader</b>.
145 # @param base_url The base URL of the original file, to resolve
146 # relative include file references.
147 # @param max_depth The maximum number of recursive inclusions.
148 # Limited to reduce the risk of malicious content explosion.
149 # Pass None to disable the limitation.
150 # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
151 # @throws FatalIncludeError If the function fails to include a given
152 # resource, or if the tree contains malformed XInclude elements.
153 # @throws IOError If the function fails to load a given resource.
154 # @returns the node or its replacement if it was an XInclude node
155
156 def include(elem, loader=None, base_url=None,
157 max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
158 if max_depth is None:
159 max_depth = -1
160 elif max_depth < 0:
161 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
162
163 if base_url is None:
164 if hasattr(elem, 'getroot'):
165 tree = elem
166 elem = elem.getroot()
167 else:
168 tree = elem.getroottree()
169 if hasattr(tree, 'docinfo'):
170 base_url = tree.docinfo.URL
171 elif hasattr(elem, 'getroot'):
172 elem = elem.getroot()
173 _include(elem, loader, base_url, max_depth)
174
175
176 def _include(elem, loader=None, base_url=None,
177 max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
178 if loader is not None:
179 load_include = _wrap_et_loader(loader)
180 else:
181 load_include = _lxml_default_loader
182
183 if _parent_hrefs is None:
184 _parent_hrefs = set()
185
186 parser = elem.getroottree().parser
187
188 include_elements = list(
189 elem.iter(XINCLUDE_ITER_TAG))
190
191 for e in include_elements:
192 if e.tag == XINCLUDE_INCLUDE:
193 # process xinclude directive
194 href = urljoin(base_url, e.get("href"))
195 parse = e.get("parse", "xml")
196 parent = e.getparent()
197 if parse == "xml":
198 if href in _parent_hrefs:
199 raise FatalIncludeError(
200 "recursive include of %r detected" % href
201 )
202 if max_depth == 0:
203 raise LimitedRecursiveIncludeError(
204 "maximum xinclude depth reached when including file %s" % href)
205 node = load_include(href, parse, parser=parser)
206 if node is None:
207 raise FatalIncludeError(
208 "cannot load %r as %r" % (href, parse)
209 )
210 node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
211 if e.tail:
212 node.tail = (node.tail or "") + e.tail
213 if parent is None:
214 return node # replaced the root node!
215 parent.replace(e, node)
216 elif parse == "text":
217 text = load_include(href, parse, encoding=e.get("encoding"))
218 if text is None:
219 raise FatalIncludeError(
220 "cannot load %r as %r" % (href, parse)
221 )
222 predecessor = e.getprevious()
223 if predecessor is not None:
224 predecessor.tail = (predecessor.tail or "") + text
225 elif parent is None:
226 return text # replaced the root node!
227 else:
228 parent.text = (parent.text or "") + text + (e.tail or "")
229 parent.remove(e)
230 else:
231 raise FatalIncludeError(
232 "unknown parse type in xi:include tag (%r)" % parse
233 )
234 elif e.tag == XINCLUDE_FALLBACK:
235 parent = e.getparent()
236 if parent is not None and parent.tag != XINCLUDE_INCLUDE:
237 raise FatalIncludeError(
238 "xi:fallback tag must be child of xi:include (%r)" % e.tag
239 )
240 else:
241 raise FatalIncludeError(
242 "Invalid element found in XInclude namespace (%r)" % e.tag
243 )
244 return elem