Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/utils.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Various utilities for pyRdfa. | |
4 | |
5 Most of the utilities are straightforward. | |
6 | |
7 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
8 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
9 @license: This software is available for use under the | |
10 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
11 | |
12 | |
13 """ | |
14 | |
15 """ | |
16 $Id: utils.py,v 1.9 2012/11/16 17:51:53 ivan Exp $ | |
17 $Date: 2012/11/16 17:51:53 $ | |
18 """ | |
19 import os, os.path, sys, imp, datetime | |
20 | |
21 # Python 3 vs. 2 switch | |
22 if sys.version_info[0] >= 3 : | |
23 from urllib.request import Request, urlopen | |
24 from urllib.parse import urljoin, quote | |
25 from http.server import BaseHTTPRequestHandler | |
26 from urllib.error import HTTPError as urllib_HTTPError | |
27 else : | |
28 from urllib.request import Request, urlopen | |
29 from urllib.error import HTTPError as urllib_HTTPError | |
30 from urllib.parse import urljoin | |
31 from urllib.parse import quote | |
32 from http.server import BaseHTTPRequestHandler | |
33 | |
34 from .extras.httpheader import content_type, parse_http_datetime | |
35 | |
36 import rdflib | |
37 if rdflib.__version__ >= "3.0.0" : | |
38 from rdflib import RDF as ns_rdf | |
39 else : | |
40 from rdflib.RDF import RDFNS as ns_rdf | |
41 | |
42 from .host import HostLanguage, preferred_suffixes | |
43 | |
44 ######################################################################################################### | |
45 # Handling URIs | |
46 class URIOpener : | |
47 """A wrapper around the urllib2 method to open a resource. Beyond accessing the data itself, the class | |
48 sets a number of instance variable that might be relevant for processing. | |
49 The class also adds an accept header to the outgoing request, namely | |
50 text/html and application/xhtml+xml (unless set explicitly by the caller). | |
51 | |
52 If the content type is set by the server, the relevant HTTP response field is used. Otherwise, | |
53 common suffixes are used (see L{host.preferred_suffixes}) to set the content type (this is really of importance | |
54 for C{file:///} URI-s). If none of these works, the content type is empty. | |
55 | |
56 Interpretation of the content type for the return is done by Deron Meranda's U{httpheader module<http://deron.meranda.us/>}. | |
57 | |
58 @ivar data: the real data, ie, a file-like object | |
59 @ivar headers: the return headers as sent back by the server | |
60 @ivar content_type: the content type of the resource or the empty string, if the content type cannot be determined | |
61 @ivar location: the real location of the data (ie, after possible redirection and content negotiation) | |
62 @ivar last_modified_date: sets the last modified date if set in the header, None otherwise | |
63 @ivar expiration_date: sets the expiration date if set in the header, I{current UTC plus one day} otherwise (this is used for caching purposes, hence this artificial setting) | |
64 """ | |
65 CONTENT_LOCATION = 'Content-Location' | |
66 CONTENT_TYPE = 'Content-Type' | |
67 LAST_MODIFIED = 'Last-Modified' | |
68 EXPIRES = 'Expires' | |
69 def __init__(self, name, additional_headers = {}) : | |
70 """ | |
71 @param name: URL to be opened | |
72 @keyword additional_headers: additional HTTP request headers to be added to the call | |
73 """ | |
74 try : | |
75 # Note the removal of the fragment ID. This is necessary, per the HTTP spec | |
76 req = Request(url=name.split('#')[0]) | |
77 | |
78 for key in additional_headers : | |
79 req.add_header(key, additional_headers[key]) | |
80 if 'Accept' not in additional_headers : | |
81 req.add_header('Accept', 'text/html, application/xhtml+xml') | |
82 | |
83 self.data = urlopen(req) | |
84 self.headers = self.data.info() | |
85 | |
86 if URIOpener.CONTENT_TYPE in self.headers : | |
87 # The call below will remove the possible media type parameters, like charset settings | |
88 ct = content_type(self.headers[URIOpener.CONTENT_TYPE]) | |
89 self.content_type = ct.media_type | |
90 if 'charset' in ct.parmdict : | |
91 self.charset = ct.parmdict['charset'] | |
92 else : | |
93 self.charset = None | |
94 # print | |
95 else : | |
96 # check if the suffix can be used for the content type; this may be important | |
97 # for file:// type URI or if the server is not properly set up to return the right | |
98 # mime type | |
99 self.charset = None | |
100 self.content_type = "" | |
101 for suffix in list(preferred_suffixes.keys()) : | |
102 if name.endswith(suffix) : | |
103 self.content_type = preferred_suffixes[suffix] | |
104 break | |
105 | |
106 if URIOpener.CONTENT_LOCATION in self.headers : | |
107 self.location = urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION]) | |
108 else : | |
109 self.location = name | |
110 | |
111 self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(days=1) | |
112 if URIOpener.EXPIRES in self.headers : | |
113 try : | |
114 # Thanks to Deron Meranda for the HTTP date conversion method... | |
115 self.expiration_date = parse_http_datetime(self.headers[URIOpener.EXPIRES]) | |
116 except : | |
117 # The Expires date format was wrong, sorry, forget it... | |
118 pass | |
119 | |
120 self.last_modified_date = None | |
121 if URIOpener.LAST_MODIFIED in self.headers : | |
122 try : | |
123 # Thanks to Deron Meranda for the HTTP date conversion method... | |
124 self.last_modified_date = parse_http_datetime(self.headers[URIOpener.LAST_MODIFIED]) | |
125 except : | |
126 # The last modified date format was wrong, sorry, forget it... | |
127 pass | |
128 | |
129 except urllib_HTTPError : | |
130 e = sys.exc_info()[1] | |
131 from . import HTTPError | |
132 msg = BaseHTTPRequestHandler.responses[e.code] | |
133 raise HTTPError('%s' % msg[1], e.code) | |
134 except Exception : | |
135 e = sys.exc_info()[1] | |
136 from . import RDFaError | |
137 raise RDFaError('%s' % e) | |
138 | |
139 ######################################################################################################### | |
140 | |
141 # 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other | |
142 # special characters are converted to their %.. equivalents for namespace prefixes | |
143 _unquotedChars = ':/\?=#~' | |
144 _warnChars = [' ','\n','\r','\t'] | |
145 | |
146 def quote_URI(uri, options = None) : | |
147 """ | |
148 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters | |
149 may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} | |
150 is also in the uri, an extra warning is also generated. | |
151 @param uri: URI | |
152 @param options: | |
153 @type options: L{Options<pyRdfa.Options>} | |
154 """ | |
155 from . import err_unusual_char_in_URI | |
156 suri = uri.strip() | |
157 for c in _warnChars : | |
158 if suri.find(c) != -1 : | |
159 if options != None : | |
160 options.add_warning(err_unusual_char_in_URI % suri) | |
161 break | |
162 return quote(suri, _unquotedChars) | |
163 | |
164 ######################################################################################################### | |
165 | |
166 def create_file_name(uri) : | |
167 """ | |
168 Create a suitable file name from an (absolute) URI. Used, eg, for the generation of a file name for a cached vocabulary file. | |
169 """ | |
170 suri = uri.strip() | |
171 final_uri = quote(suri,_unquotedChars) | |
172 # Remove some potentially dangereous characters | |
173 return final_uri.replace(' ','_').replace('%','_').replace('-','_').replace('+','_').replace('/','_').replace('?','_').replace(':','_').replace('=','_').replace('#','_') | |
174 | |
175 ######################################################################################################### | |
176 def has_one_of_attributes(node,*args) : | |
177 """ | |
178 Check whether one of the listed attributes is present on a (DOM) node. | |
179 @param node: DOM element node | |
180 @param args: possible attribute names | |
181 @return: True or False | |
182 @rtype: Boolean | |
183 """ | |
184 if len(args) == 0 : | |
185 return None | |
186 if isinstance(args[0], tuple) or isinstance(args[0], list) : | |
187 rargs = args[0] | |
188 else : | |
189 rargs = args | |
190 | |
191 return True in [ node.hasAttribute(attr) for attr in rargs ] | |
192 | |
193 ######################################################################################################### | |
194 def traverse_tree(node, func) : | |
195 """Traverse the whole element tree, and perform the function C{func} on all the elements. | |
196 @param node: DOM element node | |
197 @param func: function to be called on the node. Input parameter is a DOM Element Node. If the function returns a boolean True, the recursion is stopped. | |
198 """ | |
199 if func(node) : | |
200 return | |
201 | |
202 for n in node.childNodes : | |
203 if n.nodeType == node.ELEMENT_NODE : | |
204 traverse_tree(n, func) | |
205 | |
206 ######################################################################################################### | |
207 def return_XML(state, inode, base = True, xmlns = True) : | |
208 """ | |
209 Get (recursively) the XML Literal content of a DOM Element Node. (Most of the processing is done | |
210 via a C{node.toxml} call of the xml minidom implementation.) | |
211 | |
212 @param inode: DOM Node | |
213 @param state: L{pyRdfa.state.ExecutionContext} | |
214 @param base: whether the base element should be added to the output | |
215 @type base: Boolean | |
216 @param xmlns: whether the namespace declarations should be repeated in the generated node | |
217 @type xmlns: Boolean | |
218 @return: string | |
219 """ | |
220 node = inode.cloneNode(True) | |
221 # Decorate the element with namespaces.lang values and, optionally, base | |
222 if base : | |
223 node.setAttribute("xml:base",state.base) | |
224 if xmlns : | |
225 for prefix in state.term_or_curie.xmlns : | |
226 if not node.hasAttribute("xmlns:%s" % prefix) : | |
227 node.setAttribute("xmlns:%s" % prefix,"%s" % state.term_or_curie.xmlns[prefix]) | |
228 # Set the default namespace, if not done (and is available) | |
229 if not node.getAttribute("xmlns") and state.defaultNS != None : | |
230 node.setAttribute("xmlns", state.defaultNS) | |
231 # Get the lang, if necessary | |
232 if state.lang : | |
233 if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5 ] : | |
234 if not node.getAttribute("lang") : | |
235 node.setAttribute("lang", state.lang) | |
236 else : | |
237 if not node.getAttribute("xml:lang") : | |
238 node.setAttribute("xml:lang", state.lang) | |
239 | |
240 if sys.version_info[0] >= 3 : | |
241 return node.toxml() | |
242 else : | |
243 q = node.toxml(encoding='utf-8') | |
244 return str(q, encoding='utf-8') | |
245 | |
246 ######################################################################################################### | |
247 | |
248 def dump(node) : | |
249 """ | |
250 This is just for debug purposes: it prints the essential content of the node in the tree starting at node. | |
251 | |
252 @param node: DOM node | |
253 """ | |
254 print(( node.toprettyxml(indent="", newl="") )) | |
255 | |
256 | |
257 |