Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/utils.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 (2020-07-31) |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/utils.py Fri Jul 31 00:32:28 2020 -0400 @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- +""" +Various utilities for pyRdfa. + +Most of the utilities are straightforward. + +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} + + +""" + +""" +$Id: utils.py,v 1.9 2012/11/16 17:51:53 ivan Exp $ +$Date: 2012/11/16 17:51:53 $ +""" +import os, os.path, sys, imp, datetime + +# Python 3 vs. 2 switch +if sys.version_info[0] >= 3 : + from urllib.request import Request, urlopen + from urllib.parse import urljoin, quote + from http.server import BaseHTTPRequestHandler + from urllib.error import HTTPError as urllib_HTTPError +else : + from urllib.request import Request, urlopen + from urllib.error import HTTPError as urllib_HTTPError + from urllib.parse import urljoin + from urllib.parse import quote + from http.server import BaseHTTPRequestHandler + +from .extras.httpheader import content_type, parse_http_datetime + +import rdflib +if rdflib.__version__ >= "3.0.0" : + from rdflib import RDF as ns_rdf +else : + from rdflib.RDF import RDFNS as ns_rdf + +from .host import HostLanguage, preferred_suffixes + +######################################################################################################### +# Handling URIs +class URIOpener : + """A wrapper around the urllib2 method to open a resource. Beyond accessing the data itself, the class + sets a number of instance variable that might be relevant for processing. + The class also adds an accept header to the outgoing request, namely + text/html and application/xhtml+xml (unless set explicitly by the caller). + + If the content type is set by the server, the relevant HTTP response field is used. Otherwise, + common suffixes are used (see L{host.preferred_suffixes}) to set the content type (this is really of importance + for C{file:///} URI-s). If none of these works, the content type is empty. + + Interpretation of the content type for the return is done by Deron Meranda's U{httpheader module<http://deron.meranda.us/>}. + + @ivar data: the real data, ie, a file-like object + @ivar headers: the return headers as sent back by the server + @ivar content_type: the content type of the resource or the empty string, if the content type cannot be determined + @ivar location: the real location of the data (ie, after possible redirection and content negotiation) + @ivar last_modified_date: sets the last modified date if set in the header, None otherwise + @ivar expiration_date: sets the expiration date if set in the header, I{current UTC plus one day} otherwise (this is used for caching purposes, hence this artificial setting) + """ + CONTENT_LOCATION = 'Content-Location' + CONTENT_TYPE = 'Content-Type' + LAST_MODIFIED = 'Last-Modified' + EXPIRES = 'Expires' + def __init__(self, name, additional_headers = {}) : + """ + @param name: URL to be opened + @keyword additional_headers: additional HTTP request headers to be added to the call + """ + try : + # Note the removal of the fragment ID. This is necessary, per the HTTP spec + req = Request(url=name.split('#')[0]) + + for key in additional_headers : + req.add_header(key, additional_headers[key]) + if 'Accept' not in additional_headers : + req.add_header('Accept', 'text/html, application/xhtml+xml') + + self.data = urlopen(req) + self.headers = self.data.info() + + if URIOpener.CONTENT_TYPE in self.headers : + # The call below will remove the possible media type parameters, like charset settings + ct = content_type(self.headers[URIOpener.CONTENT_TYPE]) + self.content_type = ct.media_type + if 'charset' in ct.parmdict : + self.charset = ct.parmdict['charset'] + else : + self.charset = None + # print + else : + # check if the suffix can be used for the content type; this may be important + # for file:// type URI or if the server is not properly set up to return the right + # mime type + self.charset = None + self.content_type = "" + for suffix in list(preferred_suffixes.keys()) : + if name.endswith(suffix) : + self.content_type = preferred_suffixes[suffix] + break + + if URIOpener.CONTENT_LOCATION in self.headers : + self.location = urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION]) + else : + self.location = name + + self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(days=1) + if URIOpener.EXPIRES in self.headers : + try : + # Thanks to Deron Meranda for the HTTP date conversion method... + self.expiration_date = parse_http_datetime(self.headers[URIOpener.EXPIRES]) + except : + # The Expires date format was wrong, sorry, forget it... + pass + + self.last_modified_date = None + if URIOpener.LAST_MODIFIED in self.headers : + try : + # Thanks to Deron Meranda for the HTTP date conversion method... + self.last_modified_date = parse_http_datetime(self.headers[URIOpener.LAST_MODIFIED]) + except : + # The last modified date format was wrong, sorry, forget it... + pass + + except urllib_HTTPError : + e = sys.exc_info()[1] + from . import HTTPError + msg = BaseHTTPRequestHandler.responses[e.code] + raise HTTPError('%s' % msg[1], e.code) + except Exception : + e = sys.exc_info()[1] + from . import RDFaError + raise RDFaError('%s' % e) + +######################################################################################################### + +# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other +# special characters are converted to their %.. equivalents for namespace prefixes +_unquotedChars = ':/\?=#~' +_warnChars = [' ','\n','\r','\t'] + +def quote_URI(uri, options = None) : + """ + 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters + may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} + is also in the uri, an extra warning is also generated. + @param uri: URI + @param options: + @type options: L{Options<pyRdfa.Options>} + """ + from . import err_unusual_char_in_URI + suri = uri.strip() + for c in _warnChars : + if suri.find(c) != -1 : + if options != None : + options.add_warning(err_unusual_char_in_URI % suri) + break + return quote(suri, _unquotedChars) + +######################################################################################################### + +def create_file_name(uri) : + """ + Create a suitable file name from an (absolute) URI. Used, eg, for the generation of a file name for a cached vocabulary file. + """ + suri = uri.strip() + final_uri = quote(suri,_unquotedChars) + # Remove some potentially dangereous characters + return final_uri.replace(' ','_').replace('%','_').replace('-','_').replace('+','_').replace('/','_').replace('?','_').replace(':','_').replace('=','_').replace('#','_') + +######################################################################################################### +def has_one_of_attributes(node,*args) : + """ + Check whether one of the listed attributes is present on a (DOM) node. + @param node: DOM element node + @param args: possible attribute names + @return: True or False + @rtype: Boolean + """ + if len(args) == 0 : + return None + if isinstance(args[0], tuple) or isinstance(args[0], list) : + rargs = args[0] + else : + rargs = args + + return True in [ node.hasAttribute(attr) for attr in rargs ] + +######################################################################################################### +def traverse_tree(node, func) : + """Traverse the whole element tree, and perform the function C{func} on all the elements. + @param node: DOM element node + @param func: function to be called on the node. Input parameter is a DOM Element Node. If the function returns a boolean True, the recursion is stopped. + """ + if func(node) : + return + + for n in node.childNodes : + if n.nodeType == node.ELEMENT_NODE : + traverse_tree(n, func) + +######################################################################################################### +def return_XML(state, inode, base = True, xmlns = True) : + """ + Get (recursively) the XML Literal content of a DOM Element Node. (Most of the processing is done + via a C{node.toxml} call of the xml minidom implementation.) + + @param inode: DOM Node + @param state: L{pyRdfa.state.ExecutionContext} + @param base: whether the base element should be added to the output + @type base: Boolean + @param xmlns: whether the namespace declarations should be repeated in the generated node + @type xmlns: Boolean + @return: string + """ + node = inode.cloneNode(True) + # Decorate the element with namespaces.lang values and, optionally, base + if base : + node.setAttribute("xml:base",state.base) + if xmlns : + for prefix in state.term_or_curie.xmlns : + if not node.hasAttribute("xmlns:%s" % prefix) : + node.setAttribute("xmlns:%s" % prefix,"%s" % state.term_or_curie.xmlns[prefix]) + # Set the default namespace, if not done (and is available) + if not node.getAttribute("xmlns") and state.defaultNS != None : + node.setAttribute("xmlns", state.defaultNS) + # Get the lang, if necessary + if state.lang : + if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5 ] : + if not node.getAttribute("lang") : + node.setAttribute("lang", state.lang) + else : + if not node.getAttribute("xml:lang") : + node.setAttribute("xml:lang", state.lang) + + if sys.version_info[0] >= 3 : + return node.toxml() + else : + q = node.toxml(encoding='utf-8') + return str(q, encoding='utf-8') + +######################################################################################################### + +def dump(node) : + """ + This is just for debug purposes: it prints the essential content of the node in the tree starting at node. + + @param node: DOM node + """ + print(( node.toprettyxml(indent="", newl="") )) + + +