Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """ | |
| 2 Parser plugin interface. | |
| 3 | |
| 4 This module defines the parser plugin interface and contains other | |
| 5 related parser support code. | |
| 6 | |
| 7 The module is mainly useful for those wanting to write a parser that | |
| 8 can plugin to rdflib. If you are wanting to invoke a parser you likely | |
| 9 want to do so through the Graph class parse method. | |
| 10 | |
| 11 """ | |
| 12 from __future__ import absolute_import | |
| 13 from __future__ import division | |
| 14 from __future__ import print_function | |
| 15 | |
| 16 import os | |
| 17 import sys | |
| 18 | |
| 19 from six import BytesIO | |
| 20 from six import string_types | |
| 21 from six import text_type | |
| 22 | |
| 23 from six.moves.urllib.request import pathname2url | |
| 24 from six.moves.urllib.request import Request | |
| 25 from six.moves.urllib.request import url2pathname | |
| 26 from six.moves.urllib.parse import urljoin | |
| 27 from six.moves.urllib.request import urlopen | |
| 28 | |
| 29 from xml.sax import xmlreader | |
| 30 | |
| 31 from rdflib import __version__ | |
| 32 from rdflib.term import URIRef | |
| 33 from rdflib.namespace import Namespace | |
| 34 | |
| 35 __all__ = [ | |
| 36 'Parser', 'InputSource', 'StringInputSource', | |
| 37 'URLInputSource', 'FileInputSource'] | |
| 38 | |
| 39 | |
| 40 class Parser(object): | |
| 41 | |
| 42 def __init__(self): | |
| 43 pass | |
| 44 | |
| 45 def parse(self, source, sink): | |
| 46 pass | |
| 47 | |
| 48 | |
| 49 class InputSource(xmlreader.InputSource, object): | |
| 50 """ | |
| 51 TODO: | |
| 52 """ | |
| 53 | |
| 54 def __init__(self, system_id=None): | |
| 55 xmlreader.InputSource.__init__(self, system_id=system_id) | |
| 56 self.content_type = None | |
| 57 self.auto_close = False # see Graph.parse(), true if opened by us | |
| 58 | |
| 59 def close(self): | |
| 60 f = self.getByteStream() | |
| 61 if f and hasattr(f, 'close'): | |
| 62 f.close() | |
| 63 | |
| 64 | |
| 65 class StringInputSource(InputSource): | |
| 66 """ | |
| 67 TODO: | |
| 68 """ | |
| 69 | |
| 70 def __init__(self, value, system_id=None): | |
| 71 super(StringInputSource, self).__init__(system_id) | |
| 72 stream = BytesIO(value) | |
| 73 self.setByteStream(stream) | |
| 74 # TODO: | |
| 75 # encoding = value.encoding | |
| 76 # self.setEncoding(encoding) | |
| 77 | |
| 78 | |
| 79 headers = { | |
| 80 'User-agent': | |
| 81 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ | |
| 82 } | |
| 83 | |
| 84 | |
| 85 class URLInputSource(InputSource): | |
| 86 """ | |
| 87 TODO: | |
| 88 """ | |
| 89 | |
| 90 def __init__(self, system_id=None, format=None): | |
| 91 super(URLInputSource, self).__init__(system_id) | |
| 92 self.url = system_id | |
| 93 | |
| 94 # copy headers to change | |
| 95 myheaders = dict(headers) | |
| 96 if format == 'application/rdf+xml': | |
| 97 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1' | |
| 98 elif format == 'n3': | |
| 99 myheaders['Accept'] = 'text/n3, */*;q=0.1' | |
| 100 elif format == 'turtle': | |
| 101 myheaders['Accept'] = 'text/turtle,application/x-turtle, */*;q=0.1' | |
| 102 elif format == 'nt': | |
| 103 myheaders['Accept'] = 'text/plain, */*;q=0.1' | |
| 104 elif format == 'json-ld': | |
| 105 myheaders['Accept'] = ( | |
| 106 'application/ld+json, application/json;q=0.9, */*;q=0.1') | |
| 107 else: | |
| 108 myheaders['Accept'] = ( | |
| 109 'application/rdf+xml,text/rdf+n3;q=0.9,' + | |
| 110 'application/xhtml+xml;q=0.5, */*;q=0.1') | |
| 111 | |
| 112 req = Request(system_id, None, myheaders) | |
| 113 file = urlopen(req) | |
| 114 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130 | |
| 115 self.url = file.geturl() # in case redirections took place | |
| 116 self.setPublicId(self.url) | |
| 117 self.content_type = file.info().get('content-type') | |
| 118 if self.content_type is not None: | |
| 119 self.content_type = self.content_type.split(";", 1)[0] | |
| 120 self.setByteStream(file) | |
| 121 # TODO: self.setEncoding(encoding) | |
| 122 self.response_info = file.info() # a mimetools.Message instance | |
| 123 | |
| 124 def __repr__(self): | |
| 125 return self.url | |
| 126 | |
| 127 | |
| 128 class FileInputSource(InputSource): | |
| 129 | |
| 130 def __init__(self, file): | |
| 131 base = urljoin("file:", pathname2url(os.getcwd())) | |
| 132 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) | |
| 133 super(FileInputSource, self).__init__(system_id) | |
| 134 self.file = file | |
| 135 self.setByteStream(file) | |
| 136 # TODO: self.setEncoding(encoding) | |
| 137 | |
| 138 def __repr__(self): | |
| 139 return repr(self.file) | |
| 140 | |
| 141 | |
| 142 def create_input_source(source=None, publicID=None, | |
| 143 location=None, file=None, data=None, format=None): | |
| 144 """ | |
| 145 Return an appropriate InputSource instance for the given | |
| 146 parameters. | |
| 147 """ | |
| 148 | |
| 149 # test that exactly one of source, location, file, and data is not None. | |
| 150 if sum(( | |
| 151 source is not None, | |
| 152 location is not None, | |
| 153 file is not None, | |
| 154 data is not None, | |
| 155 )) != 1: | |
| 156 raise ValueError( | |
| 157 'exactly one of source, location, file or data must be given' | |
| 158 ) | |
| 159 | |
| 160 input_source = None | |
| 161 | |
| 162 if source is not None: | |
| 163 if isinstance(source, InputSource): | |
| 164 input_source = source | |
| 165 else: | |
| 166 if isinstance(source, string_types): | |
| 167 location = source | |
| 168 elif hasattr(source, "read") and not isinstance(source, Namespace): | |
| 169 f = source | |
| 170 input_source = InputSource() | |
| 171 input_source.setByteStream(f) | |
| 172 if f is sys.stdin: | |
| 173 input_source.setSystemId("file:///dev/stdin") | |
| 174 elif hasattr(f, "name"): | |
| 175 input_source.setSystemId(f.name) | |
| 176 else: | |
| 177 raise Exception("Unexpected type '%s' for source '%s'" % | |
| 178 (type(source), source)) | |
| 179 | |
| 180 absolute_location = None # Further to fix for issue 130 | |
| 181 | |
| 182 auto_close = False # make sure we close all file handles we open | |
| 183 if location is not None: | |
| 184 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 | |
| 185 if os.path.exists(location): | |
| 186 location = pathname2url(location) | |
| 187 base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) | |
| 188 absolute_location = URIRef(location, base=base).defrag() | |
| 189 if absolute_location.startswith("file:///"): | |
| 190 filename = url2pathname(absolute_location.replace("file:///", "/")) | |
| 191 file = open(filename, "rb") | |
| 192 else: | |
| 193 input_source = URLInputSource(absolute_location, format) | |
| 194 auto_close = True | |
| 195 # publicID = publicID or absolute_location # Further to fix | |
| 196 # for issue 130 | |
| 197 | |
| 198 if file is not None: | |
| 199 input_source = FileInputSource(file) | |
| 200 | |
| 201 if data is not None: | |
| 202 if isinstance(data, text_type): | |
| 203 data = data.encode('utf-8') | |
| 204 input_source = StringInputSource(data) | |
| 205 auto_close = True | |
| 206 | |
| 207 if input_source is None: | |
| 208 raise Exception("could not create InputSource") | |
| 209 else: | |
| 210 input_source.auto_close |= auto_close | |
| 211 if publicID is not None: # Further to fix for issue 130 | |
| 212 input_source.setPublicId(publicID) | |
| 213 # Further to fix for issue 130 | |
| 214 elif input_source.getPublicId() is None: | |
| 215 input_source.setPublicId(absolute_location or "") | |
| 216 return input_source |
