Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/parser.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """ | |
2 Parser plugin interface. | |
3 | |
4 This module defines the parser plugin interface and contains other | |
5 related parser support code. | |
6 | |
7 The module is mainly useful for those wanting to write a parser that | |
8 can plugin to rdflib. If you are wanting to invoke a parser you likely | |
9 want to do so through the Graph class parse method. | |
10 | |
11 """ | |
12 from __future__ import absolute_import | |
13 from __future__ import division | |
14 from __future__ import print_function | |
15 | |
16 import os | |
17 import sys | |
18 | |
19 from six import BytesIO | |
20 from six import string_types | |
21 from six import text_type | |
22 | |
23 from six.moves.urllib.request import pathname2url | |
24 from six.moves.urllib.request import Request | |
25 from six.moves.urllib.request import url2pathname | |
26 from six.moves.urllib.parse import urljoin | |
27 from six.moves.urllib.request import urlopen | |
28 | |
29 from xml.sax import xmlreader | |
30 | |
31 from rdflib import __version__ | |
32 from rdflib.term import URIRef | |
33 from rdflib.namespace import Namespace | |
34 | |
35 __all__ = [ | |
36 'Parser', 'InputSource', 'StringInputSource', | |
37 'URLInputSource', 'FileInputSource'] | |
38 | |
39 | |
40 class Parser(object): | |
41 | |
42 def __init__(self): | |
43 pass | |
44 | |
45 def parse(self, source, sink): | |
46 pass | |
47 | |
48 | |
49 class InputSource(xmlreader.InputSource, object): | |
50 """ | |
51 TODO: | |
52 """ | |
53 | |
54 def __init__(self, system_id=None): | |
55 xmlreader.InputSource.__init__(self, system_id=system_id) | |
56 self.content_type = None | |
57 self.auto_close = False # see Graph.parse(), true if opened by us | |
58 | |
59 def close(self): | |
60 f = self.getByteStream() | |
61 if f and hasattr(f, 'close'): | |
62 f.close() | |
63 | |
64 | |
65 class StringInputSource(InputSource): | |
66 """ | |
67 TODO: | |
68 """ | |
69 | |
70 def __init__(self, value, system_id=None): | |
71 super(StringInputSource, self).__init__(system_id) | |
72 stream = BytesIO(value) | |
73 self.setByteStream(stream) | |
74 # TODO: | |
75 # encoding = value.encoding | |
76 # self.setEncoding(encoding) | |
77 | |
78 | |
79 headers = { | |
80 'User-agent': | |
81 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ | |
82 } | |
83 | |
84 | |
85 class URLInputSource(InputSource): | |
86 """ | |
87 TODO: | |
88 """ | |
89 | |
90 def __init__(self, system_id=None, format=None): | |
91 super(URLInputSource, self).__init__(system_id) | |
92 self.url = system_id | |
93 | |
94 # copy headers to change | |
95 myheaders = dict(headers) | |
96 if format == 'application/rdf+xml': | |
97 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1' | |
98 elif format == 'n3': | |
99 myheaders['Accept'] = 'text/n3, */*;q=0.1' | |
100 elif format == 'turtle': | |
101 myheaders['Accept'] = 'text/turtle,application/x-turtle, */*;q=0.1' | |
102 elif format == 'nt': | |
103 myheaders['Accept'] = 'text/plain, */*;q=0.1' | |
104 elif format == 'json-ld': | |
105 myheaders['Accept'] = ( | |
106 'application/ld+json, application/json;q=0.9, */*;q=0.1') | |
107 else: | |
108 myheaders['Accept'] = ( | |
109 'application/rdf+xml,text/rdf+n3;q=0.9,' + | |
110 'application/xhtml+xml;q=0.5, */*;q=0.1') | |
111 | |
112 req = Request(system_id, None, myheaders) | |
113 file = urlopen(req) | |
114 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130 | |
115 self.url = file.geturl() # in case redirections took place | |
116 self.setPublicId(self.url) | |
117 self.content_type = file.info().get('content-type') | |
118 if self.content_type is not None: | |
119 self.content_type = self.content_type.split(";", 1)[0] | |
120 self.setByteStream(file) | |
121 # TODO: self.setEncoding(encoding) | |
122 self.response_info = file.info() # a mimetools.Message instance | |
123 | |
124 def __repr__(self): | |
125 return self.url | |
126 | |
127 | |
128 class FileInputSource(InputSource): | |
129 | |
130 def __init__(self, file): | |
131 base = urljoin("file:", pathname2url(os.getcwd())) | |
132 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) | |
133 super(FileInputSource, self).__init__(system_id) | |
134 self.file = file | |
135 self.setByteStream(file) | |
136 # TODO: self.setEncoding(encoding) | |
137 | |
138 def __repr__(self): | |
139 return repr(self.file) | |
140 | |
141 | |
142 def create_input_source(source=None, publicID=None, | |
143 location=None, file=None, data=None, format=None): | |
144 """ | |
145 Return an appropriate InputSource instance for the given | |
146 parameters. | |
147 """ | |
148 | |
149 # test that exactly one of source, location, file, and data is not None. | |
150 if sum(( | |
151 source is not None, | |
152 location is not None, | |
153 file is not None, | |
154 data is not None, | |
155 )) != 1: | |
156 raise ValueError( | |
157 'exactly one of source, location, file or data must be given' | |
158 ) | |
159 | |
160 input_source = None | |
161 | |
162 if source is not None: | |
163 if isinstance(source, InputSource): | |
164 input_source = source | |
165 else: | |
166 if isinstance(source, string_types): | |
167 location = source | |
168 elif hasattr(source, "read") and not isinstance(source, Namespace): | |
169 f = source | |
170 input_source = InputSource() | |
171 input_source.setByteStream(f) | |
172 if f is sys.stdin: | |
173 input_source.setSystemId("file:///dev/stdin") | |
174 elif hasattr(f, "name"): | |
175 input_source.setSystemId(f.name) | |
176 else: | |
177 raise Exception("Unexpected type '%s' for source '%s'" % | |
178 (type(source), source)) | |
179 | |
180 absolute_location = None # Further to fix for issue 130 | |
181 | |
182 auto_close = False # make sure we close all file handles we open | |
183 if location is not None: | |
184 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 | |
185 if os.path.exists(location): | |
186 location = pathname2url(location) | |
187 base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) | |
188 absolute_location = URIRef(location, base=base).defrag() | |
189 if absolute_location.startswith("file:///"): | |
190 filename = url2pathname(absolute_location.replace("file:///", "/")) | |
191 file = open(filename, "rb") | |
192 else: | |
193 input_source = URLInputSource(absolute_location, format) | |
194 auto_close = True | |
195 # publicID = publicID or absolute_location # Further to fix | |
196 # for issue 130 | |
197 | |
198 if file is not None: | |
199 input_source = FileInputSource(file) | |
200 | |
201 if data is not None: | |
202 if isinstance(data, text_type): | |
203 data = data.encode('utf-8') | |
204 input_source = StringInputSource(data) | |
205 auto_close = True | |
206 | |
207 if input_source is None: | |
208 raise Exception("could not create InputSource") | |
209 else: | |
210 input_source.auto_close |= auto_close | |
211 if publicID is not None: # Further to fix for issue 130 | |
212 input_source.setPublicId(publicID) | |
213 # Further to fix for issue 130 | |
214 elif input_source.getPublicId() is None: | |
215 input_source.setPublicId(absolute_location or "") | |
216 return input_source |