Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/parser.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 """ | |
2 Parser plugin interface. | |
3 | |
4 This module defines the parser plugin interface and contains other | |
5 related parser support code. | |
6 | |
7 The module is mainly useful for those wanting to write a parser that | |
8 can plugin to rdflib. If you are wanting to invoke a parser you likely | |
9 want to do so through the Graph class parse method. | |
10 | |
11 """ | |
12 | |
13 import os | |
14 import sys | |
15 from urllib.request import pathname2url, url2pathname | |
16 from urllib.request import urlopen, Request | |
17 from urllib.parse import urljoin | |
18 from rdflib.py3compat import PY3 | |
19 if PY3: | |
20 from io import BytesIO | |
21 assert BytesIO | |
22 else: | |
23 from io import StringIO as BytesIO | |
24 from xml.sax import xmlreader | |
25 | |
26 from rdflib import __version__ | |
27 from rdflib.term import URIRef | |
28 from rdflib.namespace import Namespace | |
29 | |
30 __all__ = [ | |
31 'Parser', 'InputSource', 'StringInputSource', | |
32 'URLInputSource', 'FileInputSource'] | |
33 | |
34 | |
35 class Parser(object): | |
36 | |
37 def __init__(self): | |
38 pass | |
39 | |
40 def parse(self, source, sink): | |
41 pass | |
42 | |
43 | |
44 class InputSource(xmlreader.InputSource, object): | |
45 """ | |
46 TODO: | |
47 """ | |
48 | |
49 def __init__(self, system_id=None): | |
50 xmlreader.InputSource.__init__(self, system_id=system_id) | |
51 self.content_type = None | |
52 self.auto_close = False # see Graph.parse(), true if opened by us | |
53 | |
54 def close(self): | |
55 f = self.getByteStream() | |
56 if f and hasattr(f, 'close'): | |
57 f.close() | |
58 | |
59 | |
60 class StringInputSource(InputSource): | |
61 """ | |
62 TODO: | |
63 """ | |
64 | |
65 def __init__(self, value, system_id=None): | |
66 super(StringInputSource, self).__init__(system_id) | |
67 stream = BytesIO(value) | |
68 self.setByteStream(stream) | |
69 # TODO: | |
70 # encoding = value.encoding | |
71 # self.setEncoding(encoding) | |
72 | |
73 | |
74 headers = { | |
75 'User-agent': | |
76 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ | |
77 } | |
78 | |
79 | |
80 class URLInputSource(InputSource): | |
81 """ | |
82 TODO: | |
83 """ | |
84 | |
85 def __init__(self, system_id=None, format=None): | |
86 super(URLInputSource, self).__init__(system_id) | |
87 self.url = system_id | |
88 | |
89 # copy headers to change | |
90 myheaders = dict(headers) | |
91 if format == 'application/rdf+xml': | |
92 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1' | |
93 elif format == 'n3': | |
94 myheaders['Accept'] = 'text/n3, */*;q=0.1' | |
95 elif format == 'nt': | |
96 myheaders['Accept'] = 'text/plain, */*;q=0.1' | |
97 elif format == 'json-ld': | |
98 myheaders['Accept'] = ( | |
99 'application/ld+json, application/json;p=0.9, */*;q=0.1') | |
100 else: | |
101 myheaders['Accept'] = ( | |
102 'application/rdf+xml,text/rdf+n3;q=0.9,' + | |
103 'application/xhtml+xml;q=0.5, */*;q=0.1') | |
104 | |
105 req = Request(system_id, None, myheaders) | |
106 file = urlopen(req) | |
107 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130 | |
108 self.url = file.geturl() # in case redirections took place | |
109 self.setPublicId(self.url) | |
110 self.content_type = file.info().get('content-type') | |
111 if self.content_type is not None: | |
112 self.content_type = self.content_type.split(";", 1)[0] | |
113 self.setByteStream(file) | |
114 # TODO: self.setEncoding(encoding) | |
115 self.response_info = file.info() # a mimetools.Message instance | |
116 | |
117 def __repr__(self): | |
118 return self.url | |
119 | |
120 | |
121 class FileInputSource(InputSource): | |
122 | |
123 def __init__(self, file): | |
124 base = urljoin("file:", pathname2url(os.getcwd())) | |
125 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) | |
126 super(FileInputSource, self).__init__(system_id) | |
127 self.file = file | |
128 self.setByteStream(file) | |
129 # TODO: self.setEncoding(encoding) | |
130 | |
131 def __repr__(self): | |
132 return repr(self.file) | |
133 | |
134 | |
135 def create_input_source(source=None, publicID=None, | |
136 location=None, file=None, data=None, format=None): | |
137 """ | |
138 Return an appropriate InputSource instance for the given | |
139 parameters. | |
140 """ | |
141 | |
142 # test that exactly one of source, location, file, and data is not None. | |
143 if sum(( | |
144 source is not None, | |
145 location is not None, | |
146 file is not None, | |
147 data is not None, | |
148 )) != 1: | |
149 raise ValueError( | |
150 'exactly one of source, location, file or data must be given' | |
151 ) | |
152 | |
153 input_source = None | |
154 | |
155 if source is not None: | |
156 if isinstance(source, InputSource): | |
157 input_source = source | |
158 else: | |
159 if isinstance(source, str): | |
160 location = source | |
161 elif hasattr(source, "read") and not isinstance(source, Namespace): | |
162 f = source | |
163 input_source = InputSource() | |
164 input_source.setByteStream(f) | |
165 if f is sys.stdin: | |
166 input_source.setSystemId("file:///dev/stdin") | |
167 elif hasattr(f, "name"): | |
168 input_source.setSystemId(f.name) | |
169 else: | |
170 raise Exception("Unexpected type '%s' for source '%s'" % | |
171 (type(source), source)) | |
172 | |
173 absolute_location = None # Further to fix for issue 130 | |
174 | |
175 auto_close = False # make sure we close all file handles we open | |
176 if location is not None: | |
177 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 | |
178 if os.path.exists(location): | |
179 location = pathname2url(location) | |
180 base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) | |
181 absolute_location = URIRef(location, base=base).defrag() | |
182 if absolute_location.startswith("file:///"): | |
183 filename = url2pathname(absolute_location.replace("file:///", "/")) | |
184 file = open(filename, "rb") | |
185 else: | |
186 input_source = URLInputSource(absolute_location, format) | |
187 auto_close = True | |
188 # publicID = publicID or absolute_location # Further to fix | |
189 # for issue 130 | |
190 | |
191 if file is not None: | |
192 input_source = FileInputSource(file) | |
193 | |
194 if data is not None: | |
195 if isinstance(data, str): | |
196 data = data.encode('utf-8') | |
197 input_source = StringInputSource(data) | |
198 auto_close = True | |
199 | |
200 if input_source is None: | |
201 raise Exception("could not create InputSource") | |
202 else: | |
203 input_source.auto_close |= auto_close | |
204 if publicID is not None: # Further to fix for issue 130 | |
205 input_source.setPublicId(publicID) | |
206 # Further to fix for issue 130 | |
207 elif input_source.getPublicId() is None: | |
208 input_source.setPublicId(absolute_location or "") | |
209 return input_source |