comparison env/lib/python3.7/site-packages/rdflib/parser.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """
2 Parser plugin interface.
3
4 This module defines the parser plugin interface and contains other
5 related parser support code.
6
7 The module is mainly useful for those wanting to write a parser that
8 can plugin to rdflib. If you are wanting to invoke a parser you likely
9 want to do so through the Graph class parse method.
10
11 """
12
13 import os
14 import sys
15 from urllib.request import pathname2url, url2pathname
16 from urllib.request import urlopen, Request
17 from urllib.parse import urljoin
18 from rdflib.py3compat import PY3
19 if PY3:
20 from io import BytesIO
21 assert BytesIO
22 else:
23 from io import StringIO as BytesIO
24 from xml.sax import xmlreader
25
26 from rdflib import __version__
27 from rdflib.term import URIRef
28 from rdflib.namespace import Namespace
29
30 __all__ = [
31 'Parser', 'InputSource', 'StringInputSource',
32 'URLInputSource', 'FileInputSource']
33
34
35 class Parser(object):
36
37 def __init__(self):
38 pass
39
40 def parse(self, source, sink):
41 pass
42
43
44 class InputSource(xmlreader.InputSource, object):
45 """
46 TODO:
47 """
48
49 def __init__(self, system_id=None):
50 xmlreader.InputSource.__init__(self, system_id=system_id)
51 self.content_type = None
52 self.auto_close = False # see Graph.parse(), true if opened by us
53
54 def close(self):
55 f = self.getByteStream()
56 if f and hasattr(f, 'close'):
57 f.close()
58
59
60 class StringInputSource(InputSource):
61 """
62 TODO:
63 """
64
65 def __init__(self, value, system_id=None):
66 super(StringInputSource, self).__init__(system_id)
67 stream = BytesIO(value)
68 self.setByteStream(stream)
69 # TODO:
70 # encoding = value.encoding
71 # self.setEncoding(encoding)
72
73
74 headers = {
75 'User-agent':
76 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__
77 }
78
79
80 class URLInputSource(InputSource):
81 """
82 TODO:
83 """
84
85 def __init__(self, system_id=None, format=None):
86 super(URLInputSource, self).__init__(system_id)
87 self.url = system_id
88
89 # copy headers to change
90 myheaders = dict(headers)
91 if format == 'application/rdf+xml':
92 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1'
93 elif format == 'n3':
94 myheaders['Accept'] = 'text/n3, */*;q=0.1'
95 elif format == 'nt':
96 myheaders['Accept'] = 'text/plain, */*;q=0.1'
97 elif format == 'json-ld':
98 myheaders['Accept'] = (
99 'application/ld+json, application/json;p=0.9, */*;q=0.1')
100 else:
101 myheaders['Accept'] = (
102 'application/rdf+xml,text/rdf+n3;q=0.9,' +
103 'application/xhtml+xml;q=0.5, */*;q=0.1')
104
105 req = Request(system_id, None, myheaders)
106 file = urlopen(req)
107 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130
108 self.url = file.geturl() # in case redirections took place
109 self.setPublicId(self.url)
110 self.content_type = file.info().get('content-type')
111 if self.content_type is not None:
112 self.content_type = self.content_type.split(";", 1)[0]
113 self.setByteStream(file)
114 # TODO: self.setEncoding(encoding)
115 self.response_info = file.info() # a mimetools.Message instance
116
117 def __repr__(self):
118 return self.url
119
120
121 class FileInputSource(InputSource):
122
123 def __init__(self, file):
124 base = urljoin("file:", pathname2url(os.getcwd()))
125 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base)
126 super(FileInputSource, self).__init__(system_id)
127 self.file = file
128 self.setByteStream(file)
129 # TODO: self.setEncoding(encoding)
130
131 def __repr__(self):
132 return repr(self.file)
133
134
135 def create_input_source(source=None, publicID=None,
136 location=None, file=None, data=None, format=None):
137 """
138 Return an appropriate InputSource instance for the given
139 parameters.
140 """
141
142 # test that exactly one of source, location, file, and data is not None.
143 if sum((
144 source is not None,
145 location is not None,
146 file is not None,
147 data is not None,
148 )) != 1:
149 raise ValueError(
150 'exactly one of source, location, file or data must be given'
151 )
152
153 input_source = None
154
155 if source is not None:
156 if isinstance(source, InputSource):
157 input_source = source
158 else:
159 if isinstance(source, str):
160 location = source
161 elif hasattr(source, "read") and not isinstance(source, Namespace):
162 f = source
163 input_source = InputSource()
164 input_source.setByteStream(f)
165 if f is sys.stdin:
166 input_source.setSystemId("file:///dev/stdin")
167 elif hasattr(f, "name"):
168 input_source.setSystemId(f.name)
169 else:
170 raise Exception("Unexpected type '%s' for source '%s'" %
171 (type(source), source))
172
173 absolute_location = None # Further to fix for issue 130
174
175 auto_close = False # make sure we close all file handles we open
176 if location is not None:
177 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145
178 if os.path.exists(location):
179 location = pathname2url(location)
180 base = urljoin("file:", "%s/" % pathname2url(os.getcwd()))
181 absolute_location = URIRef(location, base=base).defrag()
182 if absolute_location.startswith("file:///"):
183 filename = url2pathname(absolute_location.replace("file:///", "/"))
184 file = open(filename, "rb")
185 else:
186 input_source = URLInputSource(absolute_location, format)
187 auto_close = True
188 # publicID = publicID or absolute_location # Further to fix
189 # for issue 130
190
191 if file is not None:
192 input_source = FileInputSource(file)
193
194 if data is not None:
195 if isinstance(data, str):
196 data = data.encode('utf-8')
197 input_source = StringInputSource(data)
198 auto_close = True
199
200 if input_source is None:
201 raise Exception("could not create InputSource")
202 else:
203 input_source.auto_close |= auto_close
204 if publicID is not None: # Further to fix for issue 130
205 input_source.setPublicId(publicID)
206 # Further to fix for issue 130
207 elif input_source.getPublicId() is None:
208 input_source.setPublicId(absolute_location or "")
209 return input_source