comparison env/lib/python3.7/site-packages/rdflib/namespace.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 from rdflib.py3compat import format_doctest_out
2
3 __doc__ = format_doctest_out("""
4 ===================
5 Namespace Utilities
6 ===================
7
8 RDFLib provides mechanisms for managing Namespaces.
9
10 In particular, there is a :class:`~rdflib.namespace.Namespace` class
11 that takes as its argument the base URI of the namespace.
12
13 .. code-block:: pycon
14
15 >>> from rdflib.namespace import Namespace
16 >>> owl = Namespace('http://www.w3.org/2002/07/owl#')
17
18 Fully qualified URIs in the namespace can be constructed either by attribute
19 or by dictionary access on Namespace instances:
20
21 .. code-block:: pycon
22
23 >>> owl.seeAlso
24 rdflib.term.URIRef(%(u)s'http://www.w3.org/2002/07/owl#seeAlso')
25 >>> owl['seeAlso']
26 rdflib.term.URIRef(%(u)s'http://www.w3.org/2002/07/owl#seeAlso')
27
28
29 Automatic handling of unknown predicates
30 -----------------------------------------
31
32 As a programming convenience, a namespace binding is automatically
33 created when :class:`rdflib.term.URIRef` predicates are added to the graph.
34
35 Importable namespaces
36 -----------------------
37
38 The following namespaces are available by directly importing from rdflib:
39
40 * RDF
41 * RDFS
42 * OWL
43 * XSD
44 * FOAF
45 * SKOS
46 * DOAP
47 * DC
48 * DCTERMS
49 * VOID
50
51 .. code-block:: pycon
52
53 >>> from rdflib import OWL
54 >>> OWL.seeAlso
55 rdflib.term.URIRef(%(u)s'http://www.w3.org/2002/07/owl#seeAlso')
56
57 """)
58
59 import logging
60 logger = logging.getLogger(__name__)
61
62 import os
63
64 from urllib.parse import urljoin, urldefrag
65 from urllib.request import pathname2url
66
67 from rdflib.term import URIRef, Variable, _XSD_PFX, _is_valid_uri
68
69 __all__ = [
70 'is_ncname', 'split_uri', 'Namespace',
71 'ClosedNamespace', 'NamespaceManager',
72 'XMLNS', 'RDF', 'RDFS', 'XSD', 'OWL',
73 'SKOS', 'DOAP', 'FOAF', 'DC', 'DCTERMS', 'VOID']
74
75
76 class Namespace(str):
77
78 __doc__ = format_doctest_out("""
79 Utility class for quickly generating URIRefs with a common prefix
80
81 >>> from rdflib import Namespace
82 >>> n = Namespace("http://example.org/")
83 >>> n.Person # as attribute
84 rdflib.term.URIRef(%(u)s'http://example.org/Person')
85 >>> n['first-name'] # as item - for things that are not valid python identifiers
86 rdflib.term.URIRef(%(u)s'http://example.org/first-name')
87
88 """)
89
90
91 def __new__(cls, value):
92 try:
93 rt = str.__new__(cls, value)
94 except UnicodeDecodeError:
95 rt = str.__new__(cls, value, 'utf-8')
96 return rt
97
98
99 @property
100 def title(self):
101 return URIRef(self + 'title')
102
103 def term(self, name):
104 # need to handle slices explicitly because of __getitem__ override
105 return URIRef(self + (name if isinstance(name, str) else ''))
106
107 def __getitem__(self, key, default=None):
108 return self.term(key)
109
110 def __getattr__(self, name):
111 if name.startswith("__"): # ignore any special Python names!
112 raise AttributeError
113 else:
114 return self.term(name)
115
116 def __repr__(self):
117 return "Namespace(%s)"%str.__repr__(self)
118
119
120 class URIPattern(str):
121
122 __doc__ = format_doctest_out("""
123 Utility class for creating URIs according to some pattern
124 This supports either new style formatting with .format
125 or old-style with %% operator
126
127 >>> u=URIPattern("http://example.org/%%s/%%d/resource")
128 >>> u%%('books', 12345)
129 rdflib.term.URIRef(%(u)s'http://example.org/books/12345/resource')
130
131 """)
132
133 def __new__(cls, value):
134 try:
135 rt = str.__new__(cls, value)
136 except UnicodeDecodeError:
137 rt = str.__new__(cls, value, 'utf-8')
138 return rt
139
140 def __mod__(self, *args, **kwargs):
141 return URIRef(str(self).__mod__(*args, **kwargs))
142
143 def format(self, *args, **kwargs):
144 return URIRef(str.format(self, *args, **kwargs))
145
146 def __repr__(self):
147 return "URIPattern(%r)"%str.__repr__(self)
148
149
150
151 class ClosedNamespace(object):
152 """
153 A namespace with a closed list of members
154
155 Trying to create terms not listen is an error
156 """
157
158 def __init__(self, uri, terms):
159 self.uri = uri
160 self.__uris = {}
161 for t in terms:
162 self.__uris[t] = URIRef(self.uri + t)
163
164 def term(self, name):
165 uri = self.__uris.get(name)
166 if uri is None:
167 raise Exception(
168 "term '%s' not in namespace '%s'" % (name, self.uri))
169 else:
170 return uri
171
172 def __getitem__(self, key, default=None):
173 return self.term(key)
174
175 def __getattr__(self, name):
176 if name.startswith("__"): # ignore any special Python names!
177 raise AttributeError
178 else:
179 return self.term(name)
180
181 def __str__(self):
182 return str(self.uri)
183
184 def __repr__(self):
185 return """rdf.namespace.ClosedNamespace('%s')""" % str(self.uri)
186
187
188 class _RDFNamespace(ClosedNamespace):
189 """
190 Closed namespace for RDF terms
191 """
192 def __init__(self):
193 super(_RDFNamespace, self).__init__(
194 URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
195 terms=[
196 # Syntax Names
197 "RDF", "Description", "ID", "about", "parseType",
198 "resource", "li", "nodeID", "datatype",
199
200 # RDF Classes
201 "Seq", "Bag", "Alt", "Statement", "Property",
202 "List", "PlainLiteral",
203
204 # RDF Properties
205 "subject", "predicate", "object", "type",
206 "value", "first", "rest",
207 # and _n where n is a non-negative integer
208
209 # RDF Resources
210 "nil",
211
212 # Added in RDF 1.1
213 "XMLLiteral", "HTML", "langString"]
214 )
215
216 def term(self, name):
217 try:
218 i = int(name)
219 return URIRef("%s_%s" % (self.uri, i))
220 except ValueError:
221 return super(_RDFNamespace, self).term(name)
222
223 RDF = _RDFNamespace()
224
225 RDFS = ClosedNamespace(
226 uri=URIRef("http://www.w3.org/2000/01/rdf-schema#"),
227 terms=[
228 "Resource", "Class", "subClassOf", "subPropertyOf", "comment", "label",
229 "domain", "range", "seeAlso", "isDefinedBy", "Literal", "Container",
230 "ContainerMembershipProperty", "member", "Datatype"]
231 )
232
233 OWL = Namespace('http://www.w3.org/2002/07/owl#')
234
235 XSD = Namespace(_XSD_PFX)
236
237 SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
238 DOAP = Namespace('http://usefulinc.com/ns/doap#')
239 FOAF = Namespace('http://xmlns.com/foaf/0.1/')
240 DC = Namespace('http://purl.org/dc/elements/1.1/')
241 DCTERMS = Namespace('http://purl.org/dc/terms/')
242 VOID = Namespace('http://rdfs.org/ns/void#')
243
244
245
246 class NamespaceManager(object):
247 """
248
249 Class for managing prefix => namespace mappings
250
251 Sample usage from FuXi ...
252
253 .. code-block:: python
254
255 ruleStore = N3RuleStore(additionalBuiltins=additionalBuiltins)
256 nsMgr = NamespaceManager(Graph(ruleStore))
257 ruleGraph = Graph(ruleStore,namespace_manager=nsMgr)
258
259
260 and ...
261
262 .. code-block:: pycon
263
264 >>> import rdflib
265 >>> from rdflib import Graph
266 >>> from rdflib.namespace import Namespace, NamespaceManager
267 >>> exNs = Namespace('http://example.com/')
268 >>> namespace_manager = NamespaceManager(Graph())
269 >>> namespace_manager.bind('ex', exNs, override=False)
270 >>> g = Graph()
271 >>> g.namespace_manager = namespace_manager
272 >>> all_ns = [n for n in g.namespace_manager.namespaces()]
273 >>> assert ('ex', rdflib.term.URIRef('http://example.com/')) in all_ns
274 >>>
275
276 """
277 def __init__(self, graph):
278 self.graph = graph
279 self.__cache = {}
280 self.__log = None
281 self.bind("xml", "http://www.w3.org/XML/1998/namespace")
282 self.bind("rdf", RDF)
283 self.bind("rdfs", RDFS)
284 self.bind("xsd", XSD)
285
286 def reset(self):
287 self.__cache = {}
288
289 def __get_store(self):
290 return self.graph.store
291 store = property(__get_store)
292
293 def qname(self, uri):
294 prefix, namespace, name = self.compute_qname(uri)
295 if prefix == "":
296 return name
297 else:
298 return ":".join((prefix, name))
299
300 def normalizeUri(self, rdfTerm):
301 """
302 Takes an RDF Term and 'normalizes' it into a QName (using the
303 registered prefix) or (unlike compute_qname) the Notation 3
304 form for URIs: <...URI...>
305 """
306 try:
307 namespace, name = split_uri(rdfTerm)
308 namespace = URIRef(str(namespace))
309 except:
310 if isinstance(rdfTerm, Variable):
311 return "?%s" % rdfTerm
312 else:
313 return "<%s>" % rdfTerm
314 prefix = self.store.prefix(namespace)
315 if prefix is None and isinstance(rdfTerm, Variable):
316 return "?%s" % rdfTerm
317 elif prefix is None:
318 return "<%s>" % rdfTerm
319 else:
320 qNameParts = self.compute_qname(rdfTerm)
321 return ':'.join([qNameParts[0], qNameParts[-1]])
322
323 def compute_qname(self, uri, generate=True):
324
325 if not _is_valid_uri(uri):
326 raise Exception('"%s" does not look like a valid URI, I cannot serialize this. Perhaps you wanted to urlencode it?'%uri)
327
328
329 if not uri in self.__cache:
330 namespace, name = split_uri(uri)
331 namespace = URIRef(namespace)
332 prefix = self.store.prefix(namespace)
333 if prefix is None:
334 if not generate:
335 raise Exception(
336 "No known prefix for %s and generate=False")
337 num = 1
338 while 1:
339 prefix = "ns%s" % num
340 if not self.store.namespace(prefix):
341 break
342 num += 1
343 self.bind(prefix, namespace)
344 self.__cache[uri] = (prefix, namespace, name)
345 return self.__cache[uri]
346
347 def bind(self, prefix, namespace, override=True, replace=False):
348
349 """bind a given namespace to the prefix
350
351 if override, rebind, even if the given namespace is already
352 bound to another prefix.
353
354 if replace, replace any existing prefix with the new namespace
355
356 """
357
358 namespace = URIRef(str(namespace))
359 # When documenting explain that override only applies in what cases
360 if prefix is None:
361 prefix = ''
362 bound_namespace = self.store.namespace(prefix)
363 # Check if the bound_namespace contains a URI
364 # and if so convert it into a URIRef for comparison
365 # This is to prevent duplicate namespaces with the
366 # same URI
367 if bound_namespace:
368 bound_namespace = URIRef(bound_namespace)
369 if bound_namespace and bound_namespace != namespace:
370
371 if replace:
372 self.store.bind(prefix, namespace)
373 return
374
375 # prefix already in use for different namespace
376 #
377 # append number to end of prefix until we find one
378 # that's not in use.
379 if not prefix:
380 prefix = "default"
381 num = 1
382 while 1:
383 new_prefix = "%s%s" % (prefix, num)
384 tnamespace = self.store.namespace(new_prefix)
385 if tnamespace and namespace == URIRef(tnamespace):
386 # the prefix is already bound to the correct
387 # namespace
388 return
389 if not self.store.namespace(new_prefix):
390 break
391 num += 1
392 self.store.bind(new_prefix, namespace)
393 else:
394 bound_prefix = self.store.prefix(namespace)
395 if bound_prefix is None:
396 self.store.bind(prefix, namespace)
397 elif bound_prefix == prefix:
398 pass # already bound
399 else:
400 if override or bound_prefix.startswith("_"): # or a generated
401 # prefix
402 self.store.bind(prefix, namespace)
403
404 def namespaces(self):
405 for prefix, namespace in self.store.namespaces():
406 namespace = URIRef(namespace)
407 yield prefix, namespace
408
409 def absolutize(self, uri, defrag=1):
410 base = urljoin("file:", pathname2url(os.getcwd()))
411 result = urljoin("%s/" % base, uri, allow_fragments=not defrag)
412 if defrag:
413 result = urldefrag(result)[0]
414 if not defrag:
415 if uri and uri[-1] == "#" and result[-1] != "#":
416 result = "%s#" % result
417 return URIRef(result)
418
419 # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar
420 #
421 # * Name start characters must have one of the categories Ll, Lu, Lo,
422 # Lt, Nl.
423 #
424 # * Name characters other than Name-start characters must have one of
425 # the categories Mc, Me, Mn, Lm, or Nd.
426 #
427 # * Characters in the compatibility area (i.e. with character code
428 # greater than #xF900 and less than #xFFFE) are not allowed in XML
429 # names.
430 #
431 # * Characters which have a font or compatibility decomposition
432 # (i.e. those with a "compatibility formatting tag" in field 5 of the
433 # database -- marked by field 5 beginning with a "<") are not allowed.
434 #
435 # * The following characters are treated as name-start characters rather
436 # than name characters, because the property file classifies them as
437 # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
438 #
439 # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode
440 # 2.0, section 5.14).
441 #
442 # * Character #x00B7 is classified as an extender, because the property
443 # list so identifies it.
444 #
445 # * Character #x0387 is added as a name character, because #x00B7 is its
446 # canonical equivalent.
447 #
448 # * Characters ':' and '_' are allowed as name-start characters.
449 #
450 # * Characters '-' and '.' are allowed as name characters.
451
452 from unicodedata import category
453
454 NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
455 NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
456 ALLOWED_NAME_CHARS = ["\u00B7", "\u0387", "-", ".", "_"]
457
458 # http://www.w3.org/TR/REC-xml-names/#NT-NCName
459 # [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus
460 # the ":" */
461 # [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar
462 # | Extender
463
464
465 def is_ncname(name):
466 first = name[0]
467 if first == "_" or category(first) in NAME_START_CATEGORIES:
468 for i in range(1, len(name)):
469 c = name[i]
470 if not category(c) in NAME_CATEGORIES:
471 if c in ALLOWED_NAME_CHARS:
472 continue
473 return 0
474 # if in compatibility area
475 # if decomposition(c)!='':
476 # return 0
477
478 return 1
479 else:
480 return 0
481
482 XMLNS = "http://www.w3.org/XML/1998/namespace"
483
484
485 def split_uri(uri):
486 if uri.startswith(XMLNS):
487 return (XMLNS, uri.split(XMLNS)[1])
488 length = len(uri)
489 for i in range(0, length):
490 c = uri[-i - 1]
491 if not category(c) in NAME_CATEGORIES:
492 if c in ALLOWED_NAME_CHARS:
493 continue
494 for j in range(-1 - i, length):
495 if category(uri[j]) in NAME_START_CATEGORIES or uri[j] == "_":
496 ns = uri[:j]
497 if not ns:
498 break
499 ln = uri[j:]
500 return (ns, ln)
501 break
502 raise Exception("Can't split '%s'" % uri)