Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/namespace.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 from __future__ import absolute_import | |
2 from __future__ import division | |
3 from __future__ import print_function | |
4 | |
5 import logging | |
6 | |
7 import os | |
8 from unicodedata import category | |
9 | |
10 from six import string_types | |
11 from six import text_type | |
12 | |
13 from six.moves.urllib.request import pathname2url | |
14 from six.moves.urllib.parse import urldefrag | |
15 from six.moves.urllib.parse import urljoin | |
16 | |
17 from rdflib.term import URIRef, Variable, _XSD_PFX, _is_valid_uri | |
18 | |
19 __doc__ = """ | |
20 =================== | |
21 Namespace Utilities | |
22 =================== | |
23 | |
24 RDFLib provides mechanisms for managing Namespaces. | |
25 | |
26 In particular, there is a :class:`~rdflib.namespace.Namespace` class | |
27 that takes as its argument the base URI of the namespace. | |
28 | |
29 .. code-block:: pycon | |
30 | |
31 >>> from rdflib.namespace import Namespace | |
32 >>> owl = Namespace('http://www.w3.org/2002/07/owl#') | |
33 | |
34 Fully qualified URIs in the namespace can be constructed either by attribute | |
35 or by dictionary access on Namespace instances: | |
36 | |
37 .. code-block:: pycon | |
38 | |
39 >>> owl.seeAlso | |
40 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
41 >>> owl['seeAlso'] | |
42 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
43 | |
44 | |
45 Automatic handling of unknown predicates | |
46 ----------------------------------------- | |
47 | |
48 As a programming convenience, a namespace binding is automatically | |
49 created when :class:`rdflib.term.URIRef` predicates are added to the graph. | |
50 | |
51 Importable namespaces | |
52 ----------------------- | |
53 | |
54 The following namespaces are available by directly importing from rdflib: | |
55 | |
56 * RDF | |
57 * RDFS | |
58 * OWL | |
59 * XSD | |
60 * FOAF | |
61 * SKOS | |
62 * DOAP | |
63 * DC | |
64 * DCTERMS | |
65 * VOID | |
66 | |
67 .. code-block:: pycon | |
68 | |
69 >>> from rdflib import OWL | |
70 >>> OWL.seeAlso | |
71 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
72 | |
73 """ | |
74 | |
75 __all__ = [ | |
76 'is_ncname', 'split_uri', 'Namespace', | |
77 'ClosedNamespace', 'NamespaceManager', | |
78 'XMLNS', 'RDF', 'RDFS', 'XSD', 'OWL', | |
79 'SKOS', 'DOAP', 'FOAF', 'DC', 'DCTERMS', 'VOID'] | |
80 | |
81 logger = logging.getLogger(__name__) | |
82 | |
83 | |
84 class Namespace(text_type): | |
85 | |
86 __doc__ = """ | |
87 Utility class for quickly generating URIRefs with a common prefix | |
88 | |
89 >>> from rdflib import Namespace | |
90 >>> n = Namespace("http://example.org/") | |
91 >>> n.Person # as attribute | |
92 rdflib.term.URIRef(u'http://example.org/Person') | |
93 >>> n['first-name'] # as item - for things that are not valid python identifiers | |
94 rdflib.term.URIRef(u'http://example.org/first-name') | |
95 | |
96 """ | |
97 | |
98 def __new__(cls, value): | |
99 try: | |
100 rt = text_type.__new__(cls, value) | |
101 except UnicodeDecodeError: | |
102 rt = text_type.__new__(cls, value, 'utf-8') | |
103 return rt | |
104 | |
105 @property | |
106 def title(self): | |
107 return URIRef(self + 'title') | |
108 | |
109 def term(self, name): | |
110 # need to handle slices explicitly because of __getitem__ override | |
111 return URIRef(self + (name if isinstance(name, string_types) else '')) | |
112 | |
113 def __getitem__(self, key, default=None): | |
114 return self.term(key) | |
115 | |
116 def __getattr__(self, name): | |
117 if name.startswith("__"): # ignore any special Python names! | |
118 raise AttributeError | |
119 else: | |
120 return self.term(name) | |
121 | |
122 def __repr__(self): | |
123 return "Namespace(%r)" % text_type(self) | |
124 | |
125 | |
126 class URIPattern(text_type): | |
127 | |
128 __doc__ = """ | |
129 Utility class for creating URIs according to some pattern | |
130 This supports either new style formatting with .format | |
131 or old-style with % operator | |
132 | |
133 >>> u=URIPattern("http://example.org/%s/%d/resource") | |
134 >>> u%('books', 12345) | |
135 rdflib.term.URIRef(u'http://example.org/books/12345/resource') | |
136 | |
137 """ | |
138 | |
139 def __new__(cls, value): | |
140 try: | |
141 rt = text_type.__new__(cls, value) | |
142 except UnicodeDecodeError: | |
143 rt = text_type.__new__(cls, value, 'utf-8') | |
144 return rt | |
145 | |
146 def __mod__(self, *args, **kwargs): | |
147 return URIRef(text_type(self).__mod__(*args, **kwargs)) | |
148 | |
149 def format(self, *args, **kwargs): | |
150 return URIRef(text_type.format(self, *args, **kwargs)) | |
151 | |
152 def __repr__(self): | |
153 return "URIPattern(%r)" % text_type(self) | |
154 | |
155 | |
156 class ClosedNamespace(object): | |
157 """ | |
158 A namespace with a closed list of members | |
159 | |
160 Trying to create terms not listen is an error | |
161 """ | |
162 | |
163 def __init__(self, uri, terms): | |
164 self.uri = uri | |
165 self.__uris = {} | |
166 for t in terms: | |
167 self.__uris[t] = URIRef(self.uri + t) | |
168 | |
169 def term(self, name): | |
170 uri = self.__uris.get(name) | |
171 if uri is None: | |
172 raise KeyError( | |
173 "term '{}' not in namespace '{}'".format(name, self.uri) | |
174 ) | |
175 else: | |
176 return uri | |
177 | |
178 def __getitem__(self, key, default=None): | |
179 return self.term(key) | |
180 | |
181 def __getattr__(self, name): | |
182 if name.startswith("__"): # ignore any special Python names! | |
183 raise AttributeError | |
184 else: | |
185 try: | |
186 return self.term(name) | |
187 except KeyError as e: | |
188 raise AttributeError(e) | |
189 | |
190 def __str__(self): | |
191 return text_type(self.uri) | |
192 | |
193 def __repr__(self): | |
194 return "rdf.namespace.ClosedNamespace(%r)" % text_type(self.uri) | |
195 | |
196 | |
197 class _RDFNamespace(ClosedNamespace): | |
198 """ | |
199 Closed namespace for RDF terms | |
200 """ | |
201 | |
202 def __init__(self): | |
203 super(_RDFNamespace, self).__init__( | |
204 URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), | |
205 terms=[ | |
206 # Syntax Names | |
207 "RDF", "Description", "ID", "about", "parseType", | |
208 "resource", "li", "nodeID", "datatype", | |
209 | |
210 # RDF Classes | |
211 "Seq", "Bag", "Alt", "Statement", "Property", | |
212 "List", "PlainLiteral", | |
213 | |
214 # RDF Properties | |
215 "subject", "predicate", "object", "type", | |
216 "value", "first", "rest", | |
217 # and _n where n is a non-negative integer | |
218 | |
219 # RDF Resources | |
220 "nil", | |
221 | |
222 # Added in RDF 1.1 | |
223 "XMLLiteral", "HTML", "langString", | |
224 | |
225 # Added in JSON-LD 1.1 | |
226 "JSON", "CompoundLiteral", "language", "direction"] | |
227 ) | |
228 | |
229 def term(self, name): | |
230 # Container membership properties | |
231 if name.startswith('_'): | |
232 try: | |
233 i = int(name[1:]) | |
234 except ValueError: | |
235 pass | |
236 else: | |
237 if i > 0: | |
238 return URIRef("%s_%s" % (self.uri, i)) | |
239 | |
240 return super(_RDFNamespace, self).term(name) | |
241 | |
242 | |
243 RDF = _RDFNamespace() | |
244 | |
245 | |
246 RDFS = ClosedNamespace( | |
247 uri=URIRef("http://www.w3.org/2000/01/rdf-schema#"), | |
248 terms=[ | |
249 "Resource", "Class", "subClassOf", "subPropertyOf", "comment", "label", | |
250 "domain", "range", "seeAlso", "isDefinedBy", "Literal", "Container", | |
251 "ContainerMembershipProperty", "member", "Datatype"] | |
252 ) | |
253 | |
254 OWL = Namespace('http://www.w3.org/2002/07/owl#') | |
255 | |
256 XSD = Namespace(_XSD_PFX) | |
257 | |
258 CSVW = Namespace('http://www.w3.org/ns/csvw#') | |
259 DC = Namespace('http://purl.org/dc/elements/1.1/') | |
260 DCAT = Namespace('http://www.w3.org/ns/dcat#') | |
261 DCTERMS = Namespace('http://purl.org/dc/terms/') | |
262 DOAP = Namespace('http://usefulinc.com/ns/doap#') | |
263 FOAF = ClosedNamespace( | |
264 uri=URIRef('http://xmlns.com/foaf/0.1/'), | |
265 terms=[ | |
266 # all taken from http://xmlns.com/foaf/spec/ | |
267 'Agent', 'Person', 'name', 'title', 'img', | |
268 'depiction', 'depicts', 'familyName', | |
269 'givenName', 'knows', 'based_near', 'age', 'made', | |
270 'maker', 'primaryTopic', 'primaryTopicOf', 'Project', 'Organization', | |
271 'Group', 'member', 'Document', 'Image', 'nick', | |
272 'mbox', 'homepage', 'weblog', 'openid', 'jabberID', | |
273 'mbox_sha1sum', 'interest', 'topic_interest', 'topic', 'page', | |
274 'workplaceHomepage', 'workInfoHomepage', 'schoolHomepage', 'publications', 'currentProject', | |
275 'pastProject', 'account', 'OnlineAccount', 'accountName', 'accountServiceHomepage', | |
276 'PersonalProfileDocument', 'tipjar', 'sha1', 'thumbnail', 'logo' | |
277 ] | |
278 ) | |
279 ODRL2 = Namespace('http://www.w3.org/ns/odrl/2/') | |
280 ORG = Namespace('http://www.w3.org/ns/org#') | |
281 PROV = ClosedNamespace( | |
282 uri=URIRef('http://www.w3.org/ns/prov#'), | |
283 terms=[ | |
284 'Entity', 'Activity', 'Agent', 'wasGeneratedBy', 'wasDerivedFrom', | |
285 'wasAttributedTo', 'startedAtTime', 'used', 'wasInformedBy', 'endedAtTime', | |
286 'wasAssociatedWith', 'actedOnBehalfOf', 'Collection', 'EmptyCollection', 'Bundle', | |
287 'Person', 'SoftwareAgent', 'Organization', 'Location', 'alternateOf', | |
288 'specializationOf', 'generatedAtTime', 'hadPrimarySource', 'value', 'wasQuotedFrom', | |
289 'wasRevisionOf', 'invalidatedAtTime', 'wasInvalidatedBy', 'hadMember', 'wasStartedBy', | |
290 'wasEndedBy', 'invalidated', 'influenced', 'atLocation', 'generated', | |
291 'Influence', 'EntityInfluence', 'Usage', 'Start', 'End', | |
292 'Derivation', 'PrimarySource', 'Quotation', 'Revision', 'ActivityInfluence', | |
293 'Generation', 'Communication', 'Invalidation', 'AgentInfluence', | |
294 'Attribution', 'Association', 'Plan', 'Delegation', 'InstantaneousEvent', | |
295 'Role', 'wasInfluencedBy', 'qualifiedInfluence', 'qualifiedGeneration', 'qualifiedDerivation', | |
296 'qualifiedPrimarySource', 'qualifiedQuotation', 'qualifiedRevision', 'qualifiedAttribution', | |
297 'qualifiedInvalidation', 'qualifiedStart', 'qualifiedUsage', 'qualifiedCommunication', 'qualifiedAssociation', | |
298 'qualifiedEnd', 'qualifiedDelegation', 'influencer', 'entity', 'hadUsage', 'hadGeneration', | |
299 'activity', 'agent', 'hadPlan', 'hadActivity', 'atTime', 'hadRole' | |
300 ] | |
301 ) | |
302 PROF = Namespace('http://www.w3.org/ns/dx/prof/') | |
303 SDO = Namespace('https://schema.org/') | |
304 SH = Namespace('http://www.w3.org/ns/shacl#') | |
305 SKOS = ClosedNamespace( | |
306 uri=URIRef('http://www.w3.org/2004/02/skos/core#'), | |
307 terms=[ | |
308 # all taken from https://www.w3.org/TR/skos-reference/#L1302 | |
309 'Concept', 'ConceptScheme', 'inScheme', 'hasTopConcept', 'topConceptOf', | |
310 'altLabel', 'hiddenLabel', 'prefLabel', 'notation', 'changeNote', | |
311 'definition', 'editorialNote', 'example', 'historyNote', 'note', | |
312 'scopeNote', 'broader', 'broaderTransitive', 'narrower', 'narrowerTransitive', | |
313 'related', 'semanticRelation', 'Collection', 'OrderedCollection', 'member', | |
314 'memberList', 'broadMatch', 'closeMatch', 'exactMatch', 'mappingRelation', | |
315 'narrowMatch', 'relatedMatch' | |
316 ] | |
317 ) | |
318 SOSA = Namespace('http://www.w3.org/ns/ssn/') | |
319 SSN = Namespace('http://www.w3.org/ns/sosa/') | |
320 TIME = Namespace('http://www.w3.org/2006/time#') | |
321 VOID = Namespace('http://rdfs.org/ns/void#') | |
322 | |
323 | |
324 class NamespaceManager(object): | |
325 """ | |
326 | |
327 Class for managing prefix => namespace mappings | |
328 | |
329 Sample usage from FuXi ... | |
330 | |
331 .. code-block:: python | |
332 | |
333 ruleStore = N3RuleStore(additionalBuiltins=additionalBuiltins) | |
334 nsMgr = NamespaceManager(Graph(ruleStore)) | |
335 ruleGraph = Graph(ruleStore,namespace_manager=nsMgr) | |
336 | |
337 | |
338 and ... | |
339 | |
340 .. code-block:: pycon | |
341 | |
342 >>> import rdflib | |
343 >>> from rdflib import Graph | |
344 >>> from rdflib.namespace import Namespace, NamespaceManager | |
345 >>> exNs = Namespace('http://example.com/') | |
346 >>> namespace_manager = NamespaceManager(Graph()) | |
347 >>> namespace_manager.bind('ex', exNs, override=False) | |
348 >>> g = Graph() | |
349 >>> g.namespace_manager = namespace_manager | |
350 >>> all_ns = [n for n in g.namespace_manager.namespaces()] | |
351 >>> assert ('ex', rdflib.term.URIRef('http://example.com/')) in all_ns | |
352 >>> | |
353 | |
354 """ | |
355 | |
356 def __init__(self, graph): | |
357 self.graph = graph | |
358 self.__cache = {} | |
359 self.__cache_strict = {} | |
360 self.__log = None | |
361 self.__strie = {} | |
362 self.__trie = {} | |
363 for p, n in self.namespaces(): # self.bind is not always called | |
364 insert_trie(self.__trie, str(n)) | |
365 self.bind("xml", "http://www.w3.org/XML/1998/namespace") | |
366 self.bind("rdf", RDF) | |
367 self.bind("rdfs", RDFS) | |
368 self.bind("xsd", XSD) | |
369 | |
370 def reset(self): | |
371 self.__cache = {} | |
372 self.__strie = {} | |
373 self.__trie = {} | |
374 for p, n in self.namespaces(): # repopulate the trie | |
375 insert_trie(self.__trie, str(n)) | |
376 | |
377 def __get_store(self): | |
378 return self.graph.store | |
379 store = property(__get_store) | |
380 | |
381 def qname(self, uri): | |
382 prefix, namespace, name = self.compute_qname(uri) | |
383 if prefix == "": | |
384 return name | |
385 else: | |
386 return ":".join((prefix, name)) | |
387 | |
388 def qname_strict(self, uri): | |
389 prefix, namespace, name = self.compute_qname_strict(uri) | |
390 if prefix == '': | |
391 return name | |
392 else: | |
393 return ':'.join((prefix, name)) | |
394 | |
395 def normalizeUri(self, rdfTerm): | |
396 """ | |
397 Takes an RDF Term and 'normalizes' it into a QName (using the | |
398 registered prefix) or (unlike compute_qname) the Notation 3 | |
399 form for URIs: <...URI...> | |
400 """ | |
401 try: | |
402 namespace, name = split_uri(rdfTerm) | |
403 if namespace not in self.__strie: | |
404 insert_strie(self.__strie, self.__trie, str(namespace)) | |
405 namespace = URIRef(text_type(namespace)) | |
406 except: | |
407 if isinstance(rdfTerm, Variable): | |
408 return "?%s" % rdfTerm | |
409 else: | |
410 return "<%s>" % rdfTerm | |
411 prefix = self.store.prefix(namespace) | |
412 if prefix is None and isinstance(rdfTerm, Variable): | |
413 return "?%s" % rdfTerm | |
414 elif prefix is None: | |
415 return "<%s>" % rdfTerm | |
416 else: | |
417 qNameParts = self.compute_qname(rdfTerm) | |
418 return ':'.join([qNameParts[0], qNameParts[-1]]) | |
419 | |
420 def compute_qname(self, uri, generate=True): | |
421 | |
422 if not _is_valid_uri(uri): | |
423 raise ValueError( | |
424 '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'.format(uri) | |
425 ) | |
426 | |
427 if uri not in self.__cache: | |
428 try: | |
429 namespace, name = split_uri(uri) | |
430 except ValueError as e: | |
431 namespace = URIRef(uri) | |
432 prefix = self.store.prefix(namespace) | |
433 if not prefix: | |
434 raise e | |
435 if namespace not in self.__strie: | |
436 insert_strie(self.__strie, self.__trie, namespace) | |
437 | |
438 if self.__strie[namespace]: | |
439 pl_namespace = get_longest_namespace(self.__strie[namespace], uri) | |
440 if pl_namespace is not None: | |
441 namespace = pl_namespace | |
442 name = uri[len(namespace):] | |
443 | |
444 namespace = URIRef(namespace) | |
445 prefix = self.store.prefix(namespace) # warning multiple prefixes problem | |
446 | |
447 if prefix is None: | |
448 if not generate: | |
449 raise KeyError( | |
450 "No known prefix for {} and generate=False".format(namespace) | |
451 ) | |
452 num = 1 | |
453 while 1: | |
454 prefix = "ns%s" % num | |
455 if not self.store.namespace(prefix): | |
456 break | |
457 num += 1 | |
458 self.bind(prefix, namespace) | |
459 self.__cache[uri] = (prefix, namespace, name) | |
460 return self.__cache[uri] | |
461 | |
462 def compute_qname_strict(self, uri, generate=True): | |
463 # code repeated to avoid branching on strict every time | |
464 # if output needs to be strict (e.g. for xml) then | |
465 # only the strict output should bear the overhead | |
466 prefix, namespace, name = self.compute_qname(uri) | |
467 if is_ncname(text_type(name)): | |
468 return prefix, namespace, name | |
469 else: | |
470 if uri not in self.__cache_strict: | |
471 try: | |
472 namespace, name = split_uri(uri, NAME_START_CATEGORIES) | |
473 except ValueError as e: | |
474 message = ('This graph cannot be serialized to a strict format ' | |
475 'because there is no valid way to shorten {}'.format(uri)) | |
476 raise ValueError(message) | |
477 # omitted for strict since NCNames cannot be empty | |
478 #namespace = URIRef(uri) | |
479 #prefix = self.store.prefix(namespace) | |
480 #if not prefix: | |
481 #raise e | |
482 | |
483 if namespace not in self.__strie: | |
484 insert_strie(self.__strie, self.__trie, namespace) | |
485 | |
486 # omitted for strict | |
487 #if self.__strie[namespace]: | |
488 #pl_namespace = get_longest_namespace(self.__strie[namespace], uri) | |
489 #if pl_namespace is not None: | |
490 #namespace = pl_namespace | |
491 #name = uri[len(namespace):] | |
492 | |
493 namespace = URIRef(namespace) | |
494 prefix = self.store.prefix(namespace) # warning multiple prefixes problem | |
495 | |
496 if prefix is None: | |
497 if not generate: | |
498 raise KeyError( | |
499 "No known prefix for {} and generate=False".format(namespace) | |
500 ) | |
501 num = 1 | |
502 while 1: | |
503 prefix = "ns%s" % num | |
504 if not self.store.namespace(prefix): | |
505 break | |
506 num += 1 | |
507 self.bind(prefix, namespace) | |
508 self.__cache_strict[uri] = (prefix, namespace, name) | |
509 | |
510 return self.__cache_strict[uri] | |
511 | |
512 def bind(self, prefix, namespace, override=True, replace=False): | |
513 """bind a given namespace to the prefix | |
514 | |
515 if override, rebind, even if the given namespace is already | |
516 bound to another prefix. | |
517 | |
518 if replace, replace any existing prefix with the new namespace | |
519 | |
520 """ | |
521 | |
522 namespace = URIRef(text_type(namespace)) | |
523 # When documenting explain that override only applies in what cases | |
524 if prefix is None: | |
525 prefix = '' | |
526 bound_namespace = self.store.namespace(prefix) | |
527 # Check if the bound_namespace contains a URI | |
528 # and if so convert it into a URIRef for comparison | |
529 # This is to prevent duplicate namespaces with the | |
530 # same URI | |
531 if bound_namespace: | |
532 bound_namespace = URIRef(bound_namespace) | |
533 if bound_namespace and bound_namespace != namespace: | |
534 | |
535 if replace: | |
536 self.store.bind(prefix, namespace) | |
537 insert_trie(self.__trie, str(namespace)) | |
538 return | |
539 | |
540 # prefix already in use for different namespace | |
541 # | |
542 # append number to end of prefix until we find one | |
543 # that's not in use. | |
544 if not prefix: | |
545 prefix = "default" | |
546 num = 1 | |
547 while 1: | |
548 new_prefix = "%s%s" % (prefix, num) | |
549 tnamespace = self.store.namespace(new_prefix) | |
550 if tnamespace and namespace == URIRef(tnamespace): | |
551 # the prefix is already bound to the correct | |
552 # namespace | |
553 return | |
554 if not self.store.namespace(new_prefix): | |
555 break | |
556 num += 1 | |
557 self.store.bind(new_prefix, namespace) | |
558 else: | |
559 bound_prefix = self.store.prefix(namespace) | |
560 if bound_prefix is None: | |
561 self.store.bind(prefix, namespace) | |
562 elif bound_prefix == prefix: | |
563 pass # already bound | |
564 else: | |
565 if override or bound_prefix.startswith("_"): # or a generated prefix | |
566 self.store.bind(prefix, namespace) | |
567 insert_trie(self.__trie, str(namespace)) | |
568 | |
569 def namespaces(self): | |
570 for prefix, namespace in self.store.namespaces(): | |
571 namespace = URIRef(namespace) | |
572 yield prefix, namespace | |
573 | |
574 def absolutize(self, uri, defrag=1): | |
575 base = urljoin("file:", pathname2url(os.getcwd())) | |
576 result = urljoin("%s/" % base, uri, allow_fragments=not defrag) | |
577 if defrag: | |
578 result = urldefrag(result)[0] | |
579 if not defrag: | |
580 if uri and uri[-1] == "#" and result[-1] != "#": | |
581 result = "%s#" % result | |
582 return URIRef(result) | |
583 | |
584 # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar | |
585 # | |
586 # * Name start characters must have one of the categories Ll, Lu, Lo, | |
587 # Lt, Nl. | |
588 # | |
589 # * Name characters other than Name-start characters must have one of | |
590 # the categories Mc, Me, Mn, Lm, or Nd. | |
591 # | |
592 # * Characters in the compatibility area (i.e. with character code | |
593 # greater than #xF900 and less than #xFFFE) are not allowed in XML | |
594 # names. | |
595 # | |
596 # * Characters which have a font or compatibility decomposition | |
597 # (i.e. those with a "compatibility formatting tag" in field 5 of the | |
598 # database -- marked by field 5 beginning with a "<") are not allowed. | |
599 # | |
600 # * The following characters are treated as name-start characters rather | |
601 # than name characters, because the property file classifies them as | |
602 # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. | |
603 # | |
604 # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode | |
605 # 2.0, section 5.14). | |
606 # | |
607 # * Character #x00B7 is classified as an extender, because the property | |
608 # list so identifies it. | |
609 # | |
610 # * Character #x0387 is added as a name character, because #x00B7 is its | |
611 # canonical equivalent. | |
612 # | |
613 # * Characters ':' and '_' are allowed as name-start characters. | |
614 # | |
615 # * Characters '-' and '.' are allowed as name characters. | |
616 | |
617 | |
618 NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] | |
619 SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ['Nd'] | |
620 NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] | |
621 ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"] | |
622 | |
623 | |
624 # http://www.w3.org/TR/REC-xml-names/#NT-NCName | |
625 # [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus | |
626 # the ":" */ | |
627 # [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | |
628 # | Extender | |
629 | |
630 | |
631 def is_ncname(name): | |
632 if name: | |
633 first = name[0] | |
634 if first == "_" or category(first) in NAME_START_CATEGORIES: | |
635 for i in range(1, len(name)): | |
636 c = name[i] | |
637 if not category(c) in NAME_CATEGORIES: | |
638 if c != ':' and c in ALLOWED_NAME_CHARS: | |
639 continue | |
640 return 0 | |
641 # if in compatibility area | |
642 # if decomposition(c)!='': | |
643 # return 0 | |
644 | |
645 return 1 | |
646 | |
647 return 0 | |
648 | |
649 | |
650 XMLNS = "http://www.w3.org/XML/1998/namespace" | |
651 | |
652 | |
653 def split_uri(uri, split_start=SPLIT_START_CATEGORIES): | |
654 if uri.startswith(XMLNS): | |
655 return (XMLNS, uri.split(XMLNS)[1]) | |
656 length = len(uri) | |
657 for i in range(0, length): | |
658 c = uri[-i - 1] | |
659 if not category(c) in NAME_CATEGORIES: | |
660 if c in ALLOWED_NAME_CHARS: | |
661 continue | |
662 for j in range(-1 - i, length): | |
663 if category(uri[j]) in split_start or uri[j] == "_": | |
664 # _ prevents early split, roundtrip not generate | |
665 ns = uri[:j] | |
666 if not ns: | |
667 break | |
668 ln = uri[j:] | |
669 return (ns, ln) | |
670 break | |
671 raise ValueError("Can't split '{}'".format(uri)) | |
672 | |
673 def insert_trie(trie, value): # aka get_subtrie_or_insert | |
674 """ Insert a value into the trie if it is not already contained in the trie. | |
675 Return the subtree for the value regardless of whether it is a new value | |
676 or not. """ | |
677 if value in trie: | |
678 return trie[value] | |
679 multi_check = False | |
680 for key in tuple(trie.keys()): | |
681 if len(value) > len(key) and value.startswith(key): | |
682 return insert_trie(trie[key], value) | |
683 elif key.startswith(value): # we know the value is not in the trie | |
684 if not multi_check: | |
685 trie[value] = {} | |
686 multi_check = True # there can be multiple longer existing prefixes | |
687 dict_ = trie.pop(key) # does not break strie since key<->dict_ remains unchanged | |
688 trie[value][key] = dict_ | |
689 if value not in trie: | |
690 trie[value] = {} | |
691 return trie[value] | |
692 | |
693 def insert_strie(strie, trie, value): | |
694 if value not in strie: | |
695 strie[value] = insert_trie(trie, value) | |
696 | |
697 def get_longest_namespace(trie, value): | |
698 for key in trie: | |
699 if value.startswith(key): | |
700 out = get_longest_namespace(trie[key], value) | |
701 if out is None: | |
702 return key | |
703 else: | |
704 return out | |
705 return None |