Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/namespace.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 from __future__ import absolute_import | |
| 2 from __future__ import division | |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import logging | |
| 6 | |
| 7 import os | |
| 8 from unicodedata import category | |
| 9 | |
| 10 from six import string_types | |
| 11 from six import text_type | |
| 12 | |
| 13 from six.moves.urllib.request import pathname2url | |
| 14 from six.moves.urllib.parse import urldefrag | |
| 15 from six.moves.urllib.parse import urljoin | |
| 16 | |
| 17 from rdflib.term import URIRef, Variable, _XSD_PFX, _is_valid_uri | |
| 18 | |
| 19 __doc__ = """ | |
| 20 =================== | |
| 21 Namespace Utilities | |
| 22 =================== | |
| 23 | |
| 24 RDFLib provides mechanisms for managing Namespaces. | |
| 25 | |
| 26 In particular, there is a :class:`~rdflib.namespace.Namespace` class | |
| 27 that takes as its argument the base URI of the namespace. | |
| 28 | |
| 29 .. code-block:: pycon | |
| 30 | |
| 31 >>> from rdflib.namespace import Namespace | |
| 32 >>> owl = Namespace('http://www.w3.org/2002/07/owl#') | |
| 33 | |
| 34 Fully qualified URIs in the namespace can be constructed either by attribute | |
| 35 or by dictionary access on Namespace instances: | |
| 36 | |
| 37 .. code-block:: pycon | |
| 38 | |
| 39 >>> owl.seeAlso | |
| 40 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
| 41 >>> owl['seeAlso'] | |
| 42 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
| 43 | |
| 44 | |
| 45 Automatic handling of unknown predicates | |
| 46 ----------------------------------------- | |
| 47 | |
| 48 As a programming convenience, a namespace binding is automatically | |
| 49 created when :class:`rdflib.term.URIRef` predicates are added to the graph. | |
| 50 | |
| 51 Importable namespaces | |
| 52 ----------------------- | |
| 53 | |
| 54 The following namespaces are available by directly importing from rdflib: | |
| 55 | |
| 56 * RDF | |
| 57 * RDFS | |
| 58 * OWL | |
| 59 * XSD | |
| 60 * FOAF | |
| 61 * SKOS | |
| 62 * DOAP | |
| 63 * DC | |
| 64 * DCTERMS | |
| 65 * VOID | |
| 66 | |
| 67 .. code-block:: pycon | |
| 68 | |
| 69 >>> from rdflib import OWL | |
| 70 >>> OWL.seeAlso | |
| 71 rdflib.term.URIRef(u'http://www.w3.org/2002/07/owl#seeAlso') | |
| 72 | |
| 73 """ | |
| 74 | |
| 75 __all__ = [ | |
| 76 'is_ncname', 'split_uri', 'Namespace', | |
| 77 'ClosedNamespace', 'NamespaceManager', | |
| 78 'XMLNS', 'RDF', 'RDFS', 'XSD', 'OWL', | |
| 79 'SKOS', 'DOAP', 'FOAF', 'DC', 'DCTERMS', 'VOID'] | |
| 80 | |
| 81 logger = logging.getLogger(__name__) | |
| 82 | |
| 83 | |
| 84 class Namespace(text_type): | |
| 85 | |
| 86 __doc__ = """ | |
| 87 Utility class for quickly generating URIRefs with a common prefix | |
| 88 | |
| 89 >>> from rdflib import Namespace | |
| 90 >>> n = Namespace("http://example.org/") | |
| 91 >>> n.Person # as attribute | |
| 92 rdflib.term.URIRef(u'http://example.org/Person') | |
| 93 >>> n['first-name'] # as item - for things that are not valid python identifiers | |
| 94 rdflib.term.URIRef(u'http://example.org/first-name') | |
| 95 | |
| 96 """ | |
| 97 | |
| 98 def __new__(cls, value): | |
| 99 try: | |
| 100 rt = text_type.__new__(cls, value) | |
| 101 except UnicodeDecodeError: | |
| 102 rt = text_type.__new__(cls, value, 'utf-8') | |
| 103 return rt | |
| 104 | |
| 105 @property | |
| 106 def title(self): | |
| 107 return URIRef(self + 'title') | |
| 108 | |
| 109 def term(self, name): | |
| 110 # need to handle slices explicitly because of __getitem__ override | |
| 111 return URIRef(self + (name if isinstance(name, string_types) else '')) | |
| 112 | |
| 113 def __getitem__(self, key, default=None): | |
| 114 return self.term(key) | |
| 115 | |
| 116 def __getattr__(self, name): | |
| 117 if name.startswith("__"): # ignore any special Python names! | |
| 118 raise AttributeError | |
| 119 else: | |
| 120 return self.term(name) | |
| 121 | |
| 122 def __repr__(self): | |
| 123 return "Namespace(%r)" % text_type(self) | |
| 124 | |
| 125 | |
| 126 class URIPattern(text_type): | |
| 127 | |
| 128 __doc__ = """ | |
| 129 Utility class for creating URIs according to some pattern | |
| 130 This supports either new style formatting with .format | |
| 131 or old-style with % operator | |
| 132 | |
| 133 >>> u=URIPattern("http://example.org/%s/%d/resource") | |
| 134 >>> u%('books', 12345) | |
| 135 rdflib.term.URIRef(u'http://example.org/books/12345/resource') | |
| 136 | |
| 137 """ | |
| 138 | |
| 139 def __new__(cls, value): | |
| 140 try: | |
| 141 rt = text_type.__new__(cls, value) | |
| 142 except UnicodeDecodeError: | |
| 143 rt = text_type.__new__(cls, value, 'utf-8') | |
| 144 return rt | |
| 145 | |
| 146 def __mod__(self, *args, **kwargs): | |
| 147 return URIRef(text_type(self).__mod__(*args, **kwargs)) | |
| 148 | |
| 149 def format(self, *args, **kwargs): | |
| 150 return URIRef(text_type.format(self, *args, **kwargs)) | |
| 151 | |
| 152 def __repr__(self): | |
| 153 return "URIPattern(%r)" % text_type(self) | |
| 154 | |
| 155 | |
| 156 class ClosedNamespace(object): | |
| 157 """ | |
| 158 A namespace with a closed list of members | |
| 159 | |
| 160 Trying to create terms not listen is an error | |
| 161 """ | |
| 162 | |
| 163 def __init__(self, uri, terms): | |
| 164 self.uri = uri | |
| 165 self.__uris = {} | |
| 166 for t in terms: | |
| 167 self.__uris[t] = URIRef(self.uri + t) | |
| 168 | |
| 169 def term(self, name): | |
| 170 uri = self.__uris.get(name) | |
| 171 if uri is None: | |
| 172 raise KeyError( | |
| 173 "term '{}' not in namespace '{}'".format(name, self.uri) | |
| 174 ) | |
| 175 else: | |
| 176 return uri | |
| 177 | |
| 178 def __getitem__(self, key, default=None): | |
| 179 return self.term(key) | |
| 180 | |
| 181 def __getattr__(self, name): | |
| 182 if name.startswith("__"): # ignore any special Python names! | |
| 183 raise AttributeError | |
| 184 else: | |
| 185 try: | |
| 186 return self.term(name) | |
| 187 except KeyError as e: | |
| 188 raise AttributeError(e) | |
| 189 | |
| 190 def __str__(self): | |
| 191 return text_type(self.uri) | |
| 192 | |
| 193 def __repr__(self): | |
| 194 return "rdf.namespace.ClosedNamespace(%r)" % text_type(self.uri) | |
| 195 | |
| 196 | |
| 197 class _RDFNamespace(ClosedNamespace): | |
| 198 """ | |
| 199 Closed namespace for RDF terms | |
| 200 """ | |
| 201 | |
| 202 def __init__(self): | |
| 203 super(_RDFNamespace, self).__init__( | |
| 204 URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), | |
| 205 terms=[ | |
| 206 # Syntax Names | |
| 207 "RDF", "Description", "ID", "about", "parseType", | |
| 208 "resource", "li", "nodeID", "datatype", | |
| 209 | |
| 210 # RDF Classes | |
| 211 "Seq", "Bag", "Alt", "Statement", "Property", | |
| 212 "List", "PlainLiteral", | |
| 213 | |
| 214 # RDF Properties | |
| 215 "subject", "predicate", "object", "type", | |
| 216 "value", "first", "rest", | |
| 217 # and _n where n is a non-negative integer | |
| 218 | |
| 219 # RDF Resources | |
| 220 "nil", | |
| 221 | |
| 222 # Added in RDF 1.1 | |
| 223 "XMLLiteral", "HTML", "langString", | |
| 224 | |
| 225 # Added in JSON-LD 1.1 | |
| 226 "JSON", "CompoundLiteral", "language", "direction"] | |
| 227 ) | |
| 228 | |
| 229 def term(self, name): | |
| 230 # Container membership properties | |
| 231 if name.startswith('_'): | |
| 232 try: | |
| 233 i = int(name[1:]) | |
| 234 except ValueError: | |
| 235 pass | |
| 236 else: | |
| 237 if i > 0: | |
| 238 return URIRef("%s_%s" % (self.uri, i)) | |
| 239 | |
| 240 return super(_RDFNamespace, self).term(name) | |
| 241 | |
| 242 | |
| 243 RDF = _RDFNamespace() | |
| 244 | |
| 245 | |
| 246 RDFS = ClosedNamespace( | |
| 247 uri=URIRef("http://www.w3.org/2000/01/rdf-schema#"), | |
| 248 terms=[ | |
| 249 "Resource", "Class", "subClassOf", "subPropertyOf", "comment", "label", | |
| 250 "domain", "range", "seeAlso", "isDefinedBy", "Literal", "Container", | |
| 251 "ContainerMembershipProperty", "member", "Datatype"] | |
| 252 ) | |
| 253 | |
| 254 OWL = Namespace('http://www.w3.org/2002/07/owl#') | |
| 255 | |
| 256 XSD = Namespace(_XSD_PFX) | |
| 257 | |
| 258 CSVW = Namespace('http://www.w3.org/ns/csvw#') | |
| 259 DC = Namespace('http://purl.org/dc/elements/1.1/') | |
| 260 DCAT = Namespace('http://www.w3.org/ns/dcat#') | |
| 261 DCTERMS = Namespace('http://purl.org/dc/terms/') | |
| 262 DOAP = Namespace('http://usefulinc.com/ns/doap#') | |
| 263 FOAF = ClosedNamespace( | |
| 264 uri=URIRef('http://xmlns.com/foaf/0.1/'), | |
| 265 terms=[ | |
| 266 # all taken from http://xmlns.com/foaf/spec/ | |
| 267 'Agent', 'Person', 'name', 'title', 'img', | |
| 268 'depiction', 'depicts', 'familyName', | |
| 269 'givenName', 'knows', 'based_near', 'age', 'made', | |
| 270 'maker', 'primaryTopic', 'primaryTopicOf', 'Project', 'Organization', | |
| 271 'Group', 'member', 'Document', 'Image', 'nick', | |
| 272 'mbox', 'homepage', 'weblog', 'openid', 'jabberID', | |
| 273 'mbox_sha1sum', 'interest', 'topic_interest', 'topic', 'page', | |
| 274 'workplaceHomepage', 'workInfoHomepage', 'schoolHomepage', 'publications', 'currentProject', | |
| 275 'pastProject', 'account', 'OnlineAccount', 'accountName', 'accountServiceHomepage', | |
| 276 'PersonalProfileDocument', 'tipjar', 'sha1', 'thumbnail', 'logo' | |
| 277 ] | |
| 278 ) | |
| 279 ODRL2 = Namespace('http://www.w3.org/ns/odrl/2/') | |
| 280 ORG = Namespace('http://www.w3.org/ns/org#') | |
| 281 PROV = ClosedNamespace( | |
| 282 uri=URIRef('http://www.w3.org/ns/prov#'), | |
| 283 terms=[ | |
| 284 'Entity', 'Activity', 'Agent', 'wasGeneratedBy', 'wasDerivedFrom', | |
| 285 'wasAttributedTo', 'startedAtTime', 'used', 'wasInformedBy', 'endedAtTime', | |
| 286 'wasAssociatedWith', 'actedOnBehalfOf', 'Collection', 'EmptyCollection', 'Bundle', | |
| 287 'Person', 'SoftwareAgent', 'Organization', 'Location', 'alternateOf', | |
| 288 'specializationOf', 'generatedAtTime', 'hadPrimarySource', 'value', 'wasQuotedFrom', | |
| 289 'wasRevisionOf', 'invalidatedAtTime', 'wasInvalidatedBy', 'hadMember', 'wasStartedBy', | |
| 290 'wasEndedBy', 'invalidated', 'influenced', 'atLocation', 'generated', | |
| 291 'Influence', 'EntityInfluence', 'Usage', 'Start', 'End', | |
| 292 'Derivation', 'PrimarySource', 'Quotation', 'Revision', 'ActivityInfluence', | |
| 293 'Generation', 'Communication', 'Invalidation', 'AgentInfluence', | |
| 294 'Attribution', 'Association', 'Plan', 'Delegation', 'InstantaneousEvent', | |
| 295 'Role', 'wasInfluencedBy', 'qualifiedInfluence', 'qualifiedGeneration', 'qualifiedDerivation', | |
| 296 'qualifiedPrimarySource', 'qualifiedQuotation', 'qualifiedRevision', 'qualifiedAttribution', | |
| 297 'qualifiedInvalidation', 'qualifiedStart', 'qualifiedUsage', 'qualifiedCommunication', 'qualifiedAssociation', | |
| 298 'qualifiedEnd', 'qualifiedDelegation', 'influencer', 'entity', 'hadUsage', 'hadGeneration', | |
| 299 'activity', 'agent', 'hadPlan', 'hadActivity', 'atTime', 'hadRole' | |
| 300 ] | |
| 301 ) | |
| 302 PROF = Namespace('http://www.w3.org/ns/dx/prof/') | |
| 303 SDO = Namespace('https://schema.org/') | |
| 304 SH = Namespace('http://www.w3.org/ns/shacl#') | |
| 305 SKOS = ClosedNamespace( | |
| 306 uri=URIRef('http://www.w3.org/2004/02/skos/core#'), | |
| 307 terms=[ | |
| 308 # all taken from https://www.w3.org/TR/skos-reference/#L1302 | |
| 309 'Concept', 'ConceptScheme', 'inScheme', 'hasTopConcept', 'topConceptOf', | |
| 310 'altLabel', 'hiddenLabel', 'prefLabel', 'notation', 'changeNote', | |
| 311 'definition', 'editorialNote', 'example', 'historyNote', 'note', | |
| 312 'scopeNote', 'broader', 'broaderTransitive', 'narrower', 'narrowerTransitive', | |
| 313 'related', 'semanticRelation', 'Collection', 'OrderedCollection', 'member', | |
| 314 'memberList', 'broadMatch', 'closeMatch', 'exactMatch', 'mappingRelation', | |
| 315 'narrowMatch', 'relatedMatch' | |
| 316 ] | |
| 317 ) | |
| 318 SOSA = Namespace('http://www.w3.org/ns/ssn/') | |
| 319 SSN = Namespace('http://www.w3.org/ns/sosa/') | |
| 320 TIME = Namespace('http://www.w3.org/2006/time#') | |
| 321 VOID = Namespace('http://rdfs.org/ns/void#') | |
| 322 | |
| 323 | |
| 324 class NamespaceManager(object): | |
| 325 """ | |
| 326 | |
| 327 Class for managing prefix => namespace mappings | |
| 328 | |
| 329 Sample usage from FuXi ... | |
| 330 | |
| 331 .. code-block:: python | |
| 332 | |
| 333 ruleStore = N3RuleStore(additionalBuiltins=additionalBuiltins) | |
| 334 nsMgr = NamespaceManager(Graph(ruleStore)) | |
| 335 ruleGraph = Graph(ruleStore,namespace_manager=nsMgr) | |
| 336 | |
| 337 | |
| 338 and ... | |
| 339 | |
| 340 .. code-block:: pycon | |
| 341 | |
| 342 >>> import rdflib | |
| 343 >>> from rdflib import Graph | |
| 344 >>> from rdflib.namespace import Namespace, NamespaceManager | |
| 345 >>> exNs = Namespace('http://example.com/') | |
| 346 >>> namespace_manager = NamespaceManager(Graph()) | |
| 347 >>> namespace_manager.bind('ex', exNs, override=False) | |
| 348 >>> g = Graph() | |
| 349 >>> g.namespace_manager = namespace_manager | |
| 350 >>> all_ns = [n for n in g.namespace_manager.namespaces()] | |
| 351 >>> assert ('ex', rdflib.term.URIRef('http://example.com/')) in all_ns | |
| 352 >>> | |
| 353 | |
| 354 """ | |
| 355 | |
| 356 def __init__(self, graph): | |
| 357 self.graph = graph | |
| 358 self.__cache = {} | |
| 359 self.__cache_strict = {} | |
| 360 self.__log = None | |
| 361 self.__strie = {} | |
| 362 self.__trie = {} | |
| 363 for p, n in self.namespaces(): # self.bind is not always called | |
| 364 insert_trie(self.__trie, str(n)) | |
| 365 self.bind("xml", "http://www.w3.org/XML/1998/namespace") | |
| 366 self.bind("rdf", RDF) | |
| 367 self.bind("rdfs", RDFS) | |
| 368 self.bind("xsd", XSD) | |
| 369 | |
| 370 def reset(self): | |
| 371 self.__cache = {} | |
| 372 self.__strie = {} | |
| 373 self.__trie = {} | |
| 374 for p, n in self.namespaces(): # repopulate the trie | |
| 375 insert_trie(self.__trie, str(n)) | |
| 376 | |
| 377 def __get_store(self): | |
| 378 return self.graph.store | |
| 379 store = property(__get_store) | |
| 380 | |
| 381 def qname(self, uri): | |
| 382 prefix, namespace, name = self.compute_qname(uri) | |
| 383 if prefix == "": | |
| 384 return name | |
| 385 else: | |
| 386 return ":".join((prefix, name)) | |
| 387 | |
| 388 def qname_strict(self, uri): | |
| 389 prefix, namespace, name = self.compute_qname_strict(uri) | |
| 390 if prefix == '': | |
| 391 return name | |
| 392 else: | |
| 393 return ':'.join((prefix, name)) | |
| 394 | |
| 395 def normalizeUri(self, rdfTerm): | |
| 396 """ | |
| 397 Takes an RDF Term and 'normalizes' it into a QName (using the | |
| 398 registered prefix) or (unlike compute_qname) the Notation 3 | |
| 399 form for URIs: <...URI...> | |
| 400 """ | |
| 401 try: | |
| 402 namespace, name = split_uri(rdfTerm) | |
| 403 if namespace not in self.__strie: | |
| 404 insert_strie(self.__strie, self.__trie, str(namespace)) | |
| 405 namespace = URIRef(text_type(namespace)) | |
| 406 except: | |
| 407 if isinstance(rdfTerm, Variable): | |
| 408 return "?%s" % rdfTerm | |
| 409 else: | |
| 410 return "<%s>" % rdfTerm | |
| 411 prefix = self.store.prefix(namespace) | |
| 412 if prefix is None and isinstance(rdfTerm, Variable): | |
| 413 return "?%s" % rdfTerm | |
| 414 elif prefix is None: | |
| 415 return "<%s>" % rdfTerm | |
| 416 else: | |
| 417 qNameParts = self.compute_qname(rdfTerm) | |
| 418 return ':'.join([qNameParts[0], qNameParts[-1]]) | |
| 419 | |
| 420 def compute_qname(self, uri, generate=True): | |
| 421 | |
| 422 if not _is_valid_uri(uri): | |
| 423 raise ValueError( | |
| 424 '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'.format(uri) | |
| 425 ) | |
| 426 | |
| 427 if uri not in self.__cache: | |
| 428 try: | |
| 429 namespace, name = split_uri(uri) | |
| 430 except ValueError as e: | |
| 431 namespace = URIRef(uri) | |
| 432 prefix = self.store.prefix(namespace) | |
| 433 if not prefix: | |
| 434 raise e | |
| 435 if namespace not in self.__strie: | |
| 436 insert_strie(self.__strie, self.__trie, namespace) | |
| 437 | |
| 438 if self.__strie[namespace]: | |
| 439 pl_namespace = get_longest_namespace(self.__strie[namespace], uri) | |
| 440 if pl_namespace is not None: | |
| 441 namespace = pl_namespace | |
| 442 name = uri[len(namespace):] | |
| 443 | |
| 444 namespace = URIRef(namespace) | |
| 445 prefix = self.store.prefix(namespace) # warning multiple prefixes problem | |
| 446 | |
| 447 if prefix is None: | |
| 448 if not generate: | |
| 449 raise KeyError( | |
| 450 "No known prefix for {} and generate=False".format(namespace) | |
| 451 ) | |
| 452 num = 1 | |
| 453 while 1: | |
| 454 prefix = "ns%s" % num | |
| 455 if not self.store.namespace(prefix): | |
| 456 break | |
| 457 num += 1 | |
| 458 self.bind(prefix, namespace) | |
| 459 self.__cache[uri] = (prefix, namespace, name) | |
| 460 return self.__cache[uri] | |
| 461 | |
| 462 def compute_qname_strict(self, uri, generate=True): | |
| 463 # code repeated to avoid branching on strict every time | |
| 464 # if output needs to be strict (e.g. for xml) then | |
| 465 # only the strict output should bear the overhead | |
| 466 prefix, namespace, name = self.compute_qname(uri) | |
| 467 if is_ncname(text_type(name)): | |
| 468 return prefix, namespace, name | |
| 469 else: | |
| 470 if uri not in self.__cache_strict: | |
| 471 try: | |
| 472 namespace, name = split_uri(uri, NAME_START_CATEGORIES) | |
| 473 except ValueError as e: | |
| 474 message = ('This graph cannot be serialized to a strict format ' | |
| 475 'because there is no valid way to shorten {}'.format(uri)) | |
| 476 raise ValueError(message) | |
| 477 # omitted for strict since NCNames cannot be empty | |
| 478 #namespace = URIRef(uri) | |
| 479 #prefix = self.store.prefix(namespace) | |
| 480 #if not prefix: | |
| 481 #raise e | |
| 482 | |
| 483 if namespace not in self.__strie: | |
| 484 insert_strie(self.__strie, self.__trie, namespace) | |
| 485 | |
| 486 # omitted for strict | |
| 487 #if self.__strie[namespace]: | |
| 488 #pl_namespace = get_longest_namespace(self.__strie[namespace], uri) | |
| 489 #if pl_namespace is not None: | |
| 490 #namespace = pl_namespace | |
| 491 #name = uri[len(namespace):] | |
| 492 | |
| 493 namespace = URIRef(namespace) | |
| 494 prefix = self.store.prefix(namespace) # warning multiple prefixes problem | |
| 495 | |
| 496 if prefix is None: | |
| 497 if not generate: | |
| 498 raise KeyError( | |
| 499 "No known prefix for {} and generate=False".format(namespace) | |
| 500 ) | |
| 501 num = 1 | |
| 502 while 1: | |
| 503 prefix = "ns%s" % num | |
| 504 if not self.store.namespace(prefix): | |
| 505 break | |
| 506 num += 1 | |
| 507 self.bind(prefix, namespace) | |
| 508 self.__cache_strict[uri] = (prefix, namespace, name) | |
| 509 | |
| 510 return self.__cache_strict[uri] | |
| 511 | |
| 512 def bind(self, prefix, namespace, override=True, replace=False): | |
| 513 """bind a given namespace to the prefix | |
| 514 | |
| 515 if override, rebind, even if the given namespace is already | |
| 516 bound to another prefix. | |
| 517 | |
| 518 if replace, replace any existing prefix with the new namespace | |
| 519 | |
| 520 """ | |
| 521 | |
| 522 namespace = URIRef(text_type(namespace)) | |
| 523 # When documenting explain that override only applies in what cases | |
| 524 if prefix is None: | |
| 525 prefix = '' | |
| 526 bound_namespace = self.store.namespace(prefix) | |
| 527 # Check if the bound_namespace contains a URI | |
| 528 # and if so convert it into a URIRef for comparison | |
| 529 # This is to prevent duplicate namespaces with the | |
| 530 # same URI | |
| 531 if bound_namespace: | |
| 532 bound_namespace = URIRef(bound_namespace) | |
| 533 if bound_namespace and bound_namespace != namespace: | |
| 534 | |
| 535 if replace: | |
| 536 self.store.bind(prefix, namespace) | |
| 537 insert_trie(self.__trie, str(namespace)) | |
| 538 return | |
| 539 | |
| 540 # prefix already in use for different namespace | |
| 541 # | |
| 542 # append number to end of prefix until we find one | |
| 543 # that's not in use. | |
| 544 if not prefix: | |
| 545 prefix = "default" | |
| 546 num = 1 | |
| 547 while 1: | |
| 548 new_prefix = "%s%s" % (prefix, num) | |
| 549 tnamespace = self.store.namespace(new_prefix) | |
| 550 if tnamespace and namespace == URIRef(tnamespace): | |
| 551 # the prefix is already bound to the correct | |
| 552 # namespace | |
| 553 return | |
| 554 if not self.store.namespace(new_prefix): | |
| 555 break | |
| 556 num += 1 | |
| 557 self.store.bind(new_prefix, namespace) | |
| 558 else: | |
| 559 bound_prefix = self.store.prefix(namespace) | |
| 560 if bound_prefix is None: | |
| 561 self.store.bind(prefix, namespace) | |
| 562 elif bound_prefix == prefix: | |
| 563 pass # already bound | |
| 564 else: | |
| 565 if override or bound_prefix.startswith("_"): # or a generated prefix | |
| 566 self.store.bind(prefix, namespace) | |
| 567 insert_trie(self.__trie, str(namespace)) | |
| 568 | |
| 569 def namespaces(self): | |
| 570 for prefix, namespace in self.store.namespaces(): | |
| 571 namespace = URIRef(namespace) | |
| 572 yield prefix, namespace | |
| 573 | |
| 574 def absolutize(self, uri, defrag=1): | |
| 575 base = urljoin("file:", pathname2url(os.getcwd())) | |
| 576 result = urljoin("%s/" % base, uri, allow_fragments=not defrag) | |
| 577 if defrag: | |
| 578 result = urldefrag(result)[0] | |
| 579 if not defrag: | |
| 580 if uri and uri[-1] == "#" and result[-1] != "#": | |
| 581 result = "%s#" % result | |
| 582 return URIRef(result) | |
| 583 | |
| 584 # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar | |
| 585 # | |
| 586 # * Name start characters must have one of the categories Ll, Lu, Lo, | |
| 587 # Lt, Nl. | |
| 588 # | |
| 589 # * Name characters other than Name-start characters must have one of | |
| 590 # the categories Mc, Me, Mn, Lm, or Nd. | |
| 591 # | |
| 592 # * Characters in the compatibility area (i.e. with character code | |
| 593 # greater than #xF900 and less than #xFFFE) are not allowed in XML | |
| 594 # names. | |
| 595 # | |
| 596 # * Characters which have a font or compatibility decomposition | |
| 597 # (i.e. those with a "compatibility formatting tag" in field 5 of the | |
| 598 # database -- marked by field 5 beginning with a "<") are not allowed. | |
| 599 # | |
| 600 # * The following characters are treated as name-start characters rather | |
| 601 # than name characters, because the property file classifies them as | |
| 602 # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. | |
| 603 # | |
| 604 # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode | |
| 605 # 2.0, section 5.14). | |
| 606 # | |
| 607 # * Character #x00B7 is classified as an extender, because the property | |
| 608 # list so identifies it. | |
| 609 # | |
| 610 # * Character #x0387 is added as a name character, because #x00B7 is its | |
| 611 # canonical equivalent. | |
| 612 # | |
| 613 # * Characters ':' and '_' are allowed as name-start characters. | |
| 614 # | |
| 615 # * Characters '-' and '.' are allowed as name characters. | |
| 616 | |
| 617 | |
| 618 NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] | |
| 619 SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ['Nd'] | |
| 620 NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] | |
| 621 ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"] | |
| 622 | |
| 623 | |
| 624 # http://www.w3.org/TR/REC-xml-names/#NT-NCName | |
| 625 # [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus | |
| 626 # the ":" */ | |
| 627 # [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | |
| 628 # | Extender | |
| 629 | |
| 630 | |
| 631 def is_ncname(name): | |
| 632 if name: | |
| 633 first = name[0] | |
| 634 if first == "_" or category(first) in NAME_START_CATEGORIES: | |
| 635 for i in range(1, len(name)): | |
| 636 c = name[i] | |
| 637 if not category(c) in NAME_CATEGORIES: | |
| 638 if c != ':' and c in ALLOWED_NAME_CHARS: | |
| 639 continue | |
| 640 return 0 | |
| 641 # if in compatibility area | |
| 642 # if decomposition(c)!='': | |
| 643 # return 0 | |
| 644 | |
| 645 return 1 | |
| 646 | |
| 647 return 0 | |
| 648 | |
| 649 | |
| 650 XMLNS = "http://www.w3.org/XML/1998/namespace" | |
| 651 | |
| 652 | |
| 653 def split_uri(uri, split_start=SPLIT_START_CATEGORIES): | |
| 654 if uri.startswith(XMLNS): | |
| 655 return (XMLNS, uri.split(XMLNS)[1]) | |
| 656 length = len(uri) | |
| 657 for i in range(0, length): | |
| 658 c = uri[-i - 1] | |
| 659 if not category(c) in NAME_CATEGORIES: | |
| 660 if c in ALLOWED_NAME_CHARS: | |
| 661 continue | |
| 662 for j in range(-1 - i, length): | |
| 663 if category(uri[j]) in split_start or uri[j] == "_": | |
| 664 # _ prevents early split, roundtrip not generate | |
| 665 ns = uri[:j] | |
| 666 if not ns: | |
| 667 break | |
| 668 ln = uri[j:] | |
| 669 return (ns, ln) | |
| 670 break | |
| 671 raise ValueError("Can't split '{}'".format(uri)) | |
| 672 | |
| 673 def insert_trie(trie, value): # aka get_subtrie_or_insert | |
| 674 """ Insert a value into the trie if it is not already contained in the trie. | |
| 675 Return the subtree for the value regardless of whether it is a new value | |
| 676 or not. """ | |
| 677 if value in trie: | |
| 678 return trie[value] | |
| 679 multi_check = False | |
| 680 for key in tuple(trie.keys()): | |
| 681 if len(value) > len(key) and value.startswith(key): | |
| 682 return insert_trie(trie[key], value) | |
| 683 elif key.startswith(value): # we know the value is not in the trie | |
| 684 if not multi_check: | |
| 685 trie[value] = {} | |
| 686 multi_check = True # there can be multiple longer existing prefixes | |
| 687 dict_ = trie.pop(key) # does not break strie since key<->dict_ remains unchanged | |
| 688 trie[value][key] = dict_ | |
| 689 if value not in trie: | |
| 690 trie[value] = {} | |
| 691 return trie[value] | |
| 692 | |
| 693 def insert_strie(strie, trie, value): | |
| 694 if value not in strie: | |
| 695 strie[value] = insert_trie(trie, value) | |
| 696 | |
| 697 def get_longest_namespace(trie, value): | |
| 698 for key in trie: | |
| 699 if value.startswith(key): | |
| 700 out = get_longest_namespace(trie[key], value) | |
| 701 if out is None: | |
| 702 return key | |
| 703 else: | |
| 704 return out | |
| 705 return None |
