Mercurial > repos > guerler > springsuite
view planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/trix.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
line wrap: on
line source
""" A TriX parser for RDFLib """ from rdflib.namespace import Namespace from rdflib.term import URIRef from rdflib.term import BNode from rdflib.term import Literal from rdflib.graph import Graph, ConjunctiveGraph from rdflib.exceptions import ParserError from rdflib.parser import Parser from xml.sax.saxutils import handler from xml.sax import make_parser from xml.sax.handler import ErrorHandler __all__ = ['create_parser', 'TriXHandler', 'TriXParser'] TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/") XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") class TriXHandler(handler.ContentHandler): """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" def __init__(self, store): self.store = store self.preserve_bnode_ids = False self.reset() def reset(self): self.bnode = {} self.graph = None self.triple = None self.state = 0 self.lang = None self.datatype = None # ContentHandler methods def setDocumentLocator(self, locator): self.locator = locator def startDocument(self): pass def startPrefixMapping(self, prefix, namespace): pass def endPrefixMapping(self, prefix): pass def startElementNS(self, name, qname, attrs): if name[0] != str(TRIXNS): self.error( "Only elements in the TriX namespace are allowed. %s!=%s" % (name[0], TRIXNS)) if name[1] == "TriX": if self.state == 0: self.state = 1 else: self.error("Unexpected TriX element") elif name[1] == "graph": if self.state == 1: self.state = 2 else: self.error("Unexpected graph element") elif name[1] == "uri": if self.state == 2: # the context uri self.state = 3 elif self.state == 4: # part of a triple pass else: self.error("Unexpected uri element") elif name[1] == "triple": if self.state == 2: if self.graph is None: # anonymous graph, create one with random bnode id self.graph = Graph(store=self.store) # start of a triple self.triple = [] self.state = 4 else: self.error("Unexpected triple element") elif name[1] == "typedLiteral": if self.state == 4: # part of triple self.lang = None self.datatype = None try: self.lang = attrs.getValue((str(XMLNS), "lang")) except: # language not required - ignore pass try: self.datatype = attrs.getValueByQName("datatype") except KeyError: self.error("No required attribute 'datatype'") else: self.error("Unexpected typedLiteral element") elif name[1] == "plainLiteral": if self.state == 4: # part of triple self.lang = None self.datatype = None try: self.lang = attrs.getValue((str(XMLNS), "lang")) except: # language not required - ignore pass else: self.error("Unexpected plainLiteral element") elif name[1] == "id": if self.state == 2: # the context uri self.state = 3 elif self.state == 4: # part of triple pass else: self.error("Unexpected id element") else: self.error("Unknown element %s in TriX namespace" % name[1]) self.chars = "" def endElementNS(self, name, qname): if name[0] != str(TRIXNS): self.error( "Only elements in the TriX namespace are allowed. %s!=%s" % (name[0], TRIXNS)) if name[1] == "uri": if self.state == 3: self.graph = Graph(store=self.store, identifier=URIRef(self.chars.strip())) self.state = 2 elif self.state == 4: self.triple += [URIRef(self.chars.strip())] else: self.error( "Illegal internal self.state - This should never " + "happen if the SAX parser ensures XML syntax correctness") elif name[1] == "id": if self.state == 3: self.graph = Graph(self.store, identifier=self.get_bnode( self.chars.strip())) self.state = 2 elif self.state == 4: self.triple += [self.get_bnode(self.chars.strip())] else: self.error( "Illegal internal self.state - This should never " + "happen if the SAX parser ensures XML syntax correctness") elif name[1] == "plainLiteral" or name[1] == "typedLiteral": if self.state == 4: self.triple += [Literal( self.chars, lang=self.lang, datatype=self.datatype)] else: self.error( "This should never happen if the SAX parser " + "ensures XML syntax correctness") elif name[1] == "triple": if self.state == 4: if len(self.triple) != 3: self.error("Triple has wrong length, got %d elements: %s" % (len(self.triple), self.triple)) self.graph.add(self.triple) # self.store.store.add(self.triple,context=self.graph) # self.store.addN([self.triple+[self.graph]]) self.state = 2 else: self.error( "This should never happen if the SAX parser " + "ensures XML syntax correctness") elif name[1] == "graph": self.graph = None self.state = 1 elif name[1] == "TriX": self.state = 0 else: self.error("Unexpected close element") def get_bnode(self, label): if self.preserve_bnode_ids: bn = BNode(label) else: if label in self.bnode: bn = self.bnode[label] else: bn = BNode(label) self.bnode[label] = bn return bn def characters(self, content): self.chars += content def ignorableWhitespace(self, content): pass def processingInstruction(self, target, data): pass def error(self, message): locator = self.locator info = "%s:%s:%s: " % ( locator.getSystemId(), locator.getLineNumber(), locator.getColumnNumber()) raise ParserError(info + message) def create_parser(store): parser = make_parser() try: # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. parser.start_namespace_decl( "xml", "http://www.w3.org/XML/1998/namespace") except AttributeError: pass # Not present in Jython (at least) parser.setFeature(handler.feature_namespaces, 1) trix = TriXHandler(store) parser.setContentHandler(trix) parser.setErrorHandler(ErrorHandler()) return parser class TriXParser(Parser): """A parser for TriX. See http://sw.nokia.com/trix/""" def __init__(self): pass def parse(self, source, sink, **args): assert sink.store.context_aware, ( "TriXParser must be given a context aware store.") self._parser = create_parser(sink.store) content_handler = self._parser.getContentHandler() preserve_bnode_ids = args.get("preserve_bnode_ids", None) if preserve_bnode_ids is not None: content_handler.preserve_bnode_ids = preserve_bnode_ids # We're only using it once now # content_handler.reset() # self._parser.reset() self._parser.parse(source)