Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/parsers/hturtle.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Extraction parser RDF embedded verbatim into HTML or XML files. This is based | |
4 on: | |
5 | |
6 * The specification on embedding turtle into html: | |
7 http://www.w3.org/TR/turtle/#in-html | |
8 | |
9 For SVG (and currently SVG only) the method also extracts an embedded RDF/XML | |
10 data, per SVG specification | |
11 | |
12 License: W3C Software License, | |
13 http://www.w3.org/Consortium/Legal/copyright-software | |
14 Author: Ivan Herman | |
15 Copyright: W3C | |
16 """ | |
17 | |
18 from rdflib.parser import Parser | |
19 from .pyRdfa import pyRdfa, Options | |
20 from .pyRdfa.state import ExecutionContext | |
21 from .pyRdfa.embeddedRDF import handle_embeddedRDF | |
22 from .structureddata import _get_orig_source, _check_error | |
23 | |
24 try: | |
25 import html5lib | |
26 assert html5lib | |
27 html5lib = True | |
28 except ImportError: | |
29 import warnings | |
30 warnings.warn( | |
31 'html5lib not found! RDFa and Microdata parsers ' + | |
32 'will not be available.') | |
33 html5lib = False | |
34 | |
35 | |
36 class HTurtle(pyRdfa): | |
37 """ | |
38 Bastardizing the RDFa 1.1 parser to do a hturtle extractions | |
39 """ | |
40 def __init__(self, options=None, base="", media_type=""): | |
41 pyRdfa.__init__(self, options=options, base=base, | |
42 media_type=media_type, rdfa_version="1.1") | |
43 | |
44 def graph_from_DOM(self, dom, graph, pgraph=None): | |
45 """ | |
46 Stealing the parsing function from the original class, to do | |
47 turtle extraction only | |
48 """ | |
49 | |
50 def copyGraph(tog, fromg): | |
51 for t in fromg: | |
52 tog.add(t) | |
53 for k, ns in fromg.namespaces(): | |
54 tog.bind(k, ns) | |
55 | |
56 def _process_one_node(node, graph, state): | |
57 if handle_embeddedRDF(node, graph, state): | |
58 # we got an RDF content that has been extracted into Graph; | |
59 # the recursion should stop | |
60 return | |
61 else: | |
62 # recurse through all the child elements of the current node | |
63 for n in node.childNodes: | |
64 if n.nodeType == node.ELEMENT_NODE: | |
65 _process_one_node(n, graph, state) | |
66 | |
67 topElement = dom.documentElement | |
68 state = ExecutionContext(topElement, graph, base=self.base, | |
69 options=self.options, rdfa_version="1.1") | |
70 _process_one_node(topElement, graph, state) | |
71 if pgraph is not None: | |
72 copyGraph(pgraph, self.options.processor_graph.graph) | |
73 | |
74 # This is the parser interface as it would look when called from the rest of | |
75 # RDFLib | |
76 | |
77 | |
78 class HTurtleParser(Parser): | |
79 def parse(self, source, graph, pgraph=None, media_type=""): | |
80 """ | |
81 @param source: one of the input sources that the RDFLib package defined | |
82 @type source: InputSource class instance | |
83 @param graph: target graph for the triples; output graph, in RDFa spec. | |
84 parlance | |
85 @type graph: RDFLib Graph | |
86 @keyword media_type: explicit setting of the preferred media type | |
87 (a.k.a. content type) of the the RDFa source. None means the content | |
88 type of the HTTP result is used, or a guess is made based on the | |
89 suffix of a file | |
90 @type media_type: string | |
91 """ | |
92 if html5lib is False: | |
93 raise ImportError( | |
94 'html5lib is not installed, cannot ' + | |
95 'use RDFa and Microdata parsers.') | |
96 | |
97 (baseURI, orig_source) = _get_orig_source(source) | |
98 self._process( | |
99 graph, pgraph, baseURI, orig_source, media_type=media_type) | |
100 | |
101 def _process(self, graph, baseURI, orig_source, media_type=""): | |
102 self.options = Options(output_processor_graph=None, | |
103 embedded_rdf=True, | |
104 vocab_expansion=False, | |
105 vocab_cache=False) | |
106 | |
107 if media_type is None: | |
108 media_type = "" | |
109 processor = HTurtle( | |
110 self.options, base=baseURI, media_type=media_type) | |
111 processor.graph_from_source( | |
112 orig_source, graph=graph, pgraph=None, rdfOutput=False) | |
113 # get possible error triples to raise exceptions | |
114 _check_error(graph) |