Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/tools/rdfpipe.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: UTF-8 -*- | |
3 """ | |
4 A commandline tool for parsing RDF in different formats and serializing the | |
5 resulting graph to a chosen format. | |
6 """ | |
7 | |
8 import sys | |
9 from optparse import OptionParser | |
10 import logging | |
11 | |
12 import rdflib | |
13 from rdflib import plugin | |
14 from rdflib.store import Store | |
15 from rdflib.graph import ConjunctiveGraph | |
16 from rdflib.namespace import RDF, RDFS, OWL, XSD | |
17 from rdflib.parser import Parser | |
18 from rdflib.serializer import Serializer | |
19 | |
20 from rdflib.util import guess_format | |
21 from rdflib.py3compat import PY3 | |
22 | |
23 | |
24 DEFAULT_INPUT_FORMAT = 'xml' | |
25 DEFAULT_OUTPUT_FORMAT = 'n3' | |
26 | |
27 | |
28 def parse_and_serialize(input_files, input_format, guess, | |
29 outfile, output_format, ns_bindings, | |
30 store_conn="", store_type=None): | |
31 | |
32 if store_type: | |
33 store = plugin.get(store_type, Store)() | |
34 store.open(store_conn) | |
35 graph = ConjunctiveGraph(store) | |
36 else: | |
37 store = None | |
38 graph = ConjunctiveGraph() | |
39 | |
40 for prefix, uri in list(ns_bindings.items()): | |
41 graph.namespace_manager.bind(prefix, uri, override=False) | |
42 | |
43 for fpath in input_files: | |
44 use_format, kws = _format_and_kws(input_format) | |
45 if fpath == '-': | |
46 fpath = sys.stdin | |
47 elif not input_format and guess: | |
48 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT | |
49 graph.parse(fpath, format=use_format, **kws) | |
50 | |
51 if outfile: | |
52 output_format, kws = _format_and_kws(output_format) | |
53 kws.setdefault('base', None) | |
54 graph.serialize(destination=outfile, format=output_format, **kws) | |
55 | |
56 if store: | |
57 store.rollback() | |
58 | |
59 | |
60 def _format_and_kws(fmt): | |
61 """ | |
62 >>> _format_and_kws("fmt") | |
63 ('fmt', {}) | |
64 >>> _format_and_kws("fmt:+a") | |
65 ('fmt', {'a': True}) | |
66 >>> _format_and_kws("fmt:a") | |
67 ('fmt', {'a': True}) | |
68 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP | |
69 ('fmt', {'a': True, 'b': False}) | |
70 >>> _format_and_kws("fmt:c=d") | |
71 ('fmt', {'c': 'd'}) | |
72 >>> _format_and_kws("fmt:a=b:c") | |
73 ('fmt', {'a': 'b:c'}) | |
74 """ | |
75 fmt, kws = fmt, {} | |
76 if fmt and ':' in fmt: | |
77 fmt, kwrepr = fmt.split(':', 1) | |
78 for kw in kwrepr.split(','): | |
79 if '=' in kw: | |
80 k, v = kw.split('=') | |
81 kws[k] = v | |
82 elif kw.startswith('-'): | |
83 kws[kw[1:]] = False | |
84 elif kw.startswith('+'): | |
85 kws[kw[1:]] = True | |
86 else: # same as "+" | |
87 kws[kw] = True | |
88 return fmt, kws | |
89 | |
90 | |
91 def make_option_parser(): | |
92 parser_names = _get_plugin_names(Parser) | |
93 serializer_names = _get_plugin_names(Serializer) | |
94 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE" | |
95 | |
96 oparser = OptionParser( | |
97 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " + | |
98 "[--ns=PFX=NS ...] [-] [FILE ...]", | |
99 description=__doc__.strip() + ( | |
100 " Reads file system paths, URLs or from stdin if '-' is given." | |
101 " The result is serialized to stdout."), | |
102 version="%prog " + "(using rdflib %s)" % rdflib.__version__) | |
103 | |
104 oparser.add_option( | |
105 '-i', '--input-format', | |
106 type=str, # default=DEFAULT_INPUT_FORMAT, | |
107 help="Format of the input document(s)." | |
108 " Available input formats are: %s." % parser_names + | |
109 " If no format is given, it will be " + | |
110 "guessed from the file name extension." + | |
111 " Keywords to parser can be given after format like: %s." % kw_example, | |
112 metavar="INPUT_FORMAT") | |
113 | |
114 oparser.add_option( | |
115 '-o', '--output-format', | |
116 type=str, default=DEFAULT_OUTPUT_FORMAT, | |
117 help="Format of the graph serialization." | |
118 " Available output formats are: %s." | |
119 % serializer_names + | |
120 " Default format is: '%default'." + | |
121 " Keywords to serializer can be given after format like: %s." % | |
122 kw_example, | |
123 metavar="OUTPUT_FORMAT") | |
124 | |
125 oparser.add_option( | |
126 '--ns', | |
127 action="append", type=str, | |
128 help="Register a namespace binding (QName prefix to a base URI). " | |
129 "This can be used more than once.", | |
130 metavar="PREFIX=NAMESPACE") | |
131 | |
132 oparser.add_option( | |
133 '--no-guess', dest='guess', | |
134 action='store_false', default=True, | |
135 help="Don't guess format based on file suffix.") | |
136 | |
137 oparser.add_option( | |
138 '--no-out', | |
139 action='store_true', default=False, | |
140 help="Don't output the resulting graph " + | |
141 "(useful for checking validity of input).") | |
142 | |
143 oparser.add_option( | |
144 '-w', '--warn', | |
145 action='store_true', default=False, | |
146 help="Output warnings to stderr (by default only critical errors).") | |
147 | |
148 return oparser | |
149 | |
150 _get_plugin_names = lambda kind: ", ".join( | |
151 p.name for p in plugin.plugins(kind=kind)) | |
152 | |
153 | |
154 def main(): | |
155 oparser = make_option_parser() | |
156 opts, args = oparser.parse_args() | |
157 if len(args) < 1: | |
158 oparser.print_usage() | |
159 oparser.exit() | |
160 | |
161 if opts.warn: | |
162 loglevel = logging.WARNING | |
163 else: | |
164 loglevel = logging.CRITICAL | |
165 logging.basicConfig(level=loglevel) | |
166 | |
167 ns_bindings = {} | |
168 if opts.ns: | |
169 for ns_kw in opts.ns: | |
170 pfx, uri = ns_kw.split('=') | |
171 ns_bindings[pfx] = uri | |
172 | |
173 outfile = sys.stdout | |
174 if PY3: | |
175 outfile = sys.stdout.buffer | |
176 | |
177 if opts.no_out: | |
178 outfile = None | |
179 | |
180 parse_and_serialize(args, opts.input_format, opts.guess, | |
181 outfile, opts.output_format, ns_bindings) | |
182 | |
183 | |
184 if __name__ == "__main__": | |
185 main() |