comparison planemo/lib/python3.7/site-packages/rdflib/tools/rdfpipe.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 """
4 A commandline tool for parsing RDF in different formats and serializing the
5 resulting graph to a chosen format.
6 """
7
8 import sys
9 from optparse import OptionParser
10 import logging
11
12 import rdflib
13 from rdflib import plugin
14 from rdflib.store import Store
15 from rdflib.graph import ConjunctiveGraph
16 from rdflib.namespace import RDF, RDFS, OWL, XSD
17 from rdflib.parser import Parser
18 from rdflib.serializer import Serializer
19
20 from rdflib.util import guess_format
21 from rdflib.py3compat import PY3
22
23
24 DEFAULT_INPUT_FORMAT = 'xml'
25 DEFAULT_OUTPUT_FORMAT = 'n3'
26
27
28 def parse_and_serialize(input_files, input_format, guess,
29 outfile, output_format, ns_bindings,
30 store_conn="", store_type=None):
31
32 if store_type:
33 store = plugin.get(store_type, Store)()
34 store.open(store_conn)
35 graph = ConjunctiveGraph(store)
36 else:
37 store = None
38 graph = ConjunctiveGraph()
39
40 for prefix, uri in list(ns_bindings.items()):
41 graph.namespace_manager.bind(prefix, uri, override=False)
42
43 for fpath in input_files:
44 use_format, kws = _format_and_kws(input_format)
45 if fpath == '-':
46 fpath = sys.stdin
47 elif not input_format and guess:
48 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
49 graph.parse(fpath, format=use_format, **kws)
50
51 if outfile:
52 output_format, kws = _format_and_kws(output_format)
53 kws.setdefault('base', None)
54 graph.serialize(destination=outfile, format=output_format, **kws)
55
56 if store:
57 store.rollback()
58
59
60 def _format_and_kws(fmt):
61 """
62 >>> _format_and_kws("fmt")
63 ('fmt', {})
64 >>> _format_and_kws("fmt:+a")
65 ('fmt', {'a': True})
66 >>> _format_and_kws("fmt:a")
67 ('fmt', {'a': True})
68 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP
69 ('fmt', {'a': True, 'b': False})
70 >>> _format_and_kws("fmt:c=d")
71 ('fmt', {'c': 'd'})
72 >>> _format_and_kws("fmt:a=b:c")
73 ('fmt', {'a': 'b:c'})
74 """
75 fmt, kws = fmt, {}
76 if fmt and ':' in fmt:
77 fmt, kwrepr = fmt.split(':', 1)
78 for kw in kwrepr.split(','):
79 if '=' in kw:
80 k, v = kw.split('=')
81 kws[k] = v
82 elif kw.startswith('-'):
83 kws[kw[1:]] = False
84 elif kw.startswith('+'):
85 kws[kw[1:]] = True
86 else: # same as "+"
87 kws[kw] = True
88 return fmt, kws
89
90
91 def make_option_parser():
92 parser_names = _get_plugin_names(Parser)
93 serializer_names = _get_plugin_names(Serializer)
94 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE"
95
96 oparser = OptionParser(
97 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " +
98 "[--ns=PFX=NS ...] [-] [FILE ...]",
99 description=__doc__.strip() + (
100 " Reads file system paths, URLs or from stdin if '-' is given."
101 " The result is serialized to stdout."),
102 version="%prog " + "(using rdflib %s)" % rdflib.__version__)
103
104 oparser.add_option(
105 '-i', '--input-format',
106 type=str, # default=DEFAULT_INPUT_FORMAT,
107 help="Format of the input document(s)."
108 " Available input formats are: %s." % parser_names +
109 " If no format is given, it will be " +
110 "guessed from the file name extension." +
111 " Keywords to parser can be given after format like: %s." % kw_example,
112 metavar="INPUT_FORMAT")
113
114 oparser.add_option(
115 '-o', '--output-format',
116 type=str, default=DEFAULT_OUTPUT_FORMAT,
117 help="Format of the graph serialization."
118 " Available output formats are: %s."
119 % serializer_names +
120 " Default format is: '%default'." +
121 " Keywords to serializer can be given after format like: %s." %
122 kw_example,
123 metavar="OUTPUT_FORMAT")
124
125 oparser.add_option(
126 '--ns',
127 action="append", type=str,
128 help="Register a namespace binding (QName prefix to a base URI). "
129 "This can be used more than once.",
130 metavar="PREFIX=NAMESPACE")
131
132 oparser.add_option(
133 '--no-guess', dest='guess',
134 action='store_false', default=True,
135 help="Don't guess format based on file suffix.")
136
137 oparser.add_option(
138 '--no-out',
139 action='store_true', default=False,
140 help="Don't output the resulting graph " +
141 "(useful for checking validity of input).")
142
143 oparser.add_option(
144 '-w', '--warn',
145 action='store_true', default=False,
146 help="Output warnings to stderr (by default only critical errors).")
147
148 return oparser
149
150 _get_plugin_names = lambda kind: ", ".join(
151 p.name for p in plugin.plugins(kind=kind))
152
153
154 def main():
155 oparser = make_option_parser()
156 opts, args = oparser.parse_args()
157 if len(args) < 1:
158 oparser.print_usage()
159 oparser.exit()
160
161 if opts.warn:
162 loglevel = logging.WARNING
163 else:
164 loglevel = logging.CRITICAL
165 logging.basicConfig(level=loglevel)
166
167 ns_bindings = {}
168 if opts.ns:
169 for ns_kw in opts.ns:
170 pfx, uri = ns_kw.split('=')
171 ns_bindings[pfx] = uri
172
173 outfile = sys.stdout
174 if PY3:
175 outfile = sys.stdout.buffer
176
177 if opts.no_out:
178 outfile = None
179
180 parse_and_serialize(args, opts.input_format, opts.guess,
181 outfile, opts.output_format, ns_bindings)
182
183
184 if __name__ == "__main__":
185 main()