Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/util.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
comparison
equal
deleted
inserted
replaced
4:79f47841a781 | 5:9b1c78e6ba9c |
---|---|
1 """ | |
2 Some utility functions. | |
3 | |
4 Miscellaneous utilities | |
5 | |
6 * list2set | |
7 * first | |
8 * uniq | |
9 * more_than | |
10 | |
11 Term characterisation and generation | |
12 | |
13 * to_term | |
14 * from_n3 | |
15 | |
16 Date/time utilities | |
17 | |
18 * date_time | |
19 * parse_date_time | |
20 | |
21 Statement and component type checkers | |
22 | |
23 * check_context | |
24 * check_subject | |
25 * check_predicate | |
26 * check_object | |
27 * check_statement | |
28 * check_pattern | |
29 | |
30 """ | |
31 | |
32 from calendar import timegm | |
33 from time import altzone | |
34 # from time import daylight | |
35 from time import gmtime | |
36 from time import localtime | |
37 from time import time | |
38 from time import timezone | |
39 | |
40 from os.path import splitext | |
41 from io import StringIO | |
42 | |
43 from rdflib.exceptions import ContextTypeError | |
44 from rdflib.exceptions import ObjectTypeError | |
45 from rdflib.exceptions import PredicateTypeError | |
46 from rdflib.exceptions import SubjectTypeError | |
47 from rdflib.graph import Graph | |
48 from rdflib.graph import QuotedGraph | |
49 from rdflib.namespace import Namespace | |
50 from rdflib.namespace import NamespaceManager | |
51 from rdflib.term import BNode | |
52 from rdflib.term import Literal | |
53 from rdflib.term import URIRef | |
54 from rdflib.py3compat import sign | |
55 | |
56 __all__ = [ | |
57 'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3', | |
58 'date_time', 'parse_date_time', 'check_context', 'check_subject', | |
59 'check_predicate', 'check_object', 'check_statement', 'check_pattern', | |
60 'guess_format', 'find_roots', 'get_tree'] | |
61 | |
62 | |
63 def list2set(seq): | |
64 """ | |
65 Return a new list without duplicates. | |
66 Preserves the order, unlike set(seq) | |
67 """ | |
68 seen = set() | |
69 return [x for x in seq if x not in seen and not seen.add(x)] | |
70 | |
71 | |
72 def first(seq): | |
73 """ | |
74 return the first element in a python sequence | |
75 for graphs, use graph.value instead | |
76 """ | |
77 for result in seq: | |
78 return result | |
79 return None | |
80 | |
81 | |
82 def uniq(sequence, strip=0): | |
83 """removes duplicate strings from the sequence.""" | |
84 if strip: | |
85 return set(s.strip() for s in sequence) | |
86 else: | |
87 return set(sequence) | |
88 | |
89 | |
90 def more_than(sequence, number): | |
91 "Returns 1 if sequence has more items than number and 0 if not." | |
92 i = 0 | |
93 for item in sequence: | |
94 i += 1 | |
95 if i > number: | |
96 return 1 | |
97 return 0 | |
98 | |
99 | |
100 def to_term(s, default=None): | |
101 """ | |
102 Creates and returns an Identifier of type corresponding | |
103 to the pattern of the given positional argument string ``s``: | |
104 | |
105 '' returns the ``default`` keyword argument value or ``None`` | |
106 | |
107 '<s>' returns ``URIRef(s)`` (i.e. without angle brackets) | |
108 | |
109 '"s"' returns ``Literal(s)`` (i.e. without doublequotes) | |
110 | |
111 '_s' returns ``BNode(s)`` (i.e. without leading underscore) | |
112 | |
113 """ | |
114 if not s: | |
115 return default | |
116 elif s.startswith("<") and s.endswith(">"): | |
117 return URIRef(s[1:-1]) | |
118 elif s.startswith('"') and s.endswith('"'): | |
119 return Literal(s[1:-1]) | |
120 elif s.startswith("_"): | |
121 return BNode(s) | |
122 else: | |
123 msg = "Unrecognised term syntax: '%s'" % s | |
124 raise Exception(msg) | |
125 | |
126 | |
127 def from_n3(s, default=None, backend=None, nsm=None): | |
128 r''' | |
129 Creates the Identifier corresponding to the given n3 string. | |
130 | |
131 >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo') | |
132 True | |
133 >>> from_n3('"foo"@de') == Literal('foo', lang='de') | |
134 True | |
135 >>> from_n3('"""multi\nline\nstring"""@en') == Literal( | |
136 ... 'multi\nline\nstring', lang='en') | |
137 True | |
138 >>> from_n3('42') == Literal(42) | |
139 True | |
140 >>> from_n3(Literal(42).n3()) == Literal(42) | |
141 True | |
142 >>> from_n3('"42"^^xsd:integer') == Literal(42) | |
143 True | |
144 >>> from rdflib import RDFS | |
145 >>> from_n3('rdfs:label') == RDFS['label'] | |
146 True | |
147 >>> nsm = NamespaceManager(Graph()) | |
148 >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') | |
149 >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') | |
150 >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin | |
151 True | |
152 | |
153 ''' | |
154 if not s: | |
155 return default | |
156 if s.startswith('<'): | |
157 return URIRef(s[1:-1]) | |
158 elif s.startswith('"'): | |
159 if s.startswith('"""'): | |
160 quotes = '"""' | |
161 else: | |
162 quotes = '"' | |
163 value, rest = s.rsplit(quotes, 1) | |
164 value = value[len(quotes):] # strip leading quotes | |
165 datatype = None | |
166 language = None | |
167 | |
168 # as a given datatype overrules lang-tag check for it first | |
169 dtoffset = rest.rfind('^^') | |
170 if dtoffset >= 0: | |
171 # found a datatype | |
172 # datatype has to come after lang-tag so ignore everything before | |
173 # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ | |
174 # #prod-turtle2-RDFLiteral | |
175 datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) | |
176 else: | |
177 if rest.startswith("@"): | |
178 language = rest[1:] # strip leading at sign | |
179 | |
180 value = value.replace(r'\"', '"') | |
181 # Hack: this should correctly handle strings with either native unicode | |
182 # characters, or \u1234 unicode escapes. | |
183 value = value.encode("raw-unicode-escape").decode("unicode-escape") | |
184 return Literal(value, language, datatype) | |
185 elif s == 'true' or s == 'false': | |
186 return Literal(s == 'true') | |
187 elif s.isdigit(): | |
188 return Literal(int(s)) | |
189 elif s.startswith('{'): | |
190 identifier = from_n3(s[1:-1]) | |
191 return QuotedGraph(backend, identifier) | |
192 elif s.startswith('['): | |
193 identifier = from_n3(s[1:-1]) | |
194 return Graph(backend, identifier) | |
195 elif s.startswith("_:"): | |
196 return BNode(s[2:]) | |
197 elif ':' in s: | |
198 if nsm is None: | |
199 # instantiate default NamespaceManager and rely on its defaults | |
200 nsm = NamespaceManager(Graph()) | |
201 prefix, last_part = s.split(':', 1) | |
202 ns = dict(nsm.namespaces())[prefix] | |
203 return Namespace(ns)[last_part] | |
204 else: | |
205 return BNode(s) | |
206 | |
207 | |
208 def check_context(c): | |
209 if not (isinstance(c, URIRef) or | |
210 isinstance(c, BNode)): | |
211 raise ContextTypeError("%s:%s" % (c, type(c))) | |
212 | |
213 | |
214 def check_subject(s): | |
215 """ Test that s is a valid subject identifier.""" | |
216 if not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
217 raise SubjectTypeError(s) | |
218 | |
219 | |
220 def check_predicate(p): | |
221 """ Test that p is a valid predicate identifier.""" | |
222 if not isinstance(p, URIRef): | |
223 raise PredicateTypeError(p) | |
224 | |
225 | |
226 def check_object(o): | |
227 """ Test that o is a valid object identifier.""" | |
228 if not (isinstance(o, URIRef) or | |
229 isinstance(o, Literal) or | |
230 isinstance(o, BNode)): | |
231 raise ObjectTypeError(o) | |
232 | |
233 | |
234 def check_statement(triple): | |
235 (s, p, o) = triple | |
236 if not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
237 raise SubjectTypeError(s) | |
238 | |
239 if not isinstance(p, URIRef): | |
240 raise PredicateTypeError(p) | |
241 | |
242 if not (isinstance(o, URIRef) or | |
243 isinstance(o, Literal) or | |
244 isinstance(o, BNode)): | |
245 raise ObjectTypeError(o) | |
246 | |
247 | |
248 def check_pattern(triple): | |
249 (s, p, o) = triple | |
250 if s and not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
251 raise SubjectTypeError(s) | |
252 | |
253 if p and not isinstance(p, URIRef): | |
254 raise PredicateTypeError(p) | |
255 | |
256 if o and not (isinstance(o, URIRef) or | |
257 isinstance(o, Literal) or | |
258 isinstance(o, BNode)): | |
259 raise ObjectTypeError(o) | |
260 | |
261 | |
262 def date_time(t=None, local_time_zone=False): | |
263 """http://www.w3.org/TR/NOTE-datetime ex: 1997-07-16T19:20:30Z | |
264 | |
265 >>> date_time(1126482850) | |
266 '2005-09-11T23:54:10Z' | |
267 | |
268 @@ this will change depending on where it is run | |
269 #>>> date_time(1126482850, local_time_zone=True) | |
270 #'2005-09-11T19:54:10-04:00' | |
271 | |
272 >>> date_time(1) | |
273 '1970-01-01T00:00:01Z' | |
274 | |
275 >>> date_time(0) | |
276 '1970-01-01T00:00:00Z' | |
277 """ | |
278 if t is None: | |
279 t = time() | |
280 | |
281 if local_time_zone: | |
282 time_tuple = localtime(t) | |
283 if time_tuple[8]: | |
284 tz_mins = altzone // 60 | |
285 else: | |
286 tz_mins = timezone // 60 | |
287 tzd = "-%02d:%02d" % (tz_mins // 60, tz_mins % 60) | |
288 else: | |
289 time_tuple = gmtime(t) | |
290 tzd = "Z" | |
291 | |
292 year, month, day, hh, mm, ss, wd, y, z = time_tuple | |
293 s = "%0004d-%02d-%02dT%02d:%02d:%02d%s" % ( | |
294 year, month, day, hh, mm, ss, tzd) | |
295 return s | |
296 | |
297 | |
298 def parse_date_time(val): | |
299 """always returns seconds in UTC | |
300 | |
301 # tests are written like this to make any errors easier to understand | |
302 >>> parse_date_time('2005-09-11T23:54:10Z') - 1126482850.0 | |
303 0.0 | |
304 | |
305 >>> parse_date_time('2005-09-11T16:54:10-07:00') - 1126482850.0 | |
306 0.0 | |
307 | |
308 >>> parse_date_time('1970-01-01T00:00:01Z') - 1.0 | |
309 0.0 | |
310 | |
311 >>> parse_date_time('1970-01-01T00:00:00Z') - 0.0 | |
312 0.0 | |
313 >>> parse_date_time("2005-09-05T10:42:00") - 1125916920.0 | |
314 0.0 | |
315 """ | |
316 | |
317 if "T" not in val: | |
318 val += "T00:00:00Z" | |
319 | |
320 ymd, time = val.split("T") | |
321 hms, tz_str = time[0:8], time[8:] | |
322 | |
323 if not tz_str or tz_str == "Z": | |
324 time = time[:-1] | |
325 tz_offset = 0 | |
326 else: | |
327 signed_hrs = int(tz_str[:3]) | |
328 mins = int(tz_str[4:6]) | |
329 secs = (sign(signed_hrs) * mins + signed_hrs * 60) * 60 | |
330 tz_offset = -secs | |
331 | |
332 year, month, day = ymd.split("-") | |
333 hour, minute, second = hms.split(":") | |
334 | |
335 t = timegm((int(year), int(month), int(day), int(hour), | |
336 int(minute), int(second), 0, 0, 0)) | |
337 t = t + tz_offset | |
338 return t | |
339 | |
340 | |
341 | |
342 | |
343 | |
344 SUFFIX_FORMAT_MAP = { | |
345 'rdf': 'xml', | |
346 'rdfs': 'xml', | |
347 'owl': 'xml', | |
348 'n3': 'n3', | |
349 'ttl': 'turtle', | |
350 'nt': 'nt', | |
351 'trix': 'trix', | |
352 'xhtml': 'rdfa', | |
353 'html': 'rdfa', | |
354 'svg': 'rdfa', | |
355 'nq': 'nquads', | |
356 'trig': 'trig' | |
357 } | |
358 | |
359 | |
360 def guess_format(fpath, fmap=None): | |
361 """ | |
362 Guess RDF serialization based on file suffix. Uses | |
363 ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples: | |
364 | |
365 >>> guess_format('path/to/file.rdf') | |
366 'xml' | |
367 >>> guess_format('path/to/file.owl') | |
368 'xml' | |
369 >>> guess_format('path/to/file.ttl') | |
370 'turtle' | |
371 >>> guess_format('path/to/file.xhtml') | |
372 'rdfa' | |
373 >>> guess_format('path/to/file.svg') | |
374 'rdfa' | |
375 >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'}) | |
376 'grddl' | |
377 | |
378 This also works with just the suffixes, with or without leading dot, and | |
379 regardless of letter case:: | |
380 | |
381 >>> guess_format('.rdf') | |
382 'xml' | |
383 >>> guess_format('rdf') | |
384 'xml' | |
385 >>> guess_format('RDF') | |
386 'xml' | |
387 """ | |
388 fmap = fmap or SUFFIX_FORMAT_MAP | |
389 return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower()) | |
390 | |
391 | |
392 def _get_ext(fpath, lower=True): | |
393 """ | |
394 Gets the file extension from a file(path); stripped of leading '.' and in | |
395 lower case. Examples: | |
396 | |
397 >>> _get_ext("path/to/file.txt") | |
398 'txt' | |
399 >>> _get_ext("OTHER.PDF") | |
400 'pdf' | |
401 >>> _get_ext("noext") | |
402 '' | |
403 >>> _get_ext(".rdf") | |
404 'rdf' | |
405 """ | |
406 ext = splitext(fpath)[-1] | |
407 if ext == '' and fpath.startswith("."): | |
408 ext = fpath | |
409 if lower: | |
410 ext = ext.lower() | |
411 if ext.startswith('.'): | |
412 ext = ext[1:] | |
413 return ext | |
414 | |
415 | |
416 def find_roots(graph, prop, roots=None): | |
417 """ | |
418 Find the roots in some sort of transitive hierarchy. | |
419 | |
420 find_roots(graph, rdflib.RDFS.subClassOf) | |
421 will return a set of all roots of the sub-class hierarchy | |
422 | |
423 Assumes triple of the form (child, prop, parent), i.e. the direction of | |
424 RDFS.subClassOf or SKOS.broader | |
425 | |
426 """ | |
427 | |
428 non_roots = set() | |
429 if roots is None: | |
430 roots = set() | |
431 for x, y in graph.subject_objects(prop): | |
432 non_roots.add(x) | |
433 if x in roots: | |
434 roots.remove(x) | |
435 if y not in non_roots: | |
436 roots.add(y) | |
437 return roots | |
438 | |
439 | |
440 def get_tree(graph, | |
441 root, | |
442 prop, | |
443 mapper=lambda x: x, | |
444 sortkey=None, | |
445 done=None, | |
446 dir='down'): | |
447 """ | |
448 Return a nested list/tuple structure representing the tree | |
449 built by the transitive property given, starting from the root given | |
450 | |
451 i.e. | |
452 | |
453 get_tree(graph, | |
454 rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"), | |
455 rdflib.RDFS.subClassOf) | |
456 | |
457 will return the structure for the subClassTree below person. | |
458 | |
459 dir='down' assumes triple of the form (child, prop, parent), | |
460 i.e. the direction of RDFS.subClassOf or SKOS.broader | |
461 Any other dir traverses in the other direction | |
462 | |
463 """ | |
464 | |
465 if done is None: | |
466 done = set() | |
467 if root in done: | |
468 return | |
469 done.add(root) | |
470 tree = [] | |
471 | |
472 if dir == 'down': | |
473 branches = graph.subjects(prop, root) | |
474 else: | |
475 branches = graph.objects(root, prop) | |
476 | |
477 for branch in branches: | |
478 t = get_tree(graph, branch, prop, mapper, sortkey, done, dir) | |
479 if t: | |
480 tree.append(t) | |
481 | |
482 return (mapper(root), sorted(tree, key=sortkey)) | |
483 | |
484 | |
485 | |
486 | |
487 def test(): | |
488 import doctest | |
489 doctest.testmod() | |
490 | |
491 if __name__ == "__main__": | |
492 # try to make the tests work outside of the time zone they were written in | |
493 # import os, time | |
494 # os.environ['TZ'] = 'US/Pacific' | |
495 # try: | |
496 # time.tzset() | |
497 # except AttributeError, e: | |
498 # print e | |
499 # pass | |
500 # tzset missing! see | |
501 # http://mail.python.org/pipermail/python-dev/2003-April/034480.html | |
502 test() # pragma: no cover |