Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/util.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """ | |
| 2 Some utility functions. | |
| 3 | |
| 4 Miscellaneous utilities | |
| 5 | |
| 6 * list2set | |
| 7 * first | |
| 8 * uniq | |
| 9 * more_than | |
| 10 | |
| 11 Term characterisation and generation | |
| 12 | |
| 13 * to_term | |
| 14 * from_n3 | |
| 15 | |
| 16 Date/time utilities | |
| 17 | |
| 18 * date_time | |
| 19 * parse_date_time | |
| 20 | |
| 21 Statement and component type checkers | |
| 22 | |
| 23 * check_context | |
| 24 * check_subject | |
| 25 * check_predicate | |
| 26 * check_object | |
| 27 * check_statement | |
| 28 * check_pattern | |
| 29 | |
| 30 """ | |
| 31 | |
| 32 from calendar import timegm | |
| 33 from time import altzone | |
| 34 # from time import daylight | |
| 35 from time import gmtime | |
| 36 from time import localtime | |
| 37 from time import time | |
| 38 from time import timezone | |
| 39 | |
| 40 from os.path import splitext | |
| 41 from io import StringIO | |
| 42 | |
| 43 from rdflib.exceptions import ContextTypeError | |
| 44 from rdflib.exceptions import ObjectTypeError | |
| 45 from rdflib.exceptions import PredicateTypeError | |
| 46 from rdflib.exceptions import SubjectTypeError | |
| 47 from rdflib.graph import Graph | |
| 48 from rdflib.graph import QuotedGraph | |
| 49 from rdflib.namespace import Namespace | |
| 50 from rdflib.namespace import NamespaceManager | |
| 51 from rdflib.term import BNode | |
| 52 from rdflib.term import Literal | |
| 53 from rdflib.term import URIRef | |
| 54 from rdflib.py3compat import sign | |
| 55 | |
| 56 __all__ = [ | |
| 57 'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3', | |
| 58 'date_time', 'parse_date_time', 'check_context', 'check_subject', | |
| 59 'check_predicate', 'check_object', 'check_statement', 'check_pattern', | |
| 60 'guess_format', 'find_roots', 'get_tree'] | |
| 61 | |
| 62 | |
| 63 def list2set(seq): | |
| 64 """ | |
| 65 Return a new list without duplicates. | |
| 66 Preserves the order, unlike set(seq) | |
| 67 """ | |
| 68 seen = set() | |
| 69 return [x for x in seq if x not in seen and not seen.add(x)] | |
| 70 | |
| 71 | |
| 72 def first(seq): | |
| 73 """ | |
| 74 return the first element in a python sequence | |
| 75 for graphs, use graph.value instead | |
| 76 """ | |
| 77 for result in seq: | |
| 78 return result | |
| 79 return None | |
| 80 | |
| 81 | |
| 82 def uniq(sequence, strip=0): | |
| 83 """removes duplicate strings from the sequence.""" | |
| 84 if strip: | |
| 85 return set(s.strip() for s in sequence) | |
| 86 else: | |
| 87 return set(sequence) | |
| 88 | |
| 89 | |
| 90 def more_than(sequence, number): | |
| 91 "Returns 1 if sequence has more items than number and 0 if not." | |
| 92 i = 0 | |
| 93 for item in sequence: | |
| 94 i += 1 | |
| 95 if i > number: | |
| 96 return 1 | |
| 97 return 0 | |
| 98 | |
| 99 | |
| 100 def to_term(s, default=None): | |
| 101 """ | |
| 102 Creates and returns an Identifier of type corresponding | |
| 103 to the pattern of the given positional argument string ``s``: | |
| 104 | |
| 105 '' returns the ``default`` keyword argument value or ``None`` | |
| 106 | |
| 107 '<s>' returns ``URIRef(s)`` (i.e. without angle brackets) | |
| 108 | |
| 109 '"s"' returns ``Literal(s)`` (i.e. without doublequotes) | |
| 110 | |
| 111 '_s' returns ``BNode(s)`` (i.e. without leading underscore) | |
| 112 | |
| 113 """ | |
| 114 if not s: | |
| 115 return default | |
| 116 elif s.startswith("<") and s.endswith(">"): | |
| 117 return URIRef(s[1:-1]) | |
| 118 elif s.startswith('"') and s.endswith('"'): | |
| 119 return Literal(s[1:-1]) | |
| 120 elif s.startswith("_"): | |
| 121 return BNode(s) | |
| 122 else: | |
| 123 msg = "Unrecognised term syntax: '%s'" % s | |
| 124 raise Exception(msg) | |
| 125 | |
| 126 | |
| 127 def from_n3(s, default=None, backend=None, nsm=None): | |
| 128 r''' | |
| 129 Creates the Identifier corresponding to the given n3 string. | |
| 130 | |
| 131 >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo') | |
| 132 True | |
| 133 >>> from_n3('"foo"@de') == Literal('foo', lang='de') | |
| 134 True | |
| 135 >>> from_n3('"""multi\nline\nstring"""@en') == Literal( | |
| 136 ... 'multi\nline\nstring', lang='en') | |
| 137 True | |
| 138 >>> from_n3('42') == Literal(42) | |
| 139 True | |
| 140 >>> from_n3(Literal(42).n3()) == Literal(42) | |
| 141 True | |
| 142 >>> from_n3('"42"^^xsd:integer') == Literal(42) | |
| 143 True | |
| 144 >>> from rdflib import RDFS | |
| 145 >>> from_n3('rdfs:label') == RDFS['label'] | |
| 146 True | |
| 147 >>> nsm = NamespaceManager(Graph()) | |
| 148 >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') | |
| 149 >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') | |
| 150 >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin | |
| 151 True | |
| 152 | |
| 153 ''' | |
| 154 if not s: | |
| 155 return default | |
| 156 if s.startswith('<'): | |
| 157 return URIRef(s[1:-1]) | |
| 158 elif s.startswith('"'): | |
| 159 if s.startswith('"""'): | |
| 160 quotes = '"""' | |
| 161 else: | |
| 162 quotes = '"' | |
| 163 value, rest = s.rsplit(quotes, 1) | |
| 164 value = value[len(quotes):] # strip leading quotes | |
| 165 datatype = None | |
| 166 language = None | |
| 167 | |
| 168 # as a given datatype overrules lang-tag check for it first | |
| 169 dtoffset = rest.rfind('^^') | |
| 170 if dtoffset >= 0: | |
| 171 # found a datatype | |
| 172 # datatype has to come after lang-tag so ignore everything before | |
| 173 # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ | |
| 174 # #prod-turtle2-RDFLiteral | |
| 175 datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) | |
| 176 else: | |
| 177 if rest.startswith("@"): | |
| 178 language = rest[1:] # strip leading at sign | |
| 179 | |
| 180 value = value.replace(r'\"', '"') | |
| 181 # Hack: this should correctly handle strings with either native unicode | |
| 182 # characters, or \u1234 unicode escapes. | |
| 183 value = value.encode("raw-unicode-escape").decode("unicode-escape") | |
| 184 return Literal(value, language, datatype) | |
| 185 elif s == 'true' or s == 'false': | |
| 186 return Literal(s == 'true') | |
| 187 elif s.isdigit(): | |
| 188 return Literal(int(s)) | |
| 189 elif s.startswith('{'): | |
| 190 identifier = from_n3(s[1:-1]) | |
| 191 return QuotedGraph(backend, identifier) | |
| 192 elif s.startswith('['): | |
| 193 identifier = from_n3(s[1:-1]) | |
| 194 return Graph(backend, identifier) | |
| 195 elif s.startswith("_:"): | |
| 196 return BNode(s[2:]) | |
| 197 elif ':' in s: | |
| 198 if nsm is None: | |
| 199 # instantiate default NamespaceManager and rely on its defaults | |
| 200 nsm = NamespaceManager(Graph()) | |
| 201 prefix, last_part = s.split(':', 1) | |
| 202 ns = dict(nsm.namespaces())[prefix] | |
| 203 return Namespace(ns)[last_part] | |
| 204 else: | |
| 205 return BNode(s) | |
| 206 | |
| 207 | |
| 208 def check_context(c): | |
| 209 if not (isinstance(c, URIRef) or | |
| 210 isinstance(c, BNode)): | |
| 211 raise ContextTypeError("%s:%s" % (c, type(c))) | |
| 212 | |
| 213 | |
| 214 def check_subject(s): | |
| 215 """ Test that s is a valid subject identifier.""" | |
| 216 if not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
| 217 raise SubjectTypeError(s) | |
| 218 | |
| 219 | |
| 220 def check_predicate(p): | |
| 221 """ Test that p is a valid predicate identifier.""" | |
| 222 if not isinstance(p, URIRef): | |
| 223 raise PredicateTypeError(p) | |
| 224 | |
| 225 | |
| 226 def check_object(o): | |
| 227 """ Test that o is a valid object identifier.""" | |
| 228 if not (isinstance(o, URIRef) or | |
| 229 isinstance(o, Literal) or | |
| 230 isinstance(o, BNode)): | |
| 231 raise ObjectTypeError(o) | |
| 232 | |
| 233 | |
| 234 def check_statement(triple): | |
| 235 (s, p, o) = triple | |
| 236 if not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
| 237 raise SubjectTypeError(s) | |
| 238 | |
| 239 if not isinstance(p, URIRef): | |
| 240 raise PredicateTypeError(p) | |
| 241 | |
| 242 if not (isinstance(o, URIRef) or | |
| 243 isinstance(o, Literal) or | |
| 244 isinstance(o, BNode)): | |
| 245 raise ObjectTypeError(o) | |
| 246 | |
| 247 | |
| 248 def check_pattern(triple): | |
| 249 (s, p, o) = triple | |
| 250 if s and not (isinstance(s, URIRef) or isinstance(s, BNode)): | |
| 251 raise SubjectTypeError(s) | |
| 252 | |
| 253 if p and not isinstance(p, URIRef): | |
| 254 raise PredicateTypeError(p) | |
| 255 | |
| 256 if o and not (isinstance(o, URIRef) or | |
| 257 isinstance(o, Literal) or | |
| 258 isinstance(o, BNode)): | |
| 259 raise ObjectTypeError(o) | |
| 260 | |
| 261 | |
| 262 def date_time(t=None, local_time_zone=False): | |
| 263 """http://www.w3.org/TR/NOTE-datetime ex: 1997-07-16T19:20:30Z | |
| 264 | |
| 265 >>> date_time(1126482850) | |
| 266 '2005-09-11T23:54:10Z' | |
| 267 | |
| 268 @@ this will change depending on where it is run | |
| 269 #>>> date_time(1126482850, local_time_zone=True) | |
| 270 #'2005-09-11T19:54:10-04:00' | |
| 271 | |
| 272 >>> date_time(1) | |
| 273 '1970-01-01T00:00:01Z' | |
| 274 | |
| 275 >>> date_time(0) | |
| 276 '1970-01-01T00:00:00Z' | |
| 277 """ | |
| 278 if t is None: | |
| 279 t = time() | |
| 280 | |
| 281 if local_time_zone: | |
| 282 time_tuple = localtime(t) | |
| 283 if time_tuple[8]: | |
| 284 tz_mins = altzone // 60 | |
| 285 else: | |
| 286 tz_mins = timezone // 60 | |
| 287 tzd = "-%02d:%02d" % (tz_mins // 60, tz_mins % 60) | |
| 288 else: | |
| 289 time_tuple = gmtime(t) | |
| 290 tzd = "Z" | |
| 291 | |
| 292 year, month, day, hh, mm, ss, wd, y, z = time_tuple | |
| 293 s = "%0004d-%02d-%02dT%02d:%02d:%02d%s" % ( | |
| 294 year, month, day, hh, mm, ss, tzd) | |
| 295 return s | |
| 296 | |
| 297 | |
| 298 def parse_date_time(val): | |
| 299 """always returns seconds in UTC | |
| 300 | |
| 301 # tests are written like this to make any errors easier to understand | |
| 302 >>> parse_date_time('2005-09-11T23:54:10Z') - 1126482850.0 | |
| 303 0.0 | |
| 304 | |
| 305 >>> parse_date_time('2005-09-11T16:54:10-07:00') - 1126482850.0 | |
| 306 0.0 | |
| 307 | |
| 308 >>> parse_date_time('1970-01-01T00:00:01Z') - 1.0 | |
| 309 0.0 | |
| 310 | |
| 311 >>> parse_date_time('1970-01-01T00:00:00Z') - 0.0 | |
| 312 0.0 | |
| 313 >>> parse_date_time("2005-09-05T10:42:00") - 1125916920.0 | |
| 314 0.0 | |
| 315 """ | |
| 316 | |
| 317 if "T" not in val: | |
| 318 val += "T00:00:00Z" | |
| 319 | |
| 320 ymd, time = val.split("T") | |
| 321 hms, tz_str = time[0:8], time[8:] | |
| 322 | |
| 323 if not tz_str or tz_str == "Z": | |
| 324 time = time[:-1] | |
| 325 tz_offset = 0 | |
| 326 else: | |
| 327 signed_hrs = int(tz_str[:3]) | |
| 328 mins = int(tz_str[4:6]) | |
| 329 secs = (sign(signed_hrs) * mins + signed_hrs * 60) * 60 | |
| 330 tz_offset = -secs | |
| 331 | |
| 332 year, month, day = ymd.split("-") | |
| 333 hour, minute, second = hms.split(":") | |
| 334 | |
| 335 t = timegm((int(year), int(month), int(day), int(hour), | |
| 336 int(minute), int(second), 0, 0, 0)) | |
| 337 t = t + tz_offset | |
| 338 return t | |
| 339 | |
| 340 | |
| 341 | |
| 342 | |
| 343 | |
| 344 SUFFIX_FORMAT_MAP = { | |
| 345 'rdf': 'xml', | |
| 346 'rdfs': 'xml', | |
| 347 'owl': 'xml', | |
| 348 'n3': 'n3', | |
| 349 'ttl': 'turtle', | |
| 350 'nt': 'nt', | |
| 351 'trix': 'trix', | |
| 352 'xhtml': 'rdfa', | |
| 353 'html': 'rdfa', | |
| 354 'svg': 'rdfa', | |
| 355 'nq': 'nquads', | |
| 356 'trig': 'trig' | |
| 357 } | |
| 358 | |
| 359 | |
| 360 def guess_format(fpath, fmap=None): | |
| 361 """ | |
| 362 Guess RDF serialization based on file suffix. Uses | |
| 363 ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples: | |
| 364 | |
| 365 >>> guess_format('path/to/file.rdf') | |
| 366 'xml' | |
| 367 >>> guess_format('path/to/file.owl') | |
| 368 'xml' | |
| 369 >>> guess_format('path/to/file.ttl') | |
| 370 'turtle' | |
| 371 >>> guess_format('path/to/file.xhtml') | |
| 372 'rdfa' | |
| 373 >>> guess_format('path/to/file.svg') | |
| 374 'rdfa' | |
| 375 >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'}) | |
| 376 'grddl' | |
| 377 | |
| 378 This also works with just the suffixes, with or without leading dot, and | |
| 379 regardless of letter case:: | |
| 380 | |
| 381 >>> guess_format('.rdf') | |
| 382 'xml' | |
| 383 >>> guess_format('rdf') | |
| 384 'xml' | |
| 385 >>> guess_format('RDF') | |
| 386 'xml' | |
| 387 """ | |
| 388 fmap = fmap or SUFFIX_FORMAT_MAP | |
| 389 return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower()) | |
| 390 | |
| 391 | |
| 392 def _get_ext(fpath, lower=True): | |
| 393 """ | |
| 394 Gets the file extension from a file(path); stripped of leading '.' and in | |
| 395 lower case. Examples: | |
| 396 | |
| 397 >>> _get_ext("path/to/file.txt") | |
| 398 'txt' | |
| 399 >>> _get_ext("OTHER.PDF") | |
| 400 'pdf' | |
| 401 >>> _get_ext("noext") | |
| 402 '' | |
| 403 >>> _get_ext(".rdf") | |
| 404 'rdf' | |
| 405 """ | |
| 406 ext = splitext(fpath)[-1] | |
| 407 if ext == '' and fpath.startswith("."): | |
| 408 ext = fpath | |
| 409 if lower: | |
| 410 ext = ext.lower() | |
| 411 if ext.startswith('.'): | |
| 412 ext = ext[1:] | |
| 413 return ext | |
| 414 | |
| 415 | |
| 416 def find_roots(graph, prop, roots=None): | |
| 417 """ | |
| 418 Find the roots in some sort of transitive hierarchy. | |
| 419 | |
| 420 find_roots(graph, rdflib.RDFS.subClassOf) | |
| 421 will return a set of all roots of the sub-class hierarchy | |
| 422 | |
| 423 Assumes triple of the form (child, prop, parent), i.e. the direction of | |
| 424 RDFS.subClassOf or SKOS.broader | |
| 425 | |
| 426 """ | |
| 427 | |
| 428 non_roots = set() | |
| 429 if roots is None: | |
| 430 roots = set() | |
| 431 for x, y in graph.subject_objects(prop): | |
| 432 non_roots.add(x) | |
| 433 if x in roots: | |
| 434 roots.remove(x) | |
| 435 if y not in non_roots: | |
| 436 roots.add(y) | |
| 437 return roots | |
| 438 | |
| 439 | |
| 440 def get_tree(graph, | |
| 441 root, | |
| 442 prop, | |
| 443 mapper=lambda x: x, | |
| 444 sortkey=None, | |
| 445 done=None, | |
| 446 dir='down'): | |
| 447 """ | |
| 448 Return a nested list/tuple structure representing the tree | |
| 449 built by the transitive property given, starting from the root given | |
| 450 | |
| 451 i.e. | |
| 452 | |
| 453 get_tree(graph, | |
| 454 rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"), | |
| 455 rdflib.RDFS.subClassOf) | |
| 456 | |
| 457 will return the structure for the subClassTree below person. | |
| 458 | |
| 459 dir='down' assumes triple of the form (child, prop, parent), | |
| 460 i.e. the direction of RDFS.subClassOf or SKOS.broader | |
| 461 Any other dir traverses in the other direction | |
| 462 | |
| 463 """ | |
| 464 | |
| 465 if done is None: | |
| 466 done = set() | |
| 467 if root in done: | |
| 468 return | |
| 469 done.add(root) | |
| 470 tree = [] | |
| 471 | |
| 472 if dir == 'down': | |
| 473 branches = graph.subjects(prop, root) | |
| 474 else: | |
| 475 branches = graph.objects(root, prop) | |
| 476 | |
| 477 for branch in branches: | |
| 478 t = get_tree(graph, branch, prop, mapper, sortkey, done, dir) | |
| 479 if t: | |
| 480 tree.append(t) | |
| 481 | |
| 482 return (mapper(root), sorted(tree, key=sortkey)) | |
| 483 | |
| 484 | |
| 485 | |
| 486 | |
| 487 def test(): | |
| 488 import doctest | |
| 489 doctest.testmod() | |
| 490 | |
| 491 if __name__ == "__main__": | |
| 492 # try to make the tests work outside of the time zone they were written in | |
| 493 # import os, time | |
| 494 # os.environ['TZ'] = 'US/Pacific' | |
| 495 # try: | |
| 496 # time.tzset() | |
| 497 # except AttributeError, e: | |
| 498 # print e | |
| 499 # pass | |
| 500 # tzset missing! see | |
| 501 # http://mail.python.org/pipermail/python-dev/2003-April/034480.html | |
| 502 test() # pragma: no cover |
