Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/rdflib/util.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/rdflib/util.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,502 +0,0 @@ -""" -Some utility functions. - -Miscellaneous utilities - -* list2set -* first -* uniq -* more_than - -Term characterisation and generation - -* to_term -* from_n3 - -Date/time utilities - -* date_time -* parse_date_time - -Statement and component type checkers - -* check_context -* check_subject -* check_predicate -* check_object -* check_statement -* check_pattern - -""" - -from calendar import timegm -from time import altzone -# from time import daylight -from time import gmtime -from time import localtime -from time import time -from time import timezone - -from os.path import splitext -from io import StringIO - -from rdflib.exceptions import ContextTypeError -from rdflib.exceptions import ObjectTypeError -from rdflib.exceptions import PredicateTypeError -from rdflib.exceptions import SubjectTypeError -from rdflib.graph import Graph -from rdflib.graph import QuotedGraph -from rdflib.namespace import Namespace -from rdflib.namespace import NamespaceManager -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.term import URIRef -from rdflib.py3compat import sign - -__all__ = [ - 'list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3', - 'date_time', 'parse_date_time', 'check_context', 'check_subject', - 'check_predicate', 'check_object', 'check_statement', 'check_pattern', - 'guess_format', 'find_roots', 'get_tree'] - - -def list2set(seq): - """ - Return a new list without duplicates. - Preserves the order, unlike set(seq) - """ - seen = set() - return [x for x in seq if x not in seen and not seen.add(x)] - - -def first(seq): - """ - return the first element in a python sequence - for graphs, use graph.value instead - """ - for result in seq: - return result - return None - - -def uniq(sequence, strip=0): - """removes duplicate strings from the sequence.""" - if strip: - return set(s.strip() for s in sequence) - else: - return set(sequence) - - -def more_than(sequence, number): - "Returns 1 if sequence has more items than number and 0 if not." - i = 0 - for item in sequence: - i += 1 - if i > number: - return 1 - return 0 - - -def to_term(s, default=None): - """ - Creates and returns an Identifier of type corresponding - to the pattern of the given positional argument string ``s``: - - '' returns the ``default`` keyword argument value or ``None`` - - '<s>' returns ``URIRef(s)`` (i.e. without angle brackets) - - '"s"' returns ``Literal(s)`` (i.e. without doublequotes) - - '_s' returns ``BNode(s)`` (i.e. without leading underscore) - - """ - if not s: - return default - elif s.startswith("<") and s.endswith(">"): - return URIRef(s[1:-1]) - elif s.startswith('"') and s.endswith('"'): - return Literal(s[1:-1]) - elif s.startswith("_"): - return BNode(s) - else: - msg = "Unrecognised term syntax: '%s'" % s - raise Exception(msg) - - -def from_n3(s, default=None, backend=None, nsm=None): - r''' - Creates the Identifier corresponding to the given n3 string. - - >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo') - True - >>> from_n3('"foo"@de') == Literal('foo', lang='de') - True - >>> from_n3('"""multi\nline\nstring"""@en') == Literal( - ... 'multi\nline\nstring', lang='en') - True - >>> from_n3('42') == Literal(42) - True - >>> from_n3(Literal(42).n3()) == Literal(42) - True - >>> from_n3('"42"^^xsd:integer') == Literal(42) - True - >>> from rdflib import RDFS - >>> from_n3('rdfs:label') == RDFS['label'] - True - >>> nsm = NamespaceManager(Graph()) - >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') - >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') - >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin - True - - ''' - if not s: - return default - if s.startswith('<'): - return URIRef(s[1:-1]) - elif s.startswith('"'): - if s.startswith('"""'): - quotes = '"""' - else: - quotes = '"' - value, rest = s.rsplit(quotes, 1) - value = value[len(quotes):] # strip leading quotes - datatype = None - language = None - - # as a given datatype overrules lang-tag check for it first - dtoffset = rest.rfind('^^') - if dtoffset >= 0: - # found a datatype - # datatype has to come after lang-tag so ignore everything before - # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ - # #prod-turtle2-RDFLiteral - datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) - else: - if rest.startswith("@"): - language = rest[1:] # strip leading at sign - - value = value.replace(r'\"', '"') - # Hack: this should correctly handle strings with either native unicode - # characters, or \u1234 unicode escapes. - value = value.encode("raw-unicode-escape").decode("unicode-escape") - return Literal(value, language, datatype) - elif s == 'true' or s == 'false': - return Literal(s == 'true') - elif s.isdigit(): - return Literal(int(s)) - elif s.startswith('{'): - identifier = from_n3(s[1:-1]) - return QuotedGraph(backend, identifier) - elif s.startswith('['): - identifier = from_n3(s[1:-1]) - return Graph(backend, identifier) - elif s.startswith("_:"): - return BNode(s[2:]) - elif ':' in s: - if nsm is None: - # instantiate default NamespaceManager and rely on its defaults - nsm = NamespaceManager(Graph()) - prefix, last_part = s.split(':', 1) - ns = dict(nsm.namespaces())[prefix] - return Namespace(ns)[last_part] - else: - return BNode(s) - - -def check_context(c): - if not (isinstance(c, URIRef) or - isinstance(c, BNode)): - raise ContextTypeError("%s:%s" % (c, type(c))) - - -def check_subject(s): - """ Test that s is a valid subject identifier.""" - if not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - - -def check_predicate(p): - """ Test that p is a valid predicate identifier.""" - if not isinstance(p, URIRef): - raise PredicateTypeError(p) - - -def check_object(o): - """ Test that o is a valid object identifier.""" - if not (isinstance(o, URIRef) or - isinstance(o, Literal) or - isinstance(o, BNode)): - raise ObjectTypeError(o) - - -def check_statement(triple): - (s, p, o) = triple - if not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - - if not isinstance(p, URIRef): - raise PredicateTypeError(p) - - if not (isinstance(o, URIRef) or - isinstance(o, Literal) or - isinstance(o, BNode)): - raise ObjectTypeError(o) - - -def check_pattern(triple): - (s, p, o) = triple - if s and not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - - if p and not isinstance(p, URIRef): - raise PredicateTypeError(p) - - if o and not (isinstance(o, URIRef) or - isinstance(o, Literal) or - isinstance(o, BNode)): - raise ObjectTypeError(o) - - -def date_time(t=None, local_time_zone=False): - """http://www.w3.org/TR/NOTE-datetime ex: 1997-07-16T19:20:30Z - - >>> date_time(1126482850) - '2005-09-11T23:54:10Z' - - @@ this will change depending on where it is run - #>>> date_time(1126482850, local_time_zone=True) - #'2005-09-11T19:54:10-04:00' - - >>> date_time(1) - '1970-01-01T00:00:01Z' - - >>> date_time(0) - '1970-01-01T00:00:00Z' - """ - if t is None: - t = time() - - if local_time_zone: - time_tuple = localtime(t) - if time_tuple[8]: - tz_mins = altzone // 60 - else: - tz_mins = timezone // 60 - tzd = "-%02d:%02d" % (tz_mins // 60, tz_mins % 60) - else: - time_tuple = gmtime(t) - tzd = "Z" - - year, month, day, hh, mm, ss, wd, y, z = time_tuple - s = "%0004d-%02d-%02dT%02d:%02d:%02d%s" % ( - year, month, day, hh, mm, ss, tzd) - return s - - -def parse_date_time(val): - """always returns seconds in UTC - - # tests are written like this to make any errors easier to understand - >>> parse_date_time('2005-09-11T23:54:10Z') - 1126482850.0 - 0.0 - - >>> parse_date_time('2005-09-11T16:54:10-07:00') - 1126482850.0 - 0.0 - - >>> parse_date_time('1970-01-01T00:00:01Z') - 1.0 - 0.0 - - >>> parse_date_time('1970-01-01T00:00:00Z') - 0.0 - 0.0 - >>> parse_date_time("2005-09-05T10:42:00") - 1125916920.0 - 0.0 - """ - - if "T" not in val: - val += "T00:00:00Z" - - ymd, time = val.split("T") - hms, tz_str = time[0:8], time[8:] - - if not tz_str or tz_str == "Z": - time = time[:-1] - tz_offset = 0 - else: - signed_hrs = int(tz_str[:3]) - mins = int(tz_str[4:6]) - secs = (sign(signed_hrs) * mins + signed_hrs * 60) * 60 - tz_offset = -secs - - year, month, day = ymd.split("-") - hour, minute, second = hms.split(":") - - t = timegm((int(year), int(month), int(day), int(hour), - int(minute), int(second), 0, 0, 0)) - t = t + tz_offset - return t - - - - - -SUFFIX_FORMAT_MAP = { - 'rdf': 'xml', - 'rdfs': 'xml', - 'owl': 'xml', - 'n3': 'n3', - 'ttl': 'turtle', - 'nt': 'nt', - 'trix': 'trix', - 'xhtml': 'rdfa', - 'html': 'rdfa', - 'svg': 'rdfa', - 'nq': 'nquads', - 'trig': 'trig' -} - - -def guess_format(fpath, fmap=None): - """ - Guess RDF serialization based on file suffix. Uses - ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples: - - >>> guess_format('path/to/file.rdf') - 'xml' - >>> guess_format('path/to/file.owl') - 'xml' - >>> guess_format('path/to/file.ttl') - 'turtle' - >>> guess_format('path/to/file.xhtml') - 'rdfa' - >>> guess_format('path/to/file.svg') - 'rdfa' - >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'}) - 'grddl' - - This also works with just the suffixes, with or without leading dot, and - regardless of letter case:: - - >>> guess_format('.rdf') - 'xml' - >>> guess_format('rdf') - 'xml' - >>> guess_format('RDF') - 'xml' - """ - fmap = fmap or SUFFIX_FORMAT_MAP - return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower()) - - -def _get_ext(fpath, lower=True): - """ - Gets the file extension from a file(path); stripped of leading '.' and in - lower case. Examples: - - >>> _get_ext("path/to/file.txt") - 'txt' - >>> _get_ext("OTHER.PDF") - 'pdf' - >>> _get_ext("noext") - '' - >>> _get_ext(".rdf") - 'rdf' - """ - ext = splitext(fpath)[-1] - if ext == '' and fpath.startswith("."): - ext = fpath - if lower: - ext = ext.lower() - if ext.startswith('.'): - ext = ext[1:] - return ext - - -def find_roots(graph, prop, roots=None): - """ - Find the roots in some sort of transitive hierarchy. - - find_roots(graph, rdflib.RDFS.subClassOf) - will return a set of all roots of the sub-class hierarchy - - Assumes triple of the form (child, prop, parent), i.e. the direction of - RDFS.subClassOf or SKOS.broader - - """ - - non_roots = set() - if roots is None: - roots = set() - for x, y in graph.subject_objects(prop): - non_roots.add(x) - if x in roots: - roots.remove(x) - if y not in non_roots: - roots.add(y) - return roots - - -def get_tree(graph, - root, - prop, - mapper=lambda x: x, - sortkey=None, - done=None, - dir='down'): - """ - Return a nested list/tuple structure representing the tree - built by the transitive property given, starting from the root given - - i.e. - - get_tree(graph, - rdflib.URIRef("http://xmlns.com/foaf/0.1/Person"), - rdflib.RDFS.subClassOf) - - will return the structure for the subClassTree below person. - - dir='down' assumes triple of the form (child, prop, parent), - i.e. the direction of RDFS.subClassOf or SKOS.broader - Any other dir traverses in the other direction - - """ - - if done is None: - done = set() - if root in done: - return - done.add(root) - tree = [] - - if dir == 'down': - branches = graph.subjects(prop, root) - else: - branches = graph.objects(root, prop) - - for branch in branches: - t = get_tree(graph, branch, prop, mapper, sortkey, done, dir) - if t: - tree.append(t) - - return (mapper(root), sorted(tree, key=sortkey)) - - - - -def test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - # try to make the tests work outside of the time zone they were written in - # import os, time - # os.environ['TZ'] = 'US/Pacific' - # try: - # time.tzset() - # except AttributeError, e: - # print e - # pass - # tzset missing! see - # http://mail.python.org/pipermail/python-dev/2003-April/034480.html - test() # pragma: no cover