guppy_basecaller: env/lib/python3.7/site-packages/dateutil/parser/

comparison env/lib/python3.7/site-packages/dateutil/parser/_parser.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"

author	shellac
date	Mon, 01 Jun 2020 08:59:25 -0400
parents	79f47841a781
children

comparison

equal deleted inserted replaced

-:79f47841a781
+:9b1c78e6ba9c
-# -*- coding: utf-8 -*-
-"""
-This module offers a generic date/time string parser which is able to parse
-most known formats to represent a date and/or time.
-This module attempts to be forgiving with regards to unlikely input formats,
-returning a datetime object even for dates which are ambiguous. If an element
-of a date/time stamp is omitted, the following rules are applied:
-- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
-on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
-specified.
-- If a time zone is omitted, a timezone-naive datetime is returned.
-If any other elements are missing, they are taken from the
-:class:`datetime.datetime` object passed to the parameter ``default``. If this
-results in a day number exceeding the valid number of days per month, the
-value falls back to the end of the month.
-Additional resources about date/time string formats can be found below:
-- `A summary of the international standard date and time notation
-<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
-- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
-- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
-- `CPAN ParseDate module
-<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
-- `Java SimpleDateFormat Class
-<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
-"""
-from __future__ import unicode_literals
-import datetime
-import re
-import string
-import time
-import warnings
-from calendar import monthrange
-from io import StringIO
-import six
-from six import integer_types, text_type
-from decimal import Decimal
-from warnings import warn
-from .. import relativedelta
-from .. import tz
-__all__ = ["parse", "parserinfo", "ParserError"]
-# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
-# making public and/or figuring out if there is something we can
-# take off their plate.
-class _timelex(object):
-# Fractional seconds are sometimes split by a comma
-_split_decimal = re.compile("([.,])")
-def __init__(self, instream):
-if six.PY2:
-# In Python 2, we can't duck type properly because unicode has
-# a 'decode' function, and we'd be double-decoding
-if isinstance(instream, (bytes, bytearray)):
-instream = instream.decode()
-else:
-if getattr(instream, 'decode', None) is not None:
-instream = instream.decode()
-if isinstance(instream, text_type):
-instream = StringIO(instream)
-elif getattr(instream, 'read', None) is None:
-raise TypeError('Parser must be a string or character stream, not '
-'{itype}'.format(itype=instream.__class__.__name__))
-self.instream = instream
-self.charstack = []
-self.tokenstack = []
-self.eof = False
-def get_token(self):
-"""
-This function breaks the time string into lexical units (tokens), which
-can be parsed by the parser. Lexical units are demarcated by changes in
-the character set, so any continuous string of letters is considered
-one unit, any continuous string of numbers is considered one unit.
-The main complication arises from the fact that dots ('.') can be used
-both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
-"4:30:21.447"). As such, it is necessary to read the full context of
-any dot-separated strings before breaking it into tokens; as such, this
-function maintains a "token stack", for when the ambiguous context
-demands that multiple tokens be parsed at once.
-"""
-if self.tokenstack:
-return self.tokenstack.pop(0)
-seenletters = False
-token = None
-state = None
-while not self.eof:
-# We only realize that we've reached the end of a token when we
-# find a character that's not part of the current token - since
-# that character may be part of the next token, it's stored in the
-# charstack.
-if self.charstack:
-nextchar = self.charstack.pop(0)
-else:
-nextchar = self.instream.read(1)
-while nextchar == '\x00':
-nextchar = self.instream.read(1)
-if not nextchar:
-self.eof = True
-break
-elif not state:
-# First character of the token - determines if we're starting
-# to parse a word, a number or something else.
-token = nextchar
-if self.isword(nextchar):
-state = 'a'
-elif self.isnum(nextchar):
-state = '0'
-elif self.isspace(nextchar):
-token = ' '
-break  # emit token
-else:
-break  # emit token
-elif state == 'a':
-# If we've already started reading a word, we keep reading
-# letters until we find something that's not part of a word.
-seenletters = True
-if self.isword(nextchar):
-token += nextchar
-elif nextchar == '.':
-token += nextchar
-state = 'a.'
-else:
-self.charstack.append(nextchar)
-break  # emit token
-elif state == '0':
-# If we've already started reading a number, we keep reading
-# numbers until we find something that doesn't fit.
-if self.isnum(nextchar):
-token += nextchar
-elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
-token += nextchar
-state = '0.'
-else:
-self.charstack.append(nextchar)
-break  # emit token
-elif state == 'a.':
-# If we've seen some letters and a dot separator, continue
-# parsing, and the tokens will be broken up later.
-seenletters = True
-if nextchar == '.' or self.isword(nextchar):
-token += nextchar
-elif self.isnum(nextchar) and token[-1] == '.':
-token += nextchar
-state = '0.'
-else:
-self.charstack.append(nextchar)
-break  # emit token
-elif state == '0.':
-# If we've seen at least one dot separator, keep going, we'll
-# break up the tokens later.
-if nextchar == '.' or self.isnum(nextchar):
-token += nextchar
-elif self.isword(nextchar) and token[-1] == '.':
-token += nextchar
-state = 'a.'
-else:
-self.charstack.append(nextchar)
-break  # emit token
-if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
-token[-1] in '.,')):
-l = self._split_decimal.split(token)
-token = l[0]
-for tok in l[1:]:
-if tok:
-self.tokenstack.append(tok)
-if state == '0.' and token.count('.') == 0:
-token = token.replace(',', '.')
-return token
-def __iter__(self):
-return self
-def __next__(self):
-token = self.get_token()
-if token is None:
-raise StopIteration
-return token
-def next(self):
-return self.__next__()  # Python 2.x support
-@classmethod
-def split(cls, s):
-return list(cls(s))
-@classmethod
-def isword(cls, nextchar):
-""" Whether or not the next character is part of a word """
-return nextchar.isalpha()
-@classmethod
-def isnum(cls, nextchar):
-""" Whether the next character is part of a number """
-return nextchar.isdigit()
-@classmethod
-def isspace(cls, nextchar):
-""" Whether the next character is whitespace """
-return nextchar.isspace()
-class _resultbase(object):
-def __init__(self):
-for attr in self.__slots__:
-setattr(self, attr, None)
-def _repr(self, classname):
-l = []
-for attr in self.__slots__:
-value = getattr(self, attr)
-if value is not None:
-l.append("%s=%s" % (attr, repr(value)))
-return "%s(%s)" % (classname, ", ".join(l))
-def __len__(self):
-return (sum(getattr(self, attr) is not None
-for attr in self.__slots__))
-def __repr__(self):
-return self._repr(self.__class__.__name__)
-class parserinfo(object):
-"""
-Class which handles what inputs are accepted. Subclass this to customize
-the language and acceptable values for each parameter.
-:param dayfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the day (``True``) or month (``False``). If
-``yearfirst`` is set to ``True``, this distinguishes between YDM
-and YMD. Default is ``False``.
-:param yearfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the year. If ``True``, the first number is taken
-to be the year, otherwise the last number is taken to be the year.
-Default is ``False``.
-"""
-# m from a.m/p.m, t from ISO T separator
-JUMP = [" ", ".", ",", ";", "-", "/", "'",
-"at", "on", "and", "ad", "m", "t", "of",
-"st", "nd", "rd", "th"]
-WEEKDAYS = [("Mon", "Monday"),
-("Tue", "Tuesday"),     # TODO: "Tues"
-("Wed", "Wednesday"),
-("Thu", "Thursday"),    # TODO: "Thurs"
-("Fri", "Friday"),
-("Sat", "Saturday"),
-("Sun", "Sunday")]
-MONTHS = [("Jan", "January"),
-("Feb", "February"),      # TODO: "Febr"
-("Mar", "March"),
-("Apr", "April"),
-("May", "May"),
-("Jun", "June"),
-("Jul", "July"),
-("Aug", "August"),
-("Sep", "Sept", "September"),
-("Oct", "October"),
-("Nov", "November"),
-("Dec", "December")]
-HMS = [("h", "hour", "hours"),
-("m", "minute", "minutes"),
-("s", "second", "seconds")]
-AMPM = [("am", "a"),
-("pm", "p")]
-UTCZONE = ["UTC", "GMT", "Z", "z"]
-PERTAIN = ["of"]
-TZOFFSET = {}
-# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
-#              "Anno Domini", "Year of Our Lord"]
-def __init__(self, dayfirst=False, yearfirst=False):
-self._jump = self._convert(self.JUMP)
-self._weekdays = self._convert(self.WEEKDAYS)
-self._months = self._convert(self.MONTHS)
-self._hms = self._convert(self.HMS)
-self._ampm = self._convert(self.AMPM)
-self._utczone = self._convert(self.UTCZONE)
-self._pertain = self._convert(self.PERTAIN)
-self.dayfirst = dayfirst
-self.yearfirst = yearfirst
-self._year = time.localtime().tm_year
-self._century = self._year // 100 * 100
-def _convert(self, lst):
-dct = {}
-for i, v in enumerate(lst):
-if isinstance(v, tuple):
-for v in v:
-dct[v.lower()] = i
-else:
-dct[v.lower()] = i
-return dct
-def jump(self, name):
-return name.lower() in self._jump
-def weekday(self, name):
-try:
-return self._weekdays[name.lower()]
-except KeyError:
-pass
-return None
-def month(self, name):
-try:
-return self._months[name.lower()] + 1
-except KeyError:
-pass
-return None
-def hms(self, name):
-try:
-return self._hms[name.lower()]
-except KeyError:
-return None
-def ampm(self, name):
-try:
-return self._ampm[name.lower()]
-except KeyError:
-return None
-def pertain(self, name):
-return name.lower() in self._pertain
-def utczone(self, name):
-return name.lower() in self._utczone
-def tzoffset(self, name):
-if name in self._utczone:
-return 0
-return self.TZOFFSET.get(name)
-def convertyear(self, year, century_specified=False):
-"""
-Converts two-digit years to year within [-50, 49]
-range of self._year (current local time)
-"""
-# Function contract is that the year is always positive
-assert year >= 0
-if year < 100 and not century_specified:
-# assume current century to start
-year += self._century
-if year >= self._year + 50:  # if too far in future
-year -= 100
-elif year < self._year - 50:  # if too far in past
-year += 100
-return year
-def validate(self, res):
-# move to info
-if res.year is not None:
-res.year = self.convertyear(res.year, res.century_specified)
-if ((res.tzoffset == 0 and not res.tzname) or
-(res.tzname == 'Z' or res.tzname == 'z')):
-res.tzname = "UTC"
-res.tzoffset = 0
-elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
-res.tzoffset = 0
-return True
-class _ymd(list):
-def __init__(self, *args, **kwargs):
-super(self.__class__, self).__init__(*args, **kwargs)
-self.century_specified = False
-self.dstridx = None
-self.mstridx = None
-self.ystridx = None
-@property
-def has_year(self):
-return self.ystridx is not None
-@property
-def has_month(self):
-return self.mstridx is not None
-@property
-def has_day(self):
-return self.dstridx is not None
-def could_be_day(self, value):
-if self.has_day:
-return False
-elif not self.has_month:
-return 1 <= value <= 31
-elif not self.has_year:
-# Be permissive, assume leap year
-month = self[self.mstridx]
-return 1 <= value <= monthrange(2000, month)[1]
-else:
-month = self[self.mstridx]
-year = self[self.ystridx]
-return 1 <= value <= monthrange(year, month)[1]
-def append(self, val, label=None):
-if hasattr(val, '__len__'):
-if val.isdigit() and len(val) > 2:
-self.century_specified = True
-if label not in [None, 'Y']:  # pragma: no cover
-raise ValueError(label)
-label = 'Y'
-elif val > 100:
-self.century_specified = True
-if label not in [None, 'Y']:  # pragma: no cover
-raise ValueError(label)
-label = 'Y'
-super(self.__class__, self).append(int(val))
-if label == 'M':
-if self.has_month:
-raise ValueError('Month is already set')
-self.mstridx = len(self) - 1
-elif label == 'D':
-if self.has_day:
-raise ValueError('Day is already set')
-self.dstridx = len(self) - 1
-elif label == 'Y':
-if self.has_year:
-raise ValueError('Year is already set')
-self.ystridx = len(self) - 1
-def _resolve_from_stridxs(self, strids):
-"""
-Try to resolve the identities of year/month/day elements using
-ystridx, mstridx, and dstridx, if enough of these are specified.
-"""
-if len(self) == 3 and len(strids) == 2:
-# we can back out the remaining stridx value
-missing = [x for x in range(3) if x not in strids.values()]
-key = [x for x in ['y', 'm', 'd'] if x not in strids]
-assert len(missing) == len(key) == 1
-key = key[0]
-val = missing[0]
-strids[key] = val
-assert len(self) == len(strids)  # otherwise this should not be called
-out = {key: self[strids[key]] for key in strids}
-return (out.get('y'), out.get('m'), out.get('d'))
-def resolve_ymd(self, yearfirst, dayfirst):
-len_ymd = len(self)
-year, month, day = (None, None, None)
-strids = (('y', self.ystridx),
-('m', self.mstridx),
-('d', self.dstridx))
-strids = {key: val for key, val in strids if val is not None}
-if (len(self) == len(strids) > 0 or
-(len(self) == 3 and len(strids) == 2)):
-return self._resolve_from_stridxs(strids)
-mstridx = self.mstridx
-if len_ymd > 3:
-raise ValueError("More than three YMD values")
-elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
-# One member, or two members with a month string
-if mstridx is not None:
-month = self[mstridx]
-# since mstridx is 0 or 1, self[mstridx-1] always
-# looks up the other element
-other = self[mstridx - 1]
-else:
-other = self[0]
-if len_ymd > 1 or mstridx is None:
-if other > 31:
-year = other
-else:
-day = other
-elif len_ymd == 2:
-# Two members with numbers
-if self[0] > 31:
-# 99-01
-year, month = self
-elif self[1] > 31:
-# 01-99
-month, year = self
-elif dayfirst and self[1] <= 12:
-# 13-01
-day, month = self
-else:
-# 01-13
-month, day = self
-elif len_ymd == 3:
-# Three members
-if mstridx == 0:
-if self[1] > 31:
-# Apr-2003-25
-month, year, day = self
-else:
-month, day, year = self
-elif mstridx == 1:
-if self[0] > 31 or (yearfirst and self[2] <= 31):
-# 99-Jan-01
-year, month, day = self
-else:
-# 01-Jan-01
-# Give precedence to day-first, since
-# two-digit years is usually hand-written.
-day, month, year = self
-elif mstridx == 2:
-# WTF!?
-if self[1] > 31:
-# 01-99-Jan
-day, year, month = self
-else:
-# 99-01-Jan
-year, day, month = self
-else:
-if (self[0] > 31 or
-self.ystridx == 0 or
-(yearfirst and self[1] <= 12 and self[2] <= 31)):
-# 99-01-01
-if dayfirst and self[2] <= 12:
-year, day, month = self
-else:
-year, month, day = self
-elif self[0] > 12 or (dayfirst and self[1] <= 12):
-# 13-01-01
-day, month, year = self
-else:
-# 01-13-01
-month, day, year = self
-return year, month, day
-class parser(object):
-def __init__(self, info=None):
-self.info = info or parserinfo()
-def parse(self, timestr, default=None,
-ignoretz=False, tzinfos=None, **kwargs):
-"""
-Parse the date/time string into a :class:`datetime.datetime` object.
-:param timestr:
-Any date/time string using the supported formats.
-:param default:
-The default datetime object, if this is a datetime object and not
-``None``, elements specified in ``timestr`` replace elements in the
-default object.
-:param ignoretz:
-If set ``True``, time zones in parsed strings are ignored and a
-naive :class:`datetime.datetime` object is returned.
-:param tzinfos:
-Additional time zone names / aliases which may be present in the
-string. This argument maps time zone names (and optionally offsets
-from those time zones) to time zones. This parameter can be a
-dictionary with timezone aliases mapping time zone names to time
-zones or a function taking two parameters (``tzname`` and
-``tzoffset``) and returning a time zone.
-The timezones to which the names are mapped can be an integer
-offset from UTC in seconds or a :class:`tzinfo` object.
-.. doctest::
-:options: +NORMALIZE_WHITESPACE
->>> from dateutil.parser import parse
->>> from dateutil.tz import gettz
->>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
->>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
-datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
->>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
-datetime.datetime(2012, 1, 19, 17, 21,
-tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
-This parameter is ignored if ``ignoretz`` is set.
-:param \\*\\*kwargs:
-Keyword arguments as passed to ``_parse()``.
-:return:
-Returns a :class:`datetime.datetime` object or, if the
-``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
-first element being a :class:`datetime.datetime` object, the second
-a tuple containing the fuzzy tokens.
-:raises ParserError:
-Raised for invalid or unknown string format, if the provided
-:class:`tzinfo` is not in a valid format, or if an invalid date
-would be created.
-:raises TypeError:
-Raised for non-string or character stream input.
-:raises OverflowError:
-Raised if the parsed date exceeds the largest valid C integer on
-your system.
-"""
-if default is None:
-default = datetime.datetime.now().replace(hour=0, minute=0,
-second=0, microsecond=0)
-res, skipped_tokens = self._parse(timestr, **kwargs)
-if res is None:
-raise ParserError("Unknown string format: %s", timestr)
-if len(res) == 0:
-raise ParserError("String does not contain a date: %s", timestr)
-try:
-ret = self._build_naive(res, default)
-except ValueError as e:
-six.raise_from(ParserError(e.args[0] + ": %s", timestr), e)
-if not ignoretz:
-ret = self._build_tzaware(ret, res, tzinfos)
-if kwargs.get('fuzzy_with_tokens', False):
-return ret, skipped_tokens
-else:
-return ret
-class _result(_resultbase):
-__slots__ = ["year", "month", "day", "weekday",
-"hour", "minute", "second", "microsecond",
-"tzname", "tzoffset", "ampm","any_unused_tokens"]
-def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
-fuzzy_with_tokens=False):
-"""
-Private method which performs the heavy lifting of parsing, called from
-``parse()``, which passes on its ``kwargs`` to this function.
-:param timestr:
-The string to parse.
-:param dayfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the day (``True``) or month (``False``). If
-``yearfirst`` is set to ``True``, this distinguishes between YDM
-and YMD. If set to ``None``, this value is retrieved from the
-current :class:`parserinfo` object (which itself defaults to
-``False``).
-:param yearfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the year. If ``True``, the first number is taken
-to be the year, otherwise the last number is taken to be the year.
-If this is set to ``None``, the value is retrieved from the current
-:class:`parserinfo` object (which itself defaults to ``False``).
-:param fuzzy:
-Whether to allow fuzzy parsing, allowing for string like "Today is
-January 1, 2047 at 8:21:00AM".
-:param fuzzy_with_tokens:
-If ``True``, ``fuzzy`` is automatically set to True, and the parser
-will return a tuple where the first element is the parsed
-:class:`datetime.datetime` datetimestamp and the second element is
-a tuple containing the portions of the string which were ignored:
-.. doctest::
->>> from dateutil.parser import parse
->>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
-(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
-"""
-if fuzzy_with_tokens:
-fuzzy = True
-info = self.info
-if dayfirst is None:
-dayfirst = info.dayfirst
-if yearfirst is None:
-yearfirst = info.yearfirst
-res = self._result()
-l = _timelex.split(timestr)         # Splits the timestr into tokens
-skipped_idxs = []
-# year/month/day list
-ymd = _ymd()
-len_l = len(l)
-i = 0
-try:
-while i < len_l:
-# Check if it's a number
-value_repr = l[i]
-try:
-value = float(value_repr)
-except ValueError:
-value = None
-if value is not None:
-# Numeric token
-i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
-# Check weekday
-elif info.weekday(l[i]) is not None:
-value = info.weekday(l[i])
-res.weekday = value
-# Check month name
-elif info.month(l[i]) is not None:
-value = info.month(l[i])
-ymd.append(value, 'M')
-if i + 1 < len_l:
-if l[i + 1] in ('-', '/'):
-# Jan-01[-99]
-sep = l[i + 1]
-ymd.append(l[i + 2])
-if i + 3 < len_l and l[i + 3] == sep:
-# Jan-01-99
-ymd.append(l[i + 4])
-i += 2
-i += 2
-elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
-info.pertain(l[i + 2])):
-# Jan of 01
-# In this case, 01 is clearly year
-if l[i + 4].isdigit():
-# Convert it here to become unambiguous
-value = int(l[i + 4])
-year = str(info.convertyear(value))
-ymd.append(year, 'Y')
-else:
-# Wrong guess
-pass
-# TODO: not hit in tests
-i += 4
-# Check am/pm
-elif info.ampm(l[i]) is not None:
-value = info.ampm(l[i])
-val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
-if val_is_ampm:
-res.hour = self._adjust_ampm(res.hour, value)
-res.ampm = value
-elif fuzzy:
-skipped_idxs.append(i)
-# Check for a timezone name
-elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
-res.tzname = l[i]
-res.tzoffset = info.tzoffset(res.tzname)
-# Check for something like GMT+3, or BRST+3. Notice
-# that it doesn't mean "I am 3 hours after GMT", but
-# "my time +3 is GMT". If found, we reverse the
-# logic so that timezone parsing code will get it
-# right.
-if i + 1 < len_l and l[i + 1] in ('+', '-'):
-l[i + 1] = ('+', '-')[l[i + 1] == '+']
-res.tzoffset = None
-if info.utczone(res.tzname):
-# With something like GMT+3, the timezone
-# is *not* GMT.
-res.tzname = None
-# Check for a numbered timezone
-elif res.hour is not None and l[i] in ('+', '-'):
-signal = (-1, 1)[l[i] == '+']
-len_li = len(l[i + 1])
-# TODO: check that l[i + 1] is integer?
-if len_li == 4:
-# -0300
-hour_offset = int(l[i + 1][:2])
-min_offset = int(l[i + 1][2:])
-elif i + 2 < len_l and l[i + 2] == ':':
-# -03:00
-hour_offset = int(l[i + 1])
-min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
-i += 2
-elif len_li <= 2:
-# -[0]3
-hour_offset = int(l[i + 1][:2])
-min_offset = 0
-else:
-raise ValueError(timestr)
-res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
-# Look for a timezone name between parenthesis
-if (i + 5 < len_l and
-info.jump(l[i + 2]) and l[i + 3] == '(' and
-l[i + 5] == ')' and
-3 <= len(l[i + 4]) and
-self._could_be_tzname(res.hour, res.tzname,
-None, l[i + 4])):
-# -0300 (BRST)
-res.tzname = l[i + 4]
-i += 4
-i += 1
-# Check jumps
-elif not (info.jump(l[i]) or fuzzy):
-raise ValueError(timestr)
-else:
-skipped_idxs.append(i)
-i += 1
-# Process year/month/day
-year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
-res.century_specified = ymd.century_specified
-res.year = year
-res.month = month
-res.day = day
-except (IndexError, ValueError):
-return None, None
-if not info.validate(res):
-return None, None
-if fuzzy_with_tokens:
-skipped_tokens = self._recombine_skipped(l, skipped_idxs)
-return res, tuple(skipped_tokens)
-else:
-return res, None
-def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
-# Token is a number
-value_repr = tokens[idx]
-try:
-value = self._to_decimal(value_repr)
-except Exception as e:
-six.raise_from(ValueError('Unknown numeric token'), e)
-len_li = len(value_repr)
-len_l = len(tokens)
-if (len(ymd) == 3 and len_li in (2, 4) and
-res.hour is None and
-(idx + 1 >= len_l or
-(tokens[idx + 1] != ':' and
-info.hms(tokens[idx + 1]) is None))):
-# 19990101T23[59]
-s = tokens[idx]
-res.hour = int(s[:2])
-if len_li == 4:
-res.minute = int(s[2:])
-elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
-# YYMMDD or HHMMSS[.ss]
-s = tokens[idx]
-if not ymd and '.' not in tokens[idx]:
-ymd.append(s[:2])
-ymd.append(s[2:4])
-ymd.append(s[4:])
-else:
-# 19990101T235959[.59]
-# TODO: Check if res attributes already set.
-res.hour = int(s[:2])
-res.minute = int(s[2:4])
-res.second, res.microsecond = self._parsems(s[4:])
-elif len_li in (8, 12, 14):
-# YYYYMMDD
-s = tokens[idx]
-ymd.append(s[:4], 'Y')
-ymd.append(s[4:6])
-ymd.append(s[6:8])
-if len_li > 8:
-res.hour = int(s[8:10])
-res.minute = int(s[10:12])
-if len_li > 12:
-res.second = int(s[12:])
-elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
-# HH[ ]h or MM[ ]m or SS[.ss][ ]s
-hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
-(idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
-if hms is not None:
-# TODO: checking that hour/minute/second are not
-# already set?
-self._assign_hms(res, value_repr, hms)
-elif idx + 2 < len_l and tokens[idx + 1] == ':':
-# HH:MM[:SS[.ss]]
-res.hour = int(value)
-value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
-(res.minute, res.second) = self._parse_min_sec(value)
-if idx + 4 < len_l and tokens[idx + 3] == ':':
-res.second, res.microsecond = self._parsems(tokens[idx + 4])
-idx += 2
-idx += 2
-elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
-sep = tokens[idx + 1]
-ymd.append(value_repr)
-if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
-if tokens[idx + 2].isdigit():
-# 01-01[-01]
-ymd.append(tokens[idx + 2])
-else:
-# 01-Jan[-01]
-value = info.month(tokens[idx + 2])
-if value is not None:
-ymd.append(value, 'M')
-else:
-raise ValueError()
-if idx + 3 < len_l and tokens[idx + 3] == sep:
-# We have three members
-value = info.month(tokens[idx + 4])
-if value is not None:
-ymd.append(value, 'M')
-else:
-ymd.append(tokens[idx + 4])
-idx += 2
-idx += 1
-idx += 1
-elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
-if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
-# 12 am
-hour = int(value)
-res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
-idx += 1
-else:
-# Year, month or day
-ymd.append(value)
-idx += 1
-elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
-# 12am
-hour = int(value)
-res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
-idx += 1
-elif ymd.could_be_day(value):
-ymd.append(value)
-elif not fuzzy:
-raise ValueError()
-return idx
-def _find_hms_idx(self, idx, tokens, info, allow_jump):
-len_l = len(tokens)
-if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
-# There is an "h", "m", or "s" label following this token.  We take
-# assign the upcoming label to the current token.
-# e.g. the "12" in 12h"
-hms_idx = idx + 1
-elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
-info.hms(tokens[idx+2]) is not None):
-# There is a space and then an "h", "m", or "s" label.
-# e.g. the "12" in "12 h"
-hms_idx = idx + 2
-elif idx > 0 and info.hms(tokens[idx-1]) is not None:
-# There is a "h", "m", or "s" preceding this token.  Since neither
-# of the previous cases was hit, there is no label following this
-# token, so we use the previous label.
-# e.g. the "04" in "12h04"
-hms_idx = idx-1
-elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
-info.hms(tokens[idx-2]) is not None):
-# If we are looking at the final token, we allow for a
-# backward-looking check to skip over a space.
-# TODO: Are we sure this is the right condition here?
-hms_idx = idx - 2
-else:
-hms_idx = None
-return hms_idx
-def _assign_hms(self, res, value_repr, hms):
-# See GH issue #427, fixing float rounding
-value = self._to_decimal(value_repr)
-if hms == 0:
-# Hour
-res.hour = int(value)
-if value % 1:
-res.minute = int(60*(value % 1))
-elif hms == 1:
-(res.minute, res.second) = self._parse_min_sec(value)
-elif hms == 2:
-(res.second, res.microsecond) = self._parsems(value_repr)
-def _could_be_tzname(self, hour, tzname, tzoffset, token):
-return (hour is not None and
-tzname is None and
-tzoffset is None and
-len(token) <= 5 and
-(all(x in string.ascii_uppercase for x in token)
-or token in self.info.UTCZONE))
-def _ampm_valid(self, hour, ampm, fuzzy):
-"""
-For fuzzy parsing, 'a' or 'am' (both valid English words)
-may erroneously trigger the AM/PM flag. Deal with that
-here.
-"""
-val_is_ampm = True
-# If there's already an AM/PM flag, this one isn't one.
-if fuzzy and ampm is not None:
-val_is_ampm = False
-# If AM/PM is found and hour is not, raise a ValueError
-if hour is None:
-if fuzzy:
-val_is_ampm = False
-else:
-raise ValueError('No hour specified with AM or PM flag.')
-elif not 0 <= hour <= 12:
-# If AM/PM is found, it's a 12 hour clock, so raise
-# an error for invalid range
-if fuzzy:
-val_is_ampm = False
-else:
-raise ValueError('Invalid hour specified for 12-hour clock.')
-return val_is_ampm
-def _adjust_ampm(self, hour, ampm):
-if hour < 12 and ampm == 1:
-hour += 12
-elif hour == 12 and ampm == 0:
-hour = 0
-return hour
-def _parse_min_sec(self, value):
-# TODO: Every usage of this function sets res.second to the return
-# value. Are there any cases where second will be returned as None and
-# we *don't* want to set res.second = None?
-minute = int(value)
-second = None
-sec_remainder = value % 1
-if sec_remainder:
-second = int(60 * sec_remainder)
-return (minute, second)
-def _parse_hms(self, idx, tokens, info, hms_idx):
-# TODO: Is this going to admit a lot of false-positives for when we
-# just happen to have digits and "h", "m" or "s" characters in non-date
-# text?  I guess hex hashes won't have that problem, but there's plenty
-# of random junk out there.
-if hms_idx is None:
-hms = None
-new_idx = idx
-elif hms_idx > idx:
-hms = info.hms(tokens[hms_idx])
-new_idx = hms_idx
-else:
-# Looking backwards, increment one.
-hms = info.hms(tokens[hms_idx]) + 1
-new_idx = idx
-return (new_idx, hms)
-# ------------------------------------------------------------------
-# Handling for individual tokens.  These are kept as methods instead
-#  of functions for the sake of customizability via subclassing.
-def _parsems(self, value):
-"""Parse a I[.F] seconds value into (seconds, microseconds)."""
-if "." not in value:
-return int(value), 0
-else:
-i, f = value.split(".")
-return int(i), int(f.ljust(6, "0")[:6])
-def _to_decimal(self, val):
-try:
-decimal_value = Decimal(val)
-# See GH 662, edge case, infinite value should not be converted
-#  via `_to_decimal`
-if not decimal_value.is_finite():
-raise ValueError("Converted decimal value is infinite or NaN")
-except Exception as e:
-msg = "Could not convert %s to decimal" % val
-six.raise_from(ValueError(msg), e)
-else:
-return decimal_value
-# ------------------------------------------------------------------
-# Post-Parsing construction of datetime output.  These are kept as
-#  methods instead of functions for the sake of customizability via
-#  subclassing.
-def _build_tzinfo(self, tzinfos, tzname, tzoffset):
-if callable(tzinfos):
-tzdata = tzinfos(tzname, tzoffset)
-else:
-tzdata = tzinfos.get(tzname)
-# handle case where tzinfo is paased an options that returns None
-# eg tzinfos = {'BRST' : None}
-if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
-tzinfo = tzdata
-elif isinstance(tzdata, text_type):
-tzinfo = tz.tzstr(tzdata)
-elif isinstance(tzdata, integer_types):
-tzinfo = tz.tzoffset(tzname, tzdata)
-else:
-raise TypeError("Offset must be tzinfo subclass, tz string, "
-"or int offset.")
-return tzinfo
-def _build_tzaware(self, naive, res, tzinfos):
-if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
-tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
-aware = naive.replace(tzinfo=tzinfo)
-aware = self._assign_tzname(aware, res.tzname)
-elif res.tzname and res.tzname in time.tzname:
-aware = naive.replace(tzinfo=tz.tzlocal())
-# Handle ambiguous local datetime
-aware = self._assign_tzname(aware, res.tzname)
-# This is mostly relevant for winter GMT zones parsed in the UK
-if (aware.tzname() != res.tzname and
-res.tzname in self.info.UTCZONE):
-aware = aware.replace(tzinfo=tz.UTC)
-elif res.tzoffset == 0:
-aware = naive.replace(tzinfo=tz.UTC)
-elif res.tzoffset:
-aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
-elif not res.tzname and not res.tzoffset:
-# i.e. no timezone information was found.
-aware = naive
-elif res.tzname:
-# tz-like string was parsed but we don't know what to do
-# with it
-warnings.warn("tzname {tzname} identified but not understood.  "
-"Pass `tzinfos` argument in order to correctly "
-"return a timezone-aware datetime.  In a future "
-"version, this will raise an "
-"exception.".format(tzname=res.tzname),
-category=UnknownTimezoneWarning)
-aware = naive
-return aware
-def _build_naive(self, res, default):
-repl = {}
-for attr in ("year", "month", "day", "hour",
-"minute", "second", "microsecond"):
-value = getattr(res, attr)
-if value is not None:
-repl[attr] = value
-if 'day' not in repl:
-# If the default day exceeds the last day of the month, fall back
-# to the end of the month.
-cyear = default.year if res.year is None else res.year
-cmonth = default.month if res.month is None else res.month
-cday = default.day if res.day is None else res.day
-if cday > monthrange(cyear, cmonth)[1]:
-repl['day'] = monthrange(cyear, cmonth)[1]
-naive = default.replace(**repl)
-if res.weekday is not None and not res.day:
-naive = naive + relativedelta.relativedelta(weekday=res.weekday)
-return naive
-def _assign_tzname(self, dt, tzname):
-if dt.tzname() != tzname:
-new_dt = tz.enfold(dt, fold=1)
-if new_dt.tzname() == tzname:
-return new_dt
-return dt
-def _recombine_skipped(self, tokens, skipped_idxs):
-"""
->>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
->>> skipped_idxs = [0, 1, 2, 5]
->>> _recombine_skipped(tokens, skipped_idxs)
-["foo bar", "baz"]
-"""
-skipped_tokens = []
-for i, idx in enumerate(sorted(skipped_idxs)):
-if i > 0 and idx - 1 == skipped_idxs[i - 1]:
-skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
-else:
-skipped_tokens.append(tokens[idx])
-return skipped_tokens
-DEFAULTPARSER = parser()
-def parse(timestr, parserinfo=None, **kwargs):
-"""
-Parse a string in one of the supported formats, using the
-``parserinfo`` parameters.
-:param timestr:
-A string containing a date/time stamp.
-:param parserinfo:
-A :class:`parserinfo` object containing parameters for the parser.
-If ``None``, the default arguments to the :class:`parserinfo`
-constructor are used.
-The ``**kwargs`` parameter takes the following keyword arguments:
-:param default:
-The default datetime object, if this is a datetime object and not
-``None``, elements specified in ``timestr`` replace elements in the
-default object.
-:param ignoretz:
-If set ``True``, time zones in parsed strings are ignored and a naive
-:class:`datetime` object is returned.
-:param tzinfos:
-Additional time zone names / aliases which may be present in the
-string. This argument maps time zone names (and optionally offsets
-from those time zones) to time zones. This parameter can be a
-dictionary with timezone aliases mapping time zone names to time
-zones or a function taking two parameters (``tzname`` and
-``tzoffset``) and returning a time zone.
-The timezones to which the names are mapped can be an integer
-offset from UTC in seconds or a :class:`tzinfo` object.
-.. doctest::
-:options: +NORMALIZE_WHITESPACE
->>> from dateutil.parser import parse
->>> from dateutil.tz import gettz
->>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
->>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
-datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
->>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
-datetime.datetime(2012, 1, 19, 17, 21,
-tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
-This parameter is ignored if ``ignoretz`` is set.
-:param dayfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the day (``True``) or month (``False``). If
-``yearfirst`` is set to ``True``, this distinguishes between YDM and
-YMD. If set to ``None``, this value is retrieved from the current
-:class:`parserinfo` object (which itself defaults to ``False``).
-:param yearfirst:
-Whether to interpret the first value in an ambiguous 3-integer date
-(e.g. 01/05/09) as the year. If ``True``, the first number is taken to
-be the year, otherwise the last number is taken to be the year. If
-this is set to ``None``, the value is retrieved from the current
-:class:`parserinfo` object (which itself defaults to ``False``).
-:param fuzzy:
-Whether to allow fuzzy parsing, allowing for string like "Today is
-January 1, 2047 at 8:21:00AM".
-:param fuzzy_with_tokens:
-If ``True``, ``fuzzy`` is automatically set to True, and the parser
-will return a tuple where the first element is the parsed
-:class:`datetime.datetime` datetimestamp and the second element is
-a tuple containing the portions of the string which were ignored:
-.. doctest::
->>> from dateutil.parser import parse
->>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
-(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
-:return:
-Returns a :class:`datetime.datetime` object or, if the
-``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
-first element being a :class:`datetime.datetime` object, the second
-a tuple containing the fuzzy tokens.
-:raises ValueError:
-Raised for invalid or unknown string format, if the provided
-:class:`tzinfo` is not in a valid format, or if an invalid date
-would be created.
-:raises OverflowError:
-Raised if the parsed date exceeds the largest valid C integer on
-your system.
-"""
-if parserinfo:
-return parser(parserinfo).parse(timestr, **kwargs)
-else:
-return DEFAULTPARSER.parse(timestr, **kwargs)
-class _tzparser(object):
-class _result(_resultbase):
-__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
-"start", "end"]
-class _attr(_resultbase):
-__slots__ = ["month", "week", "weekday",
-"yday", "jyday", "day", "time"]
-def __repr__(self):
-return self._repr("")
-def __init__(self):
-_resultbase.__init__(self)
-self.start = self._attr()
-self.end = self._attr()
-def parse(self, tzstr):
-res = self._result()
-l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
-used_idxs = list()
-try:
-len_l = len(l)
-i = 0
-while i < len_l:
-# BRST+3[BRDT[+2]]
-j = i
-while j < len_l and not [x for x in l[j]
-if x in "0123456789:,-+"]:
-j += 1
-if j != i:
-if not res.stdabbr:
-offattr = "stdoffset"
-res.stdabbr = "".join(l[i:j])
-else:
-offattr = "dstoffset"
-res.dstabbr = "".join(l[i:j])
-for ii in range(j):
-used_idxs.append(ii)
-i = j
-if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
-"0123456789")):
-if l[i] in ('+', '-'):
-# Yes, that's right.  See the TZ variable
-# documentation.
-signal = (1, -1)[l[i] == '+']
-used_idxs.append(i)
-i += 1
-else:
-signal = -1
-len_li = len(l[i])
-if len_li == 4:
-# -0300
-setattr(res, offattr, (int(l[i][:2]) * 3600 +
-int(l[i][2:]) * 60) * signal)
-elif i + 1 < len_l and l[i + 1] == ':':
-# -03:00
-setattr(res, offattr,
-(int(l[i]) * 3600 +
-int(l[i + 2]) * 60) * signal)
-used_idxs.append(i)
-i += 2
-elif len_li <= 2:
-# -[0]3
-setattr(res, offattr,
-int(l[i][:2]) * 3600 * signal)
-else:
-return None
-used_idxs.append(i)
-i += 1
-if res.dstabbr:
-break
-else:
-break
-if i < len_l:
-for j in range(i, len_l):
-if l[j] == ';':
-l[j] = ','
-assert l[i] == ','
-i += 1
-if i >= len_l:
-pass
-elif (8 <= l.count(',') <= 9 and
-not [y for x in l[i:] if x != ','
-for y in x if y not in "0123456789+-"]):
-# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
-for x in (res.start, res.end):
-x.month = int(l[i])
-used_idxs.append(i)
-i += 2
-if l[i] == '-':
-value = int(l[i + 1]) * -1
-used_idxs.append(i)
-i += 1
-else:
-value = int(l[i])
-used_idxs.append(i)
-i += 2
-if value:
-x.week = value
-x.weekday = (int(l[i]) - 1) % 7
-else:
-x.day = int(l[i])
-used_idxs.append(i)
-i += 2
-x.time = int(l[i])
-used_idxs.append(i)
-i += 2
-if i < len_l:
-if l[i] in ('-', '+'):
-signal = (-1, 1)[l[i] == "+"]
-used_idxs.append(i)
-i += 1
-else:
-signal = 1
-used_idxs.append(i)
-res.dstoffset = (res.stdoffset + int(l[i]) * signal)
-# This was a made-up format that is not in normal use
-warn(('Parsed time zone "%s"' % tzstr) +
-'is in a non-standard dateutil-specific format, which ' +
-'is now deprecated; support for parsing this format ' +
-'will be removed in future versions. It is recommended ' +
-'that you switch to a standard format like the GNU ' +
-'TZ variable format.', tz.DeprecatedTzFormatWarning)
-elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
-not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
-'.', '-', ':')
-for y in x if y not in "0123456789"]):
-for x in (res.start, res.end):
-if l[i] == 'J':
-# non-leap year day (1 based)
-used_idxs.append(i)
-i += 1
-x.jyday = int(l[i])
-elif l[i] == 'M':
-# month[-.]week[-.]weekday
-used_idxs.append(i)
-i += 1
-x.month = int(l[i])
-used_idxs.append(i)
-i += 1
-assert l[i] in ('-', '.')
-used_idxs.append(i)
-i += 1
-x.week = int(l[i])
-if x.week == 5:
-x.week = -1
-used_idxs.append(i)
-i += 1
-assert l[i] in ('-', '.')
-used_idxs.append(i)
-i += 1
-x.weekday = (int(l[i]) - 1) % 7
-else:
-# year day (zero based)
-x.yday = int(l[i]) + 1
-used_idxs.append(i)
-i += 1
-if i < len_l and l[i] == '/':
-used_idxs.append(i)
-i += 1
-# start time
-len_li = len(l[i])
-if len_li == 4:
-# -0300
-x.time = (int(l[i][:2]) * 3600 +
-int(l[i][2:]) * 60)
-elif i + 1 < len_l and l[i + 1] == ':':
-# -03:00
-x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
-used_idxs.append(i)
-i += 2
-if i + 1 < len_l and l[i + 1] == ':':
-used_idxs.append(i)
-i += 2
-x.time += int(l[i])
-elif len_li <= 2:
-# -[0]3
-x.time = (int(l[i][:2]) * 3600)
-else:
-return None
-used_idxs.append(i)
-i += 1
-assert i == len_l or l[i] == ','
-i += 1
-assert i >= len_l
-except (IndexError, ValueError, AssertionError):
-return None
-unused_idxs = set(range(len_l)).difference(used_idxs)
-res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
-return res
-DEFAULTTZPARSER = _tzparser()
-def _parsetz(tzstr):
-return DEFAULTTZPARSER.parse(tzstr)
-class ParserError(ValueError):
-"""Error class for representing failure to parse a datetime string."""
-def __str__(self):
-try:
-return self.args[0] % self.args[1:]
-except (TypeError, IndexError):
-return super(ParserError, self).__str__()
-def __repr__(self):
-return "%s(%s)" % (self.__class__.__name__, str(self))
-class UnknownTimezoneWarning(RuntimeWarning):
-"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
-# vim:ts=4:sw=4:et

Mercurial > repos > shellac > guppy_basecaller

comparison env/lib/python3.7/site-packages/dateutil/parser/_parser.py @ 5:9b1c78e6ba9c draft default tip