Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/dateutil/parser/_parser.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac | 
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 | 
| parents | 79f47841a781 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c | 
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 This module offers a generic date/time string parser which is able to parse | |
| 4 most known formats to represent a date and/or time. | |
| 5 | |
| 6 This module attempts to be forgiving with regards to unlikely input formats, | |
| 7 returning a datetime object even for dates which are ambiguous. If an element | |
| 8 of a date/time stamp is omitted, the following rules are applied: | |
| 9 | |
| 10 - If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour | |
| 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is | |
| 12 specified. | |
| 13 - If a time zone is omitted, a timezone-naive datetime is returned. | |
| 14 | |
| 15 If any other elements are missing, they are taken from the | |
| 16 :class:`datetime.datetime` object passed to the parameter ``default``. If this | |
| 17 results in a day number exceeding the valid number of days per month, the | |
| 18 value falls back to the end of the month. | |
| 19 | |
| 20 Additional resources about date/time string formats can be found below: | |
| 21 | |
| 22 - `A summary of the international standard date and time notation | |
| 23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ | |
| 24 - `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ | |
| 25 - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ | |
| 26 - `CPAN ParseDate module | |
| 27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ | |
| 28 - `Java SimpleDateFormat Class | |
| 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ | |
| 30 """ | |
| 31 from __future__ import unicode_literals | |
| 32 | |
| 33 import datetime | |
| 34 import re | |
| 35 import string | |
| 36 import time | |
| 37 import warnings | |
| 38 | |
| 39 from calendar import monthrange | |
| 40 from io import StringIO | |
| 41 | |
| 42 import six | |
| 43 from six import integer_types, text_type | |
| 44 | |
| 45 from decimal import Decimal | |
| 46 | |
| 47 from warnings import warn | |
| 48 | |
| 49 from .. import relativedelta | |
| 50 from .. import tz | |
| 51 | |
| 52 __all__ = ["parse", "parserinfo", "ParserError"] | |
| 53 | |
| 54 | |
| 55 # TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth | |
| 56 # making public and/or figuring out if there is something we can | |
| 57 # take off their plate. | |
| 58 class _timelex(object): | |
| 59 # Fractional seconds are sometimes split by a comma | |
| 60 _split_decimal = re.compile("([.,])") | |
| 61 | |
| 62 def __init__(self, instream): | |
| 63 if six.PY2: | |
| 64 # In Python 2, we can't duck type properly because unicode has | |
| 65 # a 'decode' function, and we'd be double-decoding | |
| 66 if isinstance(instream, (bytes, bytearray)): | |
| 67 instream = instream.decode() | |
| 68 else: | |
| 69 if getattr(instream, 'decode', None) is not None: | |
| 70 instream = instream.decode() | |
| 71 | |
| 72 if isinstance(instream, text_type): | |
| 73 instream = StringIO(instream) | |
| 74 elif getattr(instream, 'read', None) is None: | |
| 75 raise TypeError('Parser must be a string or character stream, not ' | |
| 76 '{itype}'.format(itype=instream.__class__.__name__)) | |
| 77 | |
| 78 self.instream = instream | |
| 79 self.charstack = [] | |
| 80 self.tokenstack = [] | |
| 81 self.eof = False | |
| 82 | |
| 83 def get_token(self): | |
| 84 """ | |
| 85 This function breaks the time string into lexical units (tokens), which | |
| 86 can be parsed by the parser. Lexical units are demarcated by changes in | |
| 87 the character set, so any continuous string of letters is considered | |
| 88 one unit, any continuous string of numbers is considered one unit. | |
| 89 | |
| 90 The main complication arises from the fact that dots ('.') can be used | |
| 91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. | |
| 92 "4:30:21.447"). As such, it is necessary to read the full context of | |
| 93 any dot-separated strings before breaking it into tokens; as such, this | |
| 94 function maintains a "token stack", for when the ambiguous context | |
| 95 demands that multiple tokens be parsed at once. | |
| 96 """ | |
| 97 if self.tokenstack: | |
| 98 return self.tokenstack.pop(0) | |
| 99 | |
| 100 seenletters = False | |
| 101 token = None | |
| 102 state = None | |
| 103 | |
| 104 while not self.eof: | |
| 105 # We only realize that we've reached the end of a token when we | |
| 106 # find a character that's not part of the current token - since | |
| 107 # that character may be part of the next token, it's stored in the | |
| 108 # charstack. | |
| 109 if self.charstack: | |
| 110 nextchar = self.charstack.pop(0) | |
| 111 else: | |
| 112 nextchar = self.instream.read(1) | |
| 113 while nextchar == '\x00': | |
| 114 nextchar = self.instream.read(1) | |
| 115 | |
| 116 if not nextchar: | |
| 117 self.eof = True | |
| 118 break | |
| 119 elif not state: | |
| 120 # First character of the token - determines if we're starting | |
| 121 # to parse a word, a number or something else. | |
| 122 token = nextchar | |
| 123 if self.isword(nextchar): | |
| 124 state = 'a' | |
| 125 elif self.isnum(nextchar): | |
| 126 state = '0' | |
| 127 elif self.isspace(nextchar): | |
| 128 token = ' ' | |
| 129 break # emit token | |
| 130 else: | |
| 131 break # emit token | |
| 132 elif state == 'a': | |
| 133 # If we've already started reading a word, we keep reading | |
| 134 # letters until we find something that's not part of a word. | |
| 135 seenletters = True | |
| 136 if self.isword(nextchar): | |
| 137 token += nextchar | |
| 138 elif nextchar == '.': | |
| 139 token += nextchar | |
| 140 state = 'a.' | |
| 141 else: | |
| 142 self.charstack.append(nextchar) | |
| 143 break # emit token | |
| 144 elif state == '0': | |
| 145 # If we've already started reading a number, we keep reading | |
| 146 # numbers until we find something that doesn't fit. | |
| 147 if self.isnum(nextchar): | |
| 148 token += nextchar | |
| 149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): | |
| 150 token += nextchar | |
| 151 state = '0.' | |
| 152 else: | |
| 153 self.charstack.append(nextchar) | |
| 154 break # emit token | |
| 155 elif state == 'a.': | |
| 156 # If we've seen some letters and a dot separator, continue | |
| 157 # parsing, and the tokens will be broken up later. | |
| 158 seenletters = True | |
| 159 if nextchar == '.' or self.isword(nextchar): | |
| 160 token += nextchar | |
| 161 elif self.isnum(nextchar) and token[-1] == '.': | |
| 162 token += nextchar | |
| 163 state = '0.' | |
| 164 else: | |
| 165 self.charstack.append(nextchar) | |
| 166 break # emit token | |
| 167 elif state == '0.': | |
| 168 # If we've seen at least one dot separator, keep going, we'll | |
| 169 # break up the tokens later. | |
| 170 if nextchar == '.' or self.isnum(nextchar): | |
| 171 token += nextchar | |
| 172 elif self.isword(nextchar) and token[-1] == '.': | |
| 173 token += nextchar | |
| 174 state = 'a.' | |
| 175 else: | |
| 176 self.charstack.append(nextchar) | |
| 177 break # emit token | |
| 178 | |
| 179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or | |
| 180 token[-1] in '.,')): | |
| 181 l = self._split_decimal.split(token) | |
| 182 token = l[0] | |
| 183 for tok in l[1:]: | |
| 184 if tok: | |
| 185 self.tokenstack.append(tok) | |
| 186 | |
| 187 if state == '0.' and token.count('.') == 0: | |
| 188 token = token.replace(',', '.') | |
| 189 | |
| 190 return token | |
| 191 | |
| 192 def __iter__(self): | |
| 193 return self | |
| 194 | |
| 195 def __next__(self): | |
| 196 token = self.get_token() | |
| 197 if token is None: | |
| 198 raise StopIteration | |
| 199 | |
| 200 return token | |
| 201 | |
| 202 def next(self): | |
| 203 return self.__next__() # Python 2.x support | |
| 204 | |
| 205 @classmethod | |
| 206 def split(cls, s): | |
| 207 return list(cls(s)) | |
| 208 | |
| 209 @classmethod | |
| 210 def isword(cls, nextchar): | |
| 211 """ Whether or not the next character is part of a word """ | |
| 212 return nextchar.isalpha() | |
| 213 | |
| 214 @classmethod | |
| 215 def isnum(cls, nextchar): | |
| 216 """ Whether the next character is part of a number """ | |
| 217 return nextchar.isdigit() | |
| 218 | |
| 219 @classmethod | |
| 220 def isspace(cls, nextchar): | |
| 221 """ Whether the next character is whitespace """ | |
| 222 return nextchar.isspace() | |
| 223 | |
| 224 | |
| 225 class _resultbase(object): | |
| 226 | |
| 227 def __init__(self): | |
| 228 for attr in self.__slots__: | |
| 229 setattr(self, attr, None) | |
| 230 | |
| 231 def _repr(self, classname): | |
| 232 l = [] | |
| 233 for attr in self.__slots__: | |
| 234 value = getattr(self, attr) | |
| 235 if value is not None: | |
| 236 l.append("%s=%s" % (attr, repr(value))) | |
| 237 return "%s(%s)" % (classname, ", ".join(l)) | |
| 238 | |
| 239 def __len__(self): | |
| 240 return (sum(getattr(self, attr) is not None | |
| 241 for attr in self.__slots__)) | |
| 242 | |
| 243 def __repr__(self): | |
| 244 return self._repr(self.__class__.__name__) | |
| 245 | |
| 246 | |
| 247 class parserinfo(object): | |
| 248 """ | |
| 249 Class which handles what inputs are accepted. Subclass this to customize | |
| 250 the language and acceptable values for each parameter. | |
| 251 | |
| 252 :param dayfirst: | |
| 253 Whether to interpret the first value in an ambiguous 3-integer date | |
| 254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
| 255 ``yearfirst`` is set to ``True``, this distinguishes between YDM | |
| 256 and YMD. Default is ``False``. | |
| 257 | |
| 258 :param yearfirst: | |
| 259 Whether to interpret the first value in an ambiguous 3-integer date | |
| 260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken | |
| 261 to be the year, otherwise the last number is taken to be the year. | |
| 262 Default is ``False``. | |
| 263 """ | |
| 264 | |
| 265 # m from a.m/p.m, t from ISO T separator | |
| 266 JUMP = [" ", ".", ",", ";", "-", "/", "'", | |
| 267 "at", "on", "and", "ad", "m", "t", "of", | |
| 268 "st", "nd", "rd", "th"] | |
| 269 | |
| 270 WEEKDAYS = [("Mon", "Monday"), | |
| 271 ("Tue", "Tuesday"), # TODO: "Tues" | |
| 272 ("Wed", "Wednesday"), | |
| 273 ("Thu", "Thursday"), # TODO: "Thurs" | |
| 274 ("Fri", "Friday"), | |
| 275 ("Sat", "Saturday"), | |
| 276 ("Sun", "Sunday")] | |
| 277 MONTHS = [("Jan", "January"), | |
| 278 ("Feb", "February"), # TODO: "Febr" | |
| 279 ("Mar", "March"), | |
| 280 ("Apr", "April"), | |
| 281 ("May", "May"), | |
| 282 ("Jun", "June"), | |
| 283 ("Jul", "July"), | |
| 284 ("Aug", "August"), | |
| 285 ("Sep", "Sept", "September"), | |
| 286 ("Oct", "October"), | |
| 287 ("Nov", "November"), | |
| 288 ("Dec", "December")] | |
| 289 HMS = [("h", "hour", "hours"), | |
| 290 ("m", "minute", "minutes"), | |
| 291 ("s", "second", "seconds")] | |
| 292 AMPM = [("am", "a"), | |
| 293 ("pm", "p")] | |
| 294 UTCZONE = ["UTC", "GMT", "Z", "z"] | |
| 295 PERTAIN = ["of"] | |
| 296 TZOFFSET = {} | |
| 297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", | |
| 298 # "Anno Domini", "Year of Our Lord"] | |
| 299 | |
| 300 def __init__(self, dayfirst=False, yearfirst=False): | |
| 301 self._jump = self._convert(self.JUMP) | |
| 302 self._weekdays = self._convert(self.WEEKDAYS) | |
| 303 self._months = self._convert(self.MONTHS) | |
| 304 self._hms = self._convert(self.HMS) | |
| 305 self._ampm = self._convert(self.AMPM) | |
| 306 self._utczone = self._convert(self.UTCZONE) | |
| 307 self._pertain = self._convert(self.PERTAIN) | |
| 308 | |
| 309 self.dayfirst = dayfirst | |
| 310 self.yearfirst = yearfirst | |
| 311 | |
| 312 self._year = time.localtime().tm_year | |
| 313 self._century = self._year // 100 * 100 | |
| 314 | |
| 315 def _convert(self, lst): | |
| 316 dct = {} | |
| 317 for i, v in enumerate(lst): | |
| 318 if isinstance(v, tuple): | |
| 319 for v in v: | |
| 320 dct[v.lower()] = i | |
| 321 else: | |
| 322 dct[v.lower()] = i | |
| 323 return dct | |
| 324 | |
| 325 def jump(self, name): | |
| 326 return name.lower() in self._jump | |
| 327 | |
| 328 def weekday(self, name): | |
| 329 try: | |
| 330 return self._weekdays[name.lower()] | |
| 331 except KeyError: | |
| 332 pass | |
| 333 return None | |
| 334 | |
| 335 def month(self, name): | |
| 336 try: | |
| 337 return self._months[name.lower()] + 1 | |
| 338 except KeyError: | |
| 339 pass | |
| 340 return None | |
| 341 | |
| 342 def hms(self, name): | |
| 343 try: | |
| 344 return self._hms[name.lower()] | |
| 345 except KeyError: | |
| 346 return None | |
| 347 | |
| 348 def ampm(self, name): | |
| 349 try: | |
| 350 return self._ampm[name.lower()] | |
| 351 except KeyError: | |
| 352 return None | |
| 353 | |
| 354 def pertain(self, name): | |
| 355 return name.lower() in self._pertain | |
| 356 | |
| 357 def utczone(self, name): | |
| 358 return name.lower() in self._utczone | |
| 359 | |
| 360 def tzoffset(self, name): | |
| 361 if name in self._utczone: | |
| 362 return 0 | |
| 363 | |
| 364 return self.TZOFFSET.get(name) | |
| 365 | |
| 366 def convertyear(self, year, century_specified=False): | |
| 367 """ | |
| 368 Converts two-digit years to year within [-50, 49] | |
| 369 range of self._year (current local time) | |
| 370 """ | |
| 371 | |
| 372 # Function contract is that the year is always positive | |
| 373 assert year >= 0 | |
| 374 | |
| 375 if year < 100 and not century_specified: | |
| 376 # assume current century to start | |
| 377 year += self._century | |
| 378 | |
| 379 if year >= self._year + 50: # if too far in future | |
| 380 year -= 100 | |
| 381 elif year < self._year - 50: # if too far in past | |
| 382 year += 100 | |
| 383 | |
| 384 return year | |
| 385 | |
| 386 def validate(self, res): | |
| 387 # move to info | |
| 388 if res.year is not None: | |
| 389 res.year = self.convertyear(res.year, res.century_specified) | |
| 390 | |
| 391 if ((res.tzoffset == 0 and not res.tzname) or | |
| 392 (res.tzname == 'Z' or res.tzname == 'z')): | |
| 393 res.tzname = "UTC" | |
| 394 res.tzoffset = 0 | |
| 395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): | |
| 396 res.tzoffset = 0 | |
| 397 return True | |
| 398 | |
| 399 | |
| 400 class _ymd(list): | |
| 401 def __init__(self, *args, **kwargs): | |
| 402 super(self.__class__, self).__init__(*args, **kwargs) | |
| 403 self.century_specified = False | |
| 404 self.dstridx = None | |
| 405 self.mstridx = None | |
| 406 self.ystridx = None | |
| 407 | |
| 408 @property | |
| 409 def has_year(self): | |
| 410 return self.ystridx is not None | |
| 411 | |
| 412 @property | |
| 413 def has_month(self): | |
| 414 return self.mstridx is not None | |
| 415 | |
| 416 @property | |
| 417 def has_day(self): | |
| 418 return self.dstridx is not None | |
| 419 | |
| 420 def could_be_day(self, value): | |
| 421 if self.has_day: | |
| 422 return False | |
| 423 elif not self.has_month: | |
| 424 return 1 <= value <= 31 | |
| 425 elif not self.has_year: | |
| 426 # Be permissive, assume leap year | |
| 427 month = self[self.mstridx] | |
| 428 return 1 <= value <= monthrange(2000, month)[1] | |
| 429 else: | |
| 430 month = self[self.mstridx] | |
| 431 year = self[self.ystridx] | |
| 432 return 1 <= value <= monthrange(year, month)[1] | |
| 433 | |
| 434 def append(self, val, label=None): | |
| 435 if hasattr(val, '__len__'): | |
| 436 if val.isdigit() and len(val) > 2: | |
| 437 self.century_specified = True | |
| 438 if label not in [None, 'Y']: # pragma: no cover | |
| 439 raise ValueError(label) | |
| 440 label = 'Y' | |
| 441 elif val > 100: | |
| 442 self.century_specified = True | |
| 443 if label not in [None, 'Y']: # pragma: no cover | |
| 444 raise ValueError(label) | |
| 445 label = 'Y' | |
| 446 | |
| 447 super(self.__class__, self).append(int(val)) | |
| 448 | |
| 449 if label == 'M': | |
| 450 if self.has_month: | |
| 451 raise ValueError('Month is already set') | |
| 452 self.mstridx = len(self) - 1 | |
| 453 elif label == 'D': | |
| 454 if self.has_day: | |
| 455 raise ValueError('Day is already set') | |
| 456 self.dstridx = len(self) - 1 | |
| 457 elif label == 'Y': | |
| 458 if self.has_year: | |
| 459 raise ValueError('Year is already set') | |
| 460 self.ystridx = len(self) - 1 | |
| 461 | |
| 462 def _resolve_from_stridxs(self, strids): | |
| 463 """ | |
| 464 Try to resolve the identities of year/month/day elements using | |
| 465 ystridx, mstridx, and dstridx, if enough of these are specified. | |
| 466 """ | |
| 467 if len(self) == 3 and len(strids) == 2: | |
| 468 # we can back out the remaining stridx value | |
| 469 missing = [x for x in range(3) if x not in strids.values()] | |
| 470 key = [x for x in ['y', 'm', 'd'] if x not in strids] | |
| 471 assert len(missing) == len(key) == 1 | |
| 472 key = key[0] | |
| 473 val = missing[0] | |
| 474 strids[key] = val | |
| 475 | |
| 476 assert len(self) == len(strids) # otherwise this should not be called | |
| 477 out = {key: self[strids[key]] for key in strids} | |
| 478 return (out.get('y'), out.get('m'), out.get('d')) | |
| 479 | |
| 480 def resolve_ymd(self, yearfirst, dayfirst): | |
| 481 len_ymd = len(self) | |
| 482 year, month, day = (None, None, None) | |
| 483 | |
| 484 strids = (('y', self.ystridx), | |
| 485 ('m', self.mstridx), | |
| 486 ('d', self.dstridx)) | |
| 487 | |
| 488 strids = {key: val for key, val in strids if val is not None} | |
| 489 if (len(self) == len(strids) > 0 or | |
| 490 (len(self) == 3 and len(strids) == 2)): | |
| 491 return self._resolve_from_stridxs(strids) | |
| 492 | |
| 493 mstridx = self.mstridx | |
| 494 | |
| 495 if len_ymd > 3: | |
| 496 raise ValueError("More than three YMD values") | |
| 497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): | |
| 498 # One member, or two members with a month string | |
| 499 if mstridx is not None: | |
| 500 month = self[mstridx] | |
| 501 # since mstridx is 0 or 1, self[mstridx-1] always | |
| 502 # looks up the other element | |
| 503 other = self[mstridx - 1] | |
| 504 else: | |
| 505 other = self[0] | |
| 506 | |
| 507 if len_ymd > 1 or mstridx is None: | |
| 508 if other > 31: | |
| 509 year = other | |
| 510 else: | |
| 511 day = other | |
| 512 | |
| 513 elif len_ymd == 2: | |
| 514 # Two members with numbers | |
| 515 if self[0] > 31: | |
| 516 # 99-01 | |
| 517 year, month = self | |
| 518 elif self[1] > 31: | |
| 519 # 01-99 | |
| 520 month, year = self | |
| 521 elif dayfirst and self[1] <= 12: | |
| 522 # 13-01 | |
| 523 day, month = self | |
| 524 else: | |
| 525 # 01-13 | |
| 526 month, day = self | |
| 527 | |
| 528 elif len_ymd == 3: | |
| 529 # Three members | |
| 530 if mstridx == 0: | |
| 531 if self[1] > 31: | |
| 532 # Apr-2003-25 | |
| 533 month, year, day = self | |
| 534 else: | |
| 535 month, day, year = self | |
| 536 elif mstridx == 1: | |
| 537 if self[0] > 31 or (yearfirst and self[2] <= 31): | |
| 538 # 99-Jan-01 | |
| 539 year, month, day = self | |
| 540 else: | |
| 541 # 01-Jan-01 | |
| 542 # Give precedence to day-first, since | |
| 543 # two-digit years is usually hand-written. | |
| 544 day, month, year = self | |
| 545 | |
| 546 elif mstridx == 2: | |
| 547 # WTF!? | |
| 548 if self[1] > 31: | |
| 549 # 01-99-Jan | |
| 550 day, year, month = self | |
| 551 else: | |
| 552 # 99-01-Jan | |
| 553 year, day, month = self | |
| 554 | |
| 555 else: | |
| 556 if (self[0] > 31 or | |
| 557 self.ystridx == 0 or | |
| 558 (yearfirst and self[1] <= 12 and self[2] <= 31)): | |
| 559 # 99-01-01 | |
| 560 if dayfirst and self[2] <= 12: | |
| 561 year, day, month = self | |
| 562 else: | |
| 563 year, month, day = self | |
| 564 elif self[0] > 12 or (dayfirst and self[1] <= 12): | |
| 565 # 13-01-01 | |
| 566 day, month, year = self | |
| 567 else: | |
| 568 # 01-13-01 | |
| 569 month, day, year = self | |
| 570 | |
| 571 return year, month, day | |
| 572 | |
| 573 | |
| 574 class parser(object): | |
| 575 def __init__(self, info=None): | |
| 576 self.info = info or parserinfo() | |
| 577 | |
| 578 def parse(self, timestr, default=None, | |
| 579 ignoretz=False, tzinfos=None, **kwargs): | |
| 580 """ | |
| 581 Parse the date/time string into a :class:`datetime.datetime` object. | |
| 582 | |
| 583 :param timestr: | |
| 584 Any date/time string using the supported formats. | |
| 585 | |
| 586 :param default: | |
| 587 The default datetime object, if this is a datetime object and not | |
| 588 ``None``, elements specified in ``timestr`` replace elements in the | |
| 589 default object. | |
| 590 | |
| 591 :param ignoretz: | |
| 592 If set ``True``, time zones in parsed strings are ignored and a | |
| 593 naive :class:`datetime.datetime` object is returned. | |
| 594 | |
| 595 :param tzinfos: | |
| 596 Additional time zone names / aliases which may be present in the | |
| 597 string. This argument maps time zone names (and optionally offsets | |
| 598 from those time zones) to time zones. This parameter can be a | |
| 599 dictionary with timezone aliases mapping time zone names to time | |
| 600 zones or a function taking two parameters (``tzname`` and | |
| 601 ``tzoffset``) and returning a time zone. | |
| 602 | |
| 603 The timezones to which the names are mapped can be an integer | |
| 604 offset from UTC in seconds or a :class:`tzinfo` object. | |
| 605 | |
| 606 .. doctest:: | |
| 607 :options: +NORMALIZE_WHITESPACE | |
| 608 | |
| 609 >>> from dateutil.parser import parse | |
| 610 >>> from dateutil.tz import gettz | |
| 611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} | |
| 612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) | |
| 613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) | |
| 614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) | |
| 615 datetime.datetime(2012, 1, 19, 17, 21, | |
| 616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) | |
| 617 | |
| 618 This parameter is ignored if ``ignoretz`` is set. | |
| 619 | |
| 620 :param \\*\\*kwargs: | |
| 621 Keyword arguments as passed to ``_parse()``. | |
| 622 | |
| 623 :return: | |
| 624 Returns a :class:`datetime.datetime` object or, if the | |
| 625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the | |
| 626 first element being a :class:`datetime.datetime` object, the second | |
| 627 a tuple containing the fuzzy tokens. | |
| 628 | |
| 629 :raises ParserError: | |
| 630 Raised for invalid or unknown string format, if the provided | |
| 631 :class:`tzinfo` is not in a valid format, or if an invalid date | |
| 632 would be created. | |
| 633 | |
| 634 :raises TypeError: | |
| 635 Raised for non-string or character stream input. | |
| 636 | |
| 637 :raises OverflowError: | |
| 638 Raised if the parsed date exceeds the largest valid C integer on | |
| 639 your system. | |
| 640 """ | |
| 641 | |
| 642 if default is None: | |
| 643 default = datetime.datetime.now().replace(hour=0, minute=0, | |
| 644 second=0, microsecond=0) | |
| 645 | |
| 646 res, skipped_tokens = self._parse(timestr, **kwargs) | |
| 647 | |
| 648 if res is None: | |
| 649 raise ParserError("Unknown string format: %s", timestr) | |
| 650 | |
| 651 if len(res) == 0: | |
| 652 raise ParserError("String does not contain a date: %s", timestr) | |
| 653 | |
| 654 try: | |
| 655 ret = self._build_naive(res, default) | |
| 656 except ValueError as e: | |
| 657 six.raise_from(ParserError(e.args[0] + ": %s", timestr), e) | |
| 658 | |
| 659 if not ignoretz: | |
| 660 ret = self._build_tzaware(ret, res, tzinfos) | |
| 661 | |
| 662 if kwargs.get('fuzzy_with_tokens', False): | |
| 663 return ret, skipped_tokens | |
| 664 else: | |
| 665 return ret | |
| 666 | |
| 667 class _result(_resultbase): | |
| 668 __slots__ = ["year", "month", "day", "weekday", | |
| 669 "hour", "minute", "second", "microsecond", | |
| 670 "tzname", "tzoffset", "ampm","any_unused_tokens"] | |
| 671 | |
| 672 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, | |
| 673 fuzzy_with_tokens=False): | |
| 674 """ | |
| 675 Private method which performs the heavy lifting of parsing, called from | |
| 676 ``parse()``, which passes on its ``kwargs`` to this function. | |
| 677 | |
| 678 :param timestr: | |
| 679 The string to parse. | |
| 680 | |
| 681 :param dayfirst: | |
| 682 Whether to interpret the first value in an ambiguous 3-integer date | |
| 683 (e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
| 684 ``yearfirst`` is set to ``True``, this distinguishes between YDM | |
| 685 and YMD. If set to ``None``, this value is retrieved from the | |
| 686 current :class:`parserinfo` object (which itself defaults to | |
| 687 ``False``). | |
| 688 | |
| 689 :param yearfirst: | |
| 690 Whether to interpret the first value in an ambiguous 3-integer date | |
| 691 (e.g. 01/05/09) as the year. If ``True``, the first number is taken | |
| 692 to be the year, otherwise the last number is taken to be the year. | |
| 693 If this is set to ``None``, the value is retrieved from the current | |
| 694 :class:`parserinfo` object (which itself defaults to ``False``). | |
| 695 | |
| 696 :param fuzzy: | |
| 697 Whether to allow fuzzy parsing, allowing for string like "Today is | |
| 698 January 1, 2047 at 8:21:00AM". | |
| 699 | |
| 700 :param fuzzy_with_tokens: | |
| 701 If ``True``, ``fuzzy`` is automatically set to True, and the parser | |
| 702 will return a tuple where the first element is the parsed | |
| 703 :class:`datetime.datetime` datetimestamp and the second element is | |
| 704 a tuple containing the portions of the string which were ignored: | |
| 705 | |
| 706 .. doctest:: | |
| 707 | |
| 708 >>> from dateutil.parser import parse | |
| 709 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) | |
| 710 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) | |
| 711 | |
| 712 """ | |
| 713 if fuzzy_with_tokens: | |
| 714 fuzzy = True | |
| 715 | |
| 716 info = self.info | |
| 717 | |
| 718 if dayfirst is None: | |
| 719 dayfirst = info.dayfirst | |
| 720 | |
| 721 if yearfirst is None: | |
| 722 yearfirst = info.yearfirst | |
| 723 | |
| 724 res = self._result() | |
| 725 l = _timelex.split(timestr) # Splits the timestr into tokens | |
| 726 | |
| 727 skipped_idxs = [] | |
| 728 | |
| 729 # year/month/day list | |
| 730 ymd = _ymd() | |
| 731 | |
| 732 len_l = len(l) | |
| 733 i = 0 | |
| 734 try: | |
| 735 while i < len_l: | |
| 736 | |
| 737 # Check if it's a number | |
| 738 value_repr = l[i] | |
| 739 try: | |
| 740 value = float(value_repr) | |
| 741 except ValueError: | |
| 742 value = None | |
| 743 | |
| 744 if value is not None: | |
| 745 # Numeric token | |
| 746 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) | |
| 747 | |
| 748 # Check weekday | |
| 749 elif info.weekday(l[i]) is not None: | |
| 750 value = info.weekday(l[i]) | |
| 751 res.weekday = value | |
| 752 | |
| 753 # Check month name | |
| 754 elif info.month(l[i]) is not None: | |
| 755 value = info.month(l[i]) | |
| 756 ymd.append(value, 'M') | |
| 757 | |
| 758 if i + 1 < len_l: | |
| 759 if l[i + 1] in ('-', '/'): | |
| 760 # Jan-01[-99] | |
| 761 sep = l[i + 1] | |
| 762 ymd.append(l[i + 2]) | |
| 763 | |
| 764 if i + 3 < len_l and l[i + 3] == sep: | |
| 765 # Jan-01-99 | |
| 766 ymd.append(l[i + 4]) | |
| 767 i += 2 | |
| 768 | |
| 769 i += 2 | |
| 770 | |
| 771 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and | |
| 772 info.pertain(l[i + 2])): | |
| 773 # Jan of 01 | |
| 774 # In this case, 01 is clearly year | |
| 775 if l[i + 4].isdigit(): | |
| 776 # Convert it here to become unambiguous | |
| 777 value = int(l[i + 4]) | |
| 778 year = str(info.convertyear(value)) | |
| 779 ymd.append(year, 'Y') | |
| 780 else: | |
| 781 # Wrong guess | |
| 782 pass | |
| 783 # TODO: not hit in tests | |
| 784 i += 4 | |
| 785 | |
| 786 # Check am/pm | |
| 787 elif info.ampm(l[i]) is not None: | |
| 788 value = info.ampm(l[i]) | |
| 789 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) | |
| 790 | |
| 791 if val_is_ampm: | |
| 792 res.hour = self._adjust_ampm(res.hour, value) | |
| 793 res.ampm = value | |
| 794 | |
| 795 elif fuzzy: | |
| 796 skipped_idxs.append(i) | |
| 797 | |
| 798 # Check for a timezone name | |
| 799 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): | |
| 800 res.tzname = l[i] | |
| 801 res.tzoffset = info.tzoffset(res.tzname) | |
| 802 | |
| 803 # Check for something like GMT+3, or BRST+3. Notice | |
| 804 # that it doesn't mean "I am 3 hours after GMT", but | |
| 805 # "my time +3 is GMT". If found, we reverse the | |
| 806 # logic so that timezone parsing code will get it | |
| 807 # right. | |
| 808 if i + 1 < len_l and l[i + 1] in ('+', '-'): | |
| 809 l[i + 1] = ('+', '-')[l[i + 1] == '+'] | |
| 810 res.tzoffset = None | |
| 811 if info.utczone(res.tzname): | |
| 812 # With something like GMT+3, the timezone | |
| 813 # is *not* GMT. | |
| 814 res.tzname = None | |
| 815 | |
| 816 # Check for a numbered timezone | |
| 817 elif res.hour is not None and l[i] in ('+', '-'): | |
| 818 signal = (-1, 1)[l[i] == '+'] | |
| 819 len_li = len(l[i + 1]) | |
| 820 | |
| 821 # TODO: check that l[i + 1] is integer? | |
| 822 if len_li == 4: | |
| 823 # -0300 | |
| 824 hour_offset = int(l[i + 1][:2]) | |
| 825 min_offset = int(l[i + 1][2:]) | |
| 826 elif i + 2 < len_l and l[i + 2] == ':': | |
| 827 # -03:00 | |
| 828 hour_offset = int(l[i + 1]) | |
| 829 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? | |
| 830 i += 2 | |
| 831 elif len_li <= 2: | |
| 832 # -[0]3 | |
| 833 hour_offset = int(l[i + 1][:2]) | |
| 834 min_offset = 0 | |
| 835 else: | |
| 836 raise ValueError(timestr) | |
| 837 | |
| 838 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) | |
| 839 | |
| 840 # Look for a timezone name between parenthesis | |
| 841 if (i + 5 < len_l and | |
| 842 info.jump(l[i + 2]) and l[i + 3] == '(' and | |
| 843 l[i + 5] == ')' and | |
| 844 3 <= len(l[i + 4]) and | |
| 845 self._could_be_tzname(res.hour, res.tzname, | |
| 846 None, l[i + 4])): | |
| 847 # -0300 (BRST) | |
| 848 res.tzname = l[i + 4] | |
| 849 i += 4 | |
| 850 | |
| 851 i += 1 | |
| 852 | |
| 853 # Check jumps | |
| 854 elif not (info.jump(l[i]) or fuzzy): | |
| 855 raise ValueError(timestr) | |
| 856 | |
| 857 else: | |
| 858 skipped_idxs.append(i) | |
| 859 i += 1 | |
| 860 | |
| 861 # Process year/month/day | |
| 862 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) | |
| 863 | |
| 864 res.century_specified = ymd.century_specified | |
| 865 res.year = year | |
| 866 res.month = month | |
| 867 res.day = day | |
| 868 | |
| 869 except (IndexError, ValueError): | |
| 870 return None, None | |
| 871 | |
| 872 if not info.validate(res): | |
| 873 return None, None | |
| 874 | |
| 875 if fuzzy_with_tokens: | |
| 876 skipped_tokens = self._recombine_skipped(l, skipped_idxs) | |
| 877 return res, tuple(skipped_tokens) | |
| 878 else: | |
| 879 return res, None | |
| 880 | |
| 881 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): | |
| 882 # Token is a number | |
| 883 value_repr = tokens[idx] | |
| 884 try: | |
| 885 value = self._to_decimal(value_repr) | |
| 886 except Exception as e: | |
| 887 six.raise_from(ValueError('Unknown numeric token'), e) | |
| 888 | |
| 889 len_li = len(value_repr) | |
| 890 | |
| 891 len_l = len(tokens) | |
| 892 | |
| 893 if (len(ymd) == 3 and len_li in (2, 4) and | |
| 894 res.hour is None and | |
| 895 (idx + 1 >= len_l or | |
| 896 (tokens[idx + 1] != ':' and | |
| 897 info.hms(tokens[idx + 1]) is None))): | |
| 898 # 19990101T23[59] | |
| 899 s = tokens[idx] | |
| 900 res.hour = int(s[:2]) | |
| 901 | |
| 902 if len_li == 4: | |
| 903 res.minute = int(s[2:]) | |
| 904 | |
| 905 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): | |
| 906 # YYMMDD or HHMMSS[.ss] | |
| 907 s = tokens[idx] | |
| 908 | |
| 909 if not ymd and '.' not in tokens[idx]: | |
| 910 ymd.append(s[:2]) | |
| 911 ymd.append(s[2:4]) | |
| 912 ymd.append(s[4:]) | |
| 913 else: | |
| 914 # 19990101T235959[.59] | |
| 915 | |
| 916 # TODO: Check if res attributes already set. | |
| 917 res.hour = int(s[:2]) | |
| 918 res.minute = int(s[2:4]) | |
| 919 res.second, res.microsecond = self._parsems(s[4:]) | |
| 920 | |
| 921 elif len_li in (8, 12, 14): | |
| 922 # YYYYMMDD | |
| 923 s = tokens[idx] | |
| 924 ymd.append(s[:4], 'Y') | |
| 925 ymd.append(s[4:6]) | |
| 926 ymd.append(s[6:8]) | |
| 927 | |
| 928 if len_li > 8: | |
| 929 res.hour = int(s[8:10]) | |
| 930 res.minute = int(s[10:12]) | |
| 931 | |
| 932 if len_li > 12: | |
| 933 res.second = int(s[12:]) | |
| 934 | |
| 935 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: | |
| 936 # HH[ ]h or MM[ ]m or SS[.ss][ ]s | |
| 937 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) | |
| 938 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) | |
| 939 if hms is not None: | |
| 940 # TODO: checking that hour/minute/second are not | |
| 941 # already set? | |
| 942 self._assign_hms(res, value_repr, hms) | |
| 943 | |
| 944 elif idx + 2 < len_l and tokens[idx + 1] == ':': | |
| 945 # HH:MM[:SS[.ss]] | |
| 946 res.hour = int(value) | |
| 947 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? | |
| 948 (res.minute, res.second) = self._parse_min_sec(value) | |
| 949 | |
| 950 if idx + 4 < len_l and tokens[idx + 3] == ':': | |
| 951 res.second, res.microsecond = self._parsems(tokens[idx + 4]) | |
| 952 | |
| 953 idx += 2 | |
| 954 | |
| 955 idx += 2 | |
| 956 | |
| 957 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): | |
| 958 sep = tokens[idx + 1] | |
| 959 ymd.append(value_repr) | |
| 960 | |
| 961 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): | |
| 962 if tokens[idx + 2].isdigit(): | |
| 963 # 01-01[-01] | |
| 964 ymd.append(tokens[idx + 2]) | |
| 965 else: | |
| 966 # 01-Jan[-01] | |
| 967 value = info.month(tokens[idx + 2]) | |
| 968 | |
| 969 if value is not None: | |
| 970 ymd.append(value, 'M') | |
| 971 else: | |
| 972 raise ValueError() | |
| 973 | |
| 974 if idx + 3 < len_l and tokens[idx + 3] == sep: | |
| 975 # We have three members | |
| 976 value = info.month(tokens[idx + 4]) | |
| 977 | |
| 978 if value is not None: | |
| 979 ymd.append(value, 'M') | |
| 980 else: | |
| 981 ymd.append(tokens[idx + 4]) | |
| 982 idx += 2 | |
| 983 | |
| 984 idx += 1 | |
| 985 idx += 1 | |
| 986 | |
| 987 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): | |
| 988 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: | |
| 989 # 12 am | |
| 990 hour = int(value) | |
| 991 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) | |
| 992 idx += 1 | |
| 993 else: | |
| 994 # Year, month or day | |
| 995 ymd.append(value) | |
| 996 idx += 1 | |
| 997 | |
| 998 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): | |
| 999 # 12am | |
| 1000 hour = int(value) | |
| 1001 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) | |
| 1002 idx += 1 | |
| 1003 | |
| 1004 elif ymd.could_be_day(value): | |
| 1005 ymd.append(value) | |
| 1006 | |
| 1007 elif not fuzzy: | |
| 1008 raise ValueError() | |
| 1009 | |
| 1010 return idx | |
| 1011 | |
| 1012 def _find_hms_idx(self, idx, tokens, info, allow_jump): | |
| 1013 len_l = len(tokens) | |
| 1014 | |
| 1015 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: | |
| 1016 # There is an "h", "m", or "s" label following this token. We take | |
| 1017 # assign the upcoming label to the current token. | |
| 1018 # e.g. the "12" in 12h" | |
| 1019 hms_idx = idx + 1 | |
| 1020 | |
| 1021 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and | |
| 1022 info.hms(tokens[idx+2]) is not None): | |
| 1023 # There is a space and then an "h", "m", or "s" label. | |
| 1024 # e.g. the "12" in "12 h" | |
| 1025 hms_idx = idx + 2 | |
| 1026 | |
| 1027 elif idx > 0 and info.hms(tokens[idx-1]) is not None: | |
| 1028 # There is a "h", "m", or "s" preceding this token. Since neither | |
| 1029 # of the previous cases was hit, there is no label following this | |
| 1030 # token, so we use the previous label. | |
| 1031 # e.g. the "04" in "12h04" | |
| 1032 hms_idx = idx-1 | |
| 1033 | |
| 1034 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and | |
| 1035 info.hms(tokens[idx-2]) is not None): | |
| 1036 # If we are looking at the final token, we allow for a | |
| 1037 # backward-looking check to skip over a space. | |
| 1038 # TODO: Are we sure this is the right condition here? | |
| 1039 hms_idx = idx - 2 | |
| 1040 | |
| 1041 else: | |
| 1042 hms_idx = None | |
| 1043 | |
| 1044 return hms_idx | |
| 1045 | |
| 1046 def _assign_hms(self, res, value_repr, hms): | |
| 1047 # See GH issue #427, fixing float rounding | |
| 1048 value = self._to_decimal(value_repr) | |
| 1049 | |
| 1050 if hms == 0: | |
| 1051 # Hour | |
| 1052 res.hour = int(value) | |
| 1053 if value % 1: | |
| 1054 res.minute = int(60*(value % 1)) | |
| 1055 | |
| 1056 elif hms == 1: | |
| 1057 (res.minute, res.second) = self._parse_min_sec(value) | |
| 1058 | |
| 1059 elif hms == 2: | |
| 1060 (res.second, res.microsecond) = self._parsems(value_repr) | |
| 1061 | |
| 1062 def _could_be_tzname(self, hour, tzname, tzoffset, token): | |
| 1063 return (hour is not None and | |
| 1064 tzname is None and | |
| 1065 tzoffset is None and | |
| 1066 len(token) <= 5 and | |
| 1067 (all(x in string.ascii_uppercase for x in token) | |
| 1068 or token in self.info.UTCZONE)) | |
| 1069 | |
| 1070 def _ampm_valid(self, hour, ampm, fuzzy): | |
| 1071 """ | |
| 1072 For fuzzy parsing, 'a' or 'am' (both valid English words) | |
| 1073 may erroneously trigger the AM/PM flag. Deal with that | |
| 1074 here. | |
| 1075 """ | |
| 1076 val_is_ampm = True | |
| 1077 | |
| 1078 # If there's already an AM/PM flag, this one isn't one. | |
| 1079 if fuzzy and ampm is not None: | |
| 1080 val_is_ampm = False | |
| 1081 | |
| 1082 # If AM/PM is found and hour is not, raise a ValueError | |
| 1083 if hour is None: | |
| 1084 if fuzzy: | |
| 1085 val_is_ampm = False | |
| 1086 else: | |
| 1087 raise ValueError('No hour specified with AM or PM flag.') | |
| 1088 elif not 0 <= hour <= 12: | |
| 1089 # If AM/PM is found, it's a 12 hour clock, so raise | |
| 1090 # an error for invalid range | |
| 1091 if fuzzy: | |
| 1092 val_is_ampm = False | |
| 1093 else: | |
| 1094 raise ValueError('Invalid hour specified for 12-hour clock.') | |
| 1095 | |
| 1096 return val_is_ampm | |
| 1097 | |
| 1098 def _adjust_ampm(self, hour, ampm): | |
| 1099 if hour < 12 and ampm == 1: | |
| 1100 hour += 12 | |
| 1101 elif hour == 12 and ampm == 0: | |
| 1102 hour = 0 | |
| 1103 return hour | |
| 1104 | |
| 1105 def _parse_min_sec(self, value): | |
| 1106 # TODO: Every usage of this function sets res.second to the return | |
| 1107 # value. Are there any cases where second will be returned as None and | |
| 1108 # we *don't* want to set res.second = None? | |
| 1109 minute = int(value) | |
| 1110 second = None | |
| 1111 | |
| 1112 sec_remainder = value % 1 | |
| 1113 if sec_remainder: | |
| 1114 second = int(60 * sec_remainder) | |
| 1115 return (minute, second) | |
| 1116 | |
| 1117 def _parse_hms(self, idx, tokens, info, hms_idx): | |
| 1118 # TODO: Is this going to admit a lot of false-positives for when we | |
| 1119 # just happen to have digits and "h", "m" or "s" characters in non-date | |
| 1120 # text? I guess hex hashes won't have that problem, but there's plenty | |
| 1121 # of random junk out there. | |
| 1122 if hms_idx is None: | |
| 1123 hms = None | |
| 1124 new_idx = idx | |
| 1125 elif hms_idx > idx: | |
| 1126 hms = info.hms(tokens[hms_idx]) | |
| 1127 new_idx = hms_idx | |
| 1128 else: | |
| 1129 # Looking backwards, increment one. | |
| 1130 hms = info.hms(tokens[hms_idx]) + 1 | |
| 1131 new_idx = idx | |
| 1132 | |
| 1133 return (new_idx, hms) | |
| 1134 | |
| 1135 # ------------------------------------------------------------------ | |
| 1136 # Handling for individual tokens. These are kept as methods instead | |
| 1137 # of functions for the sake of customizability via subclassing. | |
| 1138 | |
| 1139 def _parsems(self, value): | |
| 1140 """Parse a I[.F] seconds value into (seconds, microseconds).""" | |
| 1141 if "." not in value: | |
| 1142 return int(value), 0 | |
| 1143 else: | |
| 1144 i, f = value.split(".") | |
| 1145 return int(i), int(f.ljust(6, "0")[:6]) | |
| 1146 | |
| 1147 def _to_decimal(self, val): | |
| 1148 try: | |
| 1149 decimal_value = Decimal(val) | |
| 1150 # See GH 662, edge case, infinite value should not be converted | |
| 1151 # via `_to_decimal` | |
| 1152 if not decimal_value.is_finite(): | |
| 1153 raise ValueError("Converted decimal value is infinite or NaN") | |
| 1154 except Exception as e: | |
| 1155 msg = "Could not convert %s to decimal" % val | |
| 1156 six.raise_from(ValueError(msg), e) | |
| 1157 else: | |
| 1158 return decimal_value | |
| 1159 | |
| 1160 # ------------------------------------------------------------------ | |
| 1161 # Post-Parsing construction of datetime output. These are kept as | |
| 1162 # methods instead of functions for the sake of customizability via | |
| 1163 # subclassing. | |
| 1164 | |
| 1165 def _build_tzinfo(self, tzinfos, tzname, tzoffset): | |
| 1166 if callable(tzinfos): | |
| 1167 tzdata = tzinfos(tzname, tzoffset) | |
| 1168 else: | |
| 1169 tzdata = tzinfos.get(tzname) | |
| 1170 # handle case where tzinfo is paased an options that returns None | |
| 1171 # eg tzinfos = {'BRST' : None} | |
| 1172 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: | |
| 1173 tzinfo = tzdata | |
| 1174 elif isinstance(tzdata, text_type): | |
| 1175 tzinfo = tz.tzstr(tzdata) | |
| 1176 elif isinstance(tzdata, integer_types): | |
| 1177 tzinfo = tz.tzoffset(tzname, tzdata) | |
| 1178 else: | |
| 1179 raise TypeError("Offset must be tzinfo subclass, tz string, " | |
| 1180 "or int offset.") | |
| 1181 return tzinfo | |
| 1182 | |
| 1183 def _build_tzaware(self, naive, res, tzinfos): | |
| 1184 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): | |
| 1185 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) | |
| 1186 aware = naive.replace(tzinfo=tzinfo) | |
| 1187 aware = self._assign_tzname(aware, res.tzname) | |
| 1188 | |
| 1189 elif res.tzname and res.tzname in time.tzname: | |
| 1190 aware = naive.replace(tzinfo=tz.tzlocal()) | |
| 1191 | |
| 1192 # Handle ambiguous local datetime | |
| 1193 aware = self._assign_tzname(aware, res.tzname) | |
| 1194 | |
| 1195 # This is mostly relevant for winter GMT zones parsed in the UK | |
| 1196 if (aware.tzname() != res.tzname and | |
| 1197 res.tzname in self.info.UTCZONE): | |
| 1198 aware = aware.replace(tzinfo=tz.UTC) | |
| 1199 | |
| 1200 elif res.tzoffset == 0: | |
| 1201 aware = naive.replace(tzinfo=tz.UTC) | |
| 1202 | |
| 1203 elif res.tzoffset: | |
| 1204 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) | |
| 1205 | |
| 1206 elif not res.tzname and not res.tzoffset: | |
| 1207 # i.e. no timezone information was found. | |
| 1208 aware = naive | |
| 1209 | |
| 1210 elif res.tzname: | |
| 1211 # tz-like string was parsed but we don't know what to do | |
| 1212 # with it | |
| 1213 warnings.warn("tzname {tzname} identified but not understood. " | |
| 1214 "Pass `tzinfos` argument in order to correctly " | |
| 1215 "return a timezone-aware datetime. In a future " | |
| 1216 "version, this will raise an " | |
| 1217 "exception.".format(tzname=res.tzname), | |
| 1218 category=UnknownTimezoneWarning) | |
| 1219 aware = naive | |
| 1220 | |
| 1221 return aware | |
| 1222 | |
| 1223 def _build_naive(self, res, default): | |
| 1224 repl = {} | |
| 1225 for attr in ("year", "month", "day", "hour", | |
| 1226 "minute", "second", "microsecond"): | |
| 1227 value = getattr(res, attr) | |
| 1228 if value is not None: | |
| 1229 repl[attr] = value | |
| 1230 | |
| 1231 if 'day' not in repl: | |
| 1232 # If the default day exceeds the last day of the month, fall back | |
| 1233 # to the end of the month. | |
| 1234 cyear = default.year if res.year is None else res.year | |
| 1235 cmonth = default.month if res.month is None else res.month | |
| 1236 cday = default.day if res.day is None else res.day | |
| 1237 | |
| 1238 if cday > monthrange(cyear, cmonth)[1]: | |
| 1239 repl['day'] = monthrange(cyear, cmonth)[1] | |
| 1240 | |
| 1241 naive = default.replace(**repl) | |
| 1242 | |
| 1243 if res.weekday is not None and not res.day: | |
| 1244 naive = naive + relativedelta.relativedelta(weekday=res.weekday) | |
| 1245 | |
| 1246 return naive | |
| 1247 | |
| 1248 def _assign_tzname(self, dt, tzname): | |
| 1249 if dt.tzname() != tzname: | |
| 1250 new_dt = tz.enfold(dt, fold=1) | |
| 1251 if new_dt.tzname() == tzname: | |
| 1252 return new_dt | |
| 1253 | |
| 1254 return dt | |
| 1255 | |
| 1256 def _recombine_skipped(self, tokens, skipped_idxs): | |
| 1257 """ | |
| 1258 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] | |
| 1259 >>> skipped_idxs = [0, 1, 2, 5] | |
| 1260 >>> _recombine_skipped(tokens, skipped_idxs) | |
| 1261 ["foo bar", "baz"] | |
| 1262 """ | |
| 1263 skipped_tokens = [] | |
| 1264 for i, idx in enumerate(sorted(skipped_idxs)): | |
| 1265 if i > 0 and idx - 1 == skipped_idxs[i - 1]: | |
| 1266 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] | |
| 1267 else: | |
| 1268 skipped_tokens.append(tokens[idx]) | |
| 1269 | |
| 1270 return skipped_tokens | |
| 1271 | |
| 1272 | |
| 1273 DEFAULTPARSER = parser() | |
| 1274 | |
| 1275 | |
| 1276 def parse(timestr, parserinfo=None, **kwargs): | |
| 1277 """ | |
| 1278 | |
| 1279 Parse a string in one of the supported formats, using the | |
| 1280 ``parserinfo`` parameters. | |
| 1281 | |
| 1282 :param timestr: | |
| 1283 A string containing a date/time stamp. | |
| 1284 | |
| 1285 :param parserinfo: | |
| 1286 A :class:`parserinfo` object containing parameters for the parser. | |
| 1287 If ``None``, the default arguments to the :class:`parserinfo` | |
| 1288 constructor are used. | |
| 1289 | |
| 1290 The ``**kwargs`` parameter takes the following keyword arguments: | |
| 1291 | |
| 1292 :param default: | |
| 1293 The default datetime object, if this is a datetime object and not | |
| 1294 ``None``, elements specified in ``timestr`` replace elements in the | |
| 1295 default object. | |
| 1296 | |
| 1297 :param ignoretz: | |
| 1298 If set ``True``, time zones in parsed strings are ignored and a naive | |
| 1299 :class:`datetime` object is returned. | |
| 1300 | |
| 1301 :param tzinfos: | |
| 1302 Additional time zone names / aliases which may be present in the | |
| 1303 string. This argument maps time zone names (and optionally offsets | |
| 1304 from those time zones) to time zones. This parameter can be a | |
| 1305 dictionary with timezone aliases mapping time zone names to time | |
| 1306 zones or a function taking two parameters (``tzname`` and | |
| 1307 ``tzoffset``) and returning a time zone. | |
| 1308 | |
| 1309 The timezones to which the names are mapped can be an integer | |
| 1310 offset from UTC in seconds or a :class:`tzinfo` object. | |
| 1311 | |
| 1312 .. doctest:: | |
| 1313 :options: +NORMALIZE_WHITESPACE | |
| 1314 | |
| 1315 >>> from dateutil.parser import parse | |
| 1316 >>> from dateutil.tz import gettz | |
| 1317 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} | |
| 1318 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) | |
| 1319 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) | |
| 1320 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) | |
| 1321 datetime.datetime(2012, 1, 19, 17, 21, | |
| 1322 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) | |
| 1323 | |
| 1324 This parameter is ignored if ``ignoretz`` is set. | |
| 1325 | |
| 1326 :param dayfirst: | |
| 1327 Whether to interpret the first value in an ambiguous 3-integer date | |
| 1328 (e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
| 1329 ``yearfirst`` is set to ``True``, this distinguishes between YDM and | |
| 1330 YMD. If set to ``None``, this value is retrieved from the current | |
| 1331 :class:`parserinfo` object (which itself defaults to ``False``). | |
| 1332 | |
| 1333 :param yearfirst: | |
| 1334 Whether to interpret the first value in an ambiguous 3-integer date | |
| 1335 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to | |
| 1336 be the year, otherwise the last number is taken to be the year. If | |
| 1337 this is set to ``None``, the value is retrieved from the current | |
| 1338 :class:`parserinfo` object (which itself defaults to ``False``). | |
| 1339 | |
| 1340 :param fuzzy: | |
| 1341 Whether to allow fuzzy parsing, allowing for string like "Today is | |
| 1342 January 1, 2047 at 8:21:00AM". | |
| 1343 | |
| 1344 :param fuzzy_with_tokens: | |
| 1345 If ``True``, ``fuzzy`` is automatically set to True, and the parser | |
| 1346 will return a tuple where the first element is the parsed | |
| 1347 :class:`datetime.datetime` datetimestamp and the second element is | |
| 1348 a tuple containing the portions of the string which were ignored: | |
| 1349 | |
| 1350 .. doctest:: | |
| 1351 | |
| 1352 >>> from dateutil.parser import parse | |
| 1353 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) | |
| 1354 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) | |
| 1355 | |
| 1356 :return: | |
| 1357 Returns a :class:`datetime.datetime` object or, if the | |
| 1358 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the | |
| 1359 first element being a :class:`datetime.datetime` object, the second | |
| 1360 a tuple containing the fuzzy tokens. | |
| 1361 | |
| 1362 :raises ValueError: | |
| 1363 Raised for invalid or unknown string format, if the provided | |
| 1364 :class:`tzinfo` is not in a valid format, or if an invalid date | |
| 1365 would be created. | |
| 1366 | |
| 1367 :raises OverflowError: | |
| 1368 Raised if the parsed date exceeds the largest valid C integer on | |
| 1369 your system. | |
| 1370 """ | |
| 1371 if parserinfo: | |
| 1372 return parser(parserinfo).parse(timestr, **kwargs) | |
| 1373 else: | |
| 1374 return DEFAULTPARSER.parse(timestr, **kwargs) | |
| 1375 | |
| 1376 | |
| 1377 class _tzparser(object): | |
| 1378 | |
| 1379 class _result(_resultbase): | |
| 1380 | |
| 1381 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", | |
| 1382 "start", "end"] | |
| 1383 | |
| 1384 class _attr(_resultbase): | |
| 1385 __slots__ = ["month", "week", "weekday", | |
| 1386 "yday", "jyday", "day", "time"] | |
| 1387 | |
| 1388 def __repr__(self): | |
| 1389 return self._repr("") | |
| 1390 | |
| 1391 def __init__(self): | |
| 1392 _resultbase.__init__(self) | |
| 1393 self.start = self._attr() | |
| 1394 self.end = self._attr() | |
| 1395 | |
| 1396 def parse(self, tzstr): | |
| 1397 res = self._result() | |
| 1398 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] | |
| 1399 used_idxs = list() | |
| 1400 try: | |
| 1401 | |
| 1402 len_l = len(l) | |
| 1403 | |
| 1404 i = 0 | |
| 1405 while i < len_l: | |
| 1406 # BRST+3[BRDT[+2]] | |
| 1407 j = i | |
| 1408 while j < len_l and not [x for x in l[j] | |
| 1409 if x in "0123456789:,-+"]: | |
| 1410 j += 1 | |
| 1411 if j != i: | |
| 1412 if not res.stdabbr: | |
| 1413 offattr = "stdoffset" | |
| 1414 res.stdabbr = "".join(l[i:j]) | |
| 1415 else: | |
| 1416 offattr = "dstoffset" | |
| 1417 res.dstabbr = "".join(l[i:j]) | |
| 1418 | |
| 1419 for ii in range(j): | |
| 1420 used_idxs.append(ii) | |
| 1421 i = j | |
| 1422 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in | |
| 1423 "0123456789")): | |
| 1424 if l[i] in ('+', '-'): | |
| 1425 # Yes, that's right. See the TZ variable | |
| 1426 # documentation. | |
| 1427 signal = (1, -1)[l[i] == '+'] | |
| 1428 used_idxs.append(i) | |
| 1429 i += 1 | |
| 1430 else: | |
| 1431 signal = -1 | |
| 1432 len_li = len(l[i]) | |
| 1433 if len_li == 4: | |
| 1434 # -0300 | |
| 1435 setattr(res, offattr, (int(l[i][:2]) * 3600 + | |
| 1436 int(l[i][2:]) * 60) * signal) | |
| 1437 elif i + 1 < len_l and l[i + 1] == ':': | |
| 1438 # -03:00 | |
| 1439 setattr(res, offattr, | |
| 1440 (int(l[i]) * 3600 + | |
| 1441 int(l[i + 2]) * 60) * signal) | |
| 1442 used_idxs.append(i) | |
| 1443 i += 2 | |
| 1444 elif len_li <= 2: | |
| 1445 # -[0]3 | |
| 1446 setattr(res, offattr, | |
| 1447 int(l[i][:2]) * 3600 * signal) | |
| 1448 else: | |
| 1449 return None | |
| 1450 used_idxs.append(i) | |
| 1451 i += 1 | |
| 1452 if res.dstabbr: | |
| 1453 break | |
| 1454 else: | |
| 1455 break | |
| 1456 | |
| 1457 | |
| 1458 if i < len_l: | |
| 1459 for j in range(i, len_l): | |
| 1460 if l[j] == ';': | |
| 1461 l[j] = ',' | |
| 1462 | |
| 1463 assert l[i] == ',' | |
| 1464 | |
| 1465 i += 1 | |
| 1466 | |
| 1467 if i >= len_l: | |
| 1468 pass | |
| 1469 elif (8 <= l.count(',') <= 9 and | |
| 1470 not [y for x in l[i:] if x != ',' | |
| 1471 for y in x if y not in "0123456789+-"]): | |
| 1472 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] | |
| 1473 for x in (res.start, res.end): | |
| 1474 x.month = int(l[i]) | |
| 1475 used_idxs.append(i) | |
| 1476 i += 2 | |
| 1477 if l[i] == '-': | |
| 1478 value = int(l[i + 1]) * -1 | |
| 1479 used_idxs.append(i) | |
| 1480 i += 1 | |
| 1481 else: | |
| 1482 value = int(l[i]) | |
| 1483 used_idxs.append(i) | |
| 1484 i += 2 | |
| 1485 if value: | |
| 1486 x.week = value | |
| 1487 x.weekday = (int(l[i]) - 1) % 7 | |
| 1488 else: | |
| 1489 x.day = int(l[i]) | |
| 1490 used_idxs.append(i) | |
| 1491 i += 2 | |
| 1492 x.time = int(l[i]) | |
| 1493 used_idxs.append(i) | |
| 1494 i += 2 | |
| 1495 if i < len_l: | |
| 1496 if l[i] in ('-', '+'): | |
| 1497 signal = (-1, 1)[l[i] == "+"] | |
| 1498 used_idxs.append(i) | |
| 1499 i += 1 | |
| 1500 else: | |
| 1501 signal = 1 | |
| 1502 used_idxs.append(i) | |
| 1503 res.dstoffset = (res.stdoffset + int(l[i]) * signal) | |
| 1504 | |
| 1505 # This was a made-up format that is not in normal use | |
| 1506 warn(('Parsed time zone "%s"' % tzstr) + | |
| 1507 'is in a non-standard dateutil-specific format, which ' + | |
| 1508 'is now deprecated; support for parsing this format ' + | |
| 1509 'will be removed in future versions. It is recommended ' + | |
| 1510 'that you switch to a standard format like the GNU ' + | |
| 1511 'TZ variable format.', tz.DeprecatedTzFormatWarning) | |
| 1512 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and | |
| 1513 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', | |
| 1514 '.', '-', ':') | |
| 1515 for y in x if y not in "0123456789"]): | |
| 1516 for x in (res.start, res.end): | |
| 1517 if l[i] == 'J': | |
| 1518 # non-leap year day (1 based) | |
| 1519 used_idxs.append(i) | |
| 1520 i += 1 | |
| 1521 x.jyday = int(l[i]) | |
| 1522 elif l[i] == 'M': | |
| 1523 # month[-.]week[-.]weekday | |
| 1524 used_idxs.append(i) | |
| 1525 i += 1 | |
| 1526 x.month = int(l[i]) | |
| 1527 used_idxs.append(i) | |
| 1528 i += 1 | |
| 1529 assert l[i] in ('-', '.') | |
| 1530 used_idxs.append(i) | |
| 1531 i += 1 | |
| 1532 x.week = int(l[i]) | |
| 1533 if x.week == 5: | |
| 1534 x.week = -1 | |
| 1535 used_idxs.append(i) | |
| 1536 i += 1 | |
| 1537 assert l[i] in ('-', '.') | |
| 1538 used_idxs.append(i) | |
| 1539 i += 1 | |
| 1540 x.weekday = (int(l[i]) - 1) % 7 | |
| 1541 else: | |
| 1542 # year day (zero based) | |
| 1543 x.yday = int(l[i]) + 1 | |
| 1544 | |
| 1545 used_idxs.append(i) | |
| 1546 i += 1 | |
| 1547 | |
| 1548 if i < len_l and l[i] == '/': | |
| 1549 used_idxs.append(i) | |
| 1550 i += 1 | |
| 1551 # start time | |
| 1552 len_li = len(l[i]) | |
| 1553 if len_li == 4: | |
| 1554 # -0300 | |
| 1555 x.time = (int(l[i][:2]) * 3600 + | |
| 1556 int(l[i][2:]) * 60) | |
| 1557 elif i + 1 < len_l and l[i + 1] == ':': | |
| 1558 # -03:00 | |
| 1559 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 | |
| 1560 used_idxs.append(i) | |
| 1561 i += 2 | |
| 1562 if i + 1 < len_l and l[i + 1] == ':': | |
| 1563 used_idxs.append(i) | |
| 1564 i += 2 | |
| 1565 x.time += int(l[i]) | |
| 1566 elif len_li <= 2: | |
| 1567 # -[0]3 | |
| 1568 x.time = (int(l[i][:2]) * 3600) | |
| 1569 else: | |
| 1570 return None | |
| 1571 used_idxs.append(i) | |
| 1572 i += 1 | |
| 1573 | |
| 1574 assert i == len_l or l[i] == ',' | |
| 1575 | |
| 1576 i += 1 | |
| 1577 | |
| 1578 assert i >= len_l | |
| 1579 | |
| 1580 except (IndexError, ValueError, AssertionError): | |
| 1581 return None | |
| 1582 | |
| 1583 unused_idxs = set(range(len_l)).difference(used_idxs) | |
| 1584 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) | |
| 1585 return res | |
| 1586 | |
| 1587 | |
| 1588 DEFAULTTZPARSER = _tzparser() | |
| 1589 | |
| 1590 | |
| 1591 def _parsetz(tzstr): | |
| 1592 return DEFAULTTZPARSER.parse(tzstr) | |
| 1593 | |
| 1594 | |
| 1595 class ParserError(ValueError): | |
| 1596 """Error class for representing failure to parse a datetime string.""" | |
| 1597 def __str__(self): | |
| 1598 try: | |
| 1599 return self.args[0] % self.args[1:] | |
| 1600 except (TypeError, IndexError): | |
| 1601 return super(ParserError, self).__str__() | |
| 1602 | |
| 1603 def __repr__(self): | |
| 1604 return "%s(%s)" % (self.__class__.__name__, str(self)) | |
| 1605 | |
| 1606 | |
| 1607 class UnknownTimezoneWarning(RuntimeWarning): | |
| 1608 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" | |
| 1609 # vim:ts=4:sw=4:et | 
