File size: 4,738 Bytes
27867f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
"""ISO 8601 date time string parsing
Basic usage:
>>> import iso8601
>>> iso8601.parse_date("2007-01-25T12:00:00Z")
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.Utc ...>)
>>>
"""
import datetime
import re
import typing
from decimal import Decimal
__all__ = ["parse_date", "ParseError", "UTC", "FixedOffset"]
# Adapted from http://delete.me.uk/2005/03/iso8601.html
ISO8601_REGEX = re.compile(
r"""
(?P<year>[0-9]{4})
(
(
(-(?P<monthdash>[0-9]{1,2}))
|
(?P<month>[0-9]{2})
(?!$) # Don't allow YYYYMM
)
(
(
(-(?P<daydash>[0-9]{1,2}))
|
(?P<day>[0-9]{2})
)
(
(
(?P<separator>[ T])
(?P<hour>[0-9]{2})
(:{0,1}(?P<minute>[0-9]{2})){0,1}
(
:{0,1}(?P<second>[0-9]{1,2})
([.,](?P<second_fraction>[0-9]+)){0,1}
){0,1}
(?P<timezone>
Z
|
R
|
(
(?P<tz_sign>[-+])
(?P<tz_hour>[0-9]{2})
:{0,1}
(?P<tz_minute>[0-9]{2}){0,1}
)
){0,1}
){0,1}
)
){0,1} # YYYY-MM
){0,1} # YYYY only
$
""",
re.VERBOSE,
)
class ParseError(ValueError):
"""Raised when there is a problem parsing a date string"""
UTC = datetime.timezone.utc
def FixedOffset(
offset_hours: float, offset_minutes: float, name: str
) -> datetime.timezone:
return datetime.timezone(
datetime.timedelta(hours=offset_hours, minutes=offset_minutes), name
)
def parse_timezone(
matches: typing.Dict[str, str],
default_timezone: typing.Optional[datetime.timezone] = UTC,
) -> typing.Optional[datetime.timezone]:
"""Parses ISO 8601 time zone specs into tzinfo offsets"""
tz = matches.get("timezone", None)
if tz == "Z":
return UTC
elif tz == "R":
return FixedOffset(5, 0, "-05:00")
# This isn't strictly correct, but it's common to encounter dates without
# timezones so I'll assume the default (which defaults to UTC).
# Addresses issue 4.
if tz is None:
return default_timezone
sign = matches.get("tz_sign", None)
hours = int(matches.get("tz_hour", 0))
minutes = int(matches.get("tz_minute", 0))
description = f"{sign}{hours:02d}:{minutes:02d}"
if sign == "-":
hours = -hours
minutes = -minutes
return FixedOffset(hours, minutes, description)
def parse_date(
datestring: str, default_timezone: typing.Optional[datetime.timezone] = UTC
) -> datetime.datetime:
"""Parses ISO 8601 dates into datetime objects
The timezone is parsed from the date string. However it is quite common to
have dates without a timezone (not strictly correct). In this case the
default timezone specified in default_timezone is used. This is UTC by
default.
:param datestring: The date to parse as a string
:param default_timezone: A datetime tzinfo instance to use when no timezone
is specified in the datestring. If this is set to
None then a naive datetime object is returned.
:returns: A datetime.datetime instance
:raises: ParseError when there is a problem parsing the date or
constructing the datetime instance.
"""
try:
m = ISO8601_REGEX.match(datestring)
except Exception as e:
raise ParseError(e)
if not m:
raise ParseError(f"Unable to parse date string {datestring!r}")
# Drop any Nones from the regex matches
# TODO: check if there's a way to omit results in regexes
groups: typing.Dict[str, str] = {
k: v for k, v in m.groupdict().items() if v is not None
}
try:
return datetime.datetime(
year=int(groups.get("year", 0)),
month=int(groups.get("month", groups.get("monthdash", 1))),
day=int(groups.get("day", groups.get("daydash", 1))),
hour=int(groups.get("hour", 0)),
minute=int(groups.get("minute", 0)),
second=int(groups.get("second", 0)),
microsecond=int(
Decimal(f"0.{groups.get('second_fraction', 0)}") * Decimal("1000000.0")
),
tzinfo=parse_timezone(groups, default_timezone=default_timezone),
)
except Exception as e:
raise ParseError(e)
|