|
"""ISO 8601 date time string parsing |
|
|
|
Basic usage: |
|
>>> import iso8601 |
|
>>> iso8601.parse_date("2007-01-25T12:00:00Z") |
|
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.Utc ...>) |
|
>>> |
|
|
|
""" |
|
|
|
import datetime |
|
import re |
|
import typing |
|
from decimal import Decimal |
|
|
|
__all__ = ["parse_date", "ParseError", "UTC", "FixedOffset"] |
|
|
|
|
|
ISO8601_REGEX = re.compile( |
|
r""" |
|
(?P<year>[0-9]{4}) |
|
( |
|
( |
|
(-(?P<monthdash>[0-9]{1,2})) |
|
| |
|
(?P<month>[0-9]{2}) |
|
(?!$) # Don't allow YYYYMM |
|
) |
|
( |
|
( |
|
(-(?P<daydash>[0-9]{1,2})) |
|
| |
|
(?P<day>[0-9]{2}) |
|
) |
|
( |
|
( |
|
(?P<separator>[ T]) |
|
(?P<hour>[0-9]{2}) |
|
(:{0,1}(?P<minute>[0-9]{2})){0,1} |
|
( |
|
:{0,1}(?P<second>[0-9]{1,2}) |
|
([.,](?P<second_fraction>[0-9]+)){0,1} |
|
){0,1} |
|
(?P<timezone> |
|
Z |
|
| |
|
R |
|
| |
|
( |
|
(?P<tz_sign>[-+]) |
|
(?P<tz_hour>[0-9]{2}) |
|
:{0,1} |
|
(?P<tz_minute>[0-9]{2}){0,1} |
|
) |
|
){0,1} |
|
){0,1} |
|
) |
|
){0,1} # YYYY-MM |
|
){0,1} # YYYY only |
|
$ |
|
""", |
|
re.VERBOSE, |
|
) |
|
|
|
|
|
class ParseError(ValueError): |
|
"""Raised when there is a problem parsing a date string""" |
|
|
|
|
|
UTC = datetime.timezone.utc |
|
|
|
|
|
def FixedOffset( |
|
offset_hours: float, offset_minutes: float, name: str |
|
) -> datetime.timezone: |
|
return datetime.timezone( |
|
datetime.timedelta(hours=offset_hours, minutes=offset_minutes), name |
|
) |
|
|
|
|
|
def parse_timezone( |
|
matches: typing.Dict[str, str], |
|
default_timezone: typing.Optional[datetime.timezone] = UTC, |
|
) -> typing.Optional[datetime.timezone]: |
|
"""Parses ISO 8601 time zone specs into tzinfo offsets""" |
|
tz = matches.get("timezone", None) |
|
if tz == "Z": |
|
return UTC |
|
elif tz == "R": |
|
return FixedOffset(5, 0, "-05:00") |
|
|
|
|
|
|
|
if tz is None: |
|
return default_timezone |
|
sign = matches.get("tz_sign", None) |
|
hours = int(matches.get("tz_hour", 0)) |
|
minutes = int(matches.get("tz_minute", 0)) |
|
description = f"{sign}{hours:02d}:{minutes:02d}" |
|
if sign == "-": |
|
hours = -hours |
|
minutes = -minutes |
|
return FixedOffset(hours, minutes, description) |
|
|
|
|
|
def parse_date( |
|
datestring: str, default_timezone: typing.Optional[datetime.timezone] = UTC |
|
) -> datetime.datetime: |
|
"""Parses ISO 8601 dates into datetime objects |
|
|
|
The timezone is parsed from the date string. However it is quite common to |
|
have dates without a timezone (not strictly correct). In this case the |
|
default timezone specified in default_timezone is used. This is UTC by |
|
default. |
|
|
|
:param datestring: The date to parse as a string |
|
:param default_timezone: A datetime tzinfo instance to use when no timezone |
|
is specified in the datestring. If this is set to |
|
None then a naive datetime object is returned. |
|
:returns: A datetime.datetime instance |
|
:raises: ParseError when there is a problem parsing the date or |
|
constructing the datetime instance. |
|
|
|
""" |
|
try: |
|
m = ISO8601_REGEX.match(datestring) |
|
except Exception as e: |
|
raise ParseError(e) |
|
|
|
if not m: |
|
raise ParseError(f"Unable to parse date string {datestring!r}") |
|
|
|
|
|
|
|
groups: typing.Dict[str, str] = { |
|
k: v for k, v in m.groupdict().items() if v is not None |
|
} |
|
|
|
try: |
|
return datetime.datetime( |
|
year=int(groups.get("year", 0)), |
|
month=int(groups.get("month", groups.get("monthdash", 1))), |
|
day=int(groups.get("day", groups.get("daydash", 1))), |
|
hour=int(groups.get("hour", 0)), |
|
minute=int(groups.get("minute", 0)), |
|
second=int(groups.get("second", 0)), |
|
microsecond=int( |
|
Decimal(f"0.{groups.get('second_fraction', 0)}") * Decimal("1000000.0") |
|
), |
|
tzinfo=parse_timezone(groups, default_timezone=default_timezone), |
|
) |
|
except Exception as e: |
|
raise ParseError(e) |
|
|