Source code for ae.parse_date

"""
parse date strings more flexible and less strict
================================================

this module is pure python and has no namespace external dependencies.

the :func:`parse_date` helper function is converting a wide range of date and datetime string literal formats into the
built-in types :class:`datetime.datetime` and :class:`datetime.date`.

this function extends (and fully replaces) Pythons standard method :meth:`~datetime.datetime.strptime` and supports
multiple date formats which are much more flexible interpreted.
"""
import datetime
from typing import Any, Dict, Optional, Tuple, Union

from ae.base import DATE_ISO, DATE_TIME_ISO                 # type: ignore


__version__ = '0.3.5'


[docs]def parse_date(literal: str, *additional_formats: str, replace: Optional[Dict[str, Any]] = None, ret_date: Optional[bool] = False, dt_seps: Tuple[str, ...] = ('T', ' '), ti_sep: str = ':', ms_sep: str = '.', tz_sep: str = '+', ) -> Optional[Union[datetime.date, datetime.datetime]]: """ parse a date literal string, returning the represented date/datetime or None if date literal is invalid. this function checks/corrects the passed date/time literals to support a wider range of ISO and additional date/time formats as Pythons :func:`~datetime.datetime.strptime`. .. hint:: Pythons :meth:`~datetime.datetime.strptime` to parse date and time strings into :class:`datetime.date` or :class:`datetime.datetime` objects is very strict and does not respect the formatting alternatives of ISO8601 (see https://bugs.python.org/issue15873 and https://github.com/boxed/iso8601). additionally a :class:`datetime.date` object can be created/returned automatically if no time info is specified in the date string/literal (see :paramref:`~parse_date.ret_date` parameter). :param literal: date literal string in the format of :data:`DATE_ISO`, :data:`DATE_TIME_ISO` or in one of the additional formats passed into the :paramref:`~parse_date.additional_formats` arguments. :param additional_formats: additional date literal format string masks (supported mask characters are documented at the `format` argument of the python method :meth:`~datetime.datetime.strptime`). :param replace: dict of replace keyword arguments for :meth:`datetime.datetime.replace` call. pass e.g. dict(microsecond=0, tzinfo=None) to set the microseconds of the resulting date to zero and to remove the timezone info. :param ret_date: request return value type: True=datetime.date, False=datetime.datetime (the default) or None=determine type from literal (short date if dt_seps are not in literal). :param dt_seps: tuple of supported separator characters between the date and time literal parts. :param ti_sep: separator character of the time parts (hours/minutes/seconds) in literal. :param ms_sep: microseconds separator character. :param tz_sep: time-zone separator character. :return: represented date/datetime or None if date literal is invalid. """ lp_tz_sep = literal.rfind(tz_sep) lp_ms_sep = literal.rfind(ms_sep) lp_dt_sep = max((literal.find(_) for _ in dt_seps)) if ret_date and lp_dt_sep != -1: literal = literal[:lp_dt_sep] # cut time part if exists caller requested return of short date l_dt_sep = None l_time_sep_cnt = 0 else: l_dt_sep = literal[lp_dt_sep] if lp_dt_sep != -1 else None l_time_sep_cnt = literal.count(ti_sep) if not 0 <= l_time_sep_cnt <= 2: return None if l_dt_sep: additional_formats += (DATE_TIME_ISO,) additional_formats += (DATE_ISO,) for mask in additional_formats: mp_dt_sep = max((mask.find(_) for _ in dt_seps)) m_time_sep_cnt = mask.count(ti_sep) if lp_tz_sep == -1 and mask[-3] == tz_sep: mask = mask[:-3] # no timezone specified in literal, then remove '+%z' from mask if lp_ms_sep == -1 and mask.rfind(ms_sep) != -1: mask = mask[:mask.rfind(ms_sep)] # no microseconds specified in literal, then remove '.%f' from mask if 1 <= l_time_sep_cnt < m_time_sep_cnt: mask = mask[:mask.rfind(ti_sep)] # no seconds specified in literal, then remove ':%S' from mask if mp_dt_sep != -1: if l_dt_sep: m_dt_sep = mask[mp_dt_sep] if l_dt_sep != m_dt_sep: # if literal uses different date-time-sep mask = mask.replace(m_dt_sep, l_dt_sep) # .. then replace in mask else: mask = mask[:mp_dt_sep] # if no date-time-sep in literal, then remove time part from mask try: ret_val = datetime.datetime.strptime(literal, mask) if replace: ret_val = ret_val.replace(**replace) except ValueError: continue return ret_val.date() if ret_date or ret_date is None and l_dt_sep is None else ret_val return None