diff --git a/guessit/rules/common/date.py b/guessit/rules/common/date.py index f6b56ea..779e4b9 100644 --- a/guessit/rules/common/date.py +++ b/guessit/rules/common/date.py @@ -26,7 +26,48 @@ def valid_year(year): return 1920 <= year < 2030 -def search_date(string, year_first=None, day_first=True): +def _is_int(string): + """ + Check if the input string is an integer + + :param string: + :type string: + :return: + :rtype: + """ + try: + int(string) + return True + except ValueError: + return False + + +def _guess_day_first_parameter(groups): + """ + If day_first is not defined, use some heuristic to fix it. + It helps to solve issues with python dateutils 2.5.3 parser changes. + + :param groups: match groups found for the date + :type groups: list of match objects + :return: day_first option guessed value + :rtype: bool + """ + + # If match starts with a long year, then day_first is force to false. + if _is_int(groups[0]) and valid_year(int(groups[0][:4])): + return False + # If match ends with a long year, the day_first is forced to true. + elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])): + return True + # If match starts with a short year, then day_first is force to false. + elif _is_int(groups[0]) and int(groups[0][:2]) > 31: + return False + # If match ends with a short year, then day_first is force to true. + elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31: + return True + + +def search_date(string, year_first=None, day_first=None): """Looks for date patterns, and if found return the date and group span. Assumes there are sentinels at the beginning and end of the string that @@ -45,15 +86,23 @@ def search_date(string, year_first=None, day_first=True): """ start, end = None, None match = None + groups = None for date_re in date_regexps: search_match = date_re.search(string) if search_match and (match is None or search_match.end() - search_match.start() > len(match)): start, end = search_match.start(1), search_match.end(1) - match = '-'.join(search_match.groups()[1:]) + groups = search_match.groups()[1:] + match = '-'.join(groups) if match is None: return + if year_first and day_first is None: + day_first = False + + if day_first is None: + day_first = _guess_day_first_parameter(groups) + # If day_first/year_first is undefined, parse is made using both possible values. yearfirst_opts = [False, True] if year_first is not None: diff --git a/setup.py b/setup.py index ed1eab3..c7fd923 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from setuptools import setup, find_packages - -import sys +import io import os import re -import io +import sys + +from setuptools import setup, find_packages here = os.path.abspath(os.path.dirname(__file__)) @@ -16,7 +16,7 @@ with io.open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: with io.open(os.path.join(here, 'HISTORY.rst'), encoding='utf-8') as f: history = f.read() -install_requires = ['rebulk>=0.7.3', 'babelfish>=0.5.5', 'python-dateutil<2.5.2'] +install_requires = ['rebulk>=0.7.3', 'babelfish>=0.5.5', 'python-dateutil'] # python-dateutil 2.5.2 introduced a change with month position in ambiguous day/month dates when year is first. # https://github.com/dateutil/dateutil/commit/2d42e046d55b9fbbc0a2f41ce83fb8ec5de2d28b#commitcomment-17032106 if sys.version_info < (2, 7):