From 22a954082c30b11b9ff9537bcb14d3e56f1b7f42 Mon Sep 17 00:00:00 2001 From: ohadmata Date: Thu, 27 Jun 2024 15:44:00 +0300 Subject: [PATCH] include static patterns in timestamp + added formats --- src/shmessy/types/date.py | 15 ++++++++++++--- src/shmessy/types/datetime_.py | 5 ++++- tests/unit/test_date_type.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/shmessy/types/date.py b/src/shmessy/types/date.py index 2866102..77b68a9 100644 --- a/src/shmessy/types/date.py +++ b/src/shmessy/types/date.py @@ -15,7 +15,11 @@ class DateType(BaseType): weight = 2 delimiters: list[str] = {"/", ".", "-", " "} - static_patterns: list[str] = ["%B %d, %Y"] # January 23, 2024 + static_patterns: list[str] = [ + "%B %d, %Y", # January 23, 2024 + "%b %d, %Y", # Feb 17, 1995 + "%d %b %y, %a", # 10 Apr 24, Wed + ] date_only_patterns: list[list[str]] = [ # Do not attach time combinations to these patterns ["%Y", "%m"], # 2022-07 | 2022 07 | 2022/07 | 2022.07 @@ -38,7 +42,9 @@ class DateType(BaseType): @classmethod def get_patterns( - cls, include_date_only_patterns: Optional[bool] = True + cls, + include_date_only_patterns: Optional[bool] = True, + include_static_date_patterns: Optional[bool] = True, ) -> list[str]: # The value returned cannot be set since the order is important! input_patterns: list[list[str]] = cls.dynamic_patterns.copy() @@ -49,7 +55,10 @@ def get_patterns( for pattern in input_patterns: for delimiter in cls.delimiters: results.append(delimiter.join(pattern)) - return results + cls.static_patterns + + if include_static_date_patterns: + return results + cls.static_patterns + return results def validate(self, data: ndarray) -> Optional[InferredField]: return validate( diff --git a/src/shmessy/types/datetime_.py b/src/shmessy/types/datetime_.py index 6382596..dee69ea 100644 --- a/src/shmessy/types/datetime_.py +++ b/src/shmessy/types/datetime_.py @@ -21,6 +21,7 @@ class DatetimeType(BaseType): "%Y-%m-%d %H:%M:%S.%fZ", # 2022-12-30 00:00:00.000Z "%Y-%m-%dT%H:%M:%S.%fZ", # 2022-12-30T00:00:00.000Z "%m/%d/%Y %H:%M:%S %p", # 11/23/2024 00:00:00 AM + "%Y-%m-%dT%H:%MZ", # 2024-01-20T11:30Z ] date_time_delimiters: set[str] = {" ", "T"} dynamic_patterns: list[str] = [ @@ -34,7 +35,9 @@ class DatetimeType(BaseType): @classmethod def get_patterns(cls) -> list[str]: result: list[str] = [] - for date in DateType.get_patterns(include_date_only_patterns=False): + for date in DateType.get_patterns( + include_date_only_patterns=False, include_static_date_patterns=False + ): for date_time_delimiter in cls.date_time_delimiters: for dynamic_pattern in cls.dynamic_patterns: result.append(date + date_time_delimiter + dynamic_pattern) diff --git a/tests/unit/test_date_type.py b/tests/unit/test_date_type.py index ef058b9..71cfd2c 100644 --- a/tests/unit/test_date_type.py +++ b/tests/unit/test_date_type.py @@ -239,3 +239,14 @@ def test_get_patterns_with_date_only(): for p in date_only_patterns: assert p in result_all_patterns assert p not in result_date_only_patterns + + +def test_include_date_static_patterns(): + input_static_patterns: list[str] = ["%B %d, %Y"] + date_type = DateType() + result_all_patterns = date_type.get_patterns() + result_without_static_patterns = date_type.get_patterns(include_static_date_patterns=False) + + for p in input_static_patterns: + assert p in result_all_patterns + assert p not in result_without_static_patterns