Skip to content

Commit

Permalink
Merge pull request #145 from ohadmata/add-break-to-date-casting-to-im…
Browse files Browse the repository at this point in the history
…prove-infer-speed

improve date datetime infer speed
  • Loading branch information
ohadmata authored May 28, 2024
2 parents cb52718 + 36c90c1 commit 90dbaac
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
3 changes: 3 additions & 0 deletions examples/read_csv.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from src.shmessy import Shmessy
from utils import pretty_print_df, init_logger
import time


if __name__ == "__main__":
init_logger()
shmessy = Shmessy()
start_time = time.time()
df = shmessy.read_csv('../tests/data/data_1.csv')
inferred_schema = shmessy.get_inferred_schema()
pretty_print_df(df=df, inferred_schema=inferred_schema)
print(f"Duration: {int((time.time() - start_time) * 1000)}")
19 changes: 8 additions & 11 deletions src/shmessy/date_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,13 @@ def is_empty_value(value: Any) -> bool:


def cast_value(value: Any, pattern: Optional[Any] = None) -> Optional[Any]:
try:
if is_empty_value(value):
return None
if isinstance(value, (datetime64, Timestamp)):
return value
if isinstance(value, str): # For security reasons & skip nan values
return datetime.strptime(value, pattern)
raise Exception("Input type for date/datetime casting must be string.")
except ValueError as e:
logger.debug(f"Cannot cast the value '{value}' using pattern '{pattern}'")
raise e
if is_empty_value(value):
return None
if isinstance(value, (datetime64, Timestamp)):
return value
if isinstance(value, str):
return datetime.strptime(value, pattern)
raise Exception("Input type for date/datetime casting must be string.")


def validate(
Expand All @@ -45,6 +41,7 @@ def validate(
except ValueError as e: # Not match the pattern
logger.debug(e)
valid_pattern = False
break
except Exception as e: # Any other exception
logger.debug(e)
return None
Expand Down

0 comments on commit 90dbaac

Please sign in to comment.