Skip to content

Commit

Permalink
Cache CSV stream schema to avoid regenerating it since the 'schema' p…
Browse files Browse the repository at this point in the history
…roperty is accessed multiple times per record.
  • Loading branch information
atl-ggregson committed Jan 16, 2025
1 parent dfd07ea commit ff37910
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tap-csv"
version = "1.1.0"
version = "1.2.0"
description = "Singer tap for CSV, built with the Meltano SDK for Singer Taps."
authors = ["Pat Nadolny"]
keywords = [
Expand Down
11 changes: 9 additions & 2 deletions tap_csv/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, *args, **kwargs):
"""Init CSVStram."""
# cache file_config so we dont need to go iterating the config list again later
self.file_config = kwargs.pop("file_config")
self.stream_schema = None
super().__init__(*args, **kwargs)

def get_records(self, context: Context | None) -> t.Iterable[dict]:
Expand Down Expand Up @@ -126,8 +127,13 @@ def schema(self) -> dict:
"""Return dictionary of record schema.
Dynamically detect the json schema for the stream.
This is evaluated prior to any records being retrieved.
This property is accessed multiple times for each record
so it's important to cache the schema.
"""
if self.stream_schema:
return self.stream_schema

properties: list[th.Property] = []
self.primary_keys = self.file_config.get("keys", [])

Expand Down Expand Up @@ -156,4 +162,5 @@ def schema(self) -> dict:
# Cache header for future use
self.header = header

return th.PropertiesList(*properties).to_dict()
self.stream_schema = th.PropertiesList(*properties).to_dict()
return self.stream_schema

0 comments on commit ff37910

Please sign in to comment.