diff --git a/pyproject.toml b/pyproject.toml index 5aed6b9..fdadda8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tap-csv" -version = "1.1.0" +version = "1.2.0" description = "Singer tap for CSV, built with the Meltano SDK for Singer Taps." authors = ["Pat Nadolny"] keywords = [ diff --git a/tap_csv/client.py b/tap_csv/client.py index cae3892..6ddce71 100644 --- a/tap_csv/client.py +++ b/tap_csv/client.py @@ -28,6 +28,7 @@ def __init__(self, *args, **kwargs): """Init CSVStram.""" # cache file_config so we dont need to go iterating the config list again later self.file_config = kwargs.pop("file_config") + self.stream_schema = None super().__init__(*args, **kwargs) def get_records(self, context: Context | None) -> t.Iterable[dict]: @@ -126,8 +127,13 @@ def schema(self) -> dict: """Return dictionary of record schema. Dynamically detect the json schema for the stream. - This is evaluated prior to any records being retrieved. + + This property is accessed multiple times for each record + so it's important to cache the schema. """ + if self.stream_schema: + return self.stream_schema + properties: list[th.Property] = [] self.primary_keys = self.file_config.get("keys", []) @@ -156,4 +162,5 @@ def schema(self) -> dict: # Cache header for future use self.header = header - return th.PropertiesList(*properties).to_dict() + self.stream_schema = th.PropertiesList(*properties).to_dict() + return self.stream_schema