From b211452fc552aedd7417f12f24a08dbec53b1c5c Mon Sep 17 00:00:00 2001 From: Denis Date: Thu, 30 Mar 2023 17:43:50 +0300 Subject: [PATCH 1/2] Add new `object` type --- tap_spreadsheets_anywhere/configuration.py | 4 ++-- tap_spreadsheets_anywhere/conversion.py | 10 ++++++++++ tap_spreadsheets_anywhere/test/test_conversion.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tap_spreadsheets_anywhere/configuration.py b/tap_spreadsheets_anywhere/configuration.py index 652c25f..10e77c2 100644 --- a/tap_spreadsheets_anywhere/configuration.py +++ b/tap_spreadsheets_anywhere/configuration.py @@ -31,8 +31,8 @@ Optional('prefer_schema_as_string'): bool, Optional('schema_overrides'): { str: { - Required('type'): Any(Any('null','string','integer','number','date-time'), - [Any('null','string','integer','number','date-time')]) + Required('type'): Any(Any('null','string','integer','number','date-time','object'), + [Any('null','string','integer','number','date-time','object')]) } } }] diff --git a/tap_spreadsheets_anywhere/conversion.py b/tap_spreadsheets_anywhere/conversion.py index 93889e7..fa4c5e5 100644 --- a/tap_spreadsheets_anywhere/conversion.py +++ b/tap_spreadsheets_anywhere/conversion.py @@ -2,6 +2,7 @@ import pytz import logging import pickle +from collections.abc import MutableMapping LOGGER = logging.getLogger(__name__) @@ -71,6 +72,13 @@ def convert(datum, desired_type=None): except (ValueError, TypeError): pass + if desired_type in (None, 'object'): + try: + if isinstance(datum, MutableMapping): + return datum, 'object' + except (ValueError, TypeError): + pass + return str(datum), 'string', @@ -117,6 +125,8 @@ def pick_datatype(counts,prefer_number_vs_integer=False): to_return = 'number' elif counts.get('date-time', 0) > 0: to_return = 'date-time' + elif counts.get('object', 0) > 0: + to_return = 'object' elif counts.get('string', 0) <= 0: LOGGER.warning(f"Unexpected data type encountered in histogram {counts}. Defaulting type to String.") diff --git a/tap_spreadsheets_anywhere/test/test_conversion.py b/tap_spreadsheets_anywhere/test/test_conversion.py index 4def7ab..fde47f6 100644 --- a/tap_spreadsheets_anywhere/test/test_conversion.py +++ b/tap_spreadsheets_anywhere/test/test_conversion.py @@ -38,6 +38,11 @@ def test_convert(self): # strings self.assertEqual(convert('4 o clock'), ('4 o clock', 'string')) + # objects + self.assertEqual(convert("{'k': 'v','k': 'v'}"), ("{'k': 'v','k': 'v'}", 'string')) + self.assertEqual(convert({'k': 'v','k': 'v'}), ({'k': 'v','k': 'v'}, 'object')) + self.assertEqual(convert({'k': 'v','k': 'v'}, 'object'), ({'k': 'v','k': 'v'}, 'object')) + def test_count_sample(self): self.assertEqual( count_sample({'id': '1', 'first_name': 'Connor'}), @@ -54,6 +59,7 @@ def test_pick_datatype(self): self.assertEqual(pick_datatype({'string': 1}), 'string') self.assertEqual(pick_datatype({'integer': 1}), 'integer') self.assertEqual(pick_datatype({'number': 1}), 'number') + self.assertEqual(pick_datatype({'object': 1}), 'object') self.assertEqual(pick_datatype({'number': 1, 'integer': 1}), 'number') @@ -62,6 +68,8 @@ def test_pick_datatype(self): 'integer': 1}), 'string') self.assertEqual(pick_datatype({'string': 1, 'number': 1}), 'string') + self.assertEqual(pick_datatype({'string': 1, + 'object': 1}), 'string') self.assertEqual(pick_datatype({}), 'string') def test_generate_schema(self): @@ -88,3 +96,9 @@ def test_generate_schema(self): {'id': '2', 'date': '2017-01-02'}]), {'id': {'type': ['null', 'integer'],}, 'date': {'type': ['null', 'string'],}}) + + self.assertEqual( + generate_schema([{'id': '1', 'obj': { 'date': '2017-01-01', 'count': 100 }}, + {'id': '2', 'obj': { 'date': '2017-01-01', 'count': 0 }}]), + {'id': {'type': ['null', 'integer'],}, + 'obj': {'type': ['null', 'object'],}}) From 58aecbdcbba48d6ac3cde1d7d6767721286d698f Mon Sep 17 00:00:00 2001 From: Denis Date: Thu, 30 Mar 2023 20:04:35 +0300 Subject: [PATCH 2/2] fix: reorganize tests for `object` type --- tap_spreadsheets_anywhere/test/test_conversion.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tap_spreadsheets_anywhere/test/test_conversion.py b/tap_spreadsheets_anywhere/test/test_conversion.py index fde47f6..49b4686 100644 --- a/tap_spreadsheets_anywhere/test/test_conversion.py +++ b/tap_spreadsheets_anywhere/test/test_conversion.py @@ -38,7 +38,7 @@ def test_convert(self): # strings self.assertEqual(convert('4 o clock'), ('4 o clock', 'string')) - # objects + def test_convert_objects(self): self.assertEqual(convert("{'k': 'v','k': 'v'}"), ("{'k': 'v','k': 'v'}", 'string')) self.assertEqual(convert({'k': 'v','k': 'v'}), ({'k': 'v','k': 'v'}, 'object')) self.assertEqual(convert({'k': 'v','k': 'v'}, 'object'), ({'k': 'v','k': 'v'}, 'object')) @@ -59,7 +59,6 @@ def test_pick_datatype(self): self.assertEqual(pick_datatype({'string': 1}), 'string') self.assertEqual(pick_datatype({'integer': 1}), 'integer') self.assertEqual(pick_datatype({'number': 1}), 'number') - self.assertEqual(pick_datatype({'object': 1}), 'object') self.assertEqual(pick_datatype({'number': 1, 'integer': 1}), 'number') @@ -68,10 +67,13 @@ def test_pick_datatype(self): 'integer': 1}), 'string') self.assertEqual(pick_datatype({'string': 1, 'number': 1}), 'string') - self.assertEqual(pick_datatype({'string': 1, - 'object': 1}), 'string') self.assertEqual(pick_datatype({}), 'string') + def test_pick_datatype_objects(self): + self.assertEqual(pick_datatype({'object': 1}), 'object') + self.assertEqual(pick_datatype({'string': 1, + 'object': 1}), 'string') + def test_generate_schema(self): self.assertEqual( generate_schema([{'id': '1', 'first_name': 'Connor'}, @@ -97,6 +99,7 @@ def test_generate_schema(self): {'id': {'type': ['null', 'integer'],}, 'date': {'type': ['null', 'string'],}}) + def test_generate_schema_objects(self): self.assertEqual( generate_schema([{'id': '1', 'obj': { 'date': '2017-01-01', 'count': 100 }}, {'id': '2', 'obj': { 'date': '2017-01-01', 'count': 0 }}]),