diff --git a/tap_spreadsheets_anywhere/configuration.py b/tap_spreadsheets_anywhere/configuration.py index 507b000..d131f25 100644 --- a/tap_spreadsheets_anywhere/configuration.py +++ b/tap_spreadsheets_anywhere/configuration.py @@ -32,8 +32,8 @@ Optional('prefer_schema_as_string'): bool, Optional('schema_overrides'): { str: { - Required('type'): Any(Any('null','string','integer','number','date-time'), - [Any('null','string','integer','number','date-time')]) + Required('type'): Any(Any('null','string','integer','number','date-time','object'), + [Any('null','string','integer','number','date-time','object')]) } } }] diff --git a/tap_spreadsheets_anywhere/conversion.py b/tap_spreadsheets_anywhere/conversion.py index 93889e7..fa4c5e5 100644 --- a/tap_spreadsheets_anywhere/conversion.py +++ b/tap_spreadsheets_anywhere/conversion.py @@ -2,6 +2,7 @@ import pytz import logging import pickle +from collections.abc import MutableMapping LOGGER = logging.getLogger(__name__) @@ -71,6 +72,13 @@ def convert(datum, desired_type=None): except (ValueError, TypeError): pass + if desired_type in (None, 'object'): + try: + if isinstance(datum, MutableMapping): + return datum, 'object' + except (ValueError, TypeError): + pass + return str(datum), 'string', @@ -117,6 +125,8 @@ def pick_datatype(counts,prefer_number_vs_integer=False): to_return = 'number' elif counts.get('date-time', 0) > 0: to_return = 'date-time' + elif counts.get('object', 0) > 0: + to_return = 'object' elif counts.get('string', 0) <= 0: LOGGER.warning(f"Unexpected data type encountered in histogram {counts}. Defaulting type to String.") diff --git a/tap_spreadsheets_anywhere/test/test_conversion.py b/tap_spreadsheets_anywhere/test/test_conversion.py index 4def7ab..49b4686 100644 --- a/tap_spreadsheets_anywhere/test/test_conversion.py +++ b/tap_spreadsheets_anywhere/test/test_conversion.py @@ -38,6 +38,11 @@ def test_convert(self): # strings self.assertEqual(convert('4 o clock'), ('4 o clock', 'string')) + def test_convert_objects(self): + self.assertEqual(convert("{'k': 'v','k': 'v'}"), ("{'k': 'v','k': 'v'}", 'string')) + self.assertEqual(convert({'k': 'v','k': 'v'}), ({'k': 'v','k': 'v'}, 'object')) + self.assertEqual(convert({'k': 'v','k': 'v'}, 'object'), ({'k': 'v','k': 'v'}, 'object')) + def test_count_sample(self): self.assertEqual( count_sample({'id': '1', 'first_name': 'Connor'}), @@ -64,6 +69,11 @@ def test_pick_datatype(self): 'number': 1}), 'string') self.assertEqual(pick_datatype({}), 'string') + def test_pick_datatype_objects(self): + self.assertEqual(pick_datatype({'object': 1}), 'object') + self.assertEqual(pick_datatype({'string': 1, + 'object': 1}), 'string') + def test_generate_schema(self): self.assertEqual( generate_schema([{'id': '1', 'first_name': 'Connor'}, @@ -88,3 +98,10 @@ def test_generate_schema(self): {'id': '2', 'date': '2017-01-02'}]), {'id': {'type': ['null', 'integer'],}, 'date': {'type': ['null', 'string'],}}) + + def test_generate_schema_objects(self): + self.assertEqual( + generate_schema([{'id': '1', 'obj': { 'date': '2017-01-01', 'count': 100 }}, + {'id': '2', 'obj': { 'date': '2017-01-01', 'count': 0 }}]), + {'id': {'type': ['null', 'integer'],}, + 'obj': {'type': ['null', 'object'],}})