singer-io · bhuvana-talend · Dec 18, 2023 · Nov 1, 2023 · Nov 20, 2023 · Nov 27, 2023
diff --git a/tests/base_hubspot.py b/tests/base_hubspot.py
@@ -0,0 +1,150 @@
+import os
+import unittest
+from datetime import datetime as dt
+from datetime import timedelta
+
+import tap_tester.menagerie   as menagerie
+import tap_tester.connections as connections
+import tap_tester.runner      as runner
+from tap_tester.base_suite_tests.base_case import BaseCase
+from tap_tester import LOGGER
+
+
+class HubspotBaseCase(BaseCase):
+
+    # set the default start date which can be overridden in the tests.
+    start_date = BaseCase.timedelta_formatted(dt.utcnow(), delta=timedelta(days=-1))
+
+    EXTRA_FIELDS = {
+        'deals': {
+            # BUG_TDL-14993 | https://jira.talendforge.org/browse/TDL-14993
+            #                 Has an value of object with key 'value' and value 'Null'
+            'property_hs_date_entered_1258834',
+            'property_hs_time_in_example_stage1660743867503491_315775040'
+        },
+        "contacts": { "versionTimestamp" }
+    }
+
+    def setUp(self):
+        missing_envs = [x for x in [
+            'TAP_HUBSPOT_REDIRECT_URI',
+            'TAP_HUBSPOT_CLIENT_ID',
+            'TAP_HUBSPOT_CLIENT_SECRET',
+            'TAP_HUBSPOT_REFRESH_TOKEN'
+        ] if os.getenv(x) is None]
+        if missing_envs:
+            raise Exception("Missing environment variables: {}".format(missing_envs))
+
+    @staticmethod
+    def get_type():
+        return "platform.hubspot"
+
+    @staticmethod
+    def tap_name():
+        return "tap-hubspot"
+
+    def get_properties(self):
+        return {'start_date': self.start_date}
+
+    def get_credentials(self):
+        return {'refresh_token': os.getenv('TAP_HUBSPOT_REFRESH_TOKEN'),
+                'client_secret': os.getenv('TAP_HUBSPOT_CLIENT_SECRET'),
+                'redirect_uri':  os.getenv('TAP_HUBSPOT_REDIRECT_URI'),
+                'client_id':     os.getenv('TAP_HUBSPOT_CLIENT_ID')}
+
+    @classmethod
+    def expected_metadata(cls):  # DOCS_BUG https://stitchdata.atlassian.net/browse/DOC-1523)
+        """The expected streams and metadata about the streams"""
+
+        return  {
+            "campaigns": {
+                BaseCase.PRIMARY_KEYS: {"id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE,
+                BaseCase.OBEYS_START_DATE: False
+            },
+            "companies": {
+                BaseCase.PRIMARY_KEYS: {"companyId"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"},
+                BaseCase.API_LIMIT: 250,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "contact_lists": {
+                BaseCase.PRIMARY_KEYS: {"listId"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"updatedAt"},
+                BaseCase.API_LIMIT: 250,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "contacts": {
+                BaseCase.PRIMARY_KEYS: {"vid"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"versionTimestamp"},
+                BaseCase.API_LIMIT: 100,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "contacts_by_company": {
+                BaseCase.PRIMARY_KEYS: {"company-id", "contact-id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.API_LIMIT: 100,
+                BaseCase.OBEYS_START_DATE: True,
+                BaseCase.PARENT_STREAM: 'companies'
+            },
+            "deal_pipelines": {
+                BaseCase.PRIMARY_KEYS: {"pipelineId"},
+                BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE,
+                BaseCase.OBEYS_START_DATE: False,
+            },
+            "deals": {
+                BaseCase.PRIMARY_KEYS: {"dealId"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"},
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "email_events": {
+                BaseCase.PRIMARY_KEYS: {"id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"startTimestamp"},
+                BaseCase.API_LIMIT: 1000,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "engagements": {
+                BaseCase.PRIMARY_KEYS: {"engagement_id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"lastUpdated"},
+                BaseCase.API_LIMIT: 250,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "forms": {
+                BaseCase.PRIMARY_KEYS: {"guid"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"updatedAt"},
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "owners": {
+                BaseCase.PRIMARY_KEYS: {"ownerId"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"updatedAt"},
+                BaseCase.OBEYS_START_DATE: True  # TODO is this a BUG?
+            },
+            "subscription_changes": {
+                BaseCase.PRIMARY_KEYS: {"timestamp", "portalId", "recipient"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"startTimestamp"},
+                BaseCase.API_LIMIT: 1000,
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "workflows": {
+                BaseCase.PRIMARY_KEYS: {"id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"updatedAt"},
+                BaseCase.OBEYS_START_DATE: True
+            },
+            "tickets": {
+                BaseCase.PRIMARY_KEYS: {"id"},
+                BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
+                BaseCase.REPLICATION_KEYS: {"updatedAt"},
+                BaseCase.API_LIMIT: 100,
+                BaseCase.OBEYS_START_DATE: True
+            }
+        }
diff --git a/tests/test_hubspot_newfw_all_fields.py b/tests/test_hubspot_newfw_all_fields.py
@@ -0,0 +1,112 @@
+import unittest
+from tap_tester.base_suite_tests.all_fields_test import AllFieldsTest
+from tap_tester.logger import LOGGER
+from base_hubspot import HubspotBaseCase
+from client import TestClient
+
+class HubspotAllFieldsTest(AllFieldsTest, HubspotBaseCase):
+    """Hubspot all fields test implementation """
+    EXTRA_FIELDS = HubspotBaseCase.EXTRA_FIELDS
+
+    @staticmethod
+    def name():
+        return "tt_hubspot_all_fields"
+
+    def streams_to_test(self):
+        """expected streams minus the streams not under test"""
+        return self.expected_stream_names().difference({
+            'owners',
+            'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938
+        })
+
+    def setUp(self):
+        self.maxDiff = None  # see all output in failure
+
+        test_client = TestClient(start_date=self.get_properties()['start_date'])
+
+        self.expected_records = dict()
+        streams = self.streams_to_test()
+        stream_to_run_last = 'contacts_by_company'
+        if stream_to_run_last in streams:
+            streams.remove(stream_to_run_last)
+            streams = list(streams)
+            streams.append(stream_to_run_last)
+
+        for stream in streams:
+            # Get all records
+            if stream == 'contacts_by_company':
+                company_ids = [company['companyId'] for company in self.expected_records['companies']]
+                self.expected_records[stream] = test_client.read(stream, parent_ids=company_ids)
+            else:
+                self.expected_records[stream] = test_client.read(stream)
+
+        for stream, records in self.expected_records.items():
+            LOGGER.info("The test client found %s %s records.", len(records), stream)
+
+        super().setUp()
+        self.convert_datatype(self.expected_records)
+
+    def convert_datatype(self, expected_records):
+        # Convert the time stamp data type, Get keys with data and with no data 
+        self.keys_with_data={}
+        for stream, records in expected_records.items():
+            expected_keys = set()
+            for record in records:
+
+                expected_keys = expected_keys.union(record.keys())
+                # convert timestamps to string formatted datetime
+                timestamp_keys = {'timestamp'}
+                for key in timestamp_keys:
+                    timestamp = record.get(key)
+                    if timestamp:
+                        record[key]=self.datetime_from_timestamp(timestamp/1000, str_format=self.BASIC_DATE_FORMAT)
+
+            self.keys_with_data[stream] = expected_keys
+            self.keys_with_no_data[stream] = self.selected_fields.get(stream).difference(expected_keys)
+
+        return expected_records
+
+    def remove_bad_keys(self, stream):
+        # NB: The following woraround is for dynamic fields on the `deals` stream that we just can't track.
+        #     At the time of implementation there is no customer feedback indicating that these dynamic fields
+        #     would prove useful to an end user. The ones that we replicated with the test client are specific
+        #     to our test data. We have determined that the filtering of these fields is an expected behavior.
+        # deals workaround for 'property_hs_date_entered_<property>' fields
+        if stream == 'deals':
+            bad_key_prefixes = {'property_hs_date_entered_', 'property_hs_date_exited_', 'property_hs_time_in'}
+            bad_keys = set()
+            for key in self.expected_all_keys:
+                for bad_prefix in bad_key_prefixes:
+                   if key.startswith(bad_prefix):
+                        bad_keys.add(key)
+            for key in self.fields_replicated:
+                for bad_prefix in bad_key_prefixes:
+                   if key.startswith(bad_prefix):
+                        bad_keys.add(key)
+
+            for key in bad_keys:
+                if key in self.expected_all_keys:
+                    self.expected_all_keys.remove(key)
+                if key in self.fields_replicated:
+                    self.fields_replicated.remove(key)
+
+    ##########################################################################
+    # Tests To Skip
+    ##########################################################################
+
+    @unittest.skip("Skip till all cards of missing fields are fixed. TDL-16145 ")
+    def test_values_of_all_fields(self):
+        for stream in self.test_streams:
+            with self.subTest(stream=stream):
+
+                # gather expectations
+                expected_all_keys = self.selected_fields.get(stream, set()) - set(self.MISSING_FIELDS.get(stream, {}))
+
+                # gather results
+                fields_replicated = self.actual_fields.get(stream, set())
+
+                # verify that all fields are sent to the target
+                # test the combination of all records
+                self.assertSetEqual(fields_replicated, expected_all_keys,
+                                    logging=f"verify all fields are replicated for stream {stream}")
+