Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move all fields test to new framework #239

Merged
merged 12 commits into from
Dec 18, 2023
Merged
150 changes: 150 additions & 0 deletions tests/base_hubspot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import os
import unittest
from datetime import datetime as dt
from datetime import timedelta

import tap_tester.menagerie as menagerie
import tap_tester.connections as connections
import tap_tester.runner as runner
from tap_tester.base_suite_tests.base_case import BaseCase
from tap_tester import LOGGER


class HubspotBaseCase(BaseCase):

# set the default start date which can be overridden in the tests.
start_date = BaseCase.timedelta_formatted(dt.utcnow(), delta=timedelta(days=-1))

EXTRA_FIELDS = {
'deals': {
# BUG_TDL-14993 | https://jira.talendforge.org/browse/TDL-14993
# Has an value of object with key 'value' and value 'Null'
'property_hs_date_entered_1258834',
'property_hs_time_in_example_stage1660743867503491_315775040'
},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is only used for the all_fields test. This is duplicative of the bad keys below and isn't necessary as there are just specific examples. We should move the bug info down to the method for removing the bad keys.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah.. This just got carried over from the old test.
So, we added these extra fields and removed the bad prefixes later.
Not needed here, will remove it.

"contacts": { "versionTimestamp" }
}

def setUp(self):
missing_envs = [x for x in [
'TAP_HUBSPOT_REDIRECT_URI',
'TAP_HUBSPOT_CLIENT_ID',
'TAP_HUBSPOT_CLIENT_SECRET',
'TAP_HUBSPOT_REFRESH_TOKEN'
] if os.getenv(x) is None]
if missing_envs:
raise Exception("Missing environment variables: {}".format(missing_envs))

@staticmethod
def get_type():
return "platform.hubspot"

@staticmethod
def tap_name():
return "tap-hubspot"

def get_properties(self):
return {'start_date': self.start_date}

def get_credentials(self):
return {'refresh_token': os.getenv('TAP_HUBSPOT_REFRESH_TOKEN'),
'client_secret': os.getenv('TAP_HUBSPOT_CLIENT_SECRET'),
'redirect_uri': os.getenv('TAP_HUBSPOT_REDIRECT_URI'),
'client_id': os.getenv('TAP_HUBSPOT_CLIENT_ID')}

@classmethod
def expected_metadata(cls): # DOCS_BUG https://stitchdata.atlassian.net/browse/DOC-1523)
"""The expected streams and metadata about the streams"""

return {
"campaigns": {
BaseCase.PRIMARY_KEYS: {"id"},
BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE,
BaseCase.OBEYS_START_DATE: False
},
"companies": {
BaseCase.PRIMARY_KEYS: {"companyId"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"},
BaseCase.API_LIMIT: 250,
BaseCase.OBEYS_START_DATE: True
},
"contact_lists": {
BaseCase.PRIMARY_KEYS: {"listId"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"updatedAt"},
BaseCase.API_LIMIT: 250,
BaseCase.OBEYS_START_DATE: True
},
"contacts": {
BaseCase.PRIMARY_KEYS: {"vid"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"versionTimestamp"},
BaseCase.API_LIMIT: 100,
BaseCase.OBEYS_START_DATE: True
},
"contacts_by_company": {
BaseCase.PRIMARY_KEYS: {"company-id", "contact-id"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.API_LIMIT: 100,
BaseCase.OBEYS_START_DATE: True,
BaseCase.PARENT_STREAM: 'companies'
},
"deal_pipelines": {
BaseCase.PRIMARY_KEYS: {"pipelineId"},
BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE,
BaseCase.OBEYS_START_DATE: False,
},
"deals": {
BaseCase.PRIMARY_KEYS: {"dealId"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"property_hs_lastmodifieddate"},
BaseCase.OBEYS_START_DATE: True
},
"email_events": {
BaseCase.PRIMARY_KEYS: {"id"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"startTimestamp"},
BaseCase.API_LIMIT: 1000,
BaseCase.OBEYS_START_DATE: True
},
"engagements": {
BaseCase.PRIMARY_KEYS: {"engagement_id"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"lastUpdated"},
BaseCase.API_LIMIT: 250,
BaseCase.OBEYS_START_DATE: True
},
"forms": {
BaseCase.PRIMARY_KEYS: {"guid"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"updatedAt"},
BaseCase.OBEYS_START_DATE: True
},
"owners": {
BaseCase.PRIMARY_KEYS: {"ownerId"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"updatedAt"},
BaseCase.OBEYS_START_DATE: True # TODO is this a BUG?
},
"subscription_changes": {
BaseCase.PRIMARY_KEYS: {"timestamp", "portalId", "recipient"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"startTimestamp"},
BaseCase.API_LIMIT: 1000,
BaseCase.OBEYS_START_DATE: True
},
"workflows": {
BaseCase.PRIMARY_KEYS: {"id"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"updatedAt"},
BaseCase.OBEYS_START_DATE: True
},
"tickets": {
BaseCase.PRIMARY_KEYS: {"id"},
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL,
BaseCase.REPLICATION_KEYS: {"updatedAt"},
BaseCase.API_LIMIT: 100,
BaseCase.OBEYS_START_DATE: True
}
}
112 changes: 112 additions & 0 deletions tests/test_hubspot_newfw_all_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import unittest
from tap_tester.base_suite_tests.all_fields_test import AllFieldsTest
from tap_tester.logger import LOGGER
from base_hubspot import HubspotBaseCase
from client import TestClient

class HubspotAllFieldsTest(AllFieldsTest, HubspotBaseCase):
"""Hubspot all fields test implementation """
EXTRA_FIELDS = HubspotBaseCase.EXTRA_FIELDS

@staticmethod
def name():
return "tt_hubspot_all_fields"

def streams_to_test(self):
"""expected streams minus the streams not under test"""
return self.expected_stream_names().difference({
'owners',
'subscription_changes', # BUG_TDL-14938 https://jira.talendforge.org/browse/TDL-14938
})

def setUp(self):
self.maxDiff = None # see all output in failure

test_client = TestClient(start_date=self.get_properties()['start_date'])

self.expected_records = dict()
streams = self.streams_to_test()
stream_to_run_last = 'contacts_by_company'
if stream_to_run_last in streams:
streams.remove(stream_to_run_last)
streams = list(streams)
streams.append(stream_to_run_last)

for stream in streams:
# Get all records
if stream == 'contacts_by_company':
company_ids = [company['companyId'] for company in self.expected_records['companies']]
self.expected_records[stream] = test_client.read(stream, parent_ids=company_ids)
else:
self.expected_records[stream] = test_client.read(stream)

for stream, records in self.expected_records.items():
LOGGER.info("The test client found %s %s records.", len(records), stream)

super().setUp()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love the fact that you just have the differences in the setup that are unique to this tap and then call the existing setup so it is easy to see why we are overriding this method.

self.convert_datatype(self.expected_records)

def convert_datatype(self, expected_records):
# Convert the time stamp data type, Get keys with data and with no data
self.keys_with_data={}
for stream, records in expected_records.items():
expected_keys = set()
for record in records:

expected_keys = expected_keys.union(record.keys())
# convert timestamps to string formatted datetime
timestamp_keys = {'timestamp'}
for key in timestamp_keys:
timestamp = record.get(key)
if timestamp:
record[key]=self.datetime_from_timestamp(timestamp/1000, str_format=self.BASIC_DATE_FORMAT)

self.keys_with_data[stream] = expected_keys
self.keys_with_no_data[stream] = self.selected_fields.get(stream).difference(expected_keys)

return expected_records

def remove_bad_keys(self, stream):
# NB: The following woraround is for dynamic fields on the `deals` stream that we just can't track.
# At the time of implementation there is no customer feedback indicating that these dynamic fields
# would prove useful to an end user. The ones that we replicated with the test client are specific
# to our test data. We have determined that the filtering of these fields is an expected behavior.
# deals workaround for 'property_hs_date_entered_<property>' fields
if stream == 'deals':
bad_key_prefixes = {'property_hs_date_entered_', 'property_hs_date_exited_', 'property_hs_time_in'}
bad_keys = set()
for key in self.expected_all_keys:
for bad_prefix in bad_key_prefixes:
if key.startswith(bad_prefix):
bad_keys.add(key)
for key in self.fields_replicated:
for bad_prefix in bad_key_prefixes:
if key.startswith(bad_prefix):
bad_keys.add(key)

for key in bad_keys:
if key in self.expected_all_keys:
self.expected_all_keys.remove(key)
if key in self.fields_replicated:
self.fields_replicated.remove(key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two comments:

  • I'm not sure why we need to build bad keys and then go through them. It appears we could just remove them at the same time.
  • If the key is in both the expected_all_keys and the fields_replicated this would pass. So I am uncertain as to why we would need to remove this from both our expectations and our actual results. Can we log if a key is removed and where it is removed from when we do this. I would expect it to only be in the actual results (fields_replicated) and not necessary in the expectations. If this isn't a good assumption I would like to figure out what I'm not understanding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the first comment, I felt the same and removed it directly instead of adding to bad_keys, but it complained that the set has changed while iterating.
For the second comment - let me print and see what is being removed. It could be the result of adding that extra fields in the above comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed the method to include only the bad keys that are not in both the lists.


##########################################################################
# Tests To Skip
##########################################################################

@unittest.skip("Skip till all cards of missing fields are fixed. TDL-16145 ")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be nice place to use the tap-tester @skipUntilDone to ensure this gets picked up as soon as TDL-16145 is completed.

def test_values_of_all_fields(self):
for stream in self.test_streams:
with self.subTest(stream=stream):

# gather expectations
expected_all_keys = self.selected_fields.get(stream, set()) - set(self.MISSING_FIELDS.get(stream, {}))

# gather results
fields_replicated = self.actual_fields.get(stream, set())

# verify that all fields are sent to the target
# test the combination of all records
self.assertSetEqual(fields_replicated, expected_all_keys,
logging=f"verify all fields are replicated for stream {stream}")