-
Notifications
You must be signed in to change notification settings - Fork 141
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
237 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
|
||
import os | ||
from datetime import timedelta | ||
from tap_tester import connections, menagerie, runner, LOGGER | ||
from tap_tester.base_suite_tests.base_case import BaseCase | ||
|
||
|
||
class FacebookBaseTest(BaseCase): | ||
""" | ||
Setup expectations for test sub classes. | ||
Metadata describing streams. | ||
A bunch of shared methods that are used in tap-tester tests. | ||
Shared tap-specific methods (as needed). | ||
Insights Test Data by Date Ranges | ||
"ads_insights": | ||
"2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_age_and_gender": | ||
"2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_country": | ||
"2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_platform_and_device": | ||
"2019-08-02T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_region": | ||
"2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_dma": | ||
"2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
"ads_insights_hourly_advertiser": | ||
"2019-08-03T00:00:00.000000Z" -> "2019-10-30T00:00:00.000000Z" | ||
"2021-04-07T00:00:00.000000Z" -> "2021-04-08T00:00:00.000000Z" | ||
""" | ||
FULL_TABLE = "FULL_TABLE" | ||
BOOKMARK_COMPARISON_FORMAT = "%Y-%m-%dT00:00:00+00:00" | ||
|
||
start_date = "" | ||
end_date = "" | ||
|
||
@staticmethod | ||
def tap_name(): | ||
"""The name of the tap""" | ||
return "tap-facebook" | ||
|
||
@staticmethod | ||
def get_type(): | ||
"""the expected url route ending""" | ||
return "platform.facebook" | ||
|
||
def get_properties(self): | ||
"""Configuration properties required for the tap.""" | ||
return { | ||
'account_id': os.getenv('TAP_FACEBOOK_ACCOUNT_ID'), | ||
'start_date' : '2021-04-07T00:00:00Z', | ||
'end_date': '2021-04-09T00:00:00Z', | ||
'insights_buffer_days': '1', | ||
} | ||
|
||
@staticmethod | ||
def get_credentials(): | ||
"""Authentication information for the test account""" | ||
return {'access_token': os.getenv('TAP_FACEBOOK_ACCESS_TOKEN')} | ||
@staticmethod | ||
def expected_metadata(): | ||
"""The expected streams and metadata about the streams""" | ||
return { | ||
"ads": { | ||
BaseCase.PRIMARY_KEYS: {"id", "updated_time"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"updated_time"} | ||
}, | ||
"adcreative": { | ||
BaseCase.PRIMARY_KEYS: {"id"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.FULL_TABLE, | ||
}, | ||
"adsets": { | ||
BaseCase.PRIMARY_KEYS: {"id", "updated_time"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"updated_time"} | ||
}, | ||
"campaigns": { | ||
BaseCase.PRIMARY_KEYS: {"id", }, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"updated_time"} | ||
}, | ||
"ads_insights": { | ||
BaseCase.PRIMARY_KEYS: {"campaign_id", "adset_id", "ad_id", "date_start"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_age_and_gender": { | ||
BaseCase.PRIMARY_KEYS: { | ||
"campaign_id", "adset_id", "ad_id", "date_start", "age", "gender" | ||
}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_country": { | ||
BaseCase.PRIMARY_KEYS: {"campaign_id", "adset_id", "ad_id", "date_start", "country"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_platform_and_device": { | ||
BaseCase.PRIMARY_KEYS: { | ||
"campaign_id", "adset_id", "ad_id", "date_start", | ||
"publisher_platform", "platform_position", "impression_device" | ||
}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_region": { | ||
BaseCase.PRIMARY_KEYS: {"region", "campaign_id", "adset_id", "ad_id", "date_start"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_dma": { | ||
BaseCase.PRIMARY_KEYS: {"dma", "campaign_id", "adset_id", "ad_id", "date_start"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
"ads_insights_hourly_advertiser": { | ||
BaseCase.PRIMARY_KEYS: {"hourly_stats_aggregated_by_advertiser_time_zone", "campaign_id", "adset_id", "ad_id", "date_start"}, | ||
BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
BaseCase.REPLICATION_KEYS: {"date_start"} | ||
}, | ||
# "leads": { | ||
# BaseCase.PRIMARY_KEYS: {"id"}, | ||
# BaseCase.REPLICATION_METHOD: BaseCase.INCREMENTAL, | ||
# BaseCase.REPLICATION_KEYS: {"created_time"} | ||
# }, | ||
} | ||
|
||
def set_replication_methods(self, conn_id, catalogs, replication_methods): | ||
|
||
replication_keys = self.expected_replication_keys() | ||
for catalog in catalogs: | ||
replication_method = replication_methods.get(catalog['stream_name']) | ||
annt=menagerie.get_annotated_schema(conn_id, catalog['stream_id']) | ||
if replication_method == self.INCREMENTAL: | ||
replication_key = list(replication_keys.get(catalog['stream_name']))[0] | ||
replication_md = [{ "breadcrumb": [], "metadata":{ "selected" : True}}] | ||
else: | ||
replication_md = [{ "breadcrumb": [], "metadata": { "selected": None}}] | ||
connections.set_non_discoverable_metadata( | ||
conn_id, catalog, menagerie.get_annotated_schema(conn_id, catalog['stream_id']), replication_md) | ||
|
||
@classmethod | ||
def setUpClass(cls,logging="Ensuring environment variables are sourced."): | ||
super().setUpClass(logging=logging) | ||
missing_envs = [x for x in [os.getenv('TAP_FACEBOOK_ACCESS_TOKEN'), | ||
os.getenv('TAP_FACEBOOK_ACCOUNT_ID')] if x is None] | ||
if len(missing_envs) != 0: | ||
raise Exception("set environment variables") | ||
|
||
|
||
########################################################################## | ||
### Tap Specific Methods | ||
########################################################################## | ||
|
||
@staticmethod | ||
def is_insight(stream): | ||
return stream.startswith('ads_insights') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,122 +1,14 @@ | ||
"""Test tap discovery mode and metadata.""" | ||
import re | ||
import unittest | ||
from tap_tester.base_suite_tests.discovery_test import DiscoveryTest | ||
|
||
from tap_tester import menagerie, connections | ||
from base_new_frmwrk import FacebookBaseTest | ||
|
||
from base import FacebookBaseTest | ||
|
||
class FacebookDiscoveryTest(DiscoveryTest, FacebookBaseTest): | ||
"""Standard Discovery Test""" | ||
|
||
class DiscoveryTest(FacebookBaseTest): | ||
"""Test tap discovery mode and metadata conforms to standards.""" | ||
@staticmethod | ||
def name(): | ||
return "tap_tester_facebook_discovery_test" | ||
|
||
return "tt_facebook_discovery" | ||
def streams_to_test(self): | ||
return self.expected_streams() | ||
|
||
def test_run(self): | ||
""" | ||
Testing that discovery creates the appropriate catalog with valid metadata. | ||
• Verify number of actual streams discovered match expected | ||
• Verify the stream names discovered were what we expect | ||
• Verify stream names follow naming convention | ||
streams should only have lowercase alphas and underscores | ||
• verify there is only 1 top level breadcrumb | ||
• verify replication key(s) | ||
• verify primary key(s) | ||
• verify that if there is a replication key we are doing INCREMENTAL otherwise FULL | ||
• verify the actual replication matches our expected replication method | ||
• verify that primary, replication and foreign keys | ||
are given the inclusion of automatic. | ||
• verify that all other fields have inclusion of available metadata. | ||
""" | ||
streams_to_test = self.streams_to_test() | ||
|
||
conn_id = connections.ensure_connection(self) | ||
|
||
found_catalogs = self.run_and_verify_check_mode(conn_id) | ||
|
||
# Verify stream names follow naming convention | ||
# streams should only have lowercase alphas and underscores | ||
found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} | ||
self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), | ||
msg="One or more streams don't follow standard naming") | ||
|
||
for stream in streams_to_test: | ||
with self.subTest(stream=stream): | ||
|
||
# Verify ensure the caatalog is found for a given stream | ||
catalog = next(iter([catalog for catalog in found_catalogs | ||
if catalog["stream_name"] == stream])) | ||
self.assertIsNotNone(catalog) | ||
|
||
# collecting expected values | ||
expected_primary_keys = self.expected_primary_keys()[stream] | ||
expected_replication_keys = self.expected_replication_keys()[stream] | ||
expected_automatic_fields = expected_primary_keys | expected_replication_keys | ||
expected_replication_method = self.expected_replication_method()[stream] | ||
|
||
# collecting actual values... | ||
schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) | ||
metadata = schema_and_metadata["metadata"] | ||
stream_properties = [item for item in metadata if item.get("breadcrumb") == []] | ||
actual_primary_keys = set( | ||
stream_properties[0].get( | ||
"metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, []) | ||
) | ||
actual_replication_keys = set( | ||
stream_properties[0].get( | ||
"metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS, []) | ||
) | ||
actual_replication_method = stream_properties[0].get( | ||
"metadata", {self.REPLICATION_METHOD: None}).get(self.REPLICATION_METHOD) | ||
actual_automatic_fields = set( | ||
item.get("breadcrumb", ["properties", None])[1] for item in metadata | ||
if item.get("metadata").get("inclusion") == "automatic" | ||
) | ||
|
||
########################################################################## | ||
### metadata assertions | ||
########################################################################## | ||
|
||
# verify there is only 1 top level breadcrumb in metadata | ||
self.assertTrue(len(stream_properties) == 1, | ||
msg="There is NOT only one top level breadcrumb for {}".format(stream) + \ | ||
"\nstream_properties | {}".format(stream_properties)) | ||
|
||
# verify replication key(s) match expectations | ||
self.assertSetEqual( | ||
expected_replication_keys, actual_replication_keys | ||
) | ||
|
||
# verify primary key(s) match expectations | ||
self.assertSetEqual( | ||
expected_primary_keys, actual_primary_keys, | ||
) | ||
|
||
# verify the replication method matches our expectations | ||
self.assertEqual( | ||
expected_replication_method, actual_replication_method | ||
) | ||
|
||
# verify that if there is a replication key we are doing INCREMENTAL otherwise FULL | ||
if actual_replication_keys: | ||
self.assertEqual(self.INCREMENTAL, actual_replication_method) | ||
else: | ||
self.assertEqual(self.FULL_TABLE, actual_replication_method) | ||
|
||
# verify that primary keys and replication keys | ||
# are given the inclusion of automatic in metadata. | ||
self.assertSetEqual(expected_automatic_fields, actual_automatic_fields) | ||
|
||
# verify that all other fields have inclusion of available | ||
# This assumes there are no unsupported fields for SaaS sources | ||
self.assertTrue( | ||
all({item.get("metadata").get("inclusion") == "available" | ||
for item in metadata | ||
if item.get("breadcrumb", []) != [] | ||
and item.get("breadcrumb", ["properties", None])[1] | ||
not in actual_automatic_fields}), | ||
msg="Not all non key properties are set to available in metadata") | ||
return self.expected_stream_names() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import os | ||
import dateutil.parser | ||
import datetime | ||
from base_new_frmwrk import FacebookBaseTest | ||
from tap_tester.base_suite_tests.table_reset_test import TableResetTest | ||
|
||
|
||
class FacebookTableResetTest(TableResetTest, FacebookBaseTest): | ||
"""tap-salesforce Table reset test implementation | ||
Currently tests only the stream with Incremental replication method""" | ||
|
||
@staticmethod | ||
def name(): | ||
return "tt_facebook_table_reset" | ||
|
||
def streams_to_test(self): | ||
return self.expected_stream_names() | ||
|
||
@property | ||
def reset_stream(self): | ||
return ('ads_insights_dma') | ||
|
||
|
||
def calculated_states_by_stream(self, current_state): | ||
|
||
""" The following streams barely make the cut: | ||
campaigns "2021-02-09T18:17:30.000000Z" | ||
"2021-02-09T16:24:58.000000Z" | ||
adsets "2021-02-09T18:17:41.000000Z" | ||
"2021-02-09T17:10:09.000000Z" | ||
leads '2021-04-07T20:09:39+0000', | ||
'2021-04-07T20:08:27+0000', | ||
""" | ||
timedelta_by_stream = {stream: [0,0,0] # {stream_name: [days, hours, minutes], ...} | ||
for stream in self.expected_stream_names()} | ||
timedelta_by_stream['campaigns'] = [0, 1, 0] | ||
timedelta_by_stream['adsets'] = [0, 1, 0] | ||
timedelta_by_stream['leads'] = [0, 0 , 1] | ||
|
||
stream_to_calculated_state = {stream: "" for stream in current_state['bookmarks'].keys()} | ||
for stream, state in current_state['bookmarks'].items(): | ||
state_key, state_value = next(iter(state.keys())), next(iter(state.values())) | ||
state_as_datetime = dateutil.parser.parse(state_value) | ||
days, hours, minutes = timedelta_by_stream[stream] | ||
calculated_state_as_datetime = state_as_datetime - datetime.timedelta(days=days, hours=hours, minutes=minutes) | ||
|
||
state_format = '%Y-%m-%dT00:00:00+00:00' if self.is_insight(stream) else '%Y-%m-%dT%H:%M:%S-00:00' | ||
calculated_state_formatted = datetime.datetime.strftime(calculated_state_as_datetime, state_format) | ||
|
||
stream_to_calculated_state[stream] = {state_key: calculated_state_formatted} | ||
|
||
return stream_to_calculated_state | ||
|
||
def manipulate_state(self,current_state): | ||
new_states = {'bookmarks': dict()} | ||
simulated_states = self.calculated_states_by_stream(current_state) | ||
for stream, new_state in simulated_states.items(): | ||
new_states['bookmarks'][stream] = new_state | ||
return new_states |