Skip to content

Commit

Permalink
feat: all db migrations handled by one router (openedx-unsupported#543)
Browse files Browse the repository at this point in the history
  • Loading branch information
zacharis278 authored Apr 11, 2022
1 parent b58dd45 commit 432f329
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 114 deletions.
19 changes: 4 additions & 15 deletions analytics_data_api/tests/test_middleware.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from unittest import mock

from django.conf import settings
from django.test import override_settings

Expand All @@ -12,19 +10,10 @@
@set_databases
class RequestVersionMiddleware(TestCaseWithAuthentication):
def test_request_version_middleware_v1(self):
# Because the AnalyticsModelsRouter prevents migrations from running on the ANALYTICS_DATABASE_V1
# database, and the AnalyticsDevelopmentRouter is not configured to be used in the test environment,
# the analytics tables don't exist in the ANALYTICS_DATABASE_V1 database. This causes errors like
# "django.db.utils.OperationalError: no such table: course_activity" to be thrown when the
# CourseActivityWeeklyView view is run. Therefore, we mock out the get_queryset method, because all we care
# about is the middleware correctly setting the analyticsapi_database attribute of the thread local data
# correctly.
with mock.patch('analytics_data_api.v0.views.courses.CourseActivityWeeklyView.get_queryset') as qs:
qs.return_value = None
self.authenticated_get('/api/v1/courses/{}/activity'.format(
CourseSamples.course_ids[0]))

self.assertEqual(thread_data.analyticsapi_database, getattr(settings, 'ANALYTICS_DATABASE_V1'))
self.authenticated_get('/api/v1/courses/{}/activity'.format(
CourseSamples.course_ids[0]))

self.assertEqual(thread_data.analyticsapi_database, getattr(settings, 'ANALYTICS_DATABASE_V1'))

@override_settings(ANALYTICS_DATABASE_V1=None)
def test_request_version_middleware_v1_no_setting(self):
Expand Down
125 changes: 55 additions & 70 deletions analyticsdataserver/router.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
# This file contains database routers used for routing database operations to the appropriate databases.
# Below is a digest of the routers in this file.
# AnalyticsDevelopmentRouter: This router routes database operations in a development environment. Currently, it only
# effects migrations. For example, in development, we want to be able to migrate the
# v0 and v1 apps into the v0 and v1 databases, respectively.
# AnalyticsAPIRouter: This router routes database operations based on the version of the analytics data API being used.
# It allows us to route database traffic to the v1 database when using the v1 API, for example.
# AnalyticsModelsRouter: This router routes database operations based on the model that is being requested.
#
# The DATABASE_ROUTERS Django setting defines what database routers are used and in what order in a given environment.
# AnalyticsModelsRouter: This router routes database operations based on the model that is being requested. Its
# primary purpose is to handle database migrations based on the multiple database settings
# in the environment.
#
# Note that if a database router returns None for a router method or does not implement the method, the master router
# delegates to the next router in the list until a value is returned. The master router falls back to a default behavior
Expand All @@ -17,60 +14,9 @@

from analytics_data_api.middleware import thread_data

ANALYTICS_APP_LABELS = ['v0', 'v1', 'enterprise_data']


class AnalyticsDevelopmentRouter:
"""
This router's role is to route database operations in the development environment. It is meant, in part, to simulate
the way databases are structured in the production environment. For example, the ANALYTICS_DATABASE
and ANALYTICS_DATABASE_V1 databases in production do not contain models from "non-analytics" apps,
like the auth app. This is because said databases are populated by other means (e.g. EMR jobs, prefect flows).
Therefore, we attempt to only allow these apps to be migrated where necessary; see inline comment for more details.
This router also ensures that analytics apps are not migrated into the default database unless the default
database is the only configured database.
This router also handles an edge case with the enterprise_data app and the ANALYTICS_DATABASE_V1 database;
see inline comment for more details.
"""
# pylint: disable=unused-argument
def allow_migrate(self, database, app_label, model_name=None, **hints):
if app_label in ANALYTICS_APP_LABELS:
databases = getattr(settings, 'DATABASES').keys()
# We don't want to migrate enterprise_data into the ANALYTICS_DATABASE_V1 database. The reason is two-fold.
# 1) The ANALYTICS_DATABASE_V1 tables in production do not include enterprise_data tables.
# 2) The migration 0018_enterprisedatafeaturerole_enterprisedataroleassignment in the enterprise_data app
# creates models. When running migrations on the v1 database, this migration successfully creates those
# models in the v1 database. The migration 0019_add_enterprise_data_feature_roles creates model instances
# for those models created in the previous migration. The write process triggers the db_for_write method
# of Django routers. The db_for_write method returns the value of
# getattr(settings, 'ANALYTICS_DATABASE', 'default'), which writes the data to the wrong database. There
# is no straight forward way to force the db_for_write method to return the value of
# getattr(settings, 'ANALYTICS_DATABASE_V1') instead. We forgo migrating that app entirely
# in this database.
if app_label == 'enterprise_data' and database == getattr(settings, 'ANALYTICS_DATABASE_V1'):
return False

# If the requested database for migrations is the default database, but other analytics
# databases are set up, they should be favored over the default database, so prevent these
# migrations from being run on the default database.
# We only allow migrations on analytics application models on the default database when analytics
# is not set up.
if database == 'default' and 'default' in databases and len(databases) > 1:
return False

return True

# Ideally, we'd only want to migrate other applications into the default database to better
# mimic production. However, the enterprise_data application has the migration
# 0018_enterprisedatafeaturerole_enterprisedataroleassignment, which creates a model with a ForeignKey field
# pointing to the auth_user model. MySQL does not allow cross-database relations, because they violate
# referential integrity, so we cannot only migrate auth_user into the default database.
# For that reason, we need to allow the migration of the auth_user table into the ANALYTICS_DATABASE
# as well.
# Please see https://docs.djangoproject.com/en/3.2/topics/db/multi-db/#limitations-of-multiple-databases.
return database in ['default', getattr(settings, 'ANALYTICS_DATABASE')]
ANALYTICS_APP_LABELS = ['v0', 'v1']
ENTERPRISE_APP_LABELS = ['enterprise_data']
DJANGO_AUTH_MODELS = ['enterprisedatafeaturerole', 'enterprisedataroleassignment']


class AnalyticsAPIRouter:
Expand Down Expand Up @@ -98,30 +44,69 @@ def db_for_write(self, model, **hints):

class AnalyticsModelsRouter:
"""
This router's role is to route database operations for all other database operations.
This router's role is to route database operations. It is meant, in part, to mirror
the way databases are structured in the production environment. For example, the ANALYTICS_DATABASE
and ANALYTICS_DATABASE_V1 databases in production do not contain models from "non-analytics" apps,
like the auth app. This is because said databases are populated by other means (e.g. EMR jobs, prefect flows).
This router also ensures that analytics apps are not migrated into the default database unless the default
database is the only configured database.
This router also handles an edge case with the enterprise_data app where migrations exist for models that have
been moved to a different app that is now in the default database.
Details:
The enterprise_data application has the migration 0018_enterprisedatafeaturerole_enterprisedataroleassignment,
which creates a model with a ForeignKey field pointing to the auth_user model. This does not work in a multiple
DB environment since MySQL does not allow cross-database relations. This model has since been moved to a
different application that will migrate into the default database where auth_user exists.
We do not define an allow_relation method. We fall back to Django's default behavior, which is to allow relations
between model instances that were loaded from the same database.
"""
def _get_database(self, app_label, model_name):
# select first available database if there are multiple options
return self._get_databases(app_label, model_name)[0]

def _get_database(self, app_label):
def _get_databases(self, app_label, model_name):
databases = []
if app_label in ANALYTICS_APP_LABELS:
return getattr(settings, 'ANALYTICS_DATABASE', 'default')
return None
databases = [
getattr(settings, 'ANALYTICS_DATABASE', None),
getattr(settings, 'ANALYTICS_DATABASE_V1', None)
]

# checking against None is an unfortunate bit of code here. There are migrations in
# the enterprise app that run python against auth related models which will fail on
# anything other than the default database. There's no way to identify these migrations
# specifically because there is no value for model.
# This is brittle, however tables in the analytic databases are created by other means today.
# (e.g. EMR jobs, prefect flows) We shouldn't expect migrations of this type in the
# future that do need to run against the analytics database.
elif (
app_label in ENTERPRISE_APP_LABELS and
model_name is not None and
model_name not in DJANGO_AUTH_MODELS
):
databases = [getattr(settings, 'ANALYTICS_DATABASE', None)]

databases = list(filter(None, databases))
if len(databases) > 0:
return databases

return ['default']

# pylint: disable=unused-argument
# pylint: disable=protected-access
def db_for_read(self, model, **hints):
return self._get_database(model._meta.app_label)
return self._get_database(model._meta.app_label, model._meta.model_name)

# pylint: disable=unused-argument
# pylint: disable=protected-access
def db_for_write(self, model, **hints):
return self._get_database(model._meta.app_label)
return self._get_database(model._meta.app_label, model._meta.model_name)

# pylint: disable=unused-argument
def allow_migrate(self, database, app_label, model_name=None, **hints):
dest_db = self._get_database(app_label)
if dest_db is not None:
return database == dest_db
return None
databases = self._get_databases(app_label, model_name)
return database in databases
2 changes: 1 addition & 1 deletion analyticsdataserver/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@
########## ANALYTICS DATA API CONFIGURATION

ANALYTICS_DATABASE = 'reports'
# Currently unused, V1 database will support migration to new backend data source
# V1 database supports migration to new backend data source
ANALYTICS_DATABASE_V1 = None
DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsAPIRouter', 'analyticsdataserver.router.AnalyticsModelsRouter']
ENTERPRISE_REPORTING_DB_ALIAS = 'enterprise'
Expand Down
2 changes: 1 addition & 1 deletion analyticsdataserver/settings/devstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
DATABASES['analytics'][override] = value
DATABASES['analytics_v1'][override] = value

DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsDevelopmentRouter', 'analyticsdataserver.router.AnalyticsAPIRouter', 'analyticsdataserver.router.AnalyticsModelsRouter']
DATABASE_ROUTERS = ['analyticsdataserver.router.AnalyticsAPIRouter', 'analyticsdataserver.router.AnalyticsModelsRouter']

########## END DATABASE CONFIGURATION

Expand Down
2 changes: 1 addition & 1 deletion analyticsdataserver/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
}

ANALYTICS_DATABASE = 'analytics'
ANALYTICS_DATABASE_V1 = None
ANALYTICS_DATABASE_V1 = 'analytics_v1'
ENTERPRISE_REPORTING_DB_ALIAS = 'default'

# Silence elasticsearch during tests
Expand Down
64 changes: 39 additions & 25 deletions analyticsdataserver/tests/test_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.test import TestCase, override_settings

from analytics_data_api.v0.models import CourseEnrollmentDaily
from analyticsdataserver.router import AnalyticsAPIRouter, AnalyticsDevelopmentRouter, AnalyticsModelsRouter
from analyticsdataserver.router import AnalyticsAPIRouter, AnalyticsModelsRouter


class AnalyticsAPIRouterTests(TestCase):
Expand All @@ -28,6 +28,7 @@ def test_db_for_read_not_analytics_app_thread_data_without_data(self):
self.assertEqual(self.router.db_for_read(get_user_model()), None)


@ddt.ddt
class AnalyticsModelsRouterTests(TestCase):
def setUp(self):
self.router = AnalyticsModelsRouter()
Expand All @@ -36,45 +37,32 @@ def setUp(self):
def test_db_for_read_analytics_app(self):
self.assertEqual(self.router.db_for_read(CourseEnrollmentDaily), self.analytics_database_slug)

@override_settings()
@override_settings(
ANALYTICS_DATABASE=None,
ANALYTICS_DATABASE_V1=None
)
def test_db_for_read_analytics_app_no_setting(self):
del settings.ANALYTICS_DATABASE
self.assertEqual(self.router.db_for_read(CourseEnrollmentDaily), 'default')

def test_db_for_read_not_analytics_app(self):
self.assertEqual(self.router.db_for_read(get_user_model()), None)
self.assertEqual(self.router.db_for_read(get_user_model()), 'default')

def test_db_for_write_analytics_app(self):
self.assertEqual(self.router.db_for_write(CourseEnrollmentDaily), self.analytics_database_slug)

@override_settings()
@override_settings(
ANALYTICS_DATABASE=None,
ANALYTICS_DATABASE_V1=None
)
def test_db_for_write_analytics_app_no_setting(self):
del settings.ANALYTICS_DATABASE
self.assertEqual(self.router.db_for_write(CourseEnrollmentDaily), 'default')

def test_db_for_write_not_analytics_app(self):
self.assertEqual(self.router.db_for_write(get_user_model()), None)

def test_allow_migrate_not_analytics_app(self):
self.assertEqual(self.router.allow_migrate(self.analytics_database_slug, get_user_model()._meta.app_label), None)


@ddt.ddt
class AnalyticsDevelopmentRouterTests(TestCase):
""""
Note that it's not currently possible to test the case where the default database is the only configured database.
Databases are configured during Django internal initialization, which means modifying the DATABASES Django setting
from a test will not work as expected.
The Django docs explicitly caution against doing this: "We do not recommend altering the DATABASES setting."
Please see here: https://docs.djangoproject.com/en/3.2/topics/testing/tools/#overriding-settings.
For this reason, coverage is incomplete.
"""
def setUp(self):
self.router = AnalyticsDevelopmentRouter()
self.assertEqual(self.router.db_for_write(get_user_model()), 'default')

@ddt.data(
('default', True),
(getattr(settings, 'ANALYTICS_DATABASE', 'analytics'), True),
(getattr(settings, 'ANALYTICS_DATABASE', 'analytics'), False),
(getattr(settings, 'ANALYTICS_DATABASE_V1', 'analytics_v1'), False),
)
@ddt.unpack
Expand All @@ -90,3 +78,29 @@ def test_allow_migrate_not_analytics_app(self, database, expected_allow_migrate)
@ddt.unpack
def test_allow_migrate_analytics_app_multiple_dbs(self, database, app_label, expected_allow_migrate):
self.assertEqual(self.router.allow_migrate(database, app_label), expected_allow_migrate)

@ddt.data(
(getattr(settings, 'ANALYTICS_DATABASE', 'analytics'), True),
(getattr(settings, 'ANALYTICS_DATABASE_V1', 'analytics_v1'), False),
)
@ddt.unpack
@override_settings(ANALYTICS_DATABASE_V1=None)
def test_does_not_migrate_database_with_no_env_setting(self, database, expected_allow_migrate):
self.assertEqual(self.router.allow_migrate(database, 'v0'), expected_allow_migrate)
self.assertEqual(self.router.allow_migrate(database, 'v1'), expected_allow_migrate)

@ddt.data(
(getattr(settings, 'default', 'default'), 'auth', True),
(getattr(settings, 'default', 'default'), 'v0', True),
(getattr(settings, 'default', 'default'), 'v1', True),
(getattr(settings, 'ANALYTICS_DATABASE', 'analytics'), 'v0', False),
(getattr(settings, 'ANALYTICS_DATABASE_V1', 'analytics_v1'), 'v0', False),
(getattr(settings, 'ANALYTICS_DATABASE_V1', 'analytics_v1'), 'v1', False),
)
@ddt.unpack
@override_settings(
ANALYTICS_DATABASE=None,
ANALYTICS_DATABASE_V1=None
)
def test_migrate_single_database_environment(self, database, app_label, expected_allow_migrate):
self.assertEqual(self.router.allow_migrate(database, app_label), expected_allow_migrate)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[pycodestyle]
ignore=E501,E741
ignore=W504,E501,E741
max_line_length=119
exclude=settings,analyticsdataserver/wsgi.py

Expand Down

0 comments on commit 432f329

Please sign in to comment.