Skip to content

Commit

Permalink
Merge pull request #2642 from onaio/optimize-project-date-modified-up…
Browse files Browse the repository at this point in the history
…date

Optimize updates to project `date_modified` field when submitting data
  • Loading branch information
KipSigei authored Jul 12, 2024
2 parents 5ac0861 + 25e8408 commit bf1f34e
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 8 deletions.
5 changes: 0 additions & 5 deletions onadata/apps/api/tests/viewsets/test_project_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,11 +1944,6 @@ def test_last_date_modified_changes_when_adding_new_form(self):

self.assertNotEqual(last_date, current_last_date)

self._make_submissions()

self.project.refresh_from_db()
self.assertNotEqual(current_last_date, self.project.date_modified)

def test_anon_project_form_endpoint(self):
self._project_create()
self._publish_xls_form_to_project()
Expand Down
9 changes: 8 additions & 1 deletion onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from onadata.celeryapp import app
from onadata.libs.data.query import get_numeric_fields
from onadata.libs.utils.cache_tools import (
PROJECT_DATE_MODIFIED_CACHE,
DATAVIEW_COUNT,
IS_ORG,
PROJ_NUM_DATASET_CACHE,
Expand Down Expand Up @@ -391,7 +392,13 @@ def update_project_date_modified(instance_id, _):
if current_task.request.id:
raise e
else:
instance.xform.project.save(update_fields=["date_modified"])
timeout = getattr(settings, "PROJECT_IDS_CACHE_TIMEOUT", 3600)
project_id = instance.xform.project_id

# Log project id and date motified in cache with timeout
project_ids = cache.get(PROJECT_DATE_MODIFIED_CACHE, {})
project_ids[project_id] = instance.date_modified
cache.set(PROJECT_DATE_MODIFIED_CACHE, project_ids, timeout=timeout)


def convert_to_serializable_date(date):
Expand Down
23 changes: 22 additions & 1 deletion onadata/apps/logger/tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# pylint: disable=import-error,ungrouped-imports
"""Module for logger tasks"""
import logging

from django.core.cache import cache
from django.db import DatabaseError

from onadata.apps.logger.models import EntityList
from onadata.apps.logger.models import EntityList, Project
from onadata.celeryapp import app
from onadata.libs.utils.cache_tools import PROJECT_DATE_MODIFIED_CACHE, safe_delete
from onadata.libs.utils.project_utils import set_project_perms_to_object


Expand All @@ -25,3 +29,20 @@ def set_entity_list_perms_async(entity_list_id):
return

set_project_perms_to_object(entity_list, entity_list.project)


@app.task(retry_backoff=3, autoretry_for=(DatabaseError, ConnectionError))
def apply_project_date_modified_async():
"""
Batch update projects date_modified field periodically
"""
project_ids = cache.get(PROJECT_DATE_MODIFIED_CACHE, {})
if not project_ids:
return

# Update project date_modified field in batches
for project_id, timestamp in project_ids.items():
Project.objects.filter(pk=project_id).update(date_modified=timestamp)

# Clear cache after updating
safe_delete(PROJECT_DATE_MODIFIED_CACHE)
47 changes: 46 additions & 1 deletion onadata/apps/logger/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@

from celery.exceptions import Retry

from django.core.cache import cache
from django.db import DatabaseError
from django.utils import timezone

from onadata.apps.logger.models import EntityList
from onadata.apps.logger.tasks import set_entity_list_perms_async
from onadata.apps.logger.tasks import (
set_entity_list_perms_async,
apply_project_date_modified_async,
)
from onadata.apps.main.tests.test_base import TestBase
from onadata.libs.utils.cache_tools import PROJECT_DATE_MODIFIED_CACHE
from onadata.libs.utils.user_auth import get_user_default_project


Expand Down Expand Up @@ -61,3 +67,42 @@ def test_invalid_pk(self, mock_logger, mock_set_perms):
set_entity_list_perms_async.delay(sys.maxsize)
mock_set_perms.assert_not_called()
mock_logger.assert_called_once()


class UpdateProjectDateModified(TestBase):
"""Tests for apply_project_date_modified_async"""

def setUp(self):
super().setUp()
self.project = get_user_default_project(self.user)

def test_update_project_date_modified(self):
"""Test project date_modified field is updated"""
project_ids = cache.get(PROJECT_DATE_MODIFIED_CACHE, {})
project_ids[self.project.pk] = timezone.now()
initial_date_modified = self.project.date_modified
cache.set(PROJECT_DATE_MODIFIED_CACHE, project_ids, timeout=300)

apply_project_date_modified_async.delay()
self.project.refresh_from_db()
current_date_modified = self.project.date_modified

# check that date_modified has changed
self.assertNotEqual(initial_date_modified, current_date_modified)

# check if current date modified is greater than initial
self.assertGreater(current_date_modified, initial_date_modified)

# assert that cache is cleared once task completes
self.assertIsNone(cache.get(PROJECT_DATE_MODIFIED_CACHE))

def test_update_project_date_modified_empty_cache(self):
"""Test project date modified empty cache"""
# Ensure the cache is empty, meaning no projects exist
cache.delete(PROJECT_DATE_MODIFIED_CACHE)

# Run cronjon
apply_project_date_modified_async.delay()

# Verify that no projects were updated
self.assertIsNone(cache.get(PROJECT_DATE_MODIFIED_CACHE)) # Cache should remain empty
3 changes: 3 additions & 0 deletions onadata/libs/utils/cache_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@
# Cache timeouts used in XForm model
XFORM_REGENERATE_INSTANCE_JSON_TASK_TTL = 24 * 60 * 60 # 24 hrs converted to seconds

# Project date modified cache
PROJECT_DATE_MODIFIED_CACHE = "project_date_modified"


def safe_delete(key):
"""Safely deletes a given key from the cache."""
Expand Down

0 comments on commit bf1f34e

Please sign in to comment.