Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CSV export optimisation #2755

Merged
merged 53 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
ef39a64
add utility to update xform export repeat columns from instance
kelvin-muchiri Dec 11, 2024
9d4414b
refactor code
kelvin-muchiri Dec 11, 2024
f8efbb6
reset repeat_instances[repeat] if incoming repeat max is greater
kelvin-muchiri Dec 13, 2024
a85e61a
update code comments
kelvin-muchiri Dec 13, 2024
95f3fda
refactor code
kelvin-muchiri Dec 13, 2024
aaf8e17
refactor code
kelvin-muchiri Dec 13, 2024
ec721eb
refactor comment
kelvin-muchiri Dec 13, 2024
e73f1b4
refactor code
kelvin-muchiri Dec 13, 2024
aa85453
update code comment
kelvin-muchiri Dec 13, 2024
02746c2
register repeats on Instance create or update
kelvin-muchiri Dec 16, 2024
5b3f23f
add tests
kelvin-muchiri Dec 16, 2024
17da60b
add test
kelvin-muchiri Dec 16, 2024
cd10e94
disregard form version when registering repeats
kelvin-muchiri Dec 16, 2024
3360f55
use registered repeats when generating CSV export
kelvin-muchiri Dec 17, 2024
12941b2
fix registered repeats invalid export
kelvin-muchiri Dec 20, 2024
ad7b74c
enhance use of registered repeats to generate CSV
kelvin-muchiri Dec 20, 2024
e0af364
handle missing repeats in registered repeats
kelvin-muchiri Dec 20, 2024
ff4f090
refactor code
kelvin-muchiri Dec 20, 2024
18326e6
refactor code
kelvin-muchiri Dec 20, 2024
cfbd8a7
consider previous submissions when creating new register
kelvin-muchiri Jan 6, 2025
a852466
refactor code
kelvin-muchiri Jan 7, 2025
a8d73d9
create repeat register if Instance has no repeats
kelvin-muchiri Jan 7, 2025
cfb6ffa
enhance tests
kelvin-muchiri Jan 7, 2025
e6c16b1
fix cyclic dependency error
kelvin-muchiri Jan 8, 2025
f39f7c5
fix lint warning
kelvin-muchiri Jan 8, 2025
ad89018
suppress lint warning
kelvin-muchiri Jan 8, 2025
8fad42e
do not fall back to data if repeat unregistered
kelvin-muchiri Jan 8, 2025
81724cc
rename export_repeat_columns
kelvin-muchiri Jan 8, 2025
2eccd0d
fix cyclic dependency
kelvin-muchiri Jan 8, 2025
bdb0e34
refactor code
kelvin-muchiri Jan 8, 2025
0f5b640
refactor code
kelvin-muchiri Jan 8, 2025
2498c6d
fix flaky test
kelvin-muchiri Jan 8, 2025
14a3be7
fix failing tests
kelvin-muchiri Jan 9, 2025
dc940ce
refactor test
kelvin-muchiri Jan 9, 2025
ea17483
refactor test
kelvin-muchiri Jan 9, 2025
96d3ac5
remove lint warning ignore
kelvin-muchiri Jan 9, 2025
f03c8d0
rename symbol
kelvin-muchiri Jan 9, 2025
154fa6a
refactor code
kelvin-muchiri Jan 10, 2025
aa037f6
refactor code
kelvin-muchiri Jan 10, 2025
a445f51
fix flaky test
kelvin-muchiri Jan 10, 2025
0e7d71f
add doc strings
kelvin-muchiri Jan 10, 2025
32b7388
add doc strings
kelvin-muchiri Jan 10, 2025
17e4cbb
fix failing tests
kelvin-muchiri Jan 10, 2025
51e31a5
do not register repeat if instance not approved
kelvin-muchiri Jan 14, 2025
7ce4775
refactor code
kelvin-muchiri Jan 14, 2025
32cefb4
rename symbol
kelvin-muchiri Jan 14, 2025
d9131da
rename symbol
kelvin-muchiri Jan 14, 2025
af20142
feat: add export_data management command
ukanga Aug 19, 2024
b7d546c
add codetiming dependency
kelvin-muchiri Jan 14, 2025
2851718
address lint warning
kelvin-muchiri Jan 14, 2025
bb2fbf6
fix lint warning line too long
kelvin-muchiri Jan 14, 2025
beb8165
add missing class docstring
kelvin-muchiri Jan 14, 2025
e53c97a
fix incorrect command output
kelvin-muchiri Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,5 @@ tags
.eggs
sonar-project.properties
.scannerwork

*.er
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
"""
Test /orgs API endpoint implementation.
"""

import json
from builtins import str as text
from unittest.mock import patch

from django.contrib.auth.models import User, timezone, AnonymousUser
from django.contrib.auth.models import AnonymousUser, User, timezone
from django.core.cache import cache
from django.test.utils import override_settings

Expand All @@ -15,8 +16,8 @@

from onadata.apps.api.models.organization_profile import (
OrganizationProfile,
get_organization_members_team,
Team,
get_organization_members_team,
)
from onadata.apps.api.tests.viewsets.test_abstract_viewset import TestAbstractViewSet
from onadata.apps.api.tools import (
Expand Down Expand Up @@ -840,8 +841,13 @@ def test_add_members_to_owner_role(self):
request = self.factory.get("/", **self.extra)
response = view(request, user="denoinc")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data["users"][1]["user"], "aboy")
self.assertEqual(response.data["users"][1]["role"], "owner")
aboy_data = {"user": "aboy", "role": "owner"}
self.assertTrue(
any( # Order doesn't matter. aboy can be first or last
all(item.get(key) == value for key, value in aboy_data.items())
for item in response.data["users"]
)
)

owner_team = get_or_create_organization_owners_team(self.organization)

Expand Down
42 changes: 42 additions & 0 deletions onadata/apps/logger/management/commands/export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand

from onadata.apps.logger.models.xform import XForm
from onadata.apps.viewer.models.export import Export
from onadata.libs.utils.export_tools import generate_export

from codetiming import Timer


class Command(BaseCommand):
help = "Exports data from a form in CSV format"

def add_arguments(self, parser):
parser.add_argument("form_id", type=int)

def handle(self, *args: str, **options: str):
self.stdout.write(self.style.SUCCESS("Exporting ..."))
form_id = options["form_id"]
try:
xform = XForm.objects.get(pk=form_id)
except XForm.DoesNotExist:
self.stderr.write(
self.style.ERROR(f"There is no form with id {form_id} present.")
)
else:
with Timer() as t:
export = generate_export(Export.CSV_EXPORT, xform)
elapsed_time = t.last
self.stdout.write(
self.style.NOTICE(
f"The file {export.full_filepath} was exported in {elapsed_time:.2f} seconds."
)
)
plural_or_singular = (
"submission" if xform.num_of_submissions == 1 else "submissions"
)
self.stdout.write(
self.style.SUCCESS(
f'{export.pk}: Exporting {xform.num_of_submissions} {plural_or_singular} of the form "{xform.title}"'
)
)
21 changes: 19 additions & 2 deletions onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"""
Instance model class
"""
# pylint: disable=too-many-lines

import importlib
import math
import sys
from datetime import datetime
Expand All @@ -18,10 +21,10 @@
from django.urls import reverse
from django.utils import timezone
from django.utils.translation import gettext as _
from multidb.pinning import use_master

from celery import current_task
from deprecated import deprecated
from multidb.pinning import use_master
from taggit.managers import TaggableManager

from onadata.apps.logger.models.submission_review import SubmissionReview
Expand All @@ -35,11 +38,11 @@
from onadata.celeryapp import app
from onadata.libs.data.query import get_numeric_fields
from onadata.libs.utils.cache_tools import (
PROJECT_DATE_MODIFIED_CACHE,
DATAVIEW_COUNT,
IS_ORG,
PROJ_NUM_DATASET_CACHE,
PROJ_SUB_DATE_CACHE,
PROJECT_DATE_MODIFIED_CACHE,
XFORM_COUNT,
XFORM_DATA_VERSIONS,
XFORM_SUBMISSION_COUNT_FOR_DAY,
Expand Down Expand Up @@ -875,6 +878,14 @@ def permanently_delete_attachments(sender, instance=None, created=False, **kwarg
)


@use_master
def register_export_repeats(sender, instance, created=False, **kwargs):
# Avoid cyclic dependency errors
logger_tasks = importlib.import_module("onadata.apps.logger.tasks")

logger_tasks.register_instance_export_repeats_async.delay(instance.pk)


post_save.connect(
post_save_submission, sender=Instance, dispatch_uid="post_save_submission"
)
Expand All @@ -891,6 +902,12 @@ def permanently_delete_attachments(sender, instance=None, created=False, **kwarg
dispatch_uid="permanently_delete_attachments",
)

post_save.connect(
register_export_repeats,
sender=Instance,
dispatch_uid="register_export_repeats",
)


class InstanceHistory(models.Model, InstanceBaseClass):
"""Stores deleted submission XML to maintain a history of edits."""
Expand Down
63 changes: 45 additions & 18 deletions onadata/apps/logger/tasks.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
# pylint: disable=import-error,ungrouped-imports
"""
Asynchronous tasks for the logger app
"""

import logging

from django.core.cache import cache
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.db import DatabaseError

from multidb.pinning import use_master

from onadata.apps.logger.models import Entity, EntityList, Project
from onadata.apps.logger.models import Entity, EntityList, Instance, Project, XForm
from onadata.celeryapp import app
from onadata.libs.utils.cache_tools import (
PROJECT_DATE_MODIFIED_CACHE,
safe_delete,
)
from onadata.libs.utils.project_utils import set_project_perms_to_object
from onadata.libs.utils.cache_tools import PROJECT_DATE_MODIFIED_CACHE, safe_delete
from onadata.libs.utils.logger_tools import (
commit_cached_elist_num_entities,
dec_elist_num_entities,
inc_elist_num_entities,
register_instance_export_repeats,
register_xform_export_repeats,
soft_delete_entities_bulk,
)

from onadata.libs.utils.project_utils import set_project_perms_to_object

logger = logging.getLogger(__name__)
User = get_user_model()
Expand All @@ -32,8 +31,7 @@
def set_entity_list_perms_async(entity_list_id):
"""Set permissions for EntityList asynchronously

Args:
pk (int): Primary key for EntityList
:param entity_list_id: Primary key for EntityList
"""
with use_master:
try:
Expand Down Expand Up @@ -67,9 +65,8 @@ def apply_project_date_modified_async():
def delete_entities_bulk_async(entity_pks: list[int], username: str | None = None):
"""Delete Entities asynchronously

Args:
entity_pks (list(int)): Primary keys of Entities to be deleted
username (str): Username of the user initiating the delete
:param entity_pks: Primary keys of Entities to be deleted
:param username: Username of the user initiating the delete operation
"""
with use_master:
entity_qs = Entity.objects.filter(pk__in=entity_pks, deleted_at__isnull=True)
Expand Down Expand Up @@ -104,8 +101,7 @@ def commit_cached_elist_num_entities_async():
def inc_elist_num_entities_async(elist_pk: int):
"""Increment EntityList `num_entities` counter asynchronously

Args:
elist_pk (int): Primary key for EntityList
:param elist_pk: Primary key for EntityList
"""
inc_elist_num_entities(elist_pk)

Expand All @@ -114,7 +110,38 @@ def inc_elist_num_entities_async(elist_pk: int):
def dec_elist_num_entities_async(elist_pk: int) -> None:
"""Decrement EntityList `num_entities` counter asynchronously

Args:
elist_pk (int): Primary key for EntityList
:param elist_pk: Primary key for EntityList
"""
dec_elist_num_entities(elist_pk)


@app.task(retry_backoff=3, autoretry_for=(DatabaseError, ConnectionError))
def register_instance_export_repeats_async(instance_pk: int) -> None:
"""Register export repeats asynchronously

:param instance_pk: Primary key for Instance
"""
try:
instance = Instance.objects.get(pk=instance_pk)

except Instance.DoesNotExist as exc:
logger.exception(exc)

else:
register_instance_export_repeats(instance)


@app.task(retry_backoff=3, autoretry_for=(DatabaseError, ConnectionError))
def register_xform_export_repeats_async(xform_id: int) -> None:
"""Register export repeats for an XForm asynchronously

:param xform_id: Primary key for XForm
"""
try:
xform = XForm.objects.get(pk=xform_id)

except XForm.DoesNotExist as exc:
logger.exception(exc)

else:
register_xform_export_repeats(xform)
43 changes: 43 additions & 0 deletions onadata/apps/logger/tests/management/commands/test_export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-

from io import StringIO

from django.core.management import call_command
from django.core.management.base import CommandError
from onadata.apps.main.tests.test_base import TestBase
from onadata.apps.viewer.models.export import Export


class ExportDataTest(TestBase):
def test_command_output(self):
output = StringIO()
error_output = StringIO()
with self.assertRaisesMessage(
CommandError,
expected_message="Error: the following arguments are required: form_id",
):
_ = call_command("export_data", stdout=output)

_ = call_command("export_data", 12300, stdout=output, stderr=error_output)
self.assertIn("Exporting ...", output.getvalue())
self.assertIn(
"There is no form with id 12300 present.", error_output.getvalue()
)
self._publish_transportation_form_and_submit_instance()
export_count = Export.objects.filter().count()
_ = call_command(
"export_data", self.xform.pk, stdout=output, stderr=error_output
)
self.assertIn(
f'Exporting 1 submission of the form "{self.xform.title}"',
output.getvalue(),
)
# confirm a new export record was created.
self.assertEqual(Export.objects.filter().count(), export_count + 1)
export = Export.objects.filter(
xform_id=self.xform.pk, export_type=Export.CSV_EXPORT
).latest("created_on")
self.assertIn(
f"The file {export.full_filepath} was exported in",
output.getvalue(),
)
Loading
Loading