diff --git a/h/celery.py b/h/celery.py index 44a62904a76..78e9342849d 100644 --- a/h/celery.py +++ b/h/celery.py @@ -74,7 +74,6 @@ # task's @app.task() arguments. task_time_limit=240, imports=( - "h.tasks.annotations", "h.tasks.cleanup", "h.tasks.indexer", "h.tasks.mailer", diff --git a/h/tasks/annotations.py b/h/tasks/annotations.py deleted file mode 100644 index 853d893c3ee..00000000000 --- a/h/tasks/annotations.py +++ /dev/null @@ -1,39 +0,0 @@ -from sqlalchemy import func, update - -from h.celery import celery, get_task_logger -from h.models import Annotation, User - -log = get_task_logger(__name__) - - -@celery.task -def fill_pk_and_user_id(batch_size=1000): - """ - Task to fill the new annotation.pk and annotation.user_id in batches. - - Once most of the existing rows are done we'll make the code changes - to keep these up to date, make the column not nullable and remove this task. - """ - # pylint: disable=no-member - db = celery.request.db - - annotations = ( - db.query(Annotation.id.label("annotation_id"), User.id.label("user_id")) - .join( - User, - User.username - == func.split_part(func.split_part(Annotation.userid, "@", 1), ":", 2), - ) - .where(Annotation.pk.is_(None)) - .order_by(Annotation.created.asc()) - .with_for_update(skip_locked=True) - .limit(batch_size) - ).cte("annotations") - - db.execute( - update(Annotation) - .values(pk=Annotation.pk_sequence.next_value(), user_id=annotations.c.user_id) - .where(Annotation.id == annotations.c.annotation_id) - # just update the rows in the DB, we don't need to refresh the objects in the session - .execution_options(synchronize_session=False) - ) diff --git a/tests/h/tasks/annotations_test.py b/tests/h/tasks/annotations_test.py deleted file mode 100644 index c0970092721..00000000000 --- a/tests/h/tasks/annotations_test.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest - -from h.models import Annotation -from h.tasks.annotations import fill_pk_and_user_id - - -class TestFillPKAndUserId: - AUTHORITY_1 = "AUTHORITY_1" - AUTHORITY_2 = "AUTHORITY_2" - - USERNAME_1 = "USERNAME_1" - USERNAME_2 = "USERNAME_2" - - def test_it(self, factories, db_session): - author_1 = factories.User(authority=self.AUTHORITY_1, username=self.USERNAME_1) - author_2 = factories.User(authority=self.AUTHORITY_2, username=self.USERNAME_2) - - annos_1 = factories.Annotation.create_batch( - 5, - userid=author_1.userid, - ) - annos_2 = factories.Annotation.create_batch( - 5, - userid=author_2.userid, - ) - factories.Annotation.create_batch( - 5, - userid=author_2.userid, - ) - - fill_pk_and_user_id(batch_size=10) - - # Only one batch of 10 was processed, those have PKs now - assert ( - db_session.query(Annotation).filter(Annotation.pk.is_not(None)).count() - == 10 - ) - assert db_session.query(Annotation).filter(Annotation.pk.is_(None)).count() == 5 - - # Refresh data for the annotations - _ = [db_session.refresh(anno) for anno in annos_1 + annos_2] - # user_id was updated with the right value - assert {anno.user_id for anno in annos_1} == {author_1.id} - assert {anno.user_id for anno in annos_2} == {author_2.id} - - @pytest.fixture(autouse=True) - def celery(self, patch, db_session): - cel = patch("h.tasks.annotations.celery", autospec=False) - cel.request.db = db_session - return cel