Skip to content

Commit

Permalink
Add cron job and service to redact comments (#1492)
Browse files Browse the repository at this point in the history
* Add comment redact job and service

* update jsonb syntax to be for postgres

* remove unused import

* fix import blocks

* revert to 14 days

* update Filter

* comment status IS rejected

* update filters and dynamically get component_types

* remove return type

* Add logging

* update comment
  • Loading branch information
jadmsaadaot authored Apr 12, 2023
1 parent 96e2587 commit 31a7930
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 13 deletions.
9 changes: 5 additions & 4 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"makefile.extensionOutputFolder": "./.vscode",
"python.pythonPath": "met-api/venv/bin/python",
"python.envFile": "${workspaceFolder}/met-api/.env"
}
"makefile.extensionOutputFolder": "./.vscode",
"python.pythonPath": "met-api/venv/bin/python",
"python.envFile": "${workspaceFolder}/met-api/.env",
"python.analysis.extraPaths": ["./met-api/src", "./met-cron/src"]
}
4 changes: 4 additions & 0 deletions met-cron/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ class _Config(): # pylint: disable=too-few-public-methods
ENGAGEMENT_CLOSEOUT_EMAIL_SUBJECT = \
os.getenv('ENGAGEMENT_CLOSEOUT_EMAIL_SUBJECT', '{engagement_name} - What we heard')
NOTIFICATIONS_EMAIL_ENDPOINT = os.getenv('NOTIFICATIONS_EMAIL_ENDPOINT')

# config for comment_redact_service
N_DAYS = os.getenv('N_DAYS', 14)
REDACTION_TEXT = os.getenv('REDACTION_TEXT', '[Comment Redacted]')


class MigrationConfig(): # pylint: disable=too-few-public-methods
Expand Down
2 changes: 2 additions & 0 deletions met-cron/cron/crontab
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
*/5 * * * * default cd /met-cron && ./run_met_publish.sh
# PURGE Runs At every 15 days.
0 0 * * 0 default cd /met-cron && ./run_met_purge.sh
# REDACT COMMENTS Runs At every day.
0 0 */1 * * default cd /met-cron && ./run_met_comment_redact.sh
# An empty line is required at the end of this file for a valid cron file
4 changes: 4 additions & 0 deletions met-cron/invoke_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def run(job_name):
from tasks.met_closeout import MetEngagementCloseout
from tasks.met_publish import MetEngagementPublish
from tasks.met_purge import MetPurge
from tasks.met_comment_redact import MetCommentRedact
application = create_app()

application.app_context().push()
Expand All @@ -79,6 +80,9 @@ def run(job_name):
elif job_name == 'PURGE':
MetPurge.do_purge()
application.logger.info('<<<< Completed MET Purge >>>>')
elif job_name == 'COMMENT_REDACT':
MetCommentRedact.do_redact()
application.logger.info('<<<< Completed MET COMMENT_REDACT >>>>')
else:
application.logger.debug('No valid args passed.Exiting job without running any ***************')

Expand Down
17 changes: 9 additions & 8 deletions met-cron/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ MarkupSafe==2.1.2
SQLAlchemy-Utils==0.40.0
SQLAlchemy==1.3.24
Werkzeug==2.2.3
alembic==1.9.4
alembic==1.10.3
aniso8601==9.0.1
attrs==22.2.0
bcrypt==4.0.1
blinker==1.5
blinker==1.6.1
cachelib==0.9.0
certifi==2022.12.7
charset-normalizer==3.0.1
charset-normalizer==3.1.0
click==8.1.3
ecdsa==0.18.0
flask-jwt-oidc==0.3.0
flask-marshmallow==0.11.0
flask-restx==1.0.6
flask-restx==1.1.0
gunicorn==20.1.0
idna==3.4
importlib-metadata==6.0.0
importlib-metadata==6.3.0
importlib-resources==5.12.0
itsdangerous==2.0.1
jaeger-client==4.8.0
Expand All @@ -37,19 +37,20 @@ marshmallow==3.19.0
opentracing==2.4.0
packaging==23.0
pkgutil_resolve_name==1.3.10
psycopg2-binary==2.9.5
psycopg2-binary==2.9.6
pyasn1==0.4.8
pyrsistent==0.19.3
python-dotenv==1.0.0
python-jose==3.3.0
pytz==2022.7.1
pytz==2023.3
requests==2.28.2
rsa==4.9
secure==0.3.0
six==1.16.0
threadloop==1.0.2
thrift==0.16.0
tornado==6.2
urllib3==1.26.14
typing_extensions==4.5.0
urllib3==1.26.15
zipp==3.15.0
-e git+https://github.com/bcgov/met-public.git#egg=met-api&subdirectory=met-api
3 changes: 3 additions & 0 deletions met-cron/run_met_comment_redact.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#! /bin/sh
echo 'run invoke_jobs.py COMMENT_REDACT'
python3 invoke_jobs.py COMMENT_REDACT
15 changes: 15 additions & 0 deletions met-cron/src/met_cron/models/db.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Initilizations for db, migration and marshmallow."""

from contextlib import contextmanager
from flask import current_app
from flask_marshmallow import Marshmallow
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
Expand All @@ -14,3 +16,16 @@

# Marshmallow for database model schema
ma = Marshmallow()

@contextmanager
def session_scope():
"""Provide a transactional scope around a series of operations."""
# Using the default session for the scope
session = db.session
try:
yield session
session.commit()
except Exception as e: # noqa: B901, E722
current_app.logger.error(f'Error in session_scope: {e}')
session.rollback()
raise
87 changes: 87 additions & 0 deletions met-cron/src/met_cron/services/comment_redact_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from datetime import datetime, timedelta
from typing import List

from flask import current_app

from met_api.constants.engagement_status import Status as MetEngagementStatus
from met_api.constants.comment_status import Status as CommentStatus
from met_api.constants.user import SYSTEM_USER
from met_api.models.comment import Comment as MetCommentModel
from met_api.models.engagement import Engagement as MetEngagementModel
from met_api.models.submission import Submission as MetSubmissionModel
from met_cron.models.db import db, session_scope
from sqlalchemy import and_


class CommentRedactService: # pylint: disable=too-few-public-methods
"""Redaction Service on Comments."""

@staticmethod
def do_redact_comments():
"""Perform the redaction on rejected comments.
1. Get submissions for engagements closed for N_DAYS
2. Redact comments in comments table by submission_ids
3. Redact comments in submission_json by submission_ids
"""
submissions = CommentRedactService._find_submissions_for_n_days_closed_engagements(days=current_app.config.get('N_DAYS', 14))
if not submissions:
current_app.logger.info(f'>>>>>No Submissions for Engagements closed for {current_app.config.get("N_DAYS", 14)} days found.')
return
current_app.logger.info('>>>>>Total Submissions to redact found: %s.', len(submissions))
submissions_ids = [submission.id for submission in submissions]
with session_scope() as session:
CommentRedactService._redact_comments_by_submission_ids(submissions_ids, session)
CommentRedactService._redact_submission_json_comments(submissions_ids, session)


@staticmethod
def _find_submissions_for_n_days_closed_engagements(days) -> List[MetSubmissionModel]:
current_app.logger.info(f'>>>>>Finding submissions for Engagements closed for {days} days.')
n_days_ago = datetime.utcnow().date() - timedelta(days=days)
return db.session.query(MetSubmissionModel)\
.join(MetEngagementModel, MetEngagementModel.id == MetSubmissionModel.engagement_id)\
.filter(and_(
MetEngagementModel.end_date <= n_days_ago,
MetEngagementModel.status_id == MetEngagementStatus.Closed.value,
MetSubmissionModel.comment_status_id == CommentStatus.Rejected.value,
MetSubmissionModel.has_threat.is_(False)))\
.all()


@staticmethod
def _redact_comments_by_submission_ids(submission_ids: List[int], session):
current_app.logger.info(f'>>>>>Redacting comments for submissions: {submission_ids}')
session.query(MetCommentModel)\
.filter(MetCommentModel.submission_id.in_(submission_ids))\
.update(
{
MetCommentModel.text: current_app.config.get('REDACTION_TEXT', '[Comment Redacted]'),
MetCommentModel.updated_by: SYSTEM_USER,
MetCommentModel.updated_date: datetime.utcnow(),
},
synchronize_session=False)


@staticmethod
def _redact_submission_json_comments(submission_ids: List[int], session):
current_app.logger.info(f'>>>>>Fetching keys to redact aka component_types from comments for submissions: {submission_ids}')
comments = session.query(MetCommentModel)\
.filter(MetCommentModel.submission_id.in_(submission_ids))\
.all()
# e.g. ['simpletextarea', 'simpletextarea1', 'simpletextfield']
keys_to_redact = [comment.component_id for comment in comments]

current_app.logger.info(f'>>>>>Redacting comments in submission_json for submissions: {submission_ids}')
for submission in session.query(MetSubmissionModel).filter(MetSubmissionModel.id.in_(submission_ids)):
new_submission_json = {}
for key, value in submission.submission_json.items():
if key in keys_to_redact:
new_submission_json[key] = current_app.config.get('REDACTION_TEXT', '[Comment Redacted]')
else:
new_submission_json[key] = value
submission.submission_json = new_submission_json
submission.updated_by = SYSTEM_USER
submission.updated_date = datetime.utcnow()

28 changes: 28 additions & 0 deletions met-cron/tasks/met_comment_redact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright © 2019 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""MET Comments Redact Event Logs."""
from datetime import datetime

from met_cron.services.comment_redact_service import CommentRedactService


class MetCommentRedact: # pylint:disable=too-few-public-methods
"""Task to redact comments of closed engagements."""

@classmethod
def do_redact(cls):
"""Comment redaction event logs."""
print('Starting comments redaction event logs at------------------------', datetime.now())

CommentRedactService.do_redact_comments()
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ const SubmissionListing = () => {
allowSort: true,
getValue: (row) => row.reviewed_by,
},

{
key: 'review_date',
numeric: true,
Expand Down

0 comments on commit 31a7930

Please sign in to comment.