Skip to content

Commit

Permalink
feat(api): weekly stamp data dump (#324)
Browse files Browse the repository at this point in the history
* feat(api): create scheduled task definition

* chore(api): model updates for data dump

* feat(api): data dump export script

* fix(infra): fix staging deployment of scheduled task

* fix(infra): fix review deployment of scheduled task

* chore(api): update pipfile lock after resolving pipfile merge conflict

* chore(infra): add aws key values

* chore(api): further environment configuration for weekly data dumps

* chore(infra): comment out scheduled task until script is merged

* refactor(api): paginate stamp query and save count and last_export

---------

Co-authored-by: Gerald Iakobinyi-Pich <[email protected]>
  • Loading branch information
2 people authored and lucianHymer committed Jul 25, 2023
1 parent e23adaf commit bc82b3a
Show file tree
Hide file tree
Showing 12 changed files with 619 additions and 396 deletions.
759 changes: 363 additions & 396 deletions api/Pipfile.lock

Large diffs are not rendered by default.

Empty file.
59 changes: 59 additions & 0 deletions api/ceramic_cache/management/commands/dump_stamp_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import datetime
import json
import os

import boto3
from ceramic_cache.models import CeramicCache, StampExports
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.paginator import Paginator
from django.utils import timezone

s3 = boto3.client(
"s3",
aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
)


class Command(BaseCommand):
help = "Weekly data dump of new Stamp data since the last dump."

def handle(self, *args, **options):
print("Starting dump_stamp_data.py")

latest_export = StampExports.objects.order_by("-last_export_ts").first()

if not latest_export:
print("No previous exports found. Exporting all data.")
latest_export = StampExports.objects.create(
last_export_ts=timezone.now() - datetime.timedelta(days=7)
)

paginator = Paginator(
CeramicCache.objects.filter(
created_at__gt=latest_export.last_export_ts
).values_list("stamp", flat=True),
1000,
)

# Generate the dump file name
file_name = f'stamps_{latest_export.last_export_ts.strftime("%Y%m%d_%H%M%S")}_{timezone.now().strftime("%Y%m%d_%H%M%S")}.jsonl'

# Write serialized data to the file
with open(file_name, "w") as f:
for page in paginator.page_range:
for stamp in paginator.page(page).object_list:
f.write(json.dumps({"stamp": stamp}) + "\n")

# Upload to S3 bucket
s3.upload_file(file_name, settings.S3_WEEKLY_BACKUP_BUCKET_NAME, file_name)

# Delete local file after upload
os.remove(file_name)

StampExports.objects.create(
last_export_ts=timezone.now(), stamp_total=paginator.count
)

print(f"Data dump completed and uploaded to S3 as {file_name}")
29 changes: 29 additions & 0 deletions api/ceramic_cache/migrations/0009_stampexports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.3 on 2023-07-21 22:00

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0008_remove_ceramiccache_deleted_at"),
]

operations = [
migrations.CreateModel(
name="StampExports",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("last_export_ts", models.DateTimeField(auto_now_add=True)),
("stamp_total", models.IntegerField(default=0)),
],
),
]
18 changes: 18 additions & 0 deletions api/ceramic_cache/migrations/0010_ceramiccache_created_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-07-21 22:30

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0009_stampexports"),
]

operations = [
migrations.AddField(
model_name="ceramiccache",
name="created_at",
field=models.DateTimeField(blank=True, null=True),
),
]
18 changes: 18 additions & 0 deletions api/ceramic_cache/migrations/0011_alter_ceramiccache_created_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-07-21 22:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0010_ceramiccache_created_at"),
]

operations = [
migrations.AlterField(
model_name="ceramiccache",
name="created_at",
field=models.DateTimeField(auto_now_add=True, null=True),
),
]
6 changes: 6 additions & 0 deletions api/ceramic_cache/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ class CeramicCache(models.Model):
null=False, blank=False, default="", max_length=256, db_index=True
)
stamp = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True, blank=True, null=True)

class Meta:
unique_together = ["address", "provider"]


class StampExports(models.Model):
last_export_ts = models.DateTimeField(auto_now_add=True)
stamp_total = models.IntegerField(default=0)
8 changes: 8 additions & 0 deletions api/scorer/settings/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from .env import env

# These configuration settings will be used when accessing data
# on S3 URIs. This should typically be the case when importing or exporting
# data. Seet the `import_allo_votes` command for an example.
S3_DATA_AWS_SECRET_KEY_ID = env("S3_DATA_AWS_SECRET_KEY_ID", default=None)
S3_DATA_AWS_SECRET_ACCESS_KEY = env("S3_DATA_AWS_SECRET_ACCESS_KEY", default=None)
S3_WEEKLY_BACKUP_BUCKET_NAME = env("S3_WEEKLY_BACKUP_BUCKET_NAME", default=None)
12 changes: 12 additions & 0 deletions infra/prod/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,18 @@ const secrets = [
name: "CGRANTS_API_TOKEN",
valueFrom: `${SCORER_SERVER_SSM_ARN}:CGRANTS_API_TOKEN::`,
},
{
name: "S3_DATA_AWS_SECRET_KEY_ID",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_KEY_ID::`,
},
{
name: "S3_DATA_AWS_SECRET_ACCESS_KEY",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_ACCESS_KEY::`,
},
{
name: "S3_WEEKLY_BACKUP_BUCKET_NAME",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_WEEKLY_BACKUP_BUCKET_NAME::`,
},
];
const environment = [
{
Expand Down
54 changes: 54 additions & 0 deletions infra/review/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,18 @@ const secrets = [
name: "CGRANTS_API_TOKEN",
valueFrom: `${SCORER_SERVER_SSM_ARN}:CGRANTS_API_TOKEN::`,
},
{
name: "S3_DATA_AWS_SECRET_KEY_ID",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_KEY_ID::`,
},
{
name: "S3_DATA_AWS_SECRET_ACCESS_KEY",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_ACCESS_KEY::`,
},
{
name: "S3_WEEKLY_BACKUP_BUCKET_NAME",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_WEEKLY_BACKUP_BUCKET_NAME::`,
},
];
const environment = [
{
Expand Down Expand Up @@ -693,6 +705,48 @@ const secgrp = new aws.ec2.SecurityGroup(`scorer-run-migrations-task`, {

export const securityGroupForTaskDefinition = secgrp.id;

//////////////////////////////////////////////////////////////
// ECS Scheduled Task
//////////////////////////////////////////////////////////////
// const weeklyDataDump = new awsx.ecs.FargateTaskDefinition("weekly-data-dump", {
// containers: {
// web: {
// image: dockerGtcPassportScorerImage,
// cpu: 256,
// memory: 2048,
// secrets,
// command: ["python", "manage.py", "dump_stamp_data"],
// },
// },
// });

// const scheduledEventRule = new aws.cloudwatch.EventRule("scheduledEventRule", {
// // scheduleExpression: "cron(0 12 * * ? *)", // Run the task every day at 12 UTC
// scheduleExpression: "cron(0/5 * ? * * *)", // Run the task every 5 min
// // scheduleExpression: "cron(0 12 ? * FRI *)", // Run the task every friday at 12 UTC
// });

// // const serviceLinkRoler = new aws.iam.ServiceLinkedRole("ecs_service_link_roler", {
// // customSuffix: "ecs_scheduled_event",
// // awsServiceName: "ecs.amazonaws.com",
// // })

// new aws.cloudwatch.EventTarget("scheduledEventTarget", {
// rule: scheduledEventRule.name,
// arn: cluster.cluster.arn,
// roleArn: dpoppEcsRole.arn,
// ecsTarget: {
// taskCount: 1,
// taskDefinitionArn: weeklyDataDump.taskDefinition.arn,
// launchType: "FARGATE",
// networkConfiguration: {
// assignPublicIp: true,
// subnets: vpcPublicSubnetIds,
// securityGroups: [secgrp.id],
// },
// },
// });

//////////////////////////////////////////////////////////////
// Set up EC2 instance
// - it is intended to be used for troubleshooting
Expand Down
45 changes: 45 additions & 0 deletions infra/staging/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,18 @@ const secrets = [
name: "CGRANTS_API_TOKEN",
valueFrom: `${SCORER_SERVER_SSM_ARN}:CGRANTS_API_TOKEN::`,
},
{
name: "S3_DATA_AWS_SECRET_KEY_ID",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_KEY_ID::`,
},
{
name: "S3_DATA_AWS_SECRET_ACCESS_KEY",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_ACCESS_KEY::`,
},
{
name: "S3_WEEKLY_BACKUP_BUCKET_NAME",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_WEEKLY_BACKUP_BUCKET_NAME::`,
},
];
const environment = [
{
Expand Down Expand Up @@ -748,6 +760,39 @@ const flower = new awsx.ecs.FargateService("flower", {
},
});

//////////////////////////////////////////////////////////////
// ECS Scheduled Task
//////////////////////////////////////////////////////////////
// const weeklyDataDump = new awsx.ecs.FargateTaskDefinition("weekly-data-dump", {
// containers: {
// web: {
// image: dockerGtcPassportScorerImage,
// cpu: 256,
// memory: 2048,
// secrets,
// command: ["python", "manage.py", "dump_stamp_data"],
// },
// },
// });

// const scheduledEventRule = new aws.cloudwatch.EventRule("scheduledEventRule", {
// scheduleExpression: "rate(1 minute)", // Run the task every day at 12pm.
// });

// new aws.cloudwatch.EventTarget("scheduledEventTarget", {
// rule: scheduledEventRule.name,
// arn: cluster.cluster.arn,
// ecsTarget: {
// taskCount: 1,
// taskDefinitionArn: weeklyDataDump.taskDefinition.arn,
// launchType: "FARGATE",
// networkConfiguration: {
// subnets: vpc.publicSubnetIds,
// securityGroups: [cluster.securityGroups[0].id],
// },
// },
// });

//////////////////////////////////////////////////////////////
// Set up task to run migrations
//////////////////////////////////////////////////////////////
Expand Down
7 changes: 7 additions & 0 deletions interface/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,10 @@ NEXT_PUBLIC_PASSPORT_SCORER_TESTING_CYPRESS="on"
NEXT_PUBLIC_MAINTENANCE_MODE_ON=["2023-06-07T21:00:00.000Z", "2023-06-08T22:15:00.000Z"]
NEXT_PUBLIC_WALLET_CONNECT_PROJECT_ID=YOUR_WALLET_CONNECT_PROJECT_ID
NEXT_PUBLIC_WEB3_ONBOARD_EXPLORE_URL=http://localhost:3000/

# The following credentials are used for accessing files in the AWS S3 bucket when
# importing (or exporting) data from the database.
# See the command `import_allo_votes` for example
S3_DATA_AWS_SECRET_KEY_ID=
S3_DATA_AWS_SECRET_ACCESS_KEY=
S3_WEEKLY_BACKUP_BUCKET_NAME=

2 comments on commit bc82b3a

@Cxiane
Copy link

@Cxiane Cxiane commented on bc82b3a Jul 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

@raufgh1401
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👌👌

Please sign in to comment.