Skip to content

Commit

Permalink
use x accel redirect on django with nginx (#294)
Browse files Browse the repository at this point in the history
* use x accel redirect on django with nginx

* add user file model and uploader config

* add task to clean UserFile

* fix store to object storage

* fix test

* add tests

* fix failed tests

* fix test

* fix user file path

* add missing env to test yml
  • Loading branch information
danangmassandy authored Dec 9, 2024
1 parent 797904b commit 404fe83
Show file tree
Hide file tree
Showing 24 changed files with 973 additions and 70 deletions.
2 changes: 2 additions & 0 deletions deployment/docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ services:
- MINIO_AWS_ENDPOINT_URL=http://minio:9000/
- MINIO_AWS_BUCKET_NAME=tomorrownow
- MINIO_AWS_DIR_PREFIX=dev/media
- MINIO_GAP_AWS_BUCKET_NAME=tngap-products
- MINIO_GAP_AWS_DIR_PREFIX=dev
entrypoint: [ ]
ports:
# for django test server
Expand Down
1 change: 1 addition & 0 deletions deployment/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ RUN groupadd --gid $USER_GID $USERNAME \
# ********************************************************
# * Anything else you want to do like clean up goes here *
# ********************************************************
RUN apt-get update && apt-get install -y gnupg2

# [Optional] Set the default user. Omit if you want to keep the default as root.
USER $USERNAME
Expand Down
32 changes: 32 additions & 0 deletions deployment/nginx/sites-enabled/default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,38 @@ server {
expires 21d; # cache for 21 days
}

location ~ ^/userfiles/(.*?)/(.*?)/(.*) {
internal;
# How to resove remote URLs, you may want to update this depending
# on your setup, in our case it’s inside a Docker container with
# dnsmasq running.
resolver 127.0.0.11 ipv6=off;

# Extract the remote URL parts
set $download_protocol $1;
set $download_host $2;
set $download_path $3;
# Reconstruct the remote URL
set $download_url $download_protocol://$download_host/$download_path;
# Headers for the remote server, unset Authorization and Cookie for security reasons.
proxy_set_header Host $download_host;
proxy_set_header Authorization '';
proxy_set_header Cookie '';
# Headers for the response, by using $upstream_http_... here we can inject
# other headers from Django, proxy_hide_header ensures the header from the
# remote server isn't passed through.
proxy_hide_header Content-Disposition;
add_header Content-Disposition $upstream_http_content_disposition;
# Stops the local disk from being written to (just forwards data through)
proxy_max_temp_file_size 0;
# Set timeout, the remote URL should have at least this much
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
# Proxy the remote file through to the client
proxy_pass $download_url$is_args$args;

}

# Finally, send all non-media requests to the Django server.
location / {
uwsgi_pass django;
Expand Down
7 changes: 6 additions & 1 deletion django_project/core/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@
'task': 'cleanup_user_locations',
# Run every week at 00:00 UTC
'schedule': crontab(minute='0', hour='0', day_of_week='0'),
}
},
'cleanup-user-files': {
'task': 'cleanup_user_files',
# Run everyday at 00:15 UTC
'schedule': crontab(minute='15', hour='00'),
},
}


Expand Down
12 changes: 12 additions & 0 deletions django_project/core/settings/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@
"django.contrib.staticfiles.storage.ManifestStaticFilesStorage"
)
},
"gap_products": {
"BACKEND": "storages.backends.s3.S3Storage",
"OPTIONS": {
"access_key": MINIO_AWS_ACCESS_KEY_ID,
"secret_key": MINIO_AWS_SECRET_ACCESS_KEY,
"bucket_name": os.environ.get("MINIO_GAP_AWS_BUCKET_NAME"),
"file_overwrite": False,
"max_memory_size": 500 * MB, # 500MB
"transfer_config": AWS_TRANSFER_CONFIG,
"endpoint_url": MINIO_AWS_ENDPOINT_URL
},
}
}

STORAGE_DIR_PREFIX = os.environ.get("MINIO_AWS_DIR_PREFIX", "media")
Expand Down
4 changes: 4 additions & 0 deletions django_project/core/tests/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
.. note:: Test runner.
"""

import os
from django.conf import settings
from django.test.runner import DiscoverRunner

Expand Down Expand Up @@ -33,4 +34,7 @@ def setup_test_environment(self, **kwargs):
"""Prepare test env."""
CustomTestRunner.__disable_celery()
create_s3_bucket(settings.MINIO_AWS_BUCKET_NAME)
create_s3_bucket(
os.environ.get("MINIO_GAP_AWS_BUCKET_NAME", "tngap-products")
)
super(CustomTestRunner, self).setup_test_environment(**kwargs)
2 changes: 2 additions & 0 deletions django_project/gap/admin/preferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class PreferencesAdmin(admin.ModelAdmin):
'fields': (
'dask_threads_num_api',
'api_log_batch_size',
'api_use_x_accel_redirect',
'user_file_uploader_config'
)
}
),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.2.7 on 2024-12-08 15:55

from django.db import migrations, models
import gap.models.preferences


class Migration(migrations.Migration):

dependencies = [
('gap', '0040_datasourcefilecache_size'),
]

operations = [
migrations.AddField(
model_name='preferences',
name='api_use_x_accel_redirect',
field=models.BooleanField(default=True, help_text='When set to True, Django will send X-Accel-Redirect header to the NGINX to offload the download process to NGINX.'),
),
migrations.AddField(
model_name='preferences',
name='user_file_uploader_config',
field=models.JSONField(blank=True, default=gap.models.preferences.user_file_uploader_config_default, help_text='Config for fsspec uploader to s3 for UserFile.', null=True),
),
]
26 changes: 26 additions & 0 deletions django_project/gap/models/preferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ def crop_plan_config_default() -> dict:
}


def user_file_uploader_config_default() -> dict:
"""Return dictionary for user file uploader config."""
return {
'max_concurrency': 2,
# upload chunk size to 500 MB
'default_block_size': 500 * 1024 * 1024
}


class Preferences(SingletonModel):
"""Preference settings specifically for gap."""

Expand Down Expand Up @@ -134,6 +143,23 @@ class Preferences(SingletonModel):
help_text='Number of API Request logs to be saved in a batch.'
)

# api use x-accel-redirect
api_use_x_accel_redirect = models.BooleanField(
default=True,
help_text=(
'When set to True, Django will send X-Accel-Redirect header '
'to the NGINX to offload the download process to NGINX.'
)
)

# UserFile Uploader s3 config
user_file_uploader_config = models.JSONField(
default=user_file_uploader_config_default,
blank=True,
null=True,
help_text='Config for fsspec uploader to s3 for UserFile.'
)

class Meta: # noqa: D106
verbose_name_plural = "preferences"

Expand Down
71 changes: 63 additions & 8 deletions django_project/gap/providers/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.contrib.gis.geos import Polygon, Point
from django.contrib.gis.db.models.functions import Distance, GeoFunc
from typing import List, Tuple, Union
from fsspec.core import OpenFile

from gap.models import (
Dataset,
Expand Down Expand Up @@ -195,20 +196,54 @@ def to_csv_stream(self, suffix='.csv', separator=','):
:rtype: bytes
"""
headers, _ = self._get_headers(use_station_id=True)

# write headers
yield bytes(','.join(headers) + '\n', 'utf-8')

# get dataframe
df_pivot = self._get_data_frame(use_station_id=True)

# write headers
yield bytes(separator.join(headers) + '\n', 'utf-8')

# Write the data in chunks
for start in range(0, len(df_pivot), self.csv_chunk_size):
chunk = df_pivot.iloc[start:start + self.csv_chunk_size]
yield chunk.to_csv(index=False, header=False, float_format='%g')
yield chunk.to_csv(
index=False, header=False, float_format='%g',
sep=separator
)

def to_netcdf_stream(self):
"""Generate NetCDF."""
def to_csv(self, suffix='.csv', separator=',', **kwargs):
"""Generate csv file to object storage."""
headers, _ = self._get_headers(use_station_id=True)

# get dataframe
df_pivot = self._get_data_frame(use_station_id=True)

outfile: OpenFile = None
outfile, output = self._get_fsspec_remote_url(
suffix, mode='w', **kwargs
)

with outfile as tmp_file:
# write headers
write_headers = True

# Write the data in chunks
for start in range(0, len(df_pivot), self.csv_chunk_size):
chunk = df_pivot.iloc[start:start + self.csv_chunk_size]
if write_headers:
chunk.columns = headers

chunk.to_csv(
tmp_file.name, index=False, header=write_headers,
float_format='%g',
sep=separator, mode='a'
)

if write_headers:
write_headers = False

return output

def _get_xarray_dataset(self):
time_col_exists = self.has_time_column

# if time column exists, in netcdf we should use datetime
Expand All @@ -234,7 +269,12 @@ def to_netcdf_stream(self):

# Convert to xarray Dataset
df_pivot.set_index(field_indices, inplace=True)
ds = df_pivot.to_xarray()

return df_pivot.to_xarray()

def to_netcdf_stream(self):
"""Generate NetCDF."""
ds = self._get_xarray_dataset()

# write to netcdf
with (
Expand All @@ -253,6 +293,21 @@ def to_netcdf_stream(self):
break
yield chunk

def to_netcdf(self, **kwargs):
"""Generate netcdf file to object storage."""
ds = self._get_xarray_dataset()

outfile, output = self._get_fsspec_remote_url('.nc', **kwargs)

with outfile as tmp_file:
x = ds.to_netcdf(
tmp_file.name, format='NETCDF4', engine='h5netcdf',
compute=False
)
execute_dask_compute(x)

return output

def _to_dict(self) -> dict:
"""Convert into dict.
Expand Down
Loading

0 comments on commit 404fe83

Please sign in to comment.