-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #150 from multinet-app/upload-downloads
Make the network JSON uploader more flexible and allow JSON table uploads
- Loading branch information
Showing
14 changed files
with
744 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Generated by Django 3.2.18 on 2023-02-28 22:04 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('api', '0012_aqlquery_bind_vars'), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name='upload', | ||
name='data_type', | ||
field=models.CharField(choices=[('CSV', 'Csv'), ('JSON', 'Json'), ('D3_JSON', 'D3 Json'), ('NESTED_JSON', 'Nested Json'), ('NEWICK', 'Newick')], max_length=20), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from .common import ProcessUploadTask | ||
from .csv import process_csv | ||
from .d3_json import process_d3_json | ||
from .json_table import process_json_table | ||
|
||
__all__ = ['ProcessUploadTask', 'process_csv', 'process_d3_json'] | ||
__all__ = ['ProcessUploadTask', 'process_csv', 'process_d3_json', 'process_json_table'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class DataFormatError(Exception): | ||
def __init__(self, message): | ||
# Call the base class constructor with the parameters it needs | ||
super().__init__(message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import json | ||
from typing import Any, BinaryIO, Dict | ||
|
||
from celery import shared_task | ||
|
||
from multinet.api.models import Table, TableTypeAnnotation, Upload | ||
|
||
from .common import ProcessUploadTask | ||
from .utils import processor_dict | ||
|
||
|
||
def process_row(row: Dict[str, Any], cols: Dict[str, TableTypeAnnotation.Type]) -> Dict: | ||
new_row = dict(row) | ||
|
||
# Check for _key or id, if missing, skip row | ||
if not (new_row.get('_key') or new_row.get('id')): | ||
return None | ||
|
||
for col_key, col_type in cols.items(): | ||
entry = row.get(col_key) | ||
|
||
# If null entry, skip | ||
if entry is None: | ||
continue | ||
|
||
process_func = processor_dict.get(col_type) | ||
if process_func is not None: | ||
try: | ||
new_row[col_key] = process_func(entry) | ||
except ValueError: | ||
# If error processing row, keep as string | ||
pass | ||
|
||
return new_row | ||
|
||
|
||
@shared_task(base=ProcessUploadTask) | ||
def process_json_table( | ||
task_id: int, | ||
table_name: str, | ||
edge: bool, | ||
columns: Dict[str, TableTypeAnnotation.Type], | ||
) -> None: | ||
upload: Upload = Upload.objects.get(id=task_id) | ||
|
||
# Create new table | ||
table: Table = Table.objects.create( | ||
name=table_name, | ||
edge=edge, | ||
workspace=upload.workspace, | ||
) | ||
|
||
# Create type annotations | ||
TableTypeAnnotation.objects.bulk_create( | ||
[ | ||
TableTypeAnnotation(table=table, column=col_key, type=col_type) | ||
for col_key, col_type in columns.items() | ||
] | ||
) | ||
|
||
# Download data from S3/MinIO | ||
with upload.blob as blob_file: | ||
blob_file: BinaryIO = blob_file | ||
imported_json = json.loads(blob_file.read().decode('utf-8')) | ||
|
||
processed_rows = [ | ||
new_row | ||
for new_row in [process_row(row, columns) for row in imported_json] | ||
if new_row is not None | ||
] | ||
|
||
# Put rows in the table | ||
table.put_rows(processed_rows) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[ | ||
{ | ||
"_key": "0", | ||
"name": "Myriel", | ||
"group": "1" | ||
}, | ||
{ | ||
"_key": "1", | ||
"name": "Napoleon", | ||
"group": "1" | ||
}, | ||
{ | ||
"_key": "2", | ||
"name": "Mlle.Baptistine", | ||
"group": "1" | ||
}, | ||
{ | ||
"_key": "3", | ||
"name": "Mme.Magloire", | ||
"group": "1" | ||
}, | ||
{ | ||
"_key": "4", | ||
"name": "CountessdeLo", | ||
"group": "1" | ||
} | ||
] |
Oops, something went wrong.