Skip to content

Commit

Permalink
Merge branch 'develop' into develop-filepath2-manifest-gen-FDS-2278
Browse files Browse the repository at this point in the history
  • Loading branch information
GiaJordan committed Aug 27, 2024
2 parents d0628b2 + 34d1760 commit 2711064
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 192 deletions.
42 changes: 28 additions & 14 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ def getFilesInStorageDataset(
Raises:
ValueError: Dataset ID not found.
"""
# select all files within a given storage dataset folder (top level folder in a Synapse storage project or folder marked with contentType = 'dataset')
# select all files within a given storage dataset folder (top level folder in
# a Synapse storage project or folder marked with contentType = 'dataset')
walked_path = synapseutils.walk(
self.syn, datasetId, includeTypes=["folder", "file"]
)
Expand All @@ -568,25 +569,36 @@ def getFilesInStorageDataset(
file_list = []

# iterate over all results
for dirpath, dirname, filenames in walked_path:
for dirpath, _, path_filenames in walked_path:
# iterate over all files in a folder
for filename in filenames:
if (not "manifest" in filename[0] and not fileNames) or (
fileNames and filename[0] in fileNames
for path_filename in path_filenames:
if ("manifest" not in path_filename[0] and not fileNames) or (
fileNames and path_filename[0] in fileNames
):
# don't add manifest to list of files unless it is specified in the list of specified fileNames; return all found files
# don't add manifest to list of files unless it is specified in the
# list of specified fileNames; return all found files
# except the manifest if no fileNames have been specified
# TODO: refactor for clarity/maintainability

if fullpath:
# append directory path to filename
filename = (
project_name + "/" + dirpath[0] + "/" + filename[0],
filename[1],
)
if dirpath[0].startswith(f"{project_name}/"):
path_filename = (
dirpath[0] + "/" + path_filename[0],
path_filename[1],
)
else:
path_filename = (
project_name
+ "/"
+ dirpath[0]
+ "/"
+ path_filename[0],
path_filename[1],
)

# add file name file id tuple, rearranged so that id is first and name follows
file_list.append(filename[::-1])
file_list.append(path_filename[::-1])

return file_list

Expand Down Expand Up @@ -666,8 +678,8 @@ def getDatasetManifest(
manifest_data = ManifestDownload.download_manifest(
md, newManifestName=newManifestName, manifest_df=manifest
)
## TO DO: revisit how downstream code handle manifest_data. If the downstream code would break when manifest_data is an empty string,
## then we should catch the error here without returning an empty string.
# TO DO: revisit how downstream code handle manifest_data. If the downstream code would break when manifest_data is an empty string,
# then we should catch the error here without returning an empty string.
if not manifest_data:
logger.debug(
f"No manifest data returned. Please check if you have successfully downloaded manifest: {manifest_syn_id}"
Expand Down Expand Up @@ -3066,6 +3078,8 @@ def _fix_int_columns(self):
for col in int_columns:
# Coercing to string because NaN is a floating point value
# and cannot exist alongside integers in a column
to_int_fn = lambda x: "" if np.isnan(x) else str(int(x))
def to_int_fn(x):
return "" if np.isnan(x) else str(int(x))

self.table[col] = self.table[col].apply(to_int_fn)
return self.table
19 changes: 5 additions & 14 deletions schematic_api/api/routes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import logging
import os
import pathlib
Expand All @@ -8,12 +7,10 @@
import time
import urllib.request
from functools import wraps
from json.decoder import JSONDecodeError
from typing import Any, List, Optional
from typing import List, Tuple

import connexion
import pandas as pd
from connexion.decorators.uri_parsing import Swagger2URIParser
from flask import current_app as app
from flask import request, send_from_directory
from flask_cors import cross_origin
Expand All @@ -28,14 +25,6 @@
Span,
)
from opentelemetry.sdk.trace.sampling import ALWAYS_OFF
from synapseclient.core.exceptions import (
SynapseAuthenticationError,
SynapseHTTPError,
SynapseNoCredentialsError,
SynapseTimeoutError,
SynapseUnmetAccessRestrictions,
)
from werkzeug.debug import DebuggedApplication

from schematic.configuration.configuration import CONFIG
from schematic.manifest.generator import ManifestGenerator
Expand Down Expand Up @@ -457,7 +446,7 @@ def validate_manifest_route(
return res_dict


#####profile validate manifest route function
# profile validate manifest route function
@trace_function_params()
def submit_manifest_route(
schema_url,
Expand Down Expand Up @@ -596,7 +585,9 @@ def get_storage_projects_datasets(asset_view, project_id):
return sorted_dataset_lst


def get_files_storage_dataset(asset_view, dataset_id, full_path, file_names=None):
def get_files_storage_dataset(
asset_view: str, dataset_id: str, full_path: bool, file_names: List[str] = None
) -> List[Tuple[str, str]]:
# Access token now stored in request header
access_token = get_access_token()

Expand Down
Loading

0 comments on commit 2711064

Please sign in to comment.