From 0a908dd23c3b977dfef8d47eada82584c139067d Mon Sep 17 00:00:00 2001 From: kaeldai Date: Tue, 3 Dec 2024 12:03:27 -0800 Subject: [PATCH] adding ability to download directly from dandi --- bmtk/simulator/core/modules/ecephys_module.py | 80 +++++++++++++++-- .../config.simulation.dandi.json | 87 +++++++++++++++++++ 2 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 examples/bio_neuropixels/config.simulation.dandi.json diff --git a/bmtk/simulator/core/modules/ecephys_module.py b/bmtk/simulator/core/modules/ecephys_module.py index 42c4023bf..8f2cd9f8a 100644 --- a/bmtk/simulator/core/modules/ecephys_module.py +++ b/bmtk/simulator/core/modules/ecephys_module.py @@ -2,6 +2,8 @@ from typing import Any import numpy as np import pandas as pd +import h5py +import fnmatch from .simulator_module import SimulatorMod from bmtk.simulator.core.io_tools import io @@ -21,11 +23,13 @@ bcast = comm.bcast MPI_rank = comm.Get_rank() MPI_size = comm.Get_size() + barrier = comm.Barrier has_mpi = True except: MPI_rank = 0 MPI_size = 1 bcast = lambda v, n: v + barrier = lambda : True has_mpi = False @@ -88,6 +92,16 @@ def uuid(self): def __getattr__(self, name): return getattr(self.__dict__['_io'], name) +class DandiFileWrapper(object): + + + @property + def uuid(self): + return self._id + + def __getattr__(self, name): + return getattr(self.__dict__['_io'], name) + class TimeWindow(object): """ @@ -220,7 +234,8 @@ def __getitem__(self, unit_info): class MappingStrategy(object): def __init__(self, **kwargs): - self._nwb_paths = kwargs['input_file'] + self._nwb_paths = kwargs.get('input_file', None) + self._dandi_repo = kwargs.get('dandi_repo', None) self._filters = kwargs.get('units', {}) self._simulation_onset = kwargs.get('interval_offset', 0.0)/1000.0 self._missing_ids = kwargs.get('missing_ids', 'fail') @@ -234,14 +249,29 @@ def __init__(self, **kwargs): self._units2nodes_map = None @lazy_property - def nwb_files(self): - if not isinstance(self._nwb_paths, (list, tuple)): - self._nwb_paths = [self._nwb_paths] + def nwb_files(self): + if not (self._nwb_paths or self._dandi_repo): + io.log_exception('ecephys_probe module missing "input_type" and/or "dandi_repo" parameter, please specify location of NWB files or repo containing sorted spike units.') + + if self._nwb_paths is None: + self._nwb_paths = [] + elif not isinstance(self._nwb_paths, (list, tuple)): + self._nwb_paths = [self._nwb_paths] nwb_files = [] - for nwb_path in self._nwb_paths: + for nwb_path in self._nwb_paths: + # print(self._nwnwb_paths) + print(nwb_path) nwb_files.append(NWBFileWrapper(nwb_path)) + if self._dandi_repo is not None: + if isinstance(self._dandi_repo, dict): + dandi_nwbs = get_dandiset(**self._dandi_repo) + else: + dandi_nwbs = get_dandiset(dandi_repo=self._dandi_repo) + + nwb_files.extend(dandi_nwbs) + return nwb_files @property @@ -396,4 +426,42 @@ def filter_table(table_df, filters_dict): table_df = table_df[mask] - return table_df \ No newline at end of file + return table_df + + +def get_dandiset(dandi_repo, version_id="draft", variable_measured='Units', + download=True, download_dir='dandisets', overwrite=False): + from dandi.dandiapi import DandiAPIClient + import fsspec + + repo_url = dandi_repo.split(':') + if repo_url[0] == 'dandi': + del repo_url[0] + + dandiset_id = repo_url[0] + filepath_pattern = repo_url[1] if len(repo_url) > 1 else '*' + + nwb_files = [] + with DandiAPIClient() as client: + dandiset = client.get_dandiset(dandiset_id, version_id) + for asset in dandiset.get_assets(): + asset_measured_vars = [v['value'] for v in asset.get_raw_metadata().get('variableMeasured', []) if variable_measured in v['value']] + if asset_measured_vars and fnmatch.fnmatch(asset.path, filepath_pattern): + if download: + nwb_path = Path(download_dir) / Path(dandiset_id) / Path(asset.path) + nwb_path.parent.mkdir(parents=True, exist_ok=True) + if (not nwb_path.exists() or overwrite) and MPI_rank == 0: + asset.download(nwb_path) + barrier() + nwb_files.append(NWBFileWrapper(nwb_path)) + + else: + io.debug(f'Streaming {asset.path}') + s3_url = asset.get_content_url(follow_redirects=1, strip_query=True) + fs = fsspec.filesystem("http") + f = fs.open(s3_url, 'rb') + file_h5 = h5py.File(f) + io = pynwb.NWBHDF5IO(file=file_h5) + nwb_files.append(NWBFileWrapper(io.read())) + + return nwb_files \ No newline at end of file diff --git a/examples/bio_neuropixels/config.simulation.dandi.json b/examples/bio_neuropixels/config.simulation.dandi.json new file mode 100644 index 000000000..ea1dde328 --- /dev/null +++ b/examples/bio_neuropixels/config.simulation.dandi.json @@ -0,0 +1,87 @@ +{ + "manifest": { + "$BASE_DIR": ".", + "$OUTPUT_DIR": "$BASE_DIR/output", + "$INPUT_DIR": "$BASE_DIR/inputs", + "$NETWORK_DIR": "$BASE_DIR/network", + "$COMPONENT_DIR": "$BASE_DIR/../bio_components" + }, + + "run": { + "tstop": 2000.0, + "dt": 0.1, + "dL": 20.0, + "spike_threshold": -15, + "nsteps_block": 5000 + }, + + "target_simulator":"NEURON", + + "conditions": { + "celsius": 34.0, + "v_init": -80 + }, + + "inputs": { + "lgn_spikes": { + "input_type": "spikes", + "module": "ecephys_probe", + "dandi_repo": "000021", + "node_set": "LGN", + "mapping": "sample", + "interval": { + "interval_name": "drifting_gratings", + "interval_index": 0, + "temporal_frequency": 4.0, + "orientation": 90 + }, + "units": { + "location": "LGd", + "isi_violations": {"operation": "<", "value": 0.5}, + "amplitude_cutoff": {"operation": "<", "value": 0.1}, + "presence_ratio": {"column": "presence_ratio", "operation": ">", "value": 0.9} + } + } + }, + + "output":{ + "log_file": "log.txt", + "log_level": "DEBUG", + "output_dir": "$OUTPUT_DIR", + "spikes_file": "spikes.h5", + "spikes_file_csv": "spikes.csv" + }, + + "components": { + "morphologies_dir": "$COMPONENT_DIR/morphologies", + "synaptic_models_dir": "$COMPONENT_DIR/synaptic_models", + "mechanisms_dir":"$COMPONENT_DIR/mechanisms", + "biophysical_neuron_models_dir": "$COMPONENT_DIR/biophysical_neuron_templates/ctdb", + "point_neuron_models_dir": "$COMPONENT_DIR/point_neuron_templates" + }, + + "networks": { + "nodes": [ + { + "nodes_file": "$NETWORK_DIR/VISp_nodes.h5", + "node_types_file": "$NETWORK_DIR/VISp_node_types.csv" + }, + { + "nodes_file": "$NETWORK_DIR/LGN_nodes.h5", + "node_types_file": "$NETWORK_DIR/LGN_node_types.csv" + } + ], + + "edges": [ + { + "edges_file": "$NETWORK_DIR/VISp_VISp_edges.h5", + "edge_types_file": "$NETWORK_DIR/VISp_VISp_edge_types.csv", + "enabled": false + }, + { + "edges_file": "$NETWORK_DIR/LGN_VISp_edges.h5", + "edge_types_file": "$NETWORK_DIR/LGN_VISp_edge_types.csv" + } + ] + } +}