From 0a908dd23c3b977dfef8d47eada82584c139067d Mon Sep 17 00:00:00 2001
From: kaeldai <kael.dai@gmail.com>
Date: Tue, 3 Dec 2024 12:03:27 -0800
Subject: [PATCH] adding ability to download directly from dandi

---
 bmtk/simulator/core/modules/ecephys_module.py | 80 +++++++++++++++--
 .../config.simulation.dandi.json              | 87 +++++++++++++++++++
 2 files changed, 161 insertions(+), 6 deletions(-)
 create mode 100644 examples/bio_neuropixels/config.simulation.dandi.json

diff --git a/bmtk/simulator/core/modules/ecephys_module.py b/bmtk/simulator/core/modules/ecephys_module.py
index 42c4023bf..8f2cd9f8a 100644
--- a/bmtk/simulator/core/modules/ecephys_module.py
+++ b/bmtk/simulator/core/modules/ecephys_module.py
@@ -2,6 +2,8 @@
 from typing import Any
 import numpy as np
 import pandas as pd
+import h5py
+import fnmatch
 
 from .simulator_module import SimulatorMod
 from bmtk.simulator.core.io_tools import io
@@ -21,11 +23,13 @@
     bcast = comm.bcast
     MPI_rank = comm.Get_rank()
     MPI_size = comm.Get_size()
+    barrier = comm.Barrier
     has_mpi = True
 except:
     MPI_rank = 0
     MPI_size = 1
     bcast = lambda v, n: v
+    barrier = lambda : True
     has_mpi = False
 
 
@@ -88,6 +92,16 @@ def uuid(self):
     def __getattr__(self, name):
         return getattr(self.__dict__['_io'], name)
 
+class DandiFileWrapper(object):
+    
+    
+    @property
+    def uuid(self):
+        return self._id
+    
+    def __getattr__(self, name):
+        return getattr(self.__dict__['_io'], name)
+
 
 class TimeWindow(object):
     """
@@ -220,7 +234,8 @@ def __getitem__(self, unit_info):
 
 class MappingStrategy(object):
     def __init__(self, **kwargs):
-        self._nwb_paths = kwargs['input_file']
+        self._nwb_paths = kwargs.get('input_file', None)
+        self._dandi_repo = kwargs.get('dandi_repo', None)
         self._filters = kwargs.get('units', {})       
         self._simulation_onset = kwargs.get('interval_offset', 0.0)/1000.0
         self._missing_ids = kwargs.get('missing_ids', 'fail')
@@ -234,14 +249,29 @@ def __init__(self, **kwargs):
         self._units2nodes_map = None
 
     @lazy_property
-    def nwb_files(self):
-        if not isinstance(self._nwb_paths, (list, tuple)):
-            self._nwb_paths = [self._nwb_paths] 
+    def nwb_files(self):        
+        if not (self._nwb_paths or self._dandi_repo):
+            io.log_exception('ecephys_probe module missing "input_type" and/or "dandi_repo" parameter, please specify location of NWB files or repo containing sorted spike units.')
+              
+        if self._nwb_paths is None:
+            self._nwb_paths = []
+        elif not isinstance(self._nwb_paths, (list, tuple)):
+            self._nwb_paths = [self._nwb_paths]
 
         nwb_files = []
-        for nwb_path in self._nwb_paths:            
+        for nwb_path in self._nwb_paths:
+            # print(self._nwnwb_paths)
+            print(nwb_path)
             nwb_files.append(NWBFileWrapper(nwb_path))
 
+        if self._dandi_repo is not None:
+            if isinstance(self._dandi_repo, dict):
+                dandi_nwbs = get_dandiset(**self._dandi_repo)
+            else:
+                dandi_nwbs = get_dandiset(dandi_repo=self._dandi_repo)
+            
+            nwb_files.extend(dandi_nwbs)
+
         return nwb_files
     
     @property
@@ -396,4 +426,42 @@ def filter_table(table_df, filters_dict):
 
         table_df = table_df[mask]
 
-    return table_df
\ No newline at end of file
+    return table_df
+
+
+def get_dandiset(dandi_repo, version_id="draft", variable_measured='Units',
+                 download=True, download_dir='dandisets', overwrite=False):
+    from dandi.dandiapi import DandiAPIClient
+    import fsspec
+    
+    repo_url = dandi_repo.split(':')
+    if repo_url[0] == 'dandi':
+        del repo_url[0]
+
+    dandiset_id = repo_url[0]
+    filepath_pattern = repo_url[1] if len(repo_url) > 1 else '*'
+    
+    nwb_files = []
+    with DandiAPIClient() as client:        
+        dandiset = client.get_dandiset(dandiset_id, version_id)
+        for asset in dandiset.get_assets():
+            asset_measured_vars = [v['value'] for v in asset.get_raw_metadata().get('variableMeasured', []) if variable_measured in v['value']]
+            if asset_measured_vars and fnmatch.fnmatch(asset.path, filepath_pattern):
+                if download:
+                    nwb_path = Path(download_dir) / Path(dandiset_id) / Path(asset.path)
+                    nwb_path.parent.mkdir(parents=True, exist_ok=True)
+                    if (not nwb_path.exists() or overwrite) and MPI_rank == 0:
+                        asset.download(nwb_path)
+                    barrier()
+                    nwb_files.append(NWBFileWrapper(nwb_path))
+
+                else:
+                    io.debug(f'Streaming {asset.path}')
+                    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)
+                    fs = fsspec.filesystem("http")
+                    f = fs.open(s3_url, 'rb')
+                    file_h5 = h5py.File(f)
+                    io = pynwb.NWBHDF5IO(file=file_h5)
+                    nwb_files.append(NWBFileWrapper(io.read()))
+
+    return nwb_files
\ No newline at end of file
diff --git a/examples/bio_neuropixels/config.simulation.dandi.json b/examples/bio_neuropixels/config.simulation.dandi.json
new file mode 100644
index 000000000..ea1dde328
--- /dev/null
+++ b/examples/bio_neuropixels/config.simulation.dandi.json
@@ -0,0 +1,87 @@
+{
+  "manifest": {
+    "$BASE_DIR": ".",
+    "$OUTPUT_DIR": "$BASE_DIR/output",
+    "$INPUT_DIR": "$BASE_DIR/inputs",
+    "$NETWORK_DIR": "$BASE_DIR/network",
+    "$COMPONENT_DIR": "$BASE_DIR/../bio_components"
+  },
+
+  "run": {
+    "tstop": 2000.0,
+    "dt": 0.1,
+    "dL": 20.0,
+    "spike_threshold": -15,
+    "nsteps_block": 5000
+  },
+
+  "target_simulator":"NEURON",
+
+  "conditions": {
+    "celsius": 34.0,
+    "v_init": -80
+  },
+
+  "inputs": {
+    "lgn_spikes": {
+      "input_type": "spikes",
+      "module": "ecephys_probe",
+      "dandi_repo": "000021",
+      "node_set": "LGN",
+      "mapping": "sample",
+      "interval": {
+        "interval_name": "drifting_gratings",
+        "interval_index": 0,
+        "temporal_frequency": 4.0,
+        "orientation": 90
+      },
+      "units": {
+        "location": "LGd",
+        "isi_violations": {"operation": "<", "value": 0.5},
+        "amplitude_cutoff": {"operation": "<", "value": 0.1},
+        "presence_ratio": {"column": "presence_ratio", "operation": ">", "value": 0.9}
+      }
+    }
+  },
+
+  "output":{
+    "log_file": "log.txt",
+    "log_level": "DEBUG",
+    "output_dir": "$OUTPUT_DIR",
+    "spikes_file": "spikes.h5",
+    "spikes_file_csv": "spikes.csv"
+  },
+
+  "components": {
+    "morphologies_dir": "$COMPONENT_DIR/morphologies",
+    "synaptic_models_dir": "$COMPONENT_DIR/synaptic_models",
+    "mechanisms_dir":"$COMPONENT_DIR/mechanisms",
+    "biophysical_neuron_models_dir": "$COMPONENT_DIR/biophysical_neuron_templates/ctdb",
+    "point_neuron_models_dir": "$COMPONENT_DIR/point_neuron_templates"
+  },
+
+  "networks": {
+    "nodes": [
+      {
+        "nodes_file": "$NETWORK_DIR/VISp_nodes.h5",
+        "node_types_file": "$NETWORK_DIR/VISp_node_types.csv"
+      },
+      {
+        "nodes_file": "$NETWORK_DIR/LGN_nodes.h5",
+        "node_types_file": "$NETWORK_DIR/LGN_node_types.csv"
+      }
+    ],
+
+    "edges": [
+      {
+        "edges_file": "$NETWORK_DIR/VISp_VISp_edges.h5",
+        "edge_types_file": "$NETWORK_DIR/VISp_VISp_edge_types.csv",
+        "enabled": false
+      },
+      {
+        "edges_file": "$NETWORK_DIR/LGN_VISp_edges.h5",
+        "edge_types_file": "$NETWORK_DIR/LGN_VISp_edge_types.csv"
+      }
+    ]
+  }
+}