Merge pull request #923 from datajoint/nd2support

nd2 support
MouseLand · Apr 2, 2023 · 399e38e · 399e38e
2 parents d007f4f + 74b96b8
commit 399e38e
Show file tree

Hide file tree

Showing 6 changed files with 167 additions and 1 deletion.
diff --git a/docs/inputs.rst b/docs/inputs.rst
@@ -148,6 +148,14 @@ Scanbox binary files (*.sbx) work out of the box if you set ``ops['input_format'
 When recording in bidirectional mode some columns might have every other line saturated; to trim these during loading set ``ops['sbx_ndeadcols']``. Set this option to ``-1`` to let suite2p compute the number of columns automatically, a positive integer to specify the number of columns to trim.
 Joao Couto (@jcouto) wrote the binary sbx parser.
 
+
+Nikon nd2 files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Suite2p reads nd2 files using the nd2 package and returns a numpy array representing the data with a minimum of two dimensions (Height, Width). The data can also have additional dimensions for Time, Depth, and Channel. If any dimensions are missing, Suite2p adds them in the order of Time, Depth, Channel, Height, and Width, resulting in a 5-dimensional array. To use Suite2p with nd2 files, simply set ``ops['input_format'] = "nd2".``
+
+
+
 BinaryRWFile
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/setup.py b/setup.py
@@ -14,7 +14,8 @@
         'h5py',
         'sbxreader',
         'scikit-learn',
-        'cellpose']
+        'cellpose',
+        'nd2']
 
 gui_deps = [
         "pyqt5",

diff --git a/suite2p/io/__init__.py b/suite2p/io/__init__.py
@@ -3,5 +3,6 @@
 from .save import combined, compute_dydx, save_mat
 from .sbx import sbx_to_binary
 from .tiff import mesoscan_to_binary, ome_to_binary, tiff_to_binary, generate_tiff_filename, save_tiff
+from .nd2 import nd2_to_binary
 from .binary import BinaryFile, BinaryRWFile, BinaryFileCombined
 from .server import send_jobs
diff --git a/suite2p/io/nd2.py b/suite2p/io/nd2.py
@@ -0,0 +1,124 @@
+import os
+import gc
+import math
+import time
+import numpy as np
+from . import utils
+import nd2
+
+
+def nd2_to_binary(ops):
+    """finds nd2 files and writes them to binaries
+
+    Parameters
+    ----------
+    ops: dictionary
+        'nplanes', 'data_path', 'save_path', 'save_folder', 'fast_disk',
+        'nchannels', 'keep_movie_raw', 'look_one_level_down'
+
+    Returns
+    -------
+        ops : dictionary of first plane
+            ops['reg_file'] or ops['raw_file'] is created binary
+            assigns keys 'Ly', 'Lx', 'tiffreader', 'first_tiffs',
+            'nframes', 'meanImg', 'meanImg_chan2'
+    """
+
+    t0 = time.time()
+    # copy ops to list where each element is ops for each plane
+    ops1 = utils.init_ops(ops)
+
+    # open all binary files for writing
+    # look for nd2s in all requested folders
+    ops1, fs, reg_file, reg_file_chan2 = utils.find_files_open_binaries(ops1, False)
+    ops = ops1[0]
+
+    # loop over all nd2 files
+    iall = 0
+    ik = 0
+    for file_name in fs:
+        # open nd2
+        nd2_file = nd2.ND2File(file_name)
+        nd2_dims = {k: i for i, k in enumerate(nd2_file.sizes)}
+
+        valid_dimensions = "TZCYX"
+        assert set(nd2_dims) <= set(
+            valid_dimensions
+        ), f"Unknown dimensions {set(nd2_dims)-set(valid_dimensions)} in file {file_name}."
+
+        # Sort the dimensions in the order of TZCYX, skipping the missing ones.
+        im = nd2_file.asarray().transpose(
+            [nd2_dims[x] for x in valid_dimensions if x in nd2_dims]
+        )
+
+        # Expand array to include the missing dimensions.
+        for i, dim in enumerate("TZC"):
+            if dim not in nd2_dims:
+                im = np.expand_dims(im, i)
+
+        nplanes = nd2_file.sizes["Z"] if "Z" in nd2_file.sizes else 1
+        nchannels = nd2_file.sizes["C"] if "C" in nd2_file.sizes else 1
+        nframes = nd2_file.sizes["T"] if "T" in nd2_file.sizes else 1
+
+        iblocks = np.arange(0, nframes, ops1[0]["batch_size"])
+        if iblocks[-1] < nframes:
+            iblocks = np.append(iblocks, nframes)
+
+        if nchannels > 1:
+            nfunc = ops1[0]["functional_chan"] - 1
+        else:
+            nfunc = 0
+
+        assert im.max() < 32768 and im.min() >= -32768, "image data is out of range"
+        im = im.astype(np.int16)
+
+        # loop over all frames
+        for ichunk, onset in enumerate(iblocks[:-1]):
+            offset = iblocks[ichunk + 1]
+            im_p = np.array(im[onset:offset, :, :, :, :])
+            im2mean = im_p.mean(axis=0).astype(np.float32) / len(iblocks)
+            for ichan in range(nchannels):
+                nframes = im_p.shape[0]
+                im2write = im_p[:, :, ichan, :, :]
+                for j in range(0, nplanes):
+                    if iall == 0:
+                        ops1[j]["meanImg"] = np.zeros(
+                            (im_p.shape[3], im_p.shape[4]), np.float32
+                        )
+                        if nchannels > 1:
+                            ops1[j]["meanImg_chan2"] = np.zeros(
+                                (im_p.shape[3], im_p.shape[4]), np.float32
+                            )
+                        ops1[j]["nframes"] = 0
+                    if ichan == nfunc:
+                        ops1[j]["meanImg"] += np.squeeze(im2mean[j, ichan, :, :])
+                        reg_file[j].write(
+                            bytearray(im2write[:, j, :, :].astype("int16"))
+                        )
+                    else:
+                        ops1[j]["meanImg_chan2"] += np.squeeze(im2mean[j, ichan, :, :])
+                        reg_file_chan2[j].write(
+                            bytearray(im2write[:, j, :, :].astype("int16"))
+                        )
+
+                    ops1[j]["nframes"] += im2write.shape[0]
+            ik += nframes
+            iall += nframes
+
+        nd2_file.close()
+
+    # write ops files
+    do_registration = ops1[0]["do_registration"]
+    for ops in ops1:
+        ops["Ly"] = im.shape[3]
+        ops["Lx"] = im.shape[4]
+        if not do_registration:
+            ops["yrange"] = np.array([0, ops["Ly"]])
+            ops["xrange"] = np.array([0, ops["Lx"]])
+        np.save(ops["ops_path"], ops)
+    # close all binary files and write ops files
+    for j in range(0, nplanes):
+        reg_file[j].close()
+        if nchannels > 1:
+            reg_file_chan2[j].close()
+    return ops1[0]
diff --git a/suite2p/io/utils.py b/suite2p/io/utils.py
@@ -161,6 +161,30 @@ def get_tif_list(ops):
             print('** Found %d tifs - converting to binary **'%(len(fsall)))
     return fsall, ops
 
+
+def get_nd2_list(ops):
+    """ make list of nd2 files to process
+    if ops['look_one_level_down'], then all nd2's in all folders + one level down
+    """
+    froot = ops['data_path']
+    fold_list = ops['data_path']
+    fsall = []
+    nfs = 0
+    first_tiffs = []
+    for k,fld in enumerate(fold_list):
+        fs, ftiffs = list_files(fld, ops['look_one_level_down'],
+                                ["*.nd2"])
+        fsall.extend(fs)
+        first_tiffs.extend(list(ftiffs))
+    if len(fs)==0:
+        print('Could not find any nd2 files')
+        raise Exception('no nd2s')
+    else:
+        ops['first_tiffs'] = np.array(first_tiffs).astype('bool')
+        print('** Found %d nd2 files - converting to binary **'%(len(fsall)))
+    return fsall, ops
+
+
 def find_files_open_binaries(ops1, ish5=False):
     """  finds tiffs or h5 files and opens binaries for writing
 
@@ -216,6 +240,11 @@ def find_files_open_binaries(ops1, ish5=False):
         fs, ops2 = get_sbx_list(ops1[0])
         print('Scanbox files:')
         print('\n'.join(fs))
+    elif input_format == 'nd2':
+        # find nd2s
+        fs, ops2 = get_nd2_list(ops1[0])
+        print('Nikon files:')
+        print('\n'.join(fs))
     else:
         # find tiffs
         fs, ops2 = get_tif_list(ops1[0])

diff --git a/suite2p/run_s2p.py b/suite2p/run_s2p.py
@@ -385,6 +385,8 @@ def run_s2p(ops={}, db={}, server={}):
             ops['input_format'] = 'nwb'
         elif ops.get('mesoscan'):
             ops['input_format'] = 'mesoscan'
+        elif ops.get('nd2'):
+            ops['input_format'] = 'nd2'
         elif HAS_HAUS:
             ops['input_format'] = 'haus'
         elif not 'input_format' in ops:
@@ -396,6 +398,7 @@ def run_s2p(ops={}, db={}, server={}):
             'h5': io.h5py_to_binary,
             'nwb': io.nwb_to_binary,
             'sbx': io.sbx_to_binary,
+            'nd2': io.nd2_to_binary,
             'mesoscan': io.mesoscan_to_binary,
             'haus': lambda ops: haussio.load_haussio(ops['data_path'][0]).tosuite2p(ops.copy()),
             'bruker': io.ome_to_binary,