From 51c477c4c3fae254b30ec25f158045ad2eefa107 Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Tue, 28 May 2024 09:04:32 -0400
Subject: [PATCH 01/11] run a fit

---
 docs/neural_modeling/plot_04_v1_cells.py | 371 ++++++++++++++++-------
 1 file changed, 255 insertions(+), 116 deletions(-)

diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index d78ae616..b414a414 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -8,13 +8,14 @@
     To run this notebook locally, please download the [utility functions](https://github.com/flatironinstitute/nemos/tree/main/docs/neural_modeling/examples_utils) in the same folder as the example notebook.
 
 """
-
+import jax
 import matplotlib.pyplot as plt
 import numpy as np
 import pynapple as nap
 from examples_utils import data
 
 import nemos as nmo
+import jax.numpy as jnp
 
 # configure plots some
 plt.style.use("examples_utils/nemos.mplstyle")
@@ -55,7 +56,7 @@
 # There are 73 neurons recorded together in V1. To fit the GLM faster, we will focus on one neuron.
 print(units)
 # this returns TsGroup with one neuron only
-spikes = units[[34]]
+spikes = units[units.rate >= 5.0]
 
 # %%
 # How could we predict neuron's response to white noise stimulus?
@@ -110,13 +111,13 @@
 sta[1, 0]
 
 # %%
-# we can easily plot this
-
-fig, axes = plt.subplots(1, len(sta), figsize=(3*len(sta),3))
-for i, t in enumerate(sta.t):
-    axes[i].imshow(sta[i,0], vmin = np.min(sta), vmax = np.max(sta),
-                   cmap='Greys_r')
-    axes[i].set_title(str(t)+" s")
+# # we can easily plot this
+#
+# fig, axes = plt.subplots(1, len(sta), figsize=(3*len(sta),3))
+# for i, t in enumerate(sta.t):
+#     axes[i].imshow(sta[i,0], vmin = np.min(sta), vmax = np.max(sta),
+#                    cmap='Greys_r')
+#     axes[i].set_title(str(t)+" s")
 
 
 # %%
@@ -129,9 +130,9 @@
 
 # mkdocs_gallery_thumbnail_number = 3
 receptive_field = np.mean(sta.get(-0.125, -0.05), axis=0)[0]
-
-fig, ax = plt.subplots(1, 1, figsize=(4,4))
-ax.imshow(receptive_field, cmap='Greys_r')
+#
+# fig, ax = plt.subplots(1, 1, figsize=(4,4))
+# ax.imshow(receptive_field, cmap='Greys_r')
 
 # %%
 #
@@ -145,111 +146,249 @@
 print(np.dot(receptive_field.flatten(), stimulus[0].flatten()))
 
 # %%
-#
-# When performing this operation on multiple stimuli, things become slightly
-# more complicated. For loops on the above methods would work, but would be
-# slow. Reshaping and using the dot product is one common method, as are
-# methods like `np.tensordot`.
-#
-# We'll use einsum to do this, which is a convenient way of representing many
-# different matrix operations:
-
-filtered_stimulus = np.einsum('t h w, h w -> t', stimulus, receptive_field)
-
-# %%
-#
-# This notation says: take these arrays with dimensions `(t,h,w)` and `(h,w)`
-# and multiply and sum to get an array of shape `(t,)`. This performs the same
-# operations as above.
-#
-# And this remains a pynapple object, so we can easily visualize it!
-
-fig, ax = plt.subplots(1, 1, figsize=(12,4))
-ax.plot(filtered_stimulus)
-
-# %%
-#
-# But what is this? It's how much each frame in the video should drive our
-# neuron, based on the receptive field we fit using the spike-triggered
-# average.
-#
-# This, then, is the spatial component of our input, as described above.
-#
-# ## Preparing data for nemos
-#
-# We'll now use the GLM to fit the temporal component. To do that, let's get
-# this and our spike counts into the proper format for nemos:
-
-# grab spikes from when we were showing our stimulus, and bin at 1 msec
-# resolution
-bin_size = .001
-counts = spikes[34].restrict(filtered_stimulus.time_support).count(bin_size)
-print(counts.rate)
-print(filtered_stimulus.rate)
-
-# %%
-#
-# Hold on, our stimulus is at a much lower rate than what we want for our rates
-# -- in previous neural_modeling, our input has been at a higher rate than our spikes,
-# and so we used `bin_average` to down-sample to the appropriate rate. When the
-# input is at a lower rate, we need to think a little more carefully about how
-# to up-sample.
-
-print(counts[:5])
-print(filtered_stimulus[:5])
-
-# %%
-#
-# What was the visual input to the neuron at time 0.005? It was the same input
-# as time 0. At time 0.0015? Same thing, up until we pass time 0.025017. Thus,
-# we want to "fill forward" the values of our input, and we have pynapple
-# convenience function to do so:
-filtered_stimulus = data.fill_forward(counts, filtered_stimulus)
-filtered_stimulus
-
-# %%
-#
-# We can see that the time points are now aligned, and we've filled forward the
-# values the way we'd like.
-#
-# Now, similar to the [head direction tutorial](../02_head_direction), we'll
-# use the log-stretched raised cosine basis to create the predictor for our
-# GLM:
-
-window_size = 100
-basis = nmo.basis.RaisedCosineBasisLog(8, mode="conv", window_size=window_size)
-
-convolved_input = basis.compute_features(filtered_stimulus)
+# ## Firing rate model
+# What we want is to model the log-firing rate as a linear combination of the past
+# stimuli $\bm{x}_t$ over a fixed window, here $x_t$ is an array representing the
+# flattened image of shape `(nm, )`, where n and m are the pixel of the x and y axes
+# of the noise stimuli.
+# Mathematically, this can be expressed as,
+# $$
+# \log \mu_t = \sum \beta_{i} \bm{x}_{t-i}
+# $$
+# Where beta is a vector of coefficients, also of shape `(nm, )`. This is quite a lot of coefficients.
+# For example, if you want to use a window of 150ms at 10 ms resolution on a 51x51 image,
+# you'll end up with 51^2 x 10 = 26010 coefficients.
+# We can use a basis set to reduce the dimensionality: first we create a bank of basis with 51x51
+# elements.
+n_bas = 14
+basis = nmo.basis.RaisedCosineBasisLinear(n_basis_funcs=n_bas)**2
+
+basis_coupling = nmo.basis.RaisedCosineBasisLog(3, mode="conv", window_size=20)
+X, Y, basis_eval = basis.evaluate_on_grid(51, 51)
+
+# plot the basis set
+fig, axs = plt.subplots(n_bas,n_bas, figsize=(10, 8))
+for i in range(n_bas):
+    for j in range(n_bas):
+        axs[i, j].contourf(X, Y, basis_eval[..., i*n_bas + j])
+        axs[i, j].set_xticks([])
+        axs[i, j].set_yticks([])
 
 # %%
+# Let's create step-by-step the predictor starting from a single sample.
+
+bin_size = 0.01  # 10 ms
+
+# fix the window size used for prediction (125 ms of video)
+prediction_window = 0.13  # duration of the window in sec
+
+# number of lags
+lags = int(np.ceil(prediction_window / bin_size))
+
+# batch size in sec
+batch_size = 2000
+
+def batcher(time: float):
+
+    # get the stimulus in a 10 sec interval plus a window
+    ep = nap.IntervalSet(start=time, end=time + batch_size + prediction_window)
+
+    y = spikes[34].count(0.01, ep)
+    # up-sample the stimulus
+    x = data.fill_forward(y, stimulus.restrict(ep))
+
+    # vectorize roll
+    def roll_and_crop(x, lag):
+        return jnp.roll(x, lag, axis=0)[:-lags]
+
+    roll = jax.vmap(roll_and_crop, in_axes=(None, 0), out_axes=0)
+    rolled_stim = roll(x.d, -jnp.arange(lags))
+    features = jnp.einsum("ltnm, nmk -> tlk", rolled_stim, basis_eval).reshape(rolled_stim.shape[1], -1)
+    coupling = basis_coupling.compute_features(spikes.count(0.01, ep))[lags:]
+    return rolled_stim, np.hstack((coupling, features)), y[lags:]
+
+model = nmo.glm.GLM(
+    regularizer=nmo.regularizer.Ridge(
+        regularizer_strength=0.01,
+        solver_kwargs={"stepsize": 0.001, "acceleration": False}
+    )
+)
+rstim, X, Y = batcher(0)
+n_coupling_coef = len(spikes) * basis_coupling.n_basis_funcs
+
+model = nmo.glm.GLM(
+    observation_model=nmo.observation_models.PoissonObservations(inverse_link_function=jax.nn.softplus),
+    regularizer=nmo.regularizer.UnRegularized()
+)
+model.fit(X, Y)
+
+model_unc = nmo.glm.GLM(
+    observation_model=nmo.observation_models.PoissonObservations(inverse_link_function=jax.nn.softplus),
+    regularizer=nmo.regularizer.UnRegularized()
+)
+model_unc.fit(X[:, n_coupling_coef:], Y)
+# params, state = model.initialize_solver(*batcher(0))
+# np.random.seed(123)
+# for k in range(500):
+#     print(k)
+#     time = np.random.uniform(0, 2400)
+#     X, Y = batcher(time)
+#     params, state = model.update(params, state, X, Y)
+#     print(k, np.any(np.isnan(params[0])))
+#     if np.any(np.isnan(params[0])):
+#         break
+#
+#
+# resp = np.einsum("tj, nmj->tnm", params[0].reshape(13,-1), basis_eval)
+#
+# # plot the basis set
+aa = np.einsum("ltij, t", rstim, Y)
+bb = np.einsum("lk, ijk->ijl", np.einsum("tlk,t->lk", X[:, n_coupling_coef:].reshape((-1, lags, n_bas**2)), Y), basis_eval)
+cc = np.einsum("lk,ijk->ijl", model.coef_[n_coupling_coef:].reshape(lags, -1), basis_eval)
+fig, axs = plt.subplots(3,lags, figsize=(10, 3.5))
+mn0,mx0 = aa.min(), aa.max()
+mn1,mx1 = bb.min(), bb.max()
+mn2,mx2 = cc.min(), cc.max()
+for i in range(13):
+    axs[0, i].imshow(aa[..., i], vmin=mn0, vmax=mx0)
+    axs[0, i].set_xticks([])
+    axs[0, i].set_yticks([])
+    axs[1, i].imshow(bb[..., i], vmin=mn1, vmax=mx1)
+    axs[1, i].set_xticks([])
+    axs[1, i].set_yticks([])
+
+    axs[2, i].imshow(cc[..., i], vmin=mn2, vmax=mx2)
+    axs[2, i].set_xticks([])
+    axs[2, i].set_yticks([])
+
+rate = nap.Tsd(t=Y.t, d=model.predict(X), time_support=Y.time_support)
+rate_unc = nap.Tsd(t=Y.t, d=model_unc.predict(X[:, n_coupling_coef:]), time_support=Y.time_support)
+cc_model = nap.compute_event_trigger_average(spikes, rate/0.01, binsize=0.01, windowsize=(-8, 0))
+cc_model_unc = nap.compute_event_trigger_average(spikes, rate_unc/0.01, binsize=0.01, windowsize=(-8, 0))
+cc_spks = nap.compute_event_trigger_average(spikes, Y/0.01, binsize=0.01, windowsize=(-8, 0))
+
+fig, axs = plt.subplots(2,10, figsize=(10, 3.5))
+
+for k in range(len(spikes)):
+    axs[k // 10, k % 10].plot(cc_model_unc[:, k], "b")
+    axs[k // 10, k % 10].plot(cc_model[:, k], "r")
+    axs[k // 10, k % 10].plot(cc_spks[:, k], "k")
+plt.tight_layout()
+
+
+cross_corr = nap.compute_crosscorrelogram(group=spikes, binsize=0.01, windowsize=8)
+for k in range(19):
+    plt.plot(cross_corr.iloc[:, k])
+
+# The resulting predictor will be of shape `(13, 64)`, i.e. number of frames in the past
+# by number of basis.
+
+# # %%
+# #
+# # When performing this operation on multiple stimuli, things become slightly
+# # more complicated. For loops on the above methods would work, but would be
+# # slow. Reshaping and using the dot product is one common method, as are
+# # methods like `np.tensordot`.
+# #
+# # We'll use einsum to do this, which is a convenient way of representing many
+# # different matrix operations:
+#
+# filtered_stimulus = np.einsum('t h w, h w -> t', stimulus, receptive_field)
+#
+# # %%
+# #
+# # This notation says: take these arrays with dimensions `(t,h,w)` and `(h,w)`
+# # and multiply and sum to get an array of shape `(t,)`. This performs the same
+# # operations as above.
+# #
+# # And this remains a pynapple object, so we can easily visualize it!
+#
+# fig, ax = plt.subplots(1, 1, figsize=(12,4))
+# ax.plot(filtered_stimulus)
+#
+# # %%
+# #
+# # But what is this? It's how much each frame in the video should drive our
+# # neuron, based on the receptive field we fit using the spike-triggered
+# # average.
+# #
+# # This, then, is the spatial component of our input, as described above.
+# #
+# # ## Preparing data for nemos
+# #
+# # We'll now use the GLM to fit the temporal component. To do that, let's get
+# # this and our spike counts into the proper format for nemos:
+#
+# # grab spikes from when we were showing our stimulus, and bin at 1 msec
+# # resolution
+# bin_size = .001
+# counts = spikes[34].restrict(filtered_stimulus.time_support).count(bin_size)
+# print(counts.rate)
+# print(filtered_stimulus.rate)
+#
+# # %%
+# #
+# # Hold on, our stimulus is at a much lower rate than what we want for our rates
+# # -- in previous neural_modeling, our input has been at a higher rate than our spikes,
+# # and so we used `bin_average` to down-sample to the appropriate rate. When the
+# # input is at a lower rate, we need to think a little more carefully about how
+# # to up-sample.
+#
+# print(counts[:5])
+# print(filtered_stimulus[:5])
+#
+# # %%
+# #
+# # What was the visual input to the neuron at time 0.005? It was the same input
+# # as time 0. At time 0.0015? Same thing, up until we pass time 0.025017. Thus,
+# # we want to "fill forward" the values of our input, and we have pynapple
+# # convenience function to do so:
+# filtered_stimulus = data.fill_forward(counts, filtered_stimulus)
+# filtered_stimulus
+#
+# # %%
+# #
+# # We can see that the time points are now aligned, and we've filled forward the
+# # values the way we'd like.
+# #
+# # Now, similar to the [head direction tutorial](../02_head_direction), we'll
+# # use the log-stretched raised cosine basis to create the predictor for our
+# # GLM:
+#
+# window_size = 100
+# basis = nmo.basis.RaisedCosineBasisLog(8, mode="conv", window_size=window_size)
+#
+# convolved_input = basis.compute_features(filtered_stimulus)
+#
+# # %%
+# #
+# # convolved_input has shape (n_time_pts, n_features * n_basis_funcs), because
+# # n_features is the singleton dimension from filtered_stimulus.
+# #
+# # ## Fitting the GLM
+# #
+# # Now we're ready to fit the model! Let's do it, same as before:
+#
+#
+# model = nmo.glm.GLM(regularizer=nmo.regularizer.UnRegularized(solver_name="LBFGS"))
+# model.fit(convolved_input, counts)
+#
+# # %%
+# #
+# # We have our coefficients for each of our 8 basis functions, let's combine
+# # them to get the temporal time course of our input:
+#
+# time, basis_kernels = basis.evaluate_on_grid(window_size)
+# time *= bin_size * window_size
+# temp_weights = np.einsum('b, t b -> t', model.coef_, basis_kernels)
+# plt.plot(time, temp_weights)
+# plt.xlabel("time[sec]")
+# plt.ylabel("amplitude")
+#
+# # %%
+# #
+# # When taken together, the results of the GLM and the spike-triggered average
+# # give us the linear component of our LNP model: the separable spatio-temporal
+# # filter.
 #
-# convolved_input has shape (n_time_pts, n_features * n_basis_funcs), because
-# n_features is the singleton dimension from filtered_stimulus.
-#
-# ## Fitting the GLM
 #
-# Now we're ready to fit the model! Let's do it, same as before:
-
-
-model = nmo.glm.GLM(regularizer=nmo.regularizer.UnRegularized(solver_name="LBFGS"))
-model.fit(convolved_input, counts)
-
-# %%
-#
-# We have our coefficients for each of our 8 basis functions, let's combine
-# them to get the temporal time course of our input:
-
-time, basis_kernels = basis.evaluate_on_grid(window_size)
-time *= bin_size * window_size
-temp_weights = np.einsum('b, t b -> t', model.coef_, basis_kernels)
-plt.plot(time, temp_weights)
-plt.xlabel("time[sec]")
-plt.ylabel("amplitude")
-
-# %%
-#
-# When taken together, the results of the GLM and the spike-triggered average
-# give us the linear component of our LNP model: the separable spatio-temporal
-# filter.
-
-

From 07dc7c978d2b699dab4529689d9150ee7fe1e2ae Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Tue, 28 May 2024 12:27:38 -0400
Subject: [PATCH 02/11] removed magic designer

---
 docs/api_guide/_plot_06_magic_designer.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 docs/api_guide/_plot_06_magic_designer.py

diff --git a/docs/api_guide/_plot_06_magic_designer.py b/docs/api_guide/_plot_06_magic_designer.py
deleted file mode 100644
index 49c95a65..00000000
--- a/docs/api_guide/_plot_06_magic_designer.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Model design made easy."""
\ No newline at end of file

From de83652cf83d34ccd502990564e07ff48858c20c Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Tue, 28 May 2024 14:01:55 -0400
Subject: [PATCH 03/11] uniform NeMoS label

---
 docs/api_guide/plot_03_glm_pytree.py          |  3 +--
 docs/background/plot_00_conceptual_intro.py   |  6 +++---
 docs/index.md                                 | 16 +++++++--------
 docs/installation.md                          | 10 +++++-----
 .../plot_01_current_injection.py              | 20 +++++++++----------
 .../neural_modeling/plot_02_head_direction.py | 14 ++++++-------
 docs/neural_modeling/plot_03_grid_cells.py    |  4 ++--
 docs/neural_modeling/plot_05_place_cells.py   | 10 +++++-----
 8 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/docs/api_guide/plot_03_glm_pytree.py b/docs/api_guide/plot_03_glm_pytree.py
index a5cc442c..9b5a037d 100644
--- a/docs/api_guide/plot_03_glm_pytree.py
+++ b/docs/api_guide/plot_03_glm_pytree.py
@@ -23,7 +23,7 @@
 # %%
 # ## FeaturePytrees
 #
-# A FeaturePytree is a custom nemos object used to represent design matrices,
+# A FeaturePytree is a custom NeMoS object used to represent design matrices,
 # GLM coefficients, and other similar variables. It is a simple
 # [pytree](https://jax.readthedocs.io/en/latest/pytrees.html), a dictionary
 # with strings as keys and arrays as values. These arrays must all have the
@@ -314,7 +314,6 @@
 #
 # We could do all this with matrices as well, but we have to pay attention to
 # indices in a way that is annoying:
-from nemos.type_casting import support_pynapple
 
 X_mat = nmo.utils.pynapple_concatenate_jax([X['head_direction'], X['spatial_position']], -1)
 
diff --git a/docs/background/plot_00_conceptual_intro.py b/docs/background/plot_00_conceptual_intro.py
index a854b2bb..b2ff658c 100644
--- a/docs/background/plot_00_conceptual_intro.py
+++ b/docs/background/plot_00_conceptual_intro.py
@@ -2,7 +2,7 @@
 
 r"""# Generalized Linear Models: An Introduction
 
-Before we dive into using nemos, you might wonder: why model at all? Why not
+Before we dive into using NeMoS, you might wonder: why model at all? Why not
  just make a bunch of tuning curves and submit to *Science*? Modeling is
  helpful because:
 
@@ -136,7 +136,7 @@
 # %%
 # !!! info
 #
-#     In nemos, the non-linearity is kept fixed. We default to the exponential,
+#     In NeMoS, the non-linearity is kept fixed. We default to the exponential,
 #     but a small number of other choices, such as soft-plus, are allowed. The
 #     allowed choices guarantee both the non-negativity constraint described
 #     above, as well as convexity, i.e. a single optimal solution. In
@@ -184,7 +184,7 @@
 #
 # !!! info
 #
-#     In nemos, the log-likelihood can be computed directly by calling the
+#     In NeMoS, the log-likelihood can be computed directly by calling the
 #     `score` method, passing the predictors and the counts. The method first
 #     computes the rate $\lambda(t)$ using (2) and then the likelihood using
 #     (4). This method is used under the hood during optimization.
diff --git a/docs/index.md b/docs/index.md
index 3ca49702..422b9d90 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,4 @@
-# nemos 
+# NeMoS 
 
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/flatironinstitute/nemos/blob/main/LICENSE)
 ![Python version](https://img.shields.io/badge/python-3.10-blue.svg)
@@ -10,10 +10,10 @@
 
 
 
-`nemos` (NEural MOdelS) is a statistical modeling framework optimized for systems neuroscience and powered by [JAX](https://jax.readthedocs.io/en/latest/). 
+NeMoS (NEural MOdelS) is a statistical modeling framework optimized for systems neuroscience and powered by [JAX](https://jax.readthedocs.io/en/latest/). 
 It streamlines the process of defining and selecting models, through a collection of easy-to-use methods for feature design.
 
-The core of `nemos` includes GPU-accelerated, well-tested implementations of standard statistical models, currently 
+The core of NeMoS includes GPU-accelerated, well-tested implementations of standard statistical models, currently 
 focusing on the Generalized Linear Model (GLM). 
 
 We provide a **Poisson GLM** for analyzing spike counts, and a **Gamma GLM** for calcium or voltage imaging traces.
@@ -26,7 +26,7 @@ from Cosyne 2018 [here](https://www.youtube.com/watch?v=NFeGW5ljUoI&t=424s).
 
 ## Overview
 
-At his core, `nemos` consists of two primary modules: the `basis` and the `glm` module.
+At his core, NeMoS consists of two primary modules: the `basis` and the `glm` module.
 
 The `basis` module focuses on designing model features (inputs) for the GLM. It includes a suite of composable feature 
 constructors that accept time-series data as inputs. These inputs can be any observed variables, such as presented 
@@ -51,7 +51,7 @@ evaluating the model performance, and explore its behavior on new input.
 
 ## Examples
 
-Here's a brief demonstration of how the `basis` and `glm` modules work together within nemos.
+Here's a brief demonstration of how the `basis` and `glm` modules work together within NeMoS.
 
 ### Poisson GLM for features analysis
 
@@ -100,7 +100,7 @@ ll = glm.score(X, y)
 <img src="assets/glm_population_scheme.svg" width="84%">
 
 This second example demonstrates feature construction by convolving the simultaneously recorded population spike counts with a bank of filters, utilizing the basis in `conv` mode.
-The figure above show the GLM scheme for a single neuron, however in `nemos` you can fit jointly the whole population with the [`PopulationGLM`](generated/api_guide/plot_04_population_glm) object.
+The figure above show the GLM scheme for a single neuron, however in NeMoS you can fit jointly the whole population with the [`PopulationGLM`](generated/api_guide/plot_04_population_glm) object.
 
 #### Feature Representation
 
@@ -148,7 +148,7 @@ Run the following `pip` command in your virtual environment.
  python -m pip install nemos
  ```
 
-For more details, including specifics for GPU users and developers, refer to `nemos` [docs](https://nemos.readthedocs.io/en/latest/installation/).
+For more details, including specifics for GPU users and developers, refer to NeMoS [docs](https://nemos.readthedocs.io/en/latest/installation/).
 
 
 ## Disclaimer
@@ -164,7 +164,7 @@ We communicate via several channels on Github:
 - To report a bug, open an
   [issue](https://github.com/flatironinstitute/nemos/issues).
 - To ask usage questions, discuss broad issues, or show off what you’ve made
-  with nemos, go to
+  with NeMoS, go to
   [Discussions](https://github.com/flatironinstitute/nemos/discussions).
 - To send suggestions for extensions or enhancements, please post in the
   [ideas](https://github.com/flatironinstitute/nemos/discussions/categories/ideas)
diff --git a/docs/installation.md b/docs/installation.md
index 333c0f01..8fc8f003 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -9,7 +9,7 @@
     We suggest downloading an installer from [Anaconda](https://docs.anaconda.com/free/anaconda/install/) or [Miniconda](https://docs.anaconda.com/free/miniconda/). The former includes a comprehensive selection of Python packages for data-science and machine learning. The latter is a minimal installer which includes only few packages. 
     If you are not sure which one works best for you, take a look at the [Anaconda guidelines](https://docs.anaconda.com/free/distro-or-miniconda/).
 
-2. **Create and activate a virtual environment:** Once your updated python is up and running, we recommend creating and activating a Python virtual environment using [`venv`](https://docs.python.org/3/library/venv.html) before installing `nemos`. This practice helps manage dependencies and avoid conflicts with other Python packages. 
+2. **Create and activate a virtual environment:** Once your updated python is up and running, we recommend creating and activating a Python virtual environment using [`venv`](https://docs.python.org/3/library/venv.html) before installing NeMoS. This practice helps manage dependencies and avoid conflicts with other Python packages. 
 
     For `venv`, create your virtual environment in a specific directory. This example uses `~/python_venvs/nemos` for Linux/Mac and `C:%HOMEPATH%\python_venvs\nemos` for Windows; in general, free to use whatever directory works best for you.
 
@@ -41,7 +41,7 @@
 After creating you virtual environment, follow one of the following sections below, depending on whether you need GPU support or not:
 ### CPU Installation
 
-To install `nemos` on a system without a GPU, run this command from within your activated environment, 
+To install NeMoS on a system without a GPU, run this command from within your activated environment, 
 
 **For macOS/Linux users:**
  ```bash
@@ -58,7 +58,7 @@ To install `nemos` on a system without a GPU, run this command from within your
 !!! warning
     JAX does not guarantee GPU support for Windows, see [here](https://jax.readthedocs.io/en/latest/installation.html#supported-platforms) for updates.
 
-For systems equipped with a GPU, you need to specifically install the GPU-enabled versions of `jax` and `jaxlib` before installing `nemos`.
+For systems equipped with a GPU, you need to specifically install the GPU-enabled versions of `jax` and `jaxlib` before installing NeMoS.
 
 1. **Install `jax` and `jaxlib` for GPU:** Follow the [JAX documentation](https://jax.readthedocs.io/en/latest/installation.html) instructions to install `jax` and `jaxlib` with GPU support.
 
@@ -70,11 +70,11 @@ For systems equipped with a GPU, you need to specifically install the GPU-enable
 
     If your GPU is listed among the devices, the installation was successful.
 
-3. **Install `nemos`:** After successfully installing and configuring `jax` for GPU support, install `nemos` using the same command as in the [CPU installation](#cpu-installation).
+3. **Install NeMoS:** After successfully installing and configuring `jax` for GPU support, install NeMoS using the same command as in the [CPU installation](#cpu-installation).
 
 ### Installation For Developers
 
-Developers should clone the repository and install `nemos` in editable mode, including developer dependencies. Follow these steps:
+Developers should clone the repository and install NeMoS in editable mode, including developer dependencies. Follow these steps:
 
 1. **Clone the repo:** From your environment, execute the following commands to clone the repository and navigate to its directory:
     ```bash
diff --git a/docs/neural_modeling/plot_01_current_injection.py b/docs/neural_modeling/plot_01_current_injection.py
index 1b73fc43..73c2d42c 100644
--- a/docs/neural_modeling/plot_01_current_injection.py
+++ b/docs/neural_modeling/plot_01_current_injection.py
@@ -36,13 +36,13 @@
 we'll do using [pynapple](https://pynapple-org.github.io/pynapple/), which
 we'll use throughout this workshop, as it simplifies handling this type of
 data. After we've explored the data some, we'll introduce the Generalized
-Linear Model and how to fit it with nemos.
+Linear Model and how to fit it with NeMoS.
 
 ## Learning objectives {.keep-text}
 
 - Learn how to explore spiking data and do basic analyses using pynapple
-- Learn how to structure data for nemos
-- Learn how to fit a basic Generalized Linear Model using nemos
+- Learn how to structure data for NeMoS
+- Learn how to fit a basic Generalized Linear Model using NeMoS
 - Learn how to retrieve the parameters and predictions from a fit GLM for
   intrepetation.
 
@@ -365,7 +365,7 @@
 # current remains on.
 
 # %%
-# ## Nemos {.strip-code}
+# ## NeMoS {.strip-code}
 #
 # ### Preparing data
 #
@@ -373,7 +373,7 @@
 # Before we construct it, however, we need to get the data into the right
 # format.
 #
-# Nemos requires that the predictors and spike counts it operates on have the
+# NeMoS requires that the predictors and spike counts it operates on have the
 # following properties:
 #
 # - predictors and spike counts must have the same number of time points.
@@ -393,7 +393,7 @@
 #
 #     [jax](https://github.com/google/jax) is a Google-supported python library
 #     for automatic differentiation. It has all sorts of neat features, but the
-#     most relevant of which for nemos is its GPU-compatibility and
+#     most relevant of which for NeMoS is its GPU-compatibility and
 #     just-in-time compilation (both of which make code faster with little
 #     overhead!), as well as the collection of optimizers present in
 #     [jaxopt](https://jaxopt.github.io/stable/).
@@ -429,7 +429,7 @@
 # add two dimensions for axis 1.
 predictor = np.expand_dims(binned_current, 1)
 
-# check that the dimensionality matches nemos expectation
+# check that the dimensionality matches NeMoS expectation
 print(f"predictor shape: {predictor.shape}")
 print(f"count shape: {count.shape}")
 
@@ -441,7 +441,7 @@
 #     neuron -- do you add an extra dimension? or concatenate neurons along one
 #     of the existing dimensions?
 #
-#     In nemos, we always fit Generalized Linear Models to a single neuron at a
+#     In NeMoS, we always fit Generalized Linear Models to a single neuron at a
 #     time. We'll discuss this more in the [following
 #     tutorial](../02_head_direction/), but briefly: you get the same answer
 #     whether you fit the neurons separately or simultaneously, and fitting
@@ -450,7 +450,7 @@
 # !!! info
 #
 #     In this example, we're being very explicit about this conversion to
-#     jax.numpy arrays. However, in general, nemos is able to properly convert
+#     jax.numpy arrays. However, in general, NeMoS is able to properly convert
 #     from pynapple objects to jax.numpy arrays without any additional steps
 #     (it can similarly convert from numpy arrays to jax.numpy arrays). Thus,
 #     in later background we will omit this step.
@@ -671,7 +671,7 @@
 #
 # !!! info
 #
-#     Under the hood, nemos is minimizing the negative log-likelihood, as is
+#     Under the hood, NeMoS is minimizing the negative log-likelihood, as is
 #     typical in many optimization contexts. `score` returns the real
 #     log-likelihood, however, and thus higher is better.
 #
diff --git a/docs/neural_modeling/plot_02_head_direction.py b/docs/neural_modeling/plot_02_head_direction.py
index 130a0316..41ccb0b6 100644
--- a/docs/neural_modeling/plot_02_head_direction.py
+++ b/docs/neural_modeling/plot_02_head_direction.py
@@ -8,8 +8,8 @@
 
 ## Learning objectives
 
-- Learn how to add history-related predictors to nemos GLM
-- Learn about nemos `Basis` objects
+- Learn how to add history-related predictors to NeMoS GLM
+- Learn about NeMoS `Basis` objects
 - Learn how to use `Basis` objects with convolution
 
 """
@@ -99,7 +99,7 @@
 plt.tight_layout()
 
 # %%
-# Before using Nemos, let's explore the data at the population level.
+# Before using NeMoS, let's explore the data at the population level.
 #
 # Let's plot the preferred heading
 #
@@ -138,8 +138,8 @@
 )
 
 # %%
-# ## Nemos {.strip-code}
-# It's time to use nemos. Our goal is to estimate the pairwise interaction between neurons.
+# ## NeMoS {.strip-code}
+# It's time to use NeMoS. Our goal is to estimate the pairwise interaction between neurons.
 # This can be quantified with a GLM if we use the recent population spike history to predict the current time step.
 # ### Self-Connected Single Neuron
 # To simplify our life, let's see first how we can model spike history effects in a single neuron.
@@ -341,7 +341,7 @@
 #     analytical step. We will eventually provide guidance on this choice, but
 #     for now we'll give you a decent choice.
 #
-# nemos includes `Basis` objects to handle the construction and use of these
+# NeMoS includes `Basis` objects to handle the construction and use of these
 # basis functions.
 #
 # When we instantiate this object, the only arguments we need to specify is the
@@ -397,7 +397,7 @@
 #
 # Let's see our basis in action. We can "compress" spike history feature by convolving the basis
 # with the counts (without creating the large spike history feature matrix).
-# This can be performed in nemos by calling the "compute_features" method of basis.
+# This can be performed in NeMoS by calling the "compute_features" method of basis.
 
 
 # equivalent to
diff --git a/docs/neural_modeling/plot_03_grid_cells.py b/docs/neural_modeling/plot_03_grid_cells.py
index 69db5be8..31bf843c 100644
--- a/docs/neural_modeling/plot_03_grid_cells.py
+++ b/docs/neural_modeling/plot_03_grid_cells.py
@@ -81,8 +81,8 @@
 plt.tight_layout()
 
 # %%
-# ## Nemos
-# It's time to use nemos.
+# ## NeMoS
+# It's time to use NeMoS.
 # Let's try to predict the spikes as a function of position and see if we can generate better tuning curves
 # First we start by binning the spike trains in 10 ms bins.
 
diff --git a/docs/neural_modeling/plot_05_place_cells.py b/docs/neural_modeling/plot_05_place_cells.py
index d7377998..db5a414c 100644
--- a/docs/neural_modeling/plot_05_place_cells.py
+++ b/docs/neural_modeling/plot_05_place_cells.py
@@ -215,7 +215,7 @@
 plt.tight_layout()
 
 # %%
-# This neurons show a strong modulation of firing rate as a function of speed but we can also notice that the animal, on average, accelerates when travering the field. Is the speed tuning we observe a true modulation or spurious correlation caused by traversing the place field at different speed and for different theta phase? We can use `nemos` to model the activity and give the position, the phase and the speed as input variable.
+# This neurons show a strong modulation of firing rate as a function of speed but we can also notice that the animal, on average, accelerates when travering the field. Is the speed tuning we observe a true modulation or spurious correlation caused by traversing the place field at different speed and for different theta phase? We can use NeMoS to model the activity and give the position, the phase and the speed as input variable.
 #
 # We will use speed, phase and position to model the activity of the neuron.
 # All the feature have already been brought to the same dimension thanks to `pynapple`.
@@ -243,12 +243,12 @@
 speed_basis = nmo.basis.MSplineBasis(n_basis_funcs=15)
 
 # %%
-# In addition, we will consider position and phase to be a joint variable. In `nemos`, we can combine basis by multiplying them and adding them. In this case the final basis object for our model can be made in one line :
+# In addition, we will consider position and phase to be a joint variable. In NeMoS, we can combine basis by multiplying them and adding them. In this case the final basis object for our model can be made in one line :
 
 basis = position_basis * phase_basis + speed_basis
 
 # %%
-# The object basis only tell us how each basis covers the feature space. For each timestep, we need to _evaluate_ what are the features value. For that we can call `nemos` basis:
+# The object basis only tell us how each basis covers the feature space. For each timestep, we need to _evaluate_ what are the features value. For that we can call NeMoS basis:
 
 X = basis(position, theta, speed)
 
@@ -262,7 +262,7 @@
 # %%
 # ## Model learning
 #
-# We can now use the Poisson GLM from nemos to learn the model.
+# We can now use the Poisson GLM from NeMoS to learn the model.
 
 glm = nmo.glm.GLM(
     regularizer=nmo.regularizer.UnRegularized("LBFGS", solver_kwargs=dict(tol=10**-12))
@@ -273,7 +273,7 @@
 # %%
 # ## Prediction
 #
-# Let's check first if our model can accurately predict the different tuning curves we displayed above. We can use the `predict` function of nemos and then compute new tuning curves
+# Let's check first if our model can accurately predict the different tuning curves we displayed above. We can use the `predict` function of NeMoS and then compute new tuning curves
 
 predicted_rate = glm.predict(X) / bin_size
 

From 826e0e7496c1cb62a39a38edb5aced6443c7bf2c Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Tue, 28 May 2024 14:02:23 -0400
Subject: [PATCH 04/11] uniform NeMoS label

---
 docs/quickstart.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/quickstart.md b/docs/quickstart.md
index 7cf6203b..cc80bf3f 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -189,4 +189,4 @@ Now we can print the best coefficient.
 {'regularizer__regularizer_strength': 0.001}
 ```
 
-Enjoy modeling with `nemos`!
+Enjoy modeling with NeMoS`!

From 2afc8a2df89d5aa73aff8c5f7e64cefb6570acb8 Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Tue, 28 May 2024 14:02:44 -0400
Subject: [PATCH 05/11] uniform NeMoS label

---
 docs/quickstart.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/quickstart.md b/docs/quickstart.md
index cc80bf3f..6cc5d5b5 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -189,4 +189,4 @@ Now we can print the best coefficient.
 {'regularizer__regularizer_strength': 0.001}
 ```
 
-Enjoy modeling with NeMoS`!
+Enjoy modeling with NeMoS!

From f4c28fcbfe36bd4a4458b3f3a16a43eb8835a85e Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Thu, 30 May 2024 17:15:41 -0400
Subject: [PATCH 06/11] modified v1 tutorial

---
 .gitignore                               |   3 -
 docs/api_guide/plot_03_glm_pytree.py     |   2 +-
 docs/neural_modeling/plot_04_v1_cells.py | 390 +++++++++--------------
 3 files changed, 158 insertions(+), 237 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2579acdd..970ea314 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,9 +133,6 @@ dmypy.json
 .idea/
 
 
-# IntelliJ's project specific settings 
-.idea/
-
 .DS_Store
 
 # mkdocs generated folder
diff --git a/docs/api_guide/plot_03_glm_pytree.py b/docs/api_guide/plot_03_glm_pytree.py
index 9b5a037d..5e16ba9c 100644
--- a/docs/api_guide/plot_03_glm_pytree.py
+++ b/docs/api_guide/plot_03_glm_pytree.py
@@ -164,7 +164,7 @@
 file = h5py.File(fs.open(s3_url, "rb"))
 io = NWBHDF5IO(file=file, load_namespaces=True)
 
-nwb = nap.NWBFile(io.read())
+nwb = nap.NWBFile(io.read(), lazy_loading=False)
 
 print(nwb)
 
diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index b414a414..8b7f3330 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -17,6 +17,9 @@
 import nemos as nmo
 import jax.numpy as jnp
 
+# suppress jax to numpy conversion warning in pynapple
+nap.nap_config.suppress_conversion_warnings = True
+
 # configure plots some
 plt.style.use("examples_utils/nemos.mplstyle")
 
@@ -54,7 +57,9 @@
 
 # %%
 # There are 73 neurons recorded together in V1. To fit the GLM faster, we will focus on one neuron.
+
 print(units)
+
 # this returns TsGroup with one neuron only
 spikes = units[units.rate >= 5.0]
 
@@ -62,15 +67,20 @@
 # How could we predict neuron's response to white noise stimulus?
 # 
 # - we could fit the instantaneous spatial response. that is, just predict
-#   neuron's response to a given frame of white noise. this will give an x by y
+#   neuron's response to a given frame of white noise. this will give an x-pixel by y-pixel
 #   filter. implicitly assumes that there's no temporal info: only matters what
 #   we've just seen
 #
 # - could fit spatiotemporal filter. instead of an x by y that we use
-#   independently on each frame, fit (x, y, t) over, say 100 msecs. and then
+#   independently on each frame, fit (x, y, t) over, say 130 msecs. and then
 #   fit each of these independently (like in head direction example)
 #
-# - that's a lot of parameters! can simplify by assumping that the response is
+# - could reduce the dimensionality by using a bank of k two-dimensional basis
+#   functions of shape (x-pixel, y-pixel, k). We can "project" each (x-pixel, y-pixel) stimulus
+#   image over the basis by computing the dot product of the two, leaving us with a k-dimensional
+#   vector, where k can be much smaller than the original pixel size.
+#
+# - that's still a lot of parameters! can simplify by assuming that the response is
 #   separable: fit a single (x, y) filter and then modulate it over time. this
 #   wouldn't catch e.g., direction-selectivity because it assumes that phase
 #   preference is constant over time
@@ -78,11 +88,7 @@
 # - could make use of our knowledge of V1 and try to fit a more complex
 #   functional form, e.g., a Gabor.
 #
-# That last one is very non-linear and thus non-convex. we'll do the third one.
-#
-# in this example, we'll fit the spatial filter outside of the GLM framework,
-# using spike-triggered average, and then we'll use the GLM to fit the temporal
-# timecourse.
+# That last one is very non-linear and thus non-convex. We'll do the third one.
 #
 # ## Spike-triggered average
 #
@@ -111,284 +117,202 @@
 sta[1, 0]
 
 # %%
-# # we can easily plot this
-#
-# fig, axes = plt.subplots(1, len(sta), figsize=(3*len(sta),3))
-# for i, t in enumerate(sta.t):
-#     axes[i].imshow(sta[i,0], vmin = np.min(sta), vmax = np.max(sta),
-#                    cmap='Greys_r')
-#     axes[i].set_title(str(t)+" s")
+# We can easily plot this.
 
+fig, axes = plt.subplots(1, len(sta), figsize=(3*len(sta),3))
+for i, t in enumerate(sta.t):
+    axes[i].imshow(sta[i,0], vmin=np.min(sta), vmax=np.max(sta),
+                   cmap='Greys_r')
+    axes[i].set_title(str(t)+" s")
 
-# %%
-#
-# that looks pretty reasonable for a V1 simple cell: localized in space,
-# orientation, and spatial frequency. that is, looks Gabor-ish
-#
-# To convert this to the spatial filter we'll use for the GLM, let's take the
-# average across the bins that look informative: -.125 to -.05
-
-# mkdocs_gallery_thumbnail_number = 3
-receptive_field = np.mean(sta.get(-0.125, -0.05), axis=0)[0]
-#
-# fig, ax = plt.subplots(1, 1, figsize=(4,4))
-# ax.imshow(receptive_field, cmap='Greys_r')
 
 # %%
 #
+# That looks pretty reasonable for a V1 simple cell: localized in space,
+# orientation, and spatial frequency. That is, looks Gabor-ish.
 # This receptive field gives us the spatial part of the linear response: it
 # gives a map of weights that we use for a weighted sum on an image. There are
 # multiple ways of performing this operation:
-
-# element-wise multiplication and sum
-print((receptive_field * stimulus[0]).sum())
-# dot product of flattened versions
-print(np.dot(receptive_field.flatten(), stimulus[0].flatten()))
-
-# %%
+#
 # ## Firing rate model
 # What we want is to model the log-firing rate as a linear combination of the past
-# stimuli $\bm{x}_t$ over a fixed window, here $x_t$ is an array representing the
+# stimuli $\bm{x}\_t$ over a fixed window, here $x\_t$ is an array representing the
 # flattened image of shape `(nm, )`, where n and m are the pixel of the x and y axes
 # of the noise stimuli.
 # Mathematically, this can be expressed as,
 # $$
-# \log \mu_t = \sum \beta_{i} \bm{x}_{t-i}
+# \log \mu\_t = \sum \beta\_{i} \bm{x}\_{t-i}
 # $$
 # Where beta is a vector of coefficients, also of shape `(nm, )`. This is quite a lot of coefficients.
-# For example, if you want to use a window of 150ms at 10 ms resolution on a 51x51 image,
-# you'll end up with 51^2 x 10 = 26010 coefficients.
-# We can use a basis set to reduce the dimensionality: first we create a bank of basis with 51x51
-# elements.
-n_bas = 14
-basis = nmo.basis.RaisedCosineBasisLinear(n_basis_funcs=n_bas)**2
+# For example, if you want to use a window of 130ms at 130 ms resolution on a 51x51 image,
+# you'll end up with 51^2 x 13 = 33813 coefficients.
+# We can use a basis set to reduce the dimensionality: first we create a bank of basis with 51x51 of
+# elements 15 elements, reducing the problem to 15^2 x 13 = 2925 parameters.
 
-basis_coupling = nmo.basis.RaisedCosineBasisLog(3, mode="conv", window_size=20)
+# define a two-dimensional basis as a product of two "RaisedCosineBasisLinear" basis.
+n_bas = 15
+basis = nmo.basis.RaisedCosineBasisLinear(n_basis_funcs=n_bas) ** 2
+
+# evaluate the basis on a (51, 51) grid of points
 X, Y, basis_eval = basis.evaluate_on_grid(51, 51)
 
+print(basis_eval.shape)
+
+
+
 # plot the basis set
-fig, axs = plt.subplots(n_bas,n_bas, figsize=(10, 8))
+fig, axs = plt.subplots(n_bas, n_bas, figsize=(10, 8))
 for i in range(n_bas):
     for j in range(n_bas):
         axs[i, j].contourf(X, Y, basis_eval[..., i*n_bas + j])
         axs[i, j].set_xticks([])
         axs[i, j].set_yticks([])
 
+# %
+# Now we can project the stimulus onto the bases.
+
+# project stimulus into the basis
+projected_stim = nap.TsdFrame(
+    t=stimulus.t,
+    d=jnp.einsum("tnm, nmk -> tk", stimulus.d[:], basis_eval),  # read the HDF5 (needed for jax to work)
+    time_support=stimulus.time_support
+)
+
+# %%
+# And additionally, we could jointly model the all-to-all functional connectivity of the V1 population.
+#
+# !!! note
+#     See the [head direction](#plot_02_head_direction.py) tutorial for a detailed
+#     overview on how to infer the functional connectivity with a GLM.
+
+# Define a basis for the coupling filters
+basis_coupling = nmo.basis.RaisedCosineBasisLog(3, mode="conv", window_size=20)
+
 # %%
-# Let's create step-by-step the predictor starting from a single sample.
+# Since the number of parameters and samples is still quite large, we could try a stochastic
+# optimization approach, in which we update the parameters using a few random samples from
+# the time series of predictors and counts at each iteration. Let's define the parameters
+# that we are going to use to sample the features and spike counts.
 
+# the sampling rate for counts and features
 bin_size = 0.01  # 10 ms
 
-# fix the window size used for prediction (125 ms of video)
+# the window size used for prediction (130 ms of video)
 prediction_window = 0.13  # duration of the window in sec
 
-# number of lags
+# number of past frames used for predicting the current firing rate
 lags = int(np.ceil(prediction_window / bin_size))
 
-# batch size in sec
-batch_size = 2000
+# the duration of the data chunk that we will use at ech iteration
+batch_size = 30  # seconds
 
-def batcher(time: float):
 
+# define a function that returns the chunk of data from "time" to "time + batch_size"
+def batcher(time: float):
     # get the stimulus in a 10 sec interval plus a window
     ep = nap.IntervalSet(start=time, end=time + batch_size + prediction_window)
 
-    y = spikes[34].count(0.01, ep)
-    # up-sample the stimulus
-    x = data.fill_forward(y, stimulus.restrict(ep))
+    # count the spikes of the neuron that we are fitting
+    y = spikes[34].count(bin_size, ep)
+
+    # up-sample the projected stimulus to 0.1sec
+    x = data.fill_forward(y, projected_stim.restrict(ep))
 
-    # vectorize roll
+    # function that shifts tha stimulus of a lag and crops
     def roll_and_crop(x, lag):
         return jnp.roll(x, lag, axis=0)[:-lags]
 
-    roll = jax.vmap(roll_and_crop, in_axes=(None, 0), out_axes=0)
-    rolled_stim = roll(x.d, -jnp.arange(lags))
-    features = jnp.einsum("ltnm, nmk -> tlk", rolled_stim, basis_eval).reshape(rolled_stim.shape[1], -1)
-    coupling = basis_coupling.compute_features(spikes.count(0.01, ep))[lags:]
-    return rolled_stim, np.hstack((coupling, features)), y[lags:]
+    # vectorize the function over the lags
+    roll = jax.vmap(roll_and_crop, in_axes=(None, 0), out_axes=1)
+
+    # roll and reshape to get the predictors
+    features = roll(x.d, -jnp.arange(lags)).reshape(x.shape[0] - lags, -1)
+
+    # convolve the counts with the basis to get the coupling features
+    coupling = basis_coupling.compute_features(spikes.count(bin_size, ep))[lags:]
+
+    # concatenate the features and return features and counts
+    return np.hstack((coupling, features)), y[lags:]
 
-model = nmo.glm.GLM(
+# %%
+# We are now ready to run learn the model parameters.
+
+# instantiate two models: one that will estimate the functional connectivity and one that will not.
+model_coupled = nmo.glm.GLM(
     regularizer=nmo.regularizer.Ridge(
         regularizer_strength=0.01,
         solver_kwargs={"stepsize": 0.001, "acceleration": False}
     )
 )
-rstim, X, Y = batcher(0)
-n_coupling_coef = len(spikes) * basis_coupling.n_basis_funcs
 
-model = nmo.glm.GLM(
-    observation_model=nmo.observation_models.PoissonObservations(inverse_link_function=jax.nn.softplus),
-    regularizer=nmo.regularizer.UnRegularized()
+model_uncoupled = nmo.glm.GLM(
+    regularizer=nmo.regularizer.Ridge(
+        regularizer_strength=0.01,
+        solver_kwargs={"stepsize": 0.001, "acceleration": False}
+    )
 )
-model.fit(X, Y)
 
-model_unc = nmo.glm.GLM(
-    observation_model=nmo.observation_models.PoissonObservations(inverse_link_function=jax.nn.softplus),
-    regularizer=nmo.regularizer.UnRegularized()
-)
-model_unc.fit(X[:, n_coupling_coef:], Y)
-# params, state = model.initialize_solver(*batcher(0))
-# np.random.seed(123)
-# for k in range(500):
-#     print(k)
-#     time = np.random.uniform(0, 2400)
-#     X, Y = batcher(time)
-#     params, state = model.update(params, state, X, Y)
-#     print(k, np.any(np.isnan(params[0])))
-#     if np.any(np.isnan(params[0])):
-#         break
-#
-#
-# resp = np.einsum("tj, nmj->tnm", params[0].reshape(13,-1), basis_eval)
-#
-# # plot the basis set
-aa = np.einsum("ltij, t", rstim, Y)
-bb = np.einsum("lk, ijk->ijl", np.einsum("tlk,t->lk", X[:, n_coupling_coef:].reshape((-1, lags, n_bas**2)), Y), basis_eval)
-cc = np.einsum("lk,ijk->ijl", model.coef_[n_coupling_coef:].reshape(lags, -1), basis_eval)
-fig, axs = plt.subplots(3,lags, figsize=(10, 3.5))
-mn0,mx0 = aa.min(), aa.max()
-mn1,mx1 = bb.min(), bb.max()
-mn2,mx2 = cc.min(), cc.max()
-for i in range(13):
-    axs[0, i].imshow(aa[..., i], vmin=mn0, vmax=mx0)
-    axs[0, i].set_xticks([])
-    axs[0, i].set_yticks([])
-    axs[1, i].imshow(bb[..., i], vmin=mn1, vmax=mx1)
-    axs[1, i].set_xticks([])
-    axs[1, i].set_yticks([])
-
-    axs[2, i].imshow(cc[..., i], vmin=mn2, vmax=mx2)
-    axs[2, i].set_xticks([])
-    axs[2, i].set_yticks([])
-
-rate = nap.Tsd(t=Y.t, d=model.predict(X), time_support=Y.time_support)
-rate_unc = nap.Tsd(t=Y.t, d=model_unc.predict(X[:, n_coupling_coef:]), time_support=Y.time_support)
-cc_model = nap.compute_event_trigger_average(spikes, rate/0.01, binsize=0.01, windowsize=(-8, 0))
-cc_model_unc = nap.compute_event_trigger_average(spikes, rate_unc/0.01, binsize=0.01, windowsize=(-8, 0))
-cc_spks = nap.compute_event_trigger_average(spikes, Y/0.01, binsize=0.01, windowsize=(-8, 0))
-
-fig, axs = plt.subplots(2,10, figsize=(10, 3.5))
-
-for k in range(len(spikes)):
-    axs[k // 10, k % 10].plot(cc_model_unc[:, k], "b")
-    axs[k // 10, k % 10].plot(cc_model[:, k], "r")
-    axs[k // 10, k % 10].plot(cc_spks[:, k], "k")
-plt.tight_layout()
+# initialize the solver
+X, Y = batcher(0)
 
+# initialize params coupled
+params_cp, state_cp = model_coupled.initialize_solver(X, Y)
 
-cross_corr = nap.compute_crosscorrelogram(group=spikes, binsize=0.01, windowsize=8)
-for k in range(19):
-    plt.plot(cross_corr.iloc[:, k])
+# initialize uncoupled (remove the column corresponding to coupling parameters)
+n_coupling_coef = len(spikes) * basis_coupling.n_basis_funcs
+params_uncp, state_uncp = model_uncoupled.initialize_solver(X[:, n_coupling_coef:], Y)
 
-# The resulting predictor will be of shape `(13, 64)`, i.e. number of frames in the past
-# by number of basis.
+# %%
+# Finally, we can run a loop that grabs a chunk of data and updates the model parameters.
 
-# # %%
-# #
-# # When performing this operation on multiple stimuli, things become slightly
-# # more complicated. For loops on the above methods would work, but would be
-# # slow. Reshaping and using the dot product is one common method, as are
-# # methods like `np.tensordot`.
-# #
-# # We'll use einsum to do this, which is a convenient way of representing many
-# # different matrix operations:
-#
-# filtered_stimulus = np.einsum('t h w, h w -> t', stimulus, receptive_field)
-#
-# # %%
-# #
-# # This notation says: take these arrays with dimensions `(t,h,w)` and `(h,w)`
-# # and multiply and sum to get an array of shape `(t,)`. This performs the same
-# # operations as above.
-# #
-# # And this remains a pynapple object, so we can easily visualize it!
-#
-# fig, ax = plt.subplots(1, 1, figsize=(12,4))
-# ax.plot(filtered_stimulus)
-#
-# # %%
-# #
-# # But what is this? It's how much each frame in the video should drive our
-# # neuron, based on the receptive field we fit using the spike-triggered
-# # average.
-# #
-# # This, then, is the spatial component of our input, as described above.
-# #
-# # ## Preparing data for nemos
-# #
-# # We'll now use the GLM to fit the temporal component. To do that, let's get
-# # this and our spike counts into the proper format for nemos:
-#
-# # grab spikes from when we were showing our stimulus, and bin at 1 msec
-# # resolution
-# bin_size = .001
-# counts = spikes[34].restrict(filtered_stimulus.time_support).count(bin_size)
-# print(counts.rate)
-# print(filtered_stimulus.rate)
-#
-# # %%
-# #
-# # Hold on, our stimulus is at a much lower rate than what we want for our rates
-# # -- in previous neural_modeling, our input has been at a higher rate than our spikes,
-# # and so we used `bin_average` to down-sample to the appropriate rate. When the
-# # input is at a lower rate, we need to think a little more carefully about how
-# # to up-sample.
-#
-# print(counts[:5])
-# print(filtered_stimulus[:5])
-#
-# # %%
-# #
-# # What was the visual input to the neuron at time 0.005? It was the same input
-# # as time 0. At time 0.0015? Same thing, up until we pass time 0.025017. Thus,
-# # we want to "fill forward" the values of our input, and we have pynapple
-# # convenience function to do so:
-# filtered_stimulus = data.fill_forward(counts, filtered_stimulus)
-# filtered_stimulus
-#
-# # %%
-# #
-# # We can see that the time points are now aligned, and we've filled forward the
-# # values the way we'd like.
-# #
-# # Now, similar to the [head direction tutorial](../02_head_direction), we'll
-# # use the log-stretched raised cosine basis to create the predictor for our
-# # GLM:
-#
-# window_size = 100
-# basis = nmo.basis.RaisedCosineBasisLog(8, mode="conv", window_size=window_size)
-#
-# convolved_input = basis.compute_features(filtered_stimulus)
-#
-# # %%
-# #
-# # convolved_input has shape (n_time_pts, n_features * n_basis_funcs), because
-# # n_features is the singleton dimension from filtered_stimulus.
-# #
-# # ## Fitting the GLM
-# #
-# # Now we're ready to fit the model! Let's do it, same as before:
-#
-#
-# model = nmo.glm.GLM(regularizer=nmo.regularizer.UnRegularized(solver_name="LBFGS"))
-# model.fit(convolved_input, counts)
-#
-# # %%
-# #
-# # We have our coefficients for each of our 8 basis functions, let's combine
-# # them to get the temporal time course of our input:
-#
-# time, basis_kernels = basis.evaluate_on_grid(window_size)
-# time *= bin_size * window_size
-# temp_weights = np.einsum('b, t b -> t', model.coef_, basis_kernels)
-# plt.plot(time, temp_weights)
-# plt.xlabel("time[sec]")
-# plt.ylabel("amplitude")
-#
-# # %%
-# #
-# # When taken together, the results of the GLM and the spike-triggered average
-# # give us the linear component of our LNP model: the separable spatio-temporal
-# # filter.
-#
-#
+# run the stochastic gradient descent for a few iterations
+np.random.seed(123)
+
+for k in range(500):
+    if k % 50 == 0:
+        print(f"iter {k}")
+
+    #  select a random time point in the recording
+    time = np.random.uniform(0, 2400)
+
+    # grab a 30sec batch starting from time.
+    X, Y = batcher(time)
+
+    # update the parameters of the coupled model
+    params_cp, state_cp = model_coupled.update(params_cp, state_cp, X, Y)
+
+    # update the uncoupled model dropping the column of the features that corresponds to the coupling
+    # filters
+    params_uncp, state_uncp = model_uncoupled.update(params_uncp, state_uncp, X[:, n_coupling_coef:], Y)
+
+
+# %%
+# We can now plot the receptive fields estimated by the models.
+
+# get the coefficient for the spatiotemporal filters
+coeff_coupled = model_coupled.coef_[n_coupling_coef:]
+coeff_uncoupled = model_uncoupled.coef_
+
+# weight the basis nby the coefficients to get the estimated receptive fields.
+rf_coupled = np.einsum("lk,ijk->lij", coeff_coupled.reshape(lags, -1), basis_eval)
+rf_uncoupled = np.einsum("lk,ijk->lij", coeff_uncoupled.reshape(lags, -1), basis_eval)
+
+# compare the receptive fields
+mn1, mx1 = rf_uncoupled.min(), rf_uncoupled.max()
+mn2, mx2 = rf_coupled.min(), rf_coupled.max()
+
+fig1, axs1 = plt.subplots(1, lags, figsize=(10, 3.5))
+fig2, axs2 = plt.subplots(1, lags, figsize=(10, 3.5))
+fig1.suptitle("uncoupled model RF")
+fig2.suptitle("coupled model RF")
+for i in range(lags):
+    axs1[i].set_title(f"{prediction_window - i * bin_size:.2} s")
+    axs1[i].imshow(rf_uncoupled[i], vmin=mn1, vmax=mx1, cmap="Greys")
+    axs1[i].set_xticks([])
+    axs1[i].set_yticks([])
+
+    axs2[i].set_title(f"{prediction_window - i * bin_size:.2} s")
+    axs2[i].imshow(rf_coupled[i], vmin=mn2, vmax=mx2, cmap="Greys")
+    axs2[i].set_xticks([])
+    axs2[i].set_yticks([])
+plt.tight_layout()

From 68492626ccf806141b4419bcb099f70206edfc3b Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Thu, 30 May 2024 17:17:13 -0400
Subject: [PATCH 07/11] modified v1 tutorial

---
 docs/neural_modeling/plot_04_v1_cells.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index 8b7f3330..28afe133 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -315,4 +315,5 @@ def roll_and_crop(x, lag):
     axs2[i].imshow(rf_coupled[i], vmin=mn2, vmax=mx2, cmap="Greys")
     axs2[i].set_xticks([])
     axs2[i].set_yticks([])
-plt.tight_layout()
+fig1.tight_layout()
+fig2.tight_layout()
\ No newline at end of file

From c4aec02e2ed0c550aa2d39d36188e7ef7e0888fd Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Thu, 30 May 2024 17:35:39 -0400
Subject: [PATCH 08/11] reduced batch size

---
 docs/neural_modeling/plot_04_v1_cells.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index 28afe133..ad28b23b 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -102,7 +102,7 @@
 # V1). Pynapple makes this easy:
 
 
-sta = nap.compute_event_trigger_average(spikes, stimulus, binsize=0.025,
+sta = nap.compute_event_trigger_average(spikes[[20, 34]], stimulus, binsize=0.025,
                                         windowsize=(-0.15, 0.0))
 # %%
 #
@@ -121,7 +121,7 @@
 
 fig, axes = plt.subplots(1, len(sta), figsize=(3*len(sta),3))
 for i, t in enumerate(sta.t):
-    axes[i].imshow(sta[i,0], vmin=np.min(sta), vmax=np.max(sta),
+    axes[i].imshow(sta[i, 1], vmin=np.min(sta), vmax=np.max(sta),
                    cmap='Greys_r')
     axes[i].set_title(str(t)+" s")
 
@@ -203,8 +203,8 @@
 # number of past frames used for predicting the current firing rate
 lags = int(np.ceil(prediction_window / bin_size))
 
-# the duration of the data chunk that we will use at ech iteration
-batch_size = 30  # seconds
+# the duration of the data chunk that we will use at each iteration
+batch_size = 10  # seconds
 
 
 # define a function that returns the chunk of data from "time" to "time + batch_size"
@@ -239,14 +239,14 @@ def roll_and_crop(x, lag):
 
 # instantiate two models: one that will estimate the functional connectivity and one that will not.
 model_coupled = nmo.glm.GLM(
-    regularizer=nmo.regularizer.Ridge(
+    regularizer=nmo.regularizer.Lasso(
         regularizer_strength=0.01,
         solver_kwargs={"stepsize": 0.001, "acceleration": False}
     )
 )
 
 model_uncoupled = nmo.glm.GLM(
-    regularizer=nmo.regularizer.Ridge(
+    regularizer=nmo.regularizer.Lasso(
         regularizer_strength=0.01,
         solver_kwargs={"stepsize": 0.001, "acceleration": False}
     )
@@ -293,7 +293,7 @@ def roll_and_crop(x, lag):
 coeff_coupled = model_coupled.coef_[n_coupling_coef:]
 coeff_uncoupled = model_uncoupled.coef_
 
-# weight the basis nby the coefficients to get the estimated receptive fields.
+# weight the basis by the coefficients to get the estimated receptive fields.
 rf_coupled = np.einsum("lk,ijk->lij", coeff_coupled.reshape(lags, -1), basis_eval)
 rf_uncoupled = np.einsum("lk,ijk->lij", coeff_uncoupled.reshape(lags, -1), basis_eval)
 
@@ -301,8 +301,8 @@ def roll_and_crop(x, lag):
 mn1, mx1 = rf_uncoupled.min(), rf_uncoupled.max()
 mn2, mx2 = rf_coupled.min(), rf_coupled.max()
 
-fig1, axs1 = plt.subplots(1, lags, figsize=(10, 3.5))
-fig2, axs2 = plt.subplots(1, lags, figsize=(10, 3.5))
+fig1, axs1 = plt.subplots(1, lags, figsize=(10, 1.5))
+fig2, axs2 = plt.subplots(1, lags, figsize=(10, 1.5))
 fig1.suptitle("uncoupled model RF")
 fig2.suptitle("coupled model RF")
 for i in range(lags):
@@ -316,4 +316,4 @@ def roll_and_crop(x, lag):
     axs2[i].set_xticks([])
     axs2[i].set_yticks([])
 fig1.tight_layout()
-fig2.tight_layout()
\ No newline at end of file
+fig2.tight_layout()

From d58d611109edf8b784fc5f1b62297e4f44140089 Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Fri, 31 May 2024 11:50:13 -0400
Subject: [PATCH 09/11] improved text

---
 docs/neural_modeling/plot_04_v1_cells.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index ad28b23b..d6637a1c 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -2,10 +2,12 @@
 #
 """# Fit V1 cell
 
-The data were collected by Sonica Saraf from the Movshon lab.
+The data presented in this notebook was collected by Sonica Saraf from the Movshon lab.
+
+The notebook focuses on fitting a V1 cell model.
 
 !!! warning
-    To run this notebook locally, please download the [utility functions](https://github.com/flatironinstitute/nemos/tree/main/docs/neural_modeling/examples_utils) in the same folder as the example notebook.
+    To execute this notebook locally, ensure you download the necessary [utility functions](https://github.com/flatironinstitute/nemos/tree/main/docs/neural_modeling/examples_utils) into the same directory as this notebook.
 
 """
 import jax
@@ -20,13 +22,13 @@
 # suppress jax to numpy conversion warning in pynapple
 nap.nap_config.suppress_conversion_warnings = True
 
-# configure plots some
+# plot style configuration
 plt.style.use("examples_utils/nemos.mplstyle")
 
 # %%
 # ## Data Streaming
-#
 
+# Downloading data from a remote server and storing it locally for analysis.
 path = data.download_data("m691l1.nwb", "https://osf.io/xesdm/download",
                                          '../data')
 
@@ -136,7 +138,7 @@
 #
 # ## Firing rate model
 # What we want is to model the log-firing rate as a linear combination of the past
-# stimuli $\bm{x}\_t$ over a fixed window, here $x\_t$ is an array representing the
+# stimuli $\bm{x}\_t$ over a fixed window, here $\bm{x}\_t$ is an array representing the
 # flattened image of shape `(nm, )`, where n and m are the pixel of the x and y axes
 # of the noise stimuli.
 # Mathematically, this can be expressed as,
@@ -237,6 +239,7 @@ def roll_and_crop(x, lag):
 # %%
 # We are now ready to run learn the model parameters.
 
+
 # instantiate two models: one that will estimate the functional connectivity and one that will not.
 model_coupled = nmo.glm.GLM(
     regularizer=nmo.regularizer.Lasso(
@@ -317,3 +320,10 @@ def roll_and_crop(x, lag):
     axs2[i].set_yticks([])
 fig1.tight_layout()
 fig2.tight_layout()
+
+# %%
+# Using this batched approach allows for the estimation of a neuron's receptive field with high temporal resolution,
+# even with long recordings and high-dimensional stimuli such as images. Additionally, if the stimulus data is stored
+# in large HDF5 or Zarr files, you can leverage pynapple's "lazy-loading" capabilities (details available
+# [here](https://pynapple-org.github.io/pynapple/generated/api_guide/tutorial_pynapple_nwb/)),
+# to directly read each data chunk from the disk.

From e65ff8ec90cf73c57ecfe4e528756faba86bd250 Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Fri, 31 May 2024 12:06:42 -0400
Subject: [PATCH 10/11] fixed resolution

---
 docs/neural_modeling/plot_04_v1_cells.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/neural_modeling/plot_04_v1_cells.py b/docs/neural_modeling/plot_04_v1_cells.py
index d6637a1c..1a86a11a 100644
--- a/docs/neural_modeling/plot_04_v1_cells.py
+++ b/docs/neural_modeling/plot_04_v1_cells.py
@@ -146,7 +146,7 @@
 # \log \mu\_t = \sum \beta\_{i} \bm{x}\_{t-i}
 # $$
 # Where beta is a vector of coefficients, also of shape `(nm, )`. This is quite a lot of coefficients.
-# For example, if you want to use a window of 130ms at 130 ms resolution on a 51x51 image,
+# For example, if you want to use a window of 130ms at 10 ms resolution on a 51x51 image,
 # you'll end up with 51^2 x 13 = 33813 coefficients.
 # We can use a basis set to reduce the dimensionality: first we create a bank of basis with 51x51 of
 # elements 15 elements, reducing the problem to 15^2 x 13 = 2925 parameters.

From ebf56d907b5d0e85b64cf9de23e90c9df6f5145e Mon Sep 17 00:00:00 2001
From: BalzaniEdoardo <edoardo.balzani87@gmail.com>
Date: Fri, 31 May 2024 15:47:47 -0400
Subject: [PATCH 11/11] fixed lazy loading

---
 docs/neural_modeling/plot_03_grid_cells.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/neural_modeling/plot_03_grid_cells.py b/docs/neural_modeling/plot_03_grid_cells.py
index 31bf843c..38e9f3e4 100644
--- a/docs/neural_modeling/plot_03_grid_cells.py
+++ b/docs/neural_modeling/plot_03_grid_cells.py
@@ -32,7 +32,7 @@
 #
 # Let's load the dataset and see what's inside
 
-dataset = nap.NWBFile(io.read())
+dataset = nap.NWBFile(io.read(), lazy_loading=False)
 
 print(dataset)