From e5ad1f7eec64f32427bbf4752f4ca78073ccc9d6 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Mon, 12 Aug 2024 12:00:50 +0100
Subject: [PATCH 01/11] [init] notebook example that "should" work


From 53dcce7618c128fdf3c5609a3d73f0d5331129e2 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Wed, 14 Aug 2024 18:11:35 +0100
Subject: [PATCH 02/11] [feat] adding comments to simple.py

---
 scripts/simple.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/simple.py b/scripts/simple.py
index af8b53b6..2e6e1c48 100644
--- a/scripts/simple.py
+++ b/scripts/simple.py
@@ -14,12 +14,14 @@
 # and instantiate from hydra.utils to create instances based on configuration.
 
 # %%
+# We can instantiate a transformation from the default configuration using hydra.
 transform = instantiate(config.Transform())
 
 # Instantiate a transformation using the configuration provided.
 # This will likely include any data augmentation or preprocessing steps defined in the configuration.
 
 # %%
+# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.
 dataset = FakeData(
     size=64,
     image_size=(3, 224, 224),
@@ -39,13 +41,16 @@
 # The commented-out code suggests an alternative to use the CelebA dataset.
 # It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory.
 
+# %% [markdown]
+# We can declare a recipe and configuration object to train the model.
+# I
+
+recipe = config.Recipe(model="resnet18_vae")
 # %% [markdown]
 #
 
 # %%
-cfg = config.Config(dataset=dataset)
-cfg.recipe.model = "resnet18_vae"
-cfg.recipe.max_epochs = 100
+cfg = config.Config(recipe=recipe, dataset=dataset)
 bie = bioimage_embed.BioImageEmbed(cfg)
 
 # Create a configuration object 'cfg' using the config module, and assign the fake dataset to it.

From b7a4ab289a5241048d5dce9244842a42c05f81c4 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 13:45:08 +0100
Subject: [PATCH 03/11] [feat]  adding the "full" notebook

---
 scripts/full.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 scripts/full.py

diff --git a/scripts/full.py b/scripts/full.py
new file mode 100644
index 00000000..f9e76842
--- /dev/null
+++ b/scripts/full.py
@@ -0,0 +1,52 @@
+# %%
+# Import necessary modules
+import bioimage_embed
+import bioimage_embed.config as config
+from hydra.utils import instantiate
+from torchvision import datasets
+
+# %%
+# Define input dimensions
+input_dim = [3, 224, 224]
+
+# %%
+# Use the default augmentation list
+transform = instantiate(config.Transform())
+print(transform.json())
+
+# %%
+# Load the CelebA dataset with the specified transformations
+dataset = datasets.CelebA(
+    root="data/",
+    split="train",
+    target_type="attr",
+    download=True,
+    transform=transform,
+)
+
+# %%
+# Create a dataloader from the dataset
+dataloader = config.DataLoader(dataset=dataset)
+
+# %%
+# Instantiate the model with the input dimensions
+model = config.Model(input_dim=input_dim)
+
+# %%
+# Define the recipe for the model
+recipe = config.Recipe(model="resnet18_vae")
+
+# %%
+# Create the configuration object with the recipe, dataloader, and model
+cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model)
+
+# %%
+# Initialize BioImageEmbed with the configuration
+bie = bioimage_embed.BioImageEmbed(cfg)
+
+# %%
+# Train and export the model if this script is run as the main program
+if __name__ == "__main__":
+    bie.check().train().export("model")
+# lit_model = bie.check().train().get_model()
+# bie.export("model")

From c6b36a79e063673bc44c4ecc094d63659ca8abd8 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 13:45:20 +0100
Subject: [PATCH 04/11] [feat] placeholder for idr notebook

---
 scripts/idr.py | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 scripts/idr.py

diff --git a/scripts/idr.py b/scripts/idr.py
new file mode 100644
index 00000000..f058f4f3
--- /dev/null
+++ b/scripts/idr.py
@@ -0,0 +1,3 @@
+# root = "/nfs/ftp/public/databases/IDR/"
+
+# dataset = datasets.ImageFolder(transform=transform)

From f20021ad1b77dede177c10bc057a3f723cbb7a4d Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 13:46:34 +0100
Subject: [PATCH 05/11] [bug] not a json

---
 scripts/full.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/full.py b/scripts/full.py
index f9e76842..a395abb8 100644
--- a/scripts/full.py
+++ b/scripts/full.py
@@ -12,7 +12,7 @@
 # %%
 # Use the default augmentation list
 transform = instantiate(config.Transform())
-print(transform.json())
+transform.transform
 
 # %%
 # Load the CelebA dataset with the specified transformations

From 3e3185f99b83ebac101e922fe422a11584e19290 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 14:07:28 +0100
Subject: [PATCH 06/11] [feat] adding notebook auto committer

---
 .github/workflows/notebooks.yaml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .github/workflows/notebooks.yaml

diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml
new file mode 100644
index 00000000..b91ff6fd
--- /dev/null
+++ b/.github/workflows/notebooks.yaml
@@ -0,0 +1,28 @@
+name: jupytext-changes
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  jupytext:
+    runs-on: ubuntu-latest
+    steps:
+      # Checkout
+      - uses: actions/checkout@v2
+
+      - name: Install Packages if changed files
+        if: ${{ steps.filter.outputs.notebooks == 'true' }}
+        run: |
+          pip install jupytext
+
+      - name: Synch changed files
+        if: ${{ steps.filter.outputs.notebooks == 'true' }}
+        run: |
+          jupytext --use-source-timestamp  --sync scripts/*.py
+
+      # Auto commit any updated notebook files
+      - uses: stefanzweifel/git-auto-commit-action@v4
+        with:
+          # This would be more useful if the git hash were referenced?
+          commit_message: "Auto-commit updated notebooks"

From 97b87426d16710f3be6987ef2ea6399d84640320 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 14:08:44 +0100
Subject: [PATCH 07/11] [fix] missing notebook sync

---
 .github/workflows/notebooks.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml
index b91ff6fd..372478ea 100644
--- a/.github/workflows/notebooks.yaml
+++ b/.github/workflows/notebooks.yaml
@@ -12,12 +12,10 @@ jobs:
       - uses: actions/checkout@v2
 
       - name: Install Packages if changed files
-        if: ${{ steps.filter.outputs.notebooks == 'true' }}
         run: |
           pip install jupytext
 
       - name: Synch changed files
-        if: ${{ steps.filter.outputs.notebooks == 'true' }}
         run: |
           jupytext --use-source-timestamp  --sync scripts/*.py
 

From d309205c6cfe6692a13976367f12b6bfd5a8e184 Mon Sep 17 00:00:00 2001
From: ctr26 <ctr26@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:09:01 +0000
Subject: [PATCH 08/11] Auto-commit updated notebooks

---
 scripts/idr.py    | 2 ++
 scripts/simple.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/idr.py b/scripts/idr.py
index f058f4f3..d3525289 100644
--- a/scripts/idr.py
+++ b/scripts/idr.py
@@ -1,3 +1,5 @@
+# %% [markdown]
 # root = "/nfs/ftp/public/databases/IDR/"
 
+# %% [markdown]
 # dataset = datasets.ImageFolder(transform=transform)
diff --git a/scripts/simple.py b/scripts/simple.py
index 2e6e1c48..33b9be32 100644
--- a/scripts/simple.py
+++ b/scripts/simple.py
@@ -44,8 +44,8 @@
 # %% [markdown]
 # We can declare a recipe and configuration object to train the model.
 # I
-
-recipe = config.Recipe(model="resnet18_vae")
+#
+# recipe = config.Recipe(model="resnet18_vae")
 # %% [markdown]
 #
 

From af331e07923718520a075951a8702f69c1ecc66a Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 14:10:39 +0100
Subject: [PATCH 09/11] [feat] notebooks inc for .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 1950a5b1..9e8fa3c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -328,3 +328,4 @@ manuscript
 !README.md
 !config.yaml
 !src
+!notebooks

From 1d62e6e8340755398017cab470efc16537e7c101 Mon Sep 17 00:00:00 2001
From: ctr26 <ctr26@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:11:50 +0000
Subject: [PATCH 10/11] Auto-commit updated notebooks

---
 notebooks/full.ipynb   | 136 +++++++++++++++++++++++++++++++++++
 notebooks/idr.ipynb    |  29 ++++++++
 notebooks/simple.ipynb | 156 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 321 insertions(+)
 create mode 100644 notebooks/full.ipynb
 create mode 100644 notebooks/idr.ipynb
 create mode 100644 notebooks/simple.ipynb

diff --git a/notebooks/full.ipynb b/notebooks/full.ipynb
new file mode 100644
index 00000000..df33d279
--- /dev/null
+++ b/notebooks/full.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c21f652",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import necessary modules\n",
+    "import bioimage_embed\n",
+    "import bioimage_embed.config as config\n",
+    "from hydra.utils import instantiate\n",
+    "from torchvision import datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88fb43bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define input dimensions\n",
+    "input_dim = [3, 224, 224]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b364758d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the default augmentation list\n",
+    "transform = instantiate(config.Transform())\n",
+    "transform.transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5e1c0e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the CelebA dataset with the specified transformations\n",
+    "dataset = datasets.CelebA(\n",
+    "    root=\"data/\",\n",
+    "    split=\"train\",\n",
+    "    target_type=\"attr\",\n",
+    "    download=True,\n",
+    "    transform=transform,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35482694",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a dataloader from the dataset\n",
+    "dataloader = config.DataLoader(dataset=dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2de56894",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the model with the input dimensions\n",
+    "model = config.Model(input_dim=input_dim)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcbe489e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the recipe for the model\n",
+    "recipe = config.Recipe(model=\"resnet18_vae\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8a2be1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the configuration object with the recipe, dataloader, and model\n",
+    "cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "949f9ffb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize BioImageEmbed with the configuration\n",
+    "bie = bioimage_embed.BioImageEmbed(cfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "717481bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train and export the model if this script is run as the main program\n",
+    "if __name__ == \"__main__\":\n",
+    "    bie.check().train().export(\"model\")\n",
+    "# lit_model = bie.check().train().get_model()\n",
+    "# bie.export(\"model\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/idr.ipynb b/notebooks/idr.ipynb
new file mode 100644
index 00000000..6d5cd0c8
--- /dev/null
+++ b/notebooks/idr.ipynb
@@ -0,0 +1,29 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c657c25f",
+   "metadata": {},
+   "source": [
+    "root = \"/nfs/ftp/public/databases/IDR/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8bb1331",
+   "metadata": {},
+   "source": [
+    "dataset = datasets.ImageFolder(transform=transform)"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/simple.ipynb b/notebooks/simple.ipynb
new file mode 100644
index 00000000..81a82c19
--- /dev/null
+++ b/notebooks/simple.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac6bd2ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bioimage_embed\n",
+    "import bioimage_embed.config as config\n",
+    "\n",
+    "# Import necessary modules from bioimage_embed and config.\n",
+    "# bioimage_embed is likely a library designed for embedding biological images,\n",
+    "# and config is used to handle configurations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f28d1d38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torchvision.datasets import FakeData\n",
+    "from hydra.utils import instantiate\n",
+    "\n",
+    "# Import FakeData from torchvision.datasets to create a fake dataset,\n",
+    "# and instantiate from hydra.utils to create instances based on configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8a6921a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can instantiate a transformation from the default configuration using hydra.\n",
+    "transform = instantiate(config.Transform())\n",
+    "\n",
+    "# Instantiate a transformation using the configuration provided.\n",
+    "# This will likely include any data augmentation or preprocessing steps defined in the configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4ab05fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
+    "dataset = FakeData(\n",
+    "    size=64,\n",
+    "    image_size=(3, 224, 224),\n",
+    "    num_classes=10,\n",
+    "    transform=transform,\n",
+    ")\n",
+    "\n",
+    "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
+    "# This dataset will be used to simulate data for testing purposes. The 'transform' argument applies the\n",
+    "# transformations defined earlier to the dataset.\n",
+    "\n",
+    "# NOTE: The 'dataset' must be a PyTorch Dataset object with X (data) and y (labels).\n",
+    "# If using an unsupervised encoder, set the labels (y) to None; the model will ignore them during training.\n",
+    "\n",
+    "# dataset=CelebA(download=True, root=\"/tmp\", split=\"train\")\n",
+    "\n",
+    "# The commented-out code suggests an alternative to use the CelebA dataset.\n",
+    "# It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f41bc2a",
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
+   "source": [
+    "We can declare a recipe and configuration object to train the model.\n",
+    "I\n",
+    "\n",
+    "recipe = config.Recipe(model=\"resnet18_vae\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "199e26e3",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33ba45ad",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "outputs": [],
+   "source": [
+    "cfg = config.Config(recipe=recipe, dataset=dataset)\n",
+    "bie = bioimage_embed.BioImageEmbed(cfg)\n",
+    "\n",
+    "# Create a configuration object 'cfg' using the config module, and assign the fake dataset to it.\n",
+    "# The model is set to \"resnet18_vae\" and the maximum number of epochs for training is set to 100.\n",
+    "# Instantiate the BioImageEmbed object 'bie' using the configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cf97080",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process():\n",
+    "    bie.check()\n",
+    "    bie.train()\n",
+    "    bie.export()\n",
+    "\n",
+    "\n",
+    "# Define a process function that performs three steps:\n",
+    "# 1. 'check()' to verify the setup or configuration.\n",
+    "# 2. 'train()' to start training the model.\n",
+    "# 3. 'export()' to export the trained model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4fa9482",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the entrypoint for the script and very important if cfg.trainer.num_workers > 0\n",
+    "if __name__ == \"__main__\":\n",
+    "    process()\n",
+    "\n",
+    "# This is the entry point for the script. The 'if __name__ == \"__main__\":' statement ensures that the 'process()'\n",
+    "# function is called only when the script is run directly, not when imported as a module.\n",
+    "# This is crucial if the 'num_workers' parameter is set in cfg.trainer, as it prevents potential issues\n",
+    "# with multiprocessing in PyTorch."
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From dedea1f29d7c716444727da72724ae321a922ee3 Mon Sep 17 00:00:00 2001
From: Craig Russell <ctr26@ebi.ac.uk>
Date: Thu, 15 Aug 2024 14:20:51 +0100
Subject: [PATCH 11/11] [bug] not running for some reason?

---
 .github/workflows/notebooks.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml
index 372478ea..8828321c 100644
--- a/.github/workflows/notebooks.yaml
+++ b/.github/workflows/notebooks.yaml
@@ -2,7 +2,7 @@ name: jupytext-changes
 
 on:
   push:
-  pull_request:
+  # pull_request:
 
 jobs:
   jupytext:
@@ -10,7 +10,8 @@ jobs:
     steps:
       # Checkout
       - uses: actions/checkout@v2
-
+        with:
+          ref: ${{ github.head_ref }}
       - name: Install Packages if changed files
         run: |
           pip install jupytext