From e5ad1f7eec64f32427bbf4752f4ca78073ccc9d6 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Mon, 12 Aug 2024 12:00:50 +0100 Subject: [PATCH 01/11] [init] notebook example that "should" work From 53dcce7618c128fdf3c5609a3d73f0d5331129e2 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Wed, 14 Aug 2024 18:11:35 +0100 Subject: [PATCH 02/11] [feat] adding comments to simple.py --- scripts/simple.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/simple.py b/scripts/simple.py index af8b53b6..2e6e1c48 100644 --- a/scripts/simple.py +++ b/scripts/simple.py @@ -14,12 +14,14 @@ # and instantiate from hydra.utils to create instances based on configuration. # %% +# We can instantiate a transformation from the default configuration using hydra. transform = instantiate(config.Transform()) # Instantiate a transformation using the configuration provided. # This will likely include any data augmentation or preprocessing steps defined in the configuration. # %% +# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes. dataset = FakeData( size=64, image_size=(3, 224, 224), @@ -39,13 +41,16 @@ # The commented-out code suggests an alternative to use the CelebA dataset. # It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory. +# %% [markdown] +# We can declare a recipe and configuration object to train the model. +# I + +recipe = config.Recipe(model="resnet18_vae") # %% [markdown] # # %% -cfg = config.Config(dataset=dataset) -cfg.recipe.model = "resnet18_vae" -cfg.recipe.max_epochs = 100 +cfg = config.Config(recipe=recipe, dataset=dataset) bie = bioimage_embed.BioImageEmbed(cfg) # Create a configuration object 'cfg' using the config module, and assign the fake dataset to it. From b7a4ab289a5241048d5dce9244842a42c05f81c4 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 13:45:08 +0100 Subject: [PATCH 03/11] [feat] adding the "full" notebook --- scripts/full.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 scripts/full.py diff --git a/scripts/full.py b/scripts/full.py new file mode 100644 index 00000000..f9e76842 --- /dev/null +++ b/scripts/full.py @@ -0,0 +1,52 @@ +# %% +# Import necessary modules +import bioimage_embed +import bioimage_embed.config as config +from hydra.utils import instantiate +from torchvision import datasets + +# %% +# Define input dimensions +input_dim = [3, 224, 224] + +# %% +# Use the default augmentation list +transform = instantiate(config.Transform()) +print(transform.json()) + +# %% +# Load the CelebA dataset with the specified transformations +dataset = datasets.CelebA( + root="data/", + split="train", + target_type="attr", + download=True, + transform=transform, +) + +# %% +# Create a dataloader from the dataset +dataloader = config.DataLoader(dataset=dataset) + +# %% +# Instantiate the model with the input dimensions +model = config.Model(input_dim=input_dim) + +# %% +# Define the recipe for the model +recipe = config.Recipe(model="resnet18_vae") + +# %% +# Create the configuration object with the recipe, dataloader, and model +cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model) + +# %% +# Initialize BioImageEmbed with the configuration +bie = bioimage_embed.BioImageEmbed(cfg) + +# %% +# Train and export the model if this script is run as the main program +if __name__ == "__main__": + bie.check().train().export("model") +# lit_model = bie.check().train().get_model() +# bie.export("model") From c6b36a79e063673bc44c4ecc094d63659ca8abd8 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 13:45:20 +0100 Subject: [PATCH 04/11] [feat] placeholder for idr notebook --- scripts/idr.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/idr.py diff --git a/scripts/idr.py b/scripts/idr.py new file mode 100644 index 00000000..f058f4f3 --- /dev/null +++ b/scripts/idr.py @@ -0,0 +1,3 @@ +# root = "/nfs/ftp/public/databases/IDR/" + +# dataset = datasets.ImageFolder(transform=transform) From f20021ad1b77dede177c10bc057a3f723cbb7a4d Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 13:46:34 +0100 Subject: [PATCH 05/11] [bug] not a json --- scripts/full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/full.py b/scripts/full.py index f9e76842..a395abb8 100644 --- a/scripts/full.py +++ b/scripts/full.py @@ -12,7 +12,7 @@ # %% # Use the default augmentation list transform = instantiate(config.Transform()) -print(transform.json()) +transform.transform # %% # Load the CelebA dataset with the specified transformations From 3e3185f99b83ebac101e922fe422a11584e19290 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 14:07:28 +0100 Subject: [PATCH 06/11] [feat] adding notebook auto committer --- .github/workflows/notebooks.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/notebooks.yaml diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml new file mode 100644 index 00000000..b91ff6fd --- /dev/null +++ b/.github/workflows/notebooks.yaml @@ -0,0 +1,28 @@ +name: jupytext-changes + +on: + push: + pull_request: + +jobs: + jupytext: + runs-on: ubuntu-latest + steps: + # Checkout + - uses: actions/checkout@v2 + + - name: Install Packages if changed files + if: ${{ steps.filter.outputs.notebooks == 'true' }} + run: | + pip install jupytext + + - name: Synch changed files + if: ${{ steps.filter.outputs.notebooks == 'true' }} + run: | + jupytext --use-source-timestamp --sync scripts/*.py + + # Auto commit any updated notebook files + - uses: stefanzweifel/git-auto-commit-action@v4 + with: + # This would be more useful if the git hash were referenced? + commit_message: "Auto-commit updated notebooks" From 97b87426d16710f3be6987ef2ea6399d84640320 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 14:08:44 +0100 Subject: [PATCH 07/11] [fix] missing notebook sync --- .github/workflows/notebooks.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml index b91ff6fd..372478ea 100644 --- a/.github/workflows/notebooks.yaml +++ b/.github/workflows/notebooks.yaml @@ -12,12 +12,10 @@ jobs: - uses: actions/checkout@v2 - name: Install Packages if changed files - if: ${{ steps.filter.outputs.notebooks == 'true' }} run: | pip install jupytext - name: Synch changed files - if: ${{ steps.filter.outputs.notebooks == 'true' }} run: | jupytext --use-source-timestamp --sync scripts/*.py From d309205c6cfe6692a13976367f12b6bfd5a8e184 Mon Sep 17 00:00:00 2001 From: ctr26 Date: Thu, 15 Aug 2024 13:09:01 +0000 Subject: [PATCH 08/11] Auto-commit updated notebooks --- scripts/idr.py | 2 ++ scripts/simple.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/idr.py b/scripts/idr.py index f058f4f3..d3525289 100644 --- a/scripts/idr.py +++ b/scripts/idr.py @@ -1,3 +1,5 @@ +# %% [markdown] # root = "/nfs/ftp/public/databases/IDR/" +# %% [markdown] # dataset = datasets.ImageFolder(transform=transform) diff --git a/scripts/simple.py b/scripts/simple.py index 2e6e1c48..33b9be32 100644 --- a/scripts/simple.py +++ b/scripts/simple.py @@ -44,8 +44,8 @@ # %% [markdown] # We can declare a recipe and configuration object to train the model. # I - -recipe = config.Recipe(model="resnet18_vae") +# +# recipe = config.Recipe(model="resnet18_vae") # %% [markdown] # From af331e07923718520a075951a8702f69c1ecc66a Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 14:10:39 +0100 Subject: [PATCH 09/11] [feat] notebooks inc for .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1950a5b1..9e8fa3c5 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,4 @@ manuscript !README.md !config.yaml !src +!notebooks From 1d62e6e8340755398017cab470efc16537e7c101 Mon Sep 17 00:00:00 2001 From: ctr26 Date: Thu, 15 Aug 2024 13:11:50 +0000 Subject: [PATCH 10/11] Auto-commit updated notebooks --- notebooks/full.ipynb | 136 +++++++++++++++++++++++++++++++++++ notebooks/idr.ipynb | 29 ++++++++ notebooks/simple.ipynb | 156 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 notebooks/full.ipynb create mode 100644 notebooks/idr.ipynb create mode 100644 notebooks/simple.ipynb diff --git a/notebooks/full.ipynb b/notebooks/full.ipynb new file mode 100644 index 00000000..df33d279 --- /dev/null +++ b/notebooks/full.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1c21f652", + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary modules\n", + "import bioimage_embed\n", + "import bioimage_embed.config as config\n", + "from hydra.utils import instantiate\n", + "from torchvision import datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88fb43bf", + "metadata": {}, + "outputs": [], + "source": [ + "# Define input dimensions\n", + "input_dim = [3, 224, 224]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b364758d", + "metadata": {}, + "outputs": [], + "source": [ + "# Use the default augmentation list\n", + "transform = instantiate(config.Transform())\n", + "transform.transform" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5e1c0e0", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the CelebA dataset with the specified transformations\n", + "dataset = datasets.CelebA(\n", + " root=\"data/\",\n", + " split=\"train\",\n", + " target_type=\"attr\",\n", + " download=True,\n", + " transform=transform,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35482694", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a dataloader from the dataset\n", + "dataloader = config.DataLoader(dataset=dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2de56894", + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the model with the input dimensions\n", + "model = config.Model(input_dim=input_dim)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcbe489e", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the recipe for the model\n", + "recipe = config.Recipe(model=\"resnet18_vae\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8a2be1b", + "metadata": {}, + "outputs": [], + "source": [ + "# Create the configuration object with the recipe, dataloader, and model\n", + "cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "949f9ffb", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize BioImageEmbed with the configuration\n", + "bie = bioimage_embed.BioImageEmbed(cfg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "717481bc", + "metadata": {}, + "outputs": [], + "source": [ + "# Train and export the model if this script is run as the main program\n", + "if __name__ == \"__main__\":\n", + " bie.check().train().export(\"model\")\n", + "# lit_model = bie.check().train().get_model()\n", + "# bie.export(\"model\")" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/idr.ipynb b/notebooks/idr.ipynb new file mode 100644 index 00000000..6d5cd0c8 --- /dev/null +++ b/notebooks/idr.ipynb @@ -0,0 +1,29 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c657c25f", + "metadata": {}, + "source": [ + "root = \"/nfs/ftp/public/databases/IDR/\"" + ] + }, + { + "cell_type": "markdown", + "id": "e8bb1331", + "metadata": {}, + "source": [ + "dataset = datasets.ImageFolder(transform=transform)" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/simple.ipynb b/notebooks/simple.ipynb new file mode 100644 index 00000000..81a82c19 --- /dev/null +++ b/notebooks/simple.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "ac6bd2ea", + "metadata": {}, + "outputs": [], + "source": [ + "import bioimage_embed\n", + "import bioimage_embed.config as config\n", + "\n", + "# Import necessary modules from bioimage_embed and config.\n", + "# bioimage_embed is likely a library designed for embedding biological images,\n", + "# and config is used to handle configurations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28d1d38", + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.datasets import FakeData\n", + "from hydra.utils import instantiate\n", + "\n", + "# Import FakeData from torchvision.datasets to create a fake dataset,\n", + "# and instantiate from hydra.utils to create instances based on configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8a6921a", + "metadata": {}, + "outputs": [], + "source": [ + "# We can instantiate a transformation from the default configuration using hydra.\n", + "transform = instantiate(config.Transform())\n", + "\n", + "# Instantiate a transformation using the configuration provided.\n", + "# This will likely include any data augmentation or preprocessing steps defined in the configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4ab05fb", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n", + "dataset = FakeData(\n", + " size=64,\n", + " image_size=(3, 224, 224),\n", + " num_classes=10,\n", + " transform=transform,\n", + ")\n", + "\n", + "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n", + "# This dataset will be used to simulate data for testing purposes. The 'transform' argument applies the\n", + "# transformations defined earlier to the dataset.\n", + "\n", + "# NOTE: The 'dataset' must be a PyTorch Dataset object with X (data) and y (labels).\n", + "# If using an unsupervised encoder, set the labels (y) to None; the model will ignore them during training.\n", + "\n", + "# dataset=CelebA(download=True, root=\"/tmp\", split=\"train\")\n", + "\n", + "# The commented-out code suggests an alternative to use the CelebA dataset.\n", + "# It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory." + ] + }, + { + "cell_type": "markdown", + "id": "4f41bc2a", + "metadata": { + "lines_to_next_cell": 0 + }, + "source": [ + "We can declare a recipe and configuration object to train the model.\n", + "I\n", + "\n", + "recipe = config.Recipe(model=\"resnet18_vae\")" + ] + }, + { + "cell_type": "markdown", + "id": "199e26e3", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33ba45ad", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "cfg = config.Config(recipe=recipe, dataset=dataset)\n", + "bie = bioimage_embed.BioImageEmbed(cfg)\n", + "\n", + "# Create a configuration object 'cfg' using the config module, and assign the fake dataset to it.\n", + "# The model is set to \"resnet18_vae\" and the maximum number of epochs for training is set to 100.\n", + "# Instantiate the BioImageEmbed object 'bie' using the configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cf97080", + "metadata": {}, + "outputs": [], + "source": [ + "def process():\n", + " bie.check()\n", + " bie.train()\n", + " bie.export()\n", + "\n", + "\n", + "# Define a process function that performs three steps:\n", + "# 1. 'check()' to verify the setup or configuration.\n", + "# 2. 'train()' to start training the model.\n", + "# 3. 'export()' to export the trained model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4fa9482", + "metadata": {}, + "outputs": [], + "source": [ + "# This is the entrypoint for the script and very important if cfg.trainer.num_workers > 0\n", + "if __name__ == \"__main__\":\n", + " process()\n", + "\n", + "# This is the entry point for the script. The 'if __name__ == \"__main__\":' statement ensures that the 'process()'\n", + "# function is called only when the script is run directly, not when imported as a module.\n", + "# This is crucial if the 'num_workers' parameter is set in cfg.trainer, as it prevents potential issues\n", + "# with multiprocessing in PyTorch." + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From dedea1f29d7c716444727da72724ae321a922ee3 Mon Sep 17 00:00:00 2001 From: Craig Russell Date: Thu, 15 Aug 2024 14:20:51 +0100 Subject: [PATCH 11/11] [bug] not running for some reason? --- .github/workflows/notebooks.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml index 372478ea..8828321c 100644 --- a/.github/workflows/notebooks.yaml +++ b/.github/workflows/notebooks.yaml @@ -2,7 +2,7 @@ name: jupytext-changes on: push: - pull_request: + # pull_request: jobs: jupytext: @@ -10,7 +10,8 @@ jobs: steps: # Checkout - uses: actions/checkout@v2 - + with: + ref: ${{ github.head_ref }} - name: Install Packages if changed files run: | pip install jupytext