Merge pull request #58 from ctr26/notebooks

Notebooks
uhlmanngroup · Aug 15, 2024 · 26366df · 26366df
2 parents 124b576 + dedea1f
commit 26366df
Show file tree

Hide file tree

Showing 8 changed files with 414 additions and 3 deletions.
diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml
@@ -0,0 +1,27 @@
+name: jupytext-changes
+
+on:
+  push:
+  # pull_request:
+
+jobs:
+  jupytext:
+    runs-on: ubuntu-latest
+    steps:
+      # Checkout
+      - uses: actions/checkout@v2
+        with:
+          ref: ${{ github.head_ref }}
+      - name: Install Packages if changed files
+        run: |
+          pip install jupytext
+
+      - name: Synch changed files
+        run: |
+          jupytext --use-source-timestamp  --sync scripts/*.py
+
+      # Auto commit any updated notebook files
+      - uses: stefanzweifel/git-auto-commit-action@v4
+        with:
+          # This would be more useful if the git hash were referenced?
+          commit_message: "Auto-commit updated notebooks"
diff --git a/.gitignore b/.gitignore
@@ -328,3 +328,4 @@ manuscript
 !README.md
 !config.yaml
 !src
+!notebooks
diff --git a/notebooks/full.ipynb b/notebooks/full.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c21f652",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import necessary modules\n",
+    "import bioimage_embed\n",
+    "import bioimage_embed.config as config\n",
+    "from hydra.utils import instantiate\n",
+    "from torchvision import datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88fb43bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define input dimensions\n",
+    "input_dim = [3, 224, 224]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b364758d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the default augmentation list\n",
+    "transform = instantiate(config.Transform())\n",
+    "transform.transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5e1c0e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the CelebA dataset with the specified transformations\n",
+    "dataset = datasets.CelebA(\n",
+    "    root=\"data/\",\n",
+    "    split=\"train\",\n",
+    "    target_type=\"attr\",\n",
+    "    download=True,\n",
+    "    transform=transform,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35482694",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a dataloader from the dataset\n",
+    "dataloader = config.DataLoader(dataset=dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2de56894",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the model with the input dimensions\n",
+    "model = config.Model(input_dim=input_dim)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcbe489e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the recipe for the model\n",
+    "recipe = config.Recipe(model=\"resnet18_vae\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8a2be1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the configuration object with the recipe, dataloader, and model\n",
+    "cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "949f9ffb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize BioImageEmbed with the configuration\n",
+    "bie = bioimage_embed.BioImageEmbed(cfg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "717481bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train and export the model if this script is run as the main program\n",
+    "if __name__ == \"__main__\":\n",
+    "    bie.check().train().export(\"model\")\n",
+    "# lit_model = bie.check().train().get_model()\n",
+    "# bie.export(\"model\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/idr.ipynb b/notebooks/idr.ipynb
@@ -0,0 +1,29 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c657c25f",
+   "metadata": {},
+   "source": [
+    "root = \"/nfs/ftp/public/databases/IDR/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e8bb1331",
+   "metadata": {},
+   "source": [
+    "dataset = datasets.ImageFolder(transform=transform)"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/simple.ipynb b/notebooks/simple.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac6bd2ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bioimage_embed\n",
+    "import bioimage_embed.config as config\n",
+    "\n",
+    "# Import necessary modules from bioimage_embed and config.\n",
+    "# bioimage_embed is likely a library designed for embedding biological images,\n",
+    "# and config is used to handle configurations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f28d1d38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torchvision.datasets import FakeData\n",
+    "from hydra.utils import instantiate\n",
+    "\n",
+    "# Import FakeData from torchvision.datasets to create a fake dataset,\n",
+    "# and instantiate from hydra.utils to create instances based on configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8a6921a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can instantiate a transformation from the default configuration using hydra.\n",
+    "transform = instantiate(config.Transform())\n",
+    "\n",
+    "# Instantiate a transformation using the configuration provided.\n",
+    "# This will likely include any data augmentation or preprocessing steps defined in the configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4ab05fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
+    "dataset = FakeData(\n",
+    "    size=64,\n",
+    "    image_size=(3, 224, 224),\n",
+    "    num_classes=10,\n",
+    "    transform=transform,\n",
+    ")\n",
+    "\n",
+    "# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
+    "# This dataset will be used to simulate data for testing purposes. The 'transform' argument applies the\n",
+    "# transformations defined earlier to the dataset.\n",
+    "\n",
+    "# NOTE: The 'dataset' must be a PyTorch Dataset object with X (data) and y (labels).\n",
+    "# If using an unsupervised encoder, set the labels (y) to None; the model will ignore them during training.\n",
+    "\n",
+    "# dataset=CelebA(download=True, root=\"/tmp\", split=\"train\")\n",
+    "\n",
+    "# The commented-out code suggests an alternative to use the CelebA dataset.\n",
+    "# It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f41bc2a",
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
+   "source": [
+    "We can declare a recipe and configuration object to train the model.\n",
+    "I\n",
+    "\n",
+    "recipe = config.Recipe(model=\"resnet18_vae\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "199e26e3",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33ba45ad",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "outputs": [],
+   "source": [
+    "cfg = config.Config(recipe=recipe, dataset=dataset)\n",
+    "bie = bioimage_embed.BioImageEmbed(cfg)\n",
+    "\n",
+    "# Create a configuration object 'cfg' using the config module, and assign the fake dataset to it.\n",
+    "# The model is set to \"resnet18_vae\" and the maximum number of epochs for training is set to 100.\n",
+    "# Instantiate the BioImageEmbed object 'bie' using the configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cf97080",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process():\n",
+    "    bie.check()\n",
+    "    bie.train()\n",
+    "    bie.export()\n",
+    "\n",
+    "\n",
+    "# Define a process function that performs three steps:\n",
+    "# 1. 'check()' to verify the setup or configuration.\n",
+    "# 2. 'train()' to start training the model.\n",
+    "# 3. 'export()' to export the trained model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4fa9482",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is the entrypoint for the script and very important if cfg.trainer.num_workers > 0\n",
+    "if __name__ == \"__main__\":\n",
+    "    process()\n",
+    "\n",
+    "# This is the entry point for the script. The 'if __name__ == \"__main__\":' statement ensures that the 'process()'\n",
+    "# function is called only when the script is run directly, not when imported as a module.\n",
+    "# This is crucial if the 'num_workers' parameter is set in cfg.trainer, as it prevents potential issues\n",
+    "# with multiprocessing in PyTorch."
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}