Skip to content

Commit

Permalink
Merge pull request #58 from ctr26/notebooks
Browse files Browse the repository at this point in the history
Notebooks
  • Loading branch information
ctr26 authored Aug 15, 2024
2 parents 124b576 + dedea1f commit 26366df
Show file tree
Hide file tree
Showing 8 changed files with 414 additions and 3 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/notebooks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: jupytext-changes

on:
push:
# pull_request:

jobs:
jupytext:
runs-on: ubuntu-latest
steps:
# Checkout
- uses: actions/checkout@v2
with:
ref: ${{ github.head_ref }}
- name: Install Packages if changed files
run: |
pip install jupytext
- name: Synch changed files
run: |
jupytext --use-source-timestamp --sync scripts/*.py
# Auto commit any updated notebook files
- uses: stefanzweifel/git-auto-commit-action@v4
with:
# This would be more useful if the git hash were referenced?
commit_message: "Auto-commit updated notebooks"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,4 @@ manuscript
!README.md
!config.yaml
!src
!notebooks
136 changes: 136 additions & 0 deletions notebooks/full.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1c21f652",
"metadata": {},
"outputs": [],
"source": [
"# Import necessary modules\n",
"import bioimage_embed\n",
"import bioimage_embed.config as config\n",
"from hydra.utils import instantiate\n",
"from torchvision import datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88fb43bf",
"metadata": {},
"outputs": [],
"source": [
"# Define input dimensions\n",
"input_dim = [3, 224, 224]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b364758d",
"metadata": {},
"outputs": [],
"source": [
"# Use the default augmentation list\n",
"transform = instantiate(config.Transform())\n",
"transform.transform"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5e1c0e0",
"metadata": {},
"outputs": [],
"source": [
"# Load the CelebA dataset with the specified transformations\n",
"dataset = datasets.CelebA(\n",
" root=\"data/\",\n",
" split=\"train\",\n",
" target_type=\"attr\",\n",
" download=True,\n",
" transform=transform,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35482694",
"metadata": {},
"outputs": [],
"source": [
"# Create a dataloader from the dataset\n",
"dataloader = config.DataLoader(dataset=dataset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2de56894",
"metadata": {},
"outputs": [],
"source": [
"# Instantiate the model with the input dimensions\n",
"model = config.Model(input_dim=input_dim)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dcbe489e",
"metadata": {},
"outputs": [],
"source": [
"# Define the recipe for the model\n",
"recipe = config.Recipe(model=\"resnet18_vae\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8a2be1b",
"metadata": {},
"outputs": [],
"source": [
"# Create the configuration object with the recipe, dataloader, and model\n",
"cfg = config.Config(recipe=recipe, dataloader=dataloader, model=model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "949f9ffb",
"metadata": {},
"outputs": [],
"source": [
"# Initialize BioImageEmbed with the configuration\n",
"bie = bioimage_embed.BioImageEmbed(cfg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "717481bc",
"metadata": {},
"outputs": [],
"source": [
"# Train and export the model if this script is run as the main program\n",
"if __name__ == \"__main__\":\n",
" bie.check().train().export(\"model\")\n",
"# lit_model = bie.check().train().get_model()\n",
"# bie.export(\"model\")"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"main_language": "python",
"notebook_metadata_filter": "-all"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
29 changes: 29 additions & 0 deletions notebooks/idr.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c657c25f",
"metadata": {},
"source": [
"root = \"/nfs/ftp/public/databases/IDR/\""
]
},
{
"cell_type": "markdown",
"id": "e8bb1331",
"metadata": {},
"source": [
"dataset = datasets.ImageFolder(transform=transform)"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"main_language": "python",
"notebook_metadata_filter": "-all"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
156 changes: 156 additions & 0 deletions notebooks/simple.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "ac6bd2ea",
"metadata": {},
"outputs": [],
"source": [
"import bioimage_embed\n",
"import bioimage_embed.config as config\n",
"\n",
"# Import necessary modules from bioimage_embed and config.\n",
"# bioimage_embed is likely a library designed for embedding biological images,\n",
"# and config is used to handle configurations."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f28d1d38",
"metadata": {},
"outputs": [],
"source": [
"from torchvision.datasets import FakeData\n",
"from hydra.utils import instantiate\n",
"\n",
"# Import FakeData from torchvision.datasets to create a fake dataset,\n",
"# and instantiate from hydra.utils to create instances based on configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8a6921a",
"metadata": {},
"outputs": [],
"source": [
"# We can instantiate a transformation from the default configuration using hydra.\n",
"transform = instantiate(config.Transform())\n",
"\n",
"# Instantiate a transformation using the configuration provided.\n",
"# This will likely include any data augmentation or preprocessing steps defined in the configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4ab05fb",
"metadata": {},
"outputs": [],
"source": [
"# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
"dataset = FakeData(\n",
" size=64,\n",
" image_size=(3, 224, 224),\n",
" num_classes=10,\n",
" transform=transform,\n",
")\n",
"\n",
"# Create a fake dataset with 64 images of size 224x224x3 (3 channels), and 10 classes.\n",
"# This dataset will be used to simulate data for testing purposes. The 'transform' argument applies the\n",
"# transformations defined earlier to the dataset.\n",
"\n",
"# NOTE: The 'dataset' must be a PyTorch Dataset object with X (data) and y (labels).\n",
"# If using an unsupervised encoder, set the labels (y) to None; the model will ignore them during training.\n",
"\n",
"# dataset=CelebA(download=True, root=\"/tmp\", split=\"train\")\n",
"\n",
"# The commented-out code suggests an alternative to use the CelebA dataset.\n",
"# It would download the CelebA dataset and use the training split, storing it in the '/tmp' directory."
]
},
{
"cell_type": "markdown",
"id": "4f41bc2a",
"metadata": {
"lines_to_next_cell": 0
},
"source": [
"We can declare a recipe and configuration object to train the model.\n",
"I\n",
"\n",
"recipe = config.Recipe(model=\"resnet18_vae\")"
]
},
{
"cell_type": "markdown",
"id": "199e26e3",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "33ba45ad",
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"cfg = config.Config(recipe=recipe, dataset=dataset)\n",
"bie = bioimage_embed.BioImageEmbed(cfg)\n",
"\n",
"# Create a configuration object 'cfg' using the config module, and assign the fake dataset to it.\n",
"# The model is set to \"resnet18_vae\" and the maximum number of epochs for training is set to 100.\n",
"# Instantiate the BioImageEmbed object 'bie' using the configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cf97080",
"metadata": {},
"outputs": [],
"source": [
"def process():\n",
" bie.check()\n",
" bie.train()\n",
" bie.export()\n",
"\n",
"\n",
"# Define a process function that performs three steps:\n",
"# 1. 'check()' to verify the setup or configuration.\n",
"# 2. 'train()' to start training the model.\n",
"# 3. 'export()' to export the trained model."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4fa9482",
"metadata": {},
"outputs": [],
"source": [
"# This is the entrypoint for the script and very important if cfg.trainer.num_workers > 0\n",
"if __name__ == \"__main__\":\n",
" process()\n",
"\n",
"# This is the entry point for the script. The 'if __name__ == \"__main__\":' statement ensures that the 'process()'\n",
"# function is called only when the script is run directly, not when imported as a module.\n",
"# This is crucial if the 'num_workers' parameter is set in cfg.trainer, as it prevents potential issues\n",
"# with multiprocessing in PyTorch."
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"main_language": "python",
"notebook_metadata_filter": "-all"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 26366df

Please sign in to comment.