Dev (ashleve#78)

* add neptune integration * improve readme * quick fix * improve datamodule * remove unnecessary folders from .gitignore * change folder structure * add optuna config * add bash tests * redesign readme.md * change wandb callbacks names * fix wandb callbacks * resolve issues with sweeping alongside wandb * improve rich config printing * update readme * Update README.md
PTG-Kitware · Mar 1, 2021 · 036aec4 · 036aec4
1 parent bcf4146
commit 036aec4
Show file tree

Hide file tree

Showing 56 changed files with 1,285 additions and 828 deletions.
diff --git a/.gitignore b/.gitignore
@@ -78,6 +78,7 @@ fabric.properties
 !.vscode/launch.json
 !.vscode/extensions.json
 *.code-workspace
+**/.vscode
 
 ### Python template
 # Byte-compiled / optimized / DLL files
@@ -225,7 +226,5 @@ ipython_config.py
 #   git rm -r .ipynb_checkpoints/
 
 .idea/
-project/data/
-project/lightning_logs/
-project/wandb/
-project/logs/
+data/
+logs/
diff --git a/README.md b/README.md
diff --git a/conda_env_cpu.yaml b/conda_env_cpu.yaml
@@ -0,0 +1,14 @@
+#name: conda_env_name
+
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+
+dependencies:
+  - python=3.8
+  - pip
+  - notebook
+  - pytorch
+  - torchvision
+  - torchaudio
diff --git a/conda_env.yaml → conda_env_gpu.yaml b/conda_env.yaml → conda_env_gpu.yaml
diff --git a/.../configs/callbacks/default_callbacks.yaml → configs/callbacks/default_callbacks.yaml b/.../configs/callbacks/default_callbacks.yaml → configs/callbacks/default_callbacks.yaml
@@ -1,16 +1,16 @@
 model_checkpoint:
     _target_: pytorch_lightning.callbacks.ModelCheckpoint
-    monitor: "val_acc"      # name of the logged metric which determines when model is improving
+    monitor: "val/acc"      # name of the logged metric which determines when model is improving
     save_top_k: 2           # save k best models (determined by above metric)
     save_last: True         # additionaly always save model from last epoch
     mode: "max"             # can be "max" or "min"
     dirpath: 'checkpoints/'
-    filename: 'sample-mnist-{epoch:02d}'
+    filename: '{epoch:02d}'
 
 
 early_stopping:
     _target_: pytorch_lightning.callbacks.EarlyStopping
-    monitor: "val_acc"      # name of the logged metric which determines when model is improving
+    monitor: "val/acc"      # name of the logged metric which determines when model is improving
     patience: 100           # how many epochs of not improving until training stops
     mode: "max"             # can be "max" or "min"
     min_delta: 0.0          # minimum change in the monitored metric needed to qualify as an improvement
diff --git a/project/data/.gitkeep → configs/callbacks/none.yaml b/project/data/.gitkeep → configs/callbacks/none.yaml
diff --git a/configs/callbacks/wandb_callbacks.yaml b/configs/callbacks/wandb_callbacks.yaml
@@ -0,0 +1,34 @@
+defaults:
+    - default_callbacks.yaml
+
+
+upload_code_to_wandb_as_artifact:
+    _target_: src.callbacks.wandb_callbacks.UploadCodeToWandbAsArtifact
+    code_dir: ${work_dir}
+
+
+upload_ckpts_to_wandb_as_artifact:
+    _target_: src.callbacks.wandb_callbacks.UploadCheckpointsToWandbAsArtifact
+    ckpt_dir: "checkpoints/"
+    upload_best_only: False
+
+
+watch_model_with_wandb:
+    _target_: src.callbacks.wandb_callbacks.WatchModelWithWandb
+    log: "all"
+    log_freq: 100
+
+
+# BUGGED :(
+# save_best_metric_scores_to_wandb:
+#     _target_: src.callbacks.wandb_callbacks.LogBestMetricScoresToWandb
+
+
+save_f1_precision_recall_heatmap_to_wandb:
+    _target_: src.callbacks.wandb_callbacks.LogF1PrecisionRecallHeatmapToWandb
+    class_names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+
+
+save_confusion_matrix_to_wandb:
+    _target_: src.callbacks.wandb_callbacks.LogConfusionMatrixToWandb
+    class_names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
diff --git a/project/configs/config.yaml → configs/config.yaml b/project/configs/config.yaml → configs/config.yaml
@@ -5,24 +5,27 @@ defaults:
     - trainer: default_trainer.yaml
     - model: mnist_model.yaml
     - datamodule: mnist_datamodule.yaml
-    - seeds: default_seeds.yaml  # set this to null if you don't want to use seeds
     - callbacks: default_callbacks.yaml  # set this to null if you don't want to use callbacks
     - logger: null  # set logger here or use command line (e.g. `python train.py logger=wandb`)
 
-    # we add this just to enable color logging
-    # - hydra/hydra_logging: colorlog
-    # - hydra/job_logging: colorlog
+    # enable color logging
+    # - override hydra/hydra_logging: colorlog
+    # - override hydra/job_logging: colorlog
 
 
-# path to original working directory (the directory that `train.py` was executed from in command line)
+# path to original working directory (that `train.py` was executed from in command line)
 # hydra hijacks working directory by changing it to the current log directory,
 # so it's useful to have path to original working directory as a special variable
 # read more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
-original_work_dir: ${hydra:runtime.cwd}
+work_dir: ${hydra:runtime.cwd}
 
 
 # path to folder with data
-data_dir: ${original_work_dir}/data/
+data_dir: ${work_dir}/data/
+
+
+# pretty print config at the start of the run using Rich library
+print_config: True
 
 
 # output paths for hydra logs

diff --git a/configs/config_optuna.yaml b/configs/config_optuna.yaml
@@ -0,0 +1,65 @@
+# @package _global_
+
+# example hyperparameter optimization of some experiment with optuna:
+# python train.py -m --config-name config_optuna.yaml +experiment=exp_example_simple logger=wandb 
+
+defaults:
+  # load everything from main config file
+  - config.yaml
+
+  # override sweeper to optuna!
+  - override hydra/sweeper: optuna
+
+
+# choose metric which will be optimized by optuna
+optimized_metric: "val/acc_best"
+
+
+hydra:
+    # here we define optuna objective
+    # it optimizes for value returned from function with @hydra.main decorator
+    # learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper
+    sweeper:
+        optuna_config:
+            study_name: null
+            storage: null
+            n_jobs: 1
+            seed: 12345
+
+            # 'minimize' or 'maximize' the objective
+            direction: maximize
+
+            # number of experiments that will be executed
+            n_trials: 30
+
+            # choose optuna hyperparameter sampler ('tpe', 'random', 'cmaes' or 'nsgaii', 'motpe')
+            # learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html
+            sampler: tpe
+
+        # define range of hyperparameters
+        search_space:
+            datamodule.batch_size:
+                type: categorical
+                choices: [32, 64, 128]
+            model.lr:
+                type: float
+                low: 0.0001
+                high: 0.2
+            model.lin1_size:
+                type: categorical
+                choices: [64, 128, 256, 512]
+            model.dropout1:
+                type: categorical
+                choices: [0.05, 0.1, 0.25, 0.5]
+            model.lin2_size:
+                type: categorical
+                choices: [64, 128, 256, 512]
+            model.dropout2:
+                type: categorical
+                choices: [0.05, 0.1, 0.25, 0.5]
+            model.lin3_size:
+                type: categorical
+                choices: [32, 64, 128, 256]
+            model.dropout3:
+                type: categorical
+                choices: [0.05, 0.1, 0.25, 0.5]
diff --git a/.../configs/datamodule/mnist_datamodule.yaml → configs/datamodule/mnist_datamodule.yaml b/.../configs/datamodule/mnist_datamodule.yaml → configs/datamodule/mnist_datamodule.yaml
diff --git a/configs/experiment/exp_example_full.yaml b/configs/experiment/exp_example_full.yaml
@@ -0,0 +1,74 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py +experiment=exp_example_full
+
+defaults:
+    - override /trainer: null  # override trainer to null so it's not loaded from main config defaults...
+    - override /model: null
+    - override /datamodule: null
+    - override /callbacks: null
+    - override /logger: null
+
+# we override default configurations with nulls to prevent them from loading at all
+# instead we define all modules and their paths directly in this config, 
+# so everything is stored in one place for more readibility
+
+seed: 12345
+
+trainer:
+    _target_: pytorch_lightning.Trainer
+    gpus: 0
+    min_epochs: 1
+    max_epochs: 10
+    gradient_clip_val: 0.5
+    accumulate_grad_batches: 2
+    weights_summary: null
+    # resume_from_checkpoint: ${work_dir}/last.ckpt
+
+model:
+    _target_: src.models.mnist_model.LitModelMNIST
+    optimizer: adam
+    lr: 0.001
+    weight_decay: 0.00005
+    architecture: SimpleDenseNet
+    input_size: 784
+    lin1_size: 256
+    dropout1: 0.30
+    lin2_size: 256
+    dropout2: 0.25
+    lin3_size: 128
+    dropout3: 0.20
+    output_size: 10
+
+datamodule:
+    _target_: src.datamodules.mnist_datamodule.MNISTDataModule
+    data_dir: ${data_dir}
+    batch_size: 64
+    train_val_test_split: [55_000, 5_000, 10_000]
+    num_workers: 0
+    pin_memory: False
+
+callbacks:
+    model_checkpoint:
+        _target_: pytorch_lightning.callbacks.ModelCheckpoint
+        monitor: "val/acc"
+        save_top_k: 2
+        save_last: True
+        mode: "max"
+        dirpath: 'checkpoints/'
+        filename: 'sample-mnist-{epoch:02d}'
+    early_stopping:
+        _target_: pytorch_lightning.callbacks.EarlyStopping
+        monitor: "val/acc"
+        patience: 100
+        mode: "max"
+
+logger:
+    wandb:
+        tags: ["best_model", "uwu"]
+        notes: "Description of this model."
+    neptune:
+        tags: ["best_model"]
+    csv_logger:
+        save_dir: "."
diff --git a/...onfigs/experiment/exp_example_simple.yaml → configs/experiment/exp_example_simple.yaml b/...onfigs/experiment/exp_example_simple.yaml → configs/experiment/exp_example_simple.yaml
@@ -7,19 +7,18 @@ defaults:
     - override /trainer: default_trainer.yaml           # choose trainer from 'configs/trainer/' folder or set to null
     - override /model: mnist_model.yaml                 # choose model from 'configs/model/' folder or set to null
     - override /datamodule: mnist_datamodule.yaml       # choose datamodule from 'configs/datamodule/' folder or set to null
-    - override /seeds: default_seeds.yaml               # choose seeds from 'configs/seeds/' folder or set to null
     - override /callbacks: default_callbacks.yaml       # choose callback set from 'configs/callbacks/' folder or set to null
-    - override /logger: null                            # choose logger from 'configs/logger/' folder or set it from console when running experiment:
-                                                        # `python train.py +experiment=exp_example_simple logger=wandb`
+    - override /logger: null                            # choose logger from 'configs/logger/' folder or set to null
 
 # all parameters below will be merged with parameters from default configurations set above
 # this allows you to overwrite only specified parameters
 
-seeds:
-    pytorch_seed: 12345
+seed: 12345 
 
 trainer:
+    min_epochs: 1
     max_epochs: 10
+    gradient_clip_val: 0.5
 
 model:
     lr: 0.001

diff --git a/project/configs/logger/comet.yaml → configs/logger/comet.yaml b/project/configs/logger/comet.yaml → configs/logger/comet.yaml
@@ -1,5 +1,7 @@
-# Comet logger config
+# https://www.comet.ml
+
 comet:
     _target_: pytorch_lightning.loggers.comet.CometLogger
     api_key: ???
     project_name: "project_template_test"
+    experiment_name: null
diff --git a/project/configs/logger/csv_logger.yaml → configs/logger/csv.yaml b/project/configs/logger/csv_logger.yaml → configs/logger/csv.yaml
@@ -1,5 +1,6 @@
-# Csv logger config
-csv_logger:
+# CSVLogger built in PyTorch Lightning
+
+csv:
     _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
     save_dir: "."
-    name: "csv_logger/"
+    name: "csv/"
diff --git a/configs/logger/many_loggers.yaml b/configs/logger/many_loggers.yaml
@@ -0,0 +1,8 @@
+# train with many loggers at once
+
+defaults:
+    - csv.yaml
+    - wandb.yaml
+    # - neptune.yaml
+    # - comet.yaml
+    # - tensorboard.yaml
diff --git a/project/configs/logger/neptune.yaml → configs/logger/neptune.yaml b/project/configs/logger/neptune.yaml → configs/logger/neptune.yaml
@@ -1,6 +1,7 @@
-# Neptune logger config
+# https://neptune.ai
+
 neptune:
     _target_: pytorch_lightning.loggers.neptune.NeptuneLogger
-    project_name: "hobogalaxy/lightning-hydra-template-test"
+    project_name: "your_name/lightning-hydra-template-test"
     api_key: ${env:NEPTUNE_API_TOKEN}  # api key is laoded from environment variable
-#    experiment_name: "some_experiment"
+    # experiment_name: "some_experiment"
diff --git a/configs/logger/tensorboard.yaml b/configs/logger/tensorboard.yaml
@@ -0,0 +1,6 @@
+# TensorBoard
+
+tensorboard:
+    _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+    save_dir: "tensorboard/"
+    name: "default"
diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml
@@ -0,0 +1,10 @@
+# https://wandb.ai (Weights&Biases)
+
+wandb:
+    _target_: pytorch_lightning.loggers.wandb.WandbLogger
+    project: "env_tests"
+    # entity: ""  # set to name of your wandb team or just remove it
+    # offline: False  # set True to store all logs only locally
+    job_type: "train"
+    group: ""
+    save_dir: "."
diff --git a/project/configs/model/mnist_model.yaml → configs/model/mnist_model.yaml b/project/configs/model/mnist_model.yaml → configs/model/mnist_model.yaml
@@ -1,7 +1,7 @@
 _target_: src.models.mnist_model.LitModelMNIST
 optimizer: adam
 lr: 0.001
-weight_decay: 0.000001
+weight_decay: 0.00005
 architecture: SimpleDenseNet
 input_size: 784
 lin1_size: 256