Skip to content

Commit

Permalink
Dev (ashleve#78)
Browse files Browse the repository at this point in the history
* add neptune integration

* improve readme

* quick fix

* improve datamodule

* remove unnecessary folders from .gitignore

* change folder structure

* add optuna config

* add bash tests

* redesign readme.md

* change wandb callbacks names

* fix wandb callbacks

* resolve issues with sweeping alongside wandb

* improve rich config printing

* update readme

* Update README.md
  • Loading branch information
ashleve authored Mar 1, 2021
1 parent bcf4146 commit 036aec4
Show file tree
Hide file tree
Showing 56 changed files with 1,285 additions and 828 deletions.
7 changes: 3 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ fabric.properties
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
**/.vscode

### Python template
# Byte-compiled / optimized / DLL files
Expand Down Expand Up @@ -225,7 +226,5 @@ ipython_config.py
# git rm -r .ipynb_checkpoints/

.idea/
project/data/
project/lightning_logs/
project/wandb/
project/logs/
data/
logs/
331 changes: 216 additions & 115 deletions README.md

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions conda_env_cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#name: conda_env_name

channels:
- pytorch
- conda-forge
- defaults

dependencies:
- python=3.8
- pip
- notebook
- pytorch
- torchvision
- torchaudio
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
monitor: "val_acc" # name of the logged metric which determines when model is improving
monitor: "val/acc" # name of the logged metric which determines when model is improving
save_top_k: 2 # save k best models (determined by above metric)
save_last: True # additionaly always save model from last epoch
mode: "max" # can be "max" or "min"
dirpath: 'checkpoints/'
filename: 'sample-mnist-{epoch:02d}'
filename: '{epoch:02d}'


early_stopping:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: "val_acc" # name of the logged metric which determines when model is improving
monitor: "val/acc" # name of the logged metric which determines when model is improving
patience: 100 # how many epochs of not improving until training stops
mode: "max" # can be "max" or "min"
min_delta: 0.0 # minimum change in the monitored metric needed to qualify as an improvement
File renamed without changes.
34 changes: 34 additions & 0 deletions configs/callbacks/wandb_callbacks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
defaults:
- default_callbacks.yaml


upload_code_to_wandb_as_artifact:
_target_: src.callbacks.wandb_callbacks.UploadCodeToWandbAsArtifact
code_dir: ${work_dir}


upload_ckpts_to_wandb_as_artifact:
_target_: src.callbacks.wandb_callbacks.UploadCheckpointsToWandbAsArtifact
ckpt_dir: "checkpoints/"
upload_best_only: False


watch_model_with_wandb:
_target_: src.callbacks.wandb_callbacks.WatchModelWithWandb
log: "all"
log_freq: 100


# BUGGED :(
# save_best_metric_scores_to_wandb:
# _target_: src.callbacks.wandb_callbacks.LogBestMetricScoresToWandb


save_f1_precision_recall_heatmap_to_wandb:
_target_: src.callbacks.wandb_callbacks.LogF1PrecisionRecallHeatmapToWandb
class_names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


save_confusion_matrix_to_wandb:
_target_: src.callbacks.wandb_callbacks.LogConfusionMatrixToWandb
class_names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
17 changes: 10 additions & 7 deletions project/configs/config.yaml → configs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,27 @@ defaults:
- trainer: default_trainer.yaml
- model: mnist_model.yaml
- datamodule: mnist_datamodule.yaml
- seeds: default_seeds.yaml # set this to null if you don't want to use seeds
- callbacks: default_callbacks.yaml # set this to null if you don't want to use callbacks
- logger: null # set logger here or use command line (e.g. `python train.py logger=wandb`)

# we add this just to enable color logging
# - hydra/hydra_logging: colorlog
# - hydra/job_logging: colorlog
# enable color logging
# - override hydra/hydra_logging: colorlog
# - override hydra/job_logging: colorlog


# path to original working directory (the directory that `train.py` was executed from in command line)
# path to original working directory (that `train.py` was executed from in command line)
# hydra hijacks working directory by changing it to the current log directory,
# so it's useful to have path to original working directory as a special variable
# read more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
original_work_dir: ${hydra:runtime.cwd}
work_dir: ${hydra:runtime.cwd}


# path to folder with data
data_dir: ${original_work_dir}/data/
data_dir: ${work_dir}/data/


# pretty print config at the start of the run using Rich library
print_config: True


# output paths for hydra logs
Expand Down
65 changes: 65 additions & 0 deletions configs/config_optuna.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# @package _global_

# example hyperparameter optimization of some experiment with optuna:
# python train.py -m --config-name config_optuna.yaml +experiment=exp_example_simple logger=wandb

defaults:
# load everything from main config file
- config.yaml

# override sweeper to optuna!
- override hydra/sweeper: optuna


# choose metric which will be optimized by optuna
optimized_metric: "val/acc_best"


hydra:
# here we define optuna objective
# it optimizes for value returned from function with @hydra.main decorator
# learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper
sweeper:
optuna_config:
study_name: null
storage: null
n_jobs: 1
seed: 12345

# 'minimize' or 'maximize' the objective
direction: maximize

# number of experiments that will be executed
n_trials: 30

# choose optuna hyperparameter sampler ('tpe', 'random', 'cmaes' or 'nsgaii', 'motpe')
# learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html
sampler: tpe

# define range of hyperparameters
search_space:
datamodule.batch_size:
type: categorical
choices: [32, 64, 128]
model.lr:
type: float
low: 0.0001
high: 0.2
model.lin1_size:
type: categorical
choices: [64, 128, 256, 512]
model.dropout1:
type: categorical
choices: [0.05, 0.1, 0.25, 0.5]
model.lin2_size:
type: categorical
choices: [64, 128, 256, 512]
model.dropout2:
type: categorical
choices: [0.05, 0.1, 0.25, 0.5]
model.lin3_size:
type: categorical
choices: [32, 64, 128, 256]
model.dropout3:
type: categorical
choices: [0.05, 0.1, 0.25, 0.5]
File renamed without changes.
74 changes: 74 additions & 0 deletions configs/experiment/exp_example_full.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# @package _global_

# to execute this experiment run:
# python train.py +experiment=exp_example_full

defaults:
- override /trainer: null # override trainer to null so it's not loaded from main config defaults...
- override /model: null
- override /datamodule: null
- override /callbacks: null
- override /logger: null

# we override default configurations with nulls to prevent them from loading at all
# instead we define all modules and their paths directly in this config,
# so everything is stored in one place for more readibility

seed: 12345

trainer:
_target_: pytorch_lightning.Trainer
gpus: 0
min_epochs: 1
max_epochs: 10
gradient_clip_val: 0.5
accumulate_grad_batches: 2
weights_summary: null
# resume_from_checkpoint: ${work_dir}/last.ckpt

model:
_target_: src.models.mnist_model.LitModelMNIST
optimizer: adam
lr: 0.001
weight_decay: 0.00005
architecture: SimpleDenseNet
input_size: 784
lin1_size: 256
dropout1: 0.30
lin2_size: 256
dropout2: 0.25
lin3_size: 128
dropout3: 0.20
output_size: 10

datamodule:
_target_: src.datamodules.mnist_datamodule.MNISTDataModule
data_dir: ${data_dir}
batch_size: 64
train_val_test_split: [55_000, 5_000, 10_000]
num_workers: 0
pin_memory: False

callbacks:
model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
monitor: "val/acc"
save_top_k: 2
save_last: True
mode: "max"
dirpath: 'checkpoints/'
filename: 'sample-mnist-{epoch:02d}'
early_stopping:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: "val/acc"
patience: 100
mode: "max"

logger:
wandb:
tags: ["best_model", "uwu"]
notes: "Description of this model."
neptune:
tags: ["best_model"]
csv_logger:
save_dir: "."
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@ defaults:
- override /trainer: default_trainer.yaml # choose trainer from 'configs/trainer/' folder or set to null
- override /model: mnist_model.yaml # choose model from 'configs/model/' folder or set to null
- override /datamodule: mnist_datamodule.yaml # choose datamodule from 'configs/datamodule/' folder or set to null
- override /seeds: default_seeds.yaml # choose seeds from 'configs/seeds/' folder or set to null
- override /callbacks: default_callbacks.yaml # choose callback set from 'configs/callbacks/' folder or set to null
- override /logger: null # choose logger from 'configs/logger/' folder or set it from console when running experiment:
# `python train.py +experiment=exp_example_simple logger=wandb`
- override /logger: null # choose logger from 'configs/logger/' folder or set to null

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

seeds:
pytorch_seed: 12345
seed: 12345

trainer:
min_epochs: 1
max_epochs: 10
gradient_clip_val: 0.5

model:
lr: 0.001
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Comet logger config
# https://www.comet.ml

comet:
_target_: pytorch_lightning.loggers.comet.CometLogger
api_key: ???
project_name: "project_template_test"
experiment_name: null
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Csv logger config
csv_logger:
# CSVLogger built in PyTorch Lightning

csv:
_target_: pytorch_lightning.loggers.csv_logs.CSVLogger
save_dir: "."
name: "csv_logger/"
name: "csv/"
8 changes: 8 additions & 0 deletions configs/logger/many_loggers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# train with many loggers at once

defaults:
- csv.yaml
- wandb.yaml
# - neptune.yaml
# - comet.yaml
# - tensorboard.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Neptune logger config
# https://neptune.ai

neptune:
_target_: pytorch_lightning.loggers.neptune.NeptuneLogger
project_name: "hobogalaxy/lightning-hydra-template-test"
project_name: "your_name/lightning-hydra-template-test"
api_key: ${env:NEPTUNE_API_TOKEN} # api key is laoded from environment variable
# experiment_name: "some_experiment"
# experiment_name: "some_experiment"
6 changes: 6 additions & 0 deletions configs/logger/tensorboard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# TensorBoard

tensorboard:
_target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
save_dir: "tensorboard/"
name: "default"
10 changes: 10 additions & 0 deletions configs/logger/wandb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# https://wandb.ai (Weights&Biases)

wandb:
_target_: pytorch_lightning.loggers.wandb.WandbLogger
project: "env_tests"
# entity: "" # set to name of your wandb team or just remove it
# offline: False # set True to store all logs only locally
job_type: "train"
group: ""
save_dir: "."
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
_target_: src.models.mnist_model.LitModelMNIST
optimizer: adam
lr: 0.001
weight_decay: 0.000001
weight_decay: 0.00005
architecture: SimpleDenseNet
input_size: 784
lin1_size: 256
Expand Down
Loading

0 comments on commit 036aec4

Please sign in to comment.