Skip to content

Commit

Permalink
Merge pull request #76 from invoke-ai/flat-configs
Browse files Browse the repository at this point in the history
Flatten configs
  • Loading branch information
RyanJDick authored Jan 24, 2024
2 parents a64403d + e280194 commit 2f74cf3
Show file tree
Hide file tree
Showing 33 changed files with 253 additions and 348 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,22 @@

type: DIRECT_PREFERENCE_OPTIMIZATION_LORA_SD
seed: 1
output:
base_output_dir: output/dpo
base_output_dir: output/dpo

optimizer:
optimizer_type: AdamW
learning_rate: 1e-4
lr_warmup_steps: 500
lr_scheduler: cosine
weight_decay: 1e-2

optimizer:
optimizer_type: AdamW
weight_decay: 1e-2
lr_warmup_steps: 500
lr_scheduler: cosine

data_loader:
type: IMAGE_PAIR_PREFERENCE_SD_DATA_LOADER
dataset:
type: IMAGE_PAIR_PREFERENCE_DATASET
dataset_dir: output/pokemon_pairs
image_transforms:
resolution: 512
resolution: 512
dataloader_num_workers: 4

# General
Expand Down
15 changes: 6 additions & 9 deletions configs/_experimental/dpo_lora_sd_pickapic_1x24gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,21 @@

type: DIRECT_PREFERENCE_OPTIMIZATION_LORA_SD
seed: 1
output:
base_output_dir: output/dpo
base_output_dir: output/dpo

optimizer:
optimizer_type: AdamW
learning_rate: 1e-4
lr_warmup_steps: 200
lr_scheduler: cosine
weight_decay: 1e-2

optimizer:
optimizer_type: AdamW
weight_decay: 1e-2
lr_warmup_steps: 200
lr_scheduler: cosine

data_loader:
type: IMAGE_PAIR_PREFERENCE_SD_DATA_LOADER
dataset:
type: HF_HUB_IMAGE_PAIR_PREFERENCE_DATASET
image_transforms:
resolution: 512
resolution: 512

# General
model: runwayml/stable-diffusion-v1-5
Expand Down
16 changes: 6 additions & 10 deletions configs/finetune_lora_sd_pokemon_1x8gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,21 @@

type: FINETUNE_LORA_SD
seed: 1
output:
base_output_dir: output/finetune_lora_sd_pokemon/
base_output_dir: output/finetune_lora_sd_pokemon/

optimizer:
optimizer_type: Prodigy
learning_rate: 1.0

optimizer:
optimizer_type: Prodigy
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True

data_loader:
type: IMAGE_CAPTION_SD_DATA_LOADER
dataset:
type: HF_HUB_IMAGE_CAPTION_DATASET
dataset_name: lambdalabs/pokemon-blip-captions
image_transforms:
resolution: 512
resolution: 512
dataloader_num_workers: 4

# General
Expand Down
16 changes: 6 additions & 10 deletions configs/finetune_lora_sdxl_pokemon_1x24gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,21 @@
# purposes.
type: FINETUNE_LORA_SDXL
seed: 1
output:
base_output_dir: output/finetune_lora_sdxl_pokemon/
base_output_dir: output/finetune_lora_sdxl_pokemon/

optimizer:
optimizer_type: Prodigy
learning_rate: 1.0

optimizer:
optimizer_type: Prodigy
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True

data_loader:
type: IMAGE_CAPTION_SD_DATA_LOADER
dataset:
type: HF_HUB_IMAGE_CAPTION_DATASET
dataset_name: lambdalabs/pokemon-blip-captions
image_transforms:
resolution: 512
resolution: 512

# General
model: stabilityai/stable-diffusion-xl-base-1.0
Expand Down
16 changes: 6 additions & 10 deletions configs/finetune_lora_sdxl_pokemon_1x8gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,21 @@
# - Achieve reasonable results *quickly* (<15mins) for demo purposes.
type: FINETUNE_LORA_SDXL
seed: 1
output:
base_output_dir: output/finetune_lora_sdxl_pokemon/
base_output_dir: output/finetune_lora_sdxl_pokemon/

optimizer:
optimizer_type: Prodigy
learning_rate: 1.0

optimizer:
optimizer_type: Prodigy
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True
weight_decay: 0.01
use_bias_correction: True
safeguard_warmup: True

data_loader:
type: IMAGE_CAPTION_SD_DATA_LOADER
dataset:
type: HF_HUB_IMAGE_CAPTION_DATASET
dataset_name: lambdalabs/pokemon-blip-captions
image_transforms:
resolution: 512
resolution: 512

# General
model: stabilityai/stable-diffusion-xl-base-1.0
Expand Down
23 changes: 9 additions & 14 deletions configs/textual_inversion_sd_gnome_1x8gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,26 @@

type: TEXTUAL_INVERSION_SD
seed: 1
output:
base_output_dir: output/ti_sd_bruce_the_gnome
base_output_dir: output/ti_sd_bruce_the_gnome

optimizer:
optimizer_type: AdamW
learning_rate: 4e-3
lr_warmup_steps: 200
lr_scheduler: cosine

optimizer:
optimizer_type: AdamW
lr_warmup_steps: 200
lr_scheduler: cosine

data_loader:
type: TEXTUAL_INVERSION_SD_DATA_LOADER
dataset:
type: IMAGE_DIR_DATASET
dataset_dir: "sample_data/bruce_the_gnome"
keep_in_memory: True
captions:
type: TEXTUAL_INVERSION_PRESET_CAPTION_TRANSFORM
preset: object
image_transforms:
resolution: 512
center_crop: True
random_flip: False
shuffle_caption_transform: null
caption_preset: object
resolution: 512
center_crop: True
random_flip: False
shuffle_caption_delimiter: null
aspect_ratio_buckets:
target_resolution: 512
start_dim: 256
Expand Down
23 changes: 9 additions & 14 deletions configs/textual_inversion_sdxl_gnome_1x24gb_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,26 @@

type: TEXTUAL_INVERSION_SDXL
seed: 1
output:
base_output_dir: output/ti_sdxl_bruce_the_gnome
base_output_dir: output/ti_sdxl_bruce_the_gnome

optimizer:
optimizer_type: AdamW
learning_rate: 2e-3
lr_warmup_steps: 200
lr_scheduler: cosine

optimizer:
optimizer_type: AdamW
lr_warmup_steps: 200
lr_scheduler: cosine

data_loader:
type: TEXTUAL_INVERSION_SD_DATA_LOADER
dataset:
type: IMAGE_DIR_DATASET
dataset_dir: "sample_data/bruce_the_gnome"
keep_in_memory: True
captions:
type: TEXTUAL_INVERSION_PRESET_CAPTION_TRANSFORM
preset: object
image_transforms:
resolution: 1024
center_crop: True
random_flip: False
shuffle_caption_transform: null
caption_preset: object
resolution: 1024
center_crop: True
random_flip: False
shuffle_caption_delimiter: null
dataloader_num_workers: 4

# General
Expand Down
1 change: 0 additions & 1 deletion docs/reference/config/shared/data/transform_config.md

This file was deleted.

1 change: 0 additions & 1 deletion docs/reference/config/shared/training_output_config.md

This file was deleted.

7 changes: 2 additions & 5 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,8 @@ nav:
- Textual Inversion SD Config: reference/config/pipelines/textual_inversion_sd_config.md
- Textual Inversion SDXL Config: reference/config/pipelines/textual_inversion_sdxl_config.md
- shared:
- data:
- data_loader_config: reference/config/shared/data/data_loader_config.md
- dataset_config: reference/config/shared/data/dataset_config.md
- transform_config: reference/config/shared/data/transform_config.md
- training_output_config: reference/config/shared/training_output_config.md
- data_loader_config: reference/config/shared/data/data_loader_config.md
- dataset_config: reference/config/shared/data/dataset_config.md
- optimizer_config: reference/config/shared/optimizer_config.md
- Contributing:
- contributing/development_environment.md
Expand Down
19 changes: 15 additions & 4 deletions src/invoke_training/config/_experimental/dpo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from invoke_training.config.pipelines.finetune_lora_config import LoRATrainingConfig
from invoke_training.config.shared.config_base_model import ConfigBaseModel
from invoke_training.config.shared.data.transform_config import SDImageTransformConfig
from invoke_training.config.shared.optimizer.optimizer_config import OptimizerConfig


class HFHubImagePairPreferenceDatasetConfig(ConfigBaseModel):
Expand All @@ -28,7 +26,20 @@ class ImagePairPreferenceSDDataLoaderConfig(ConfigBaseModel):
Union[HFHubImagePairPreferenceDatasetConfig, ImagePairPreferenceDatasetConfig], Field(discriminator="type")
]

image_transforms: SDImageTransformConfig
resolution: int | tuple[int, int] = 512
"""The resolution for input images. Either a scalar integer representing the square resolution height and width, or
a (height, width) tuple. All of the images in the dataset will be resized to this resolution unless the
`aspect_ratio_buckets` config is set.
"""

center_crop: bool = True
"""If True, input images will be center-cropped to the target resolution.
If False, input images will be randomly cropped to the target resolution.
"""

random_flip: bool = False
"""Whether random flip augmentations should be applied to input images.
"""

dataloader_num_workers: int = 0
"""Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process.
Expand All @@ -37,7 +48,7 @@ class ImagePairPreferenceSDDataLoaderConfig(ConfigBaseModel):

class DirectPreferenceOptimizationLoRASDConfig(LoRATrainingConfig):
type: Literal["DIRECT_PREFERENCE_OPTIMIZATION_LORA_SD"] = "DIRECT_PREFERENCE_OPTIMIZATION_LORA_SD"
optimizer: OptimizerConfig

data_loader: ImagePairPreferenceSDDataLoaderConfig

initial_lora: str | None = None
Expand Down
12 changes: 8 additions & 4 deletions src/invoke_training/config/pipelines/base_pipeline_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import typing
from typing import Optional

from invoke_training.config.shared.config_base_model import ConfigBaseModel
from invoke_training.config.shared.training_output_config import TrainingOutputConfig


class BasePipelineConfig(ConfigBaseModel):
Expand All @@ -14,8 +14,12 @@ class BasePipelineConfig(ConfigBaseModel):
set to `null`, training will be non-deterministic.
"""

output: TrainingOutputConfig
"""Configuration for the training run outputs (output directory, log format, checkpoint format, etc.).
base_output_dir: str
"""The output directory where the training outputs (model checkpoints, logs, intermediate predictions) will be
written. A subdirectory will be created with a timestamp for each new training run.
"""

See [`TrainingOutputConfig`][invoke_training.config.shared.training_output_config.TrainingOutputConfig] for details.
report_to: typing.Literal["all", "tensorboard", "wandb", "comet_ml"] = "tensorboard"
"""The integration to report results and logs to. This value is passed to Hugging Face Accelerate. See
`accelerate.Accelerator.log_with` for more details.
"""
18 changes: 11 additions & 7 deletions src/invoke_training/config/pipelines/finetune_lora_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import typing
from typing import Annotated, Literal, Optional, Union

from pydantic import Field
Expand All @@ -7,7 +8,7 @@
DreamboothSDDataLoaderConfig,
ImageCaptionSDDataLoaderConfig,
)
from invoke_training.config.shared.optimizer.optimizer_config import OptimizerConfig
from invoke_training.config.shared.optimizer.optimizer_config import AdamOptimizerConfig, ProdigyOptimizerConfig


class LoRATrainingConfig(BasePipelineConfig):
Expand Down Expand Up @@ -51,6 +52,8 @@ class LoRATrainingConfig(BasePipelineConfig):
"""Whether to add LoRA layers to the text encoder and train it.
"""

optimizer: AdamOptimizerConfig | ProdigyOptimizerConfig = AdamOptimizerConfig()

text_encoder_learning_rate: Optional[float] = None
"""The learning rate to use for the text encoder model. If set, this overrides the optimizer's default learning
rate.
Expand All @@ -60,10 +63,13 @@ class LoRATrainingConfig(BasePipelineConfig):
"""The learning rate to use for the UNet model. If set, this overrides the optimizer's default learning rate.
"""

train_unet_non_attention_blocks: bool = False
"""Whether to inject LoRA layers into the non-attention UNet blocks for training. Enabling will produce a more
expressive LoRA model at the cost of slower training, higher training VRAM requirements, and a larger LoRA weight
file.
lr_scheduler: typing.Literal[
"linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"
] = "constant"

lr_warmup_steps: int = 0
"""The number of warmup steps in the learning rate scheduler. Only applied to schedulers that support warmup.
See lr_scheduler.
"""

lora_rank_dim: int = 4
Expand Down Expand Up @@ -159,15 +165,13 @@ class LoRATrainingConfig(BasePipelineConfig):

class FinetuneLoRASDConfig(LoRATrainingConfig):
type: Literal["FINETUNE_LORA_SD"] = "FINETUNE_LORA_SD"
optimizer: OptimizerConfig
data_loader: Annotated[
Union[ImageCaptionSDDataLoaderConfig, DreamboothSDDataLoaderConfig], Field(discriminator="type")
]


class FinetuneLoRASDXLConfig(LoRATrainingConfig):
type: Literal["FINETUNE_LORA_SDXL"] = "FINETUNE_LORA_SDXL"
optimizer: OptimizerConfig
data_loader: Annotated[
Union[ImageCaptionSDDataLoaderConfig, DreamboothSDDataLoaderConfig], Field(discriminator="type")
]
Expand Down
Loading

0 comments on commit 2f74cf3

Please sign in to comment.