Skip to content

Commit

Permalink
Merge pull request #116 from invoke-ai/vae-mixed-precision
Browse files Browse the repository at this point in the history
Improve mixed_precision docs
  • Loading branch information
RyanJDick authored Apr 25, 2024
2 parents f0963fc + d0abb4e commit d8a954b
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 38 deletions.
32 changes: 12 additions & 20 deletions src/invoke_training/_shared/stable_diffusion/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def generate_validation_images_sd( # noqa: C901
"""Generate validation images for the purpose of tracking image generation behaviour on fixed prompts throughout
training.
"""
logger.info("Generating validation images.")

# Record original model devices so that we can restore this state after running the pipeline with CPU model
# offloading.
unet_device = unet.device
Expand Down Expand Up @@ -68,6 +66,9 @@ def generate_validation_images_sd( # noqa: C901

validation_images = ValidationImages(images=[], epoch=epoch, step=step)

validation_step_dir = os.path.join(out_dir, "validation", f"epoch_{epoch:0>8}-step_{step:0>8}")
logger.info(f"Generating validation images ({validation_step_dir}).")

# Run inference.
with torch.no_grad():
for prompt_idx in range(len(config.validation_prompts)):
Expand Down Expand Up @@ -96,15 +97,10 @@ def generate_validation_images_sd( # noqa: C901
)

# Save images to disk.
validation_dir = os.path.join(
out_dir,
"validation",
f"epoch_{epoch:0>8}-step_{step:0>8}",
f"prompt_{prompt_idx:0>4}",
)
os.makedirs(validation_dir)
validation_prompt_dir = os.path.join(validation_step_dir, f"prompt_{prompt_idx:0>4}")
os.makedirs(validation_prompt_dir)
for image_idx, image in enumerate(images):
image_path = os.path.join(validation_dir, f"{image_idx:0>4}.jpg")
image_path = os.path.join(validation_prompt_dir, f"{image_idx:0>4}.jpg")
validation_images.images.append(
ValidationImage(file_path=image_path, prompt=positive_prompt, image_idx=image_idx)
)
Expand Down Expand Up @@ -160,8 +156,6 @@ def generate_validation_images_sdxl( # noqa: C901
"""Generate validation images for the purpose of tracking image generation behaviour on fixed prompts throughout
training.
"""
logger.info("Generating validation images.")

# Record original model devices so that we can restore this state after running the pipeline with CPU model
# offloading.
unet_device = unet.device
Expand Down Expand Up @@ -189,6 +183,9 @@ def generate_validation_images_sdxl( # noqa: C901

validation_images = ValidationImages(images=[], epoch=epoch, step=step)

validation_step_dir = os.path.join(out_dir, "validation", f"epoch_{epoch:0>8}-step_{step:0>8}")
logger.info(f"Generating validation images ({validation_step_dir}).")

# Run inference.
with torch.no_grad():
for prompt_idx in range(len(config.validation_prompts)):
Expand Down Expand Up @@ -217,15 +214,10 @@ def generate_validation_images_sdxl( # noqa: C901
)

# Save images to disk.
validation_dir = os.path.join(
out_dir,
"validation",
f"epoch_{epoch:0>8}-step_{step:0>8}",
f"prompt_{prompt_idx:0>4}",
)
os.makedirs(validation_dir)
validation_prompt_dir = os.path.join(validation_step_dir, f"prompt_{prompt_idx:0>4}")
os.makedirs(validation_prompt_dir)
for image_idx, image in enumerate(images):
image_path = os.path.join(validation_dir, f"{image_idx:0>4}.jpg")
image_path = os.path.join(validation_prompt_dir, f"{image_idx:0>4}.jpg")
validation_images.images.append(
ValidationImage(file_path=image_path, prompt=positive_prompt, image_idx=image_idx)
)
Expand Down
17 changes: 14 additions & 3 deletions src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,20 @@ class SdDirectPreferenceOptimizationLoraConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
See accelerate.Accelerator for more details.
"""
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See `accelerate.Accelerator` for more details.
""" # noqa: E501

xformers: bool = False
"""If true, use xformers for more efficient attention blocks.
Expand Down
19 changes: 16 additions & 3 deletions src/invoke_training/pipelines/stable_diffusion/lora/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,22 @@ class SdLoraConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
See accelerate.Accelerator for more details.
"""
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See
[`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
for more details.
""" # noqa: E501

xformers: bool = False
"""If true, use xformers for more efficient attention blocks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,22 @@ class SdTextualInversionConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use. This value is passed to Hugging Face Accelerate.
See
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See
[`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
for more details.
"""
""" # noqa: E501

xformers: bool = False
"""If `True`, use xformers for more efficient attention blocks.
Expand Down
19 changes: 16 additions & 3 deletions src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,22 @@ class SdxlLoraConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
See accelerate.Accelerator for more details.
"""
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See
[`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
for more details.
""" # noqa: E501

xformers: bool = False
"""If true, use xformers for more efficient attention blocks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,22 @@ class SdxlLoraAndTextualInversionConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
See accelerate.Accelerator for more details.
"""
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See
[`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
for more details.
""" # noqa: E501

xformers: bool = False
"""If true, use xformers for more efficient attention blocks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,22 @@ class SdxlTextualInversionConfig(BasePipelineConfig):
"""

mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
"""The mixed precision mode to use. This value is passed to Hugging Face Accelerate.
See
"""The mixed precision mode to use.
If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
Recommendations:
- `"no"`: Use this mode if you have plenty of VRAM available.
- `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
- `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
- `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
This value is passed to Hugging Face Accelerate. See
[`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
for more details.
"""
""" # noqa: E501

xformers: bool = False
"""If `True`, use xformers for more efficient attention blocks.
Expand Down

0 comments on commit d8a954b

Please sign in to comment.