From d0abb4e86329b96670177c2232b8231f91f42cdd Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Thu, 25 Apr 2024 11:46:25 -0400
Subject: [PATCH] Improve docs for mixed_precision setting.

---
 .../_experimental/sd_dpo_lora/config.py       | 17 ++++++++++++++---
 .../pipelines/stable_diffusion/lora/config.py | 19 ++++++++++++++++---
 .../textual_inversion/config.py               | 17 ++++++++++++++---
 .../stable_diffusion_xl/lora/config.py        | 19 ++++++++++++++++---
 .../lora_and_textual_inversion/config.py      | 19 ++++++++++++++++---
 .../textual_inversion/config.py               | 17 ++++++++++++++---
 6 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py b/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py
index ad2ceb03..e961cee3 100644
--- a/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py
+++ b/src/invoke_training/pipelines/_experimental/sd_dpo_lora/config.py
@@ -151,9 +151,20 @@ class SdDirectPreferenceOptimizationLoraConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
-    See accelerate.Accelerator for more details.
-    """
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See `accelerate.Accelerator` for more details.
+    """  # noqa: E501
 
     xformers: bool = False
     """If true, use xformers for more efficient attention blocks.
diff --git a/src/invoke_training/pipelines/stable_diffusion/lora/config.py b/src/invoke_training/pipelines/stable_diffusion/lora/config.py
index 53ce4698..9d1ba7e7 100644
--- a/src/invoke_training/pipelines/stable_diffusion/lora/config.py
+++ b/src/invoke_training/pipelines/stable_diffusion/lora/config.py
@@ -111,9 +111,22 @@ class SdLoraConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
-    See accelerate.Accelerator for more details.
-    """
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See
+    [`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
+    for more details.
+    """  # noqa: E501
 
     xformers: bool = False
     """If true, use xformers for more efficient attention blocks.
diff --git a/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py b/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py
index 7cdf3b7d..3bd6309b 100644
--- a/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py
+++ b/src/invoke_training/pipelines/stable_diffusion/textual_inversion/config.py
@@ -117,11 +117,22 @@ class SdTextualInversionConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use. This value is passed to Hugging Face Accelerate.
-    See
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See
     [`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
     for more details.
-    """
+    """  # noqa: E501
 
     xformers: bool = False
     """If `True`, use xformers for more efficient attention blocks.
diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py
index 04302bb3..6d612e25 100644
--- a/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py
+++ b/src/invoke_training/pipelines/stable_diffusion_xl/lora/config.py
@@ -111,9 +111,22 @@ class SdxlLoraConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
-    See accelerate.Accelerator for more details.
-    """
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See
+    [`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
+    for more details.
+    """  # noqa: E501
 
     xformers: bool = False
     """If true, use xformers for more efficient attention blocks.
diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py
index 125d9e95..83d81cda 100644
--- a/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py
+++ b/src/invoke_training/pipelines/stable_diffusion_xl/lora_and_textual_inversion/config.py
@@ -145,9 +145,22 @@ class SdxlLoraAndTextualInversionConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use ('no','fp16','bf16 or 'fp8'). This value is passed to Hugging Face Accelerate.
-    See accelerate.Accelerator for more details.
-    """
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See
+    [`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
+    for more details.
+    """  # noqa: E501
 
     xformers: bool = False
     """If true, use xformers for more efficient attention blocks.
diff --git a/src/invoke_training/pipelines/stable_diffusion_xl/textual_inversion/config.py b/src/invoke_training/pipelines/stable_diffusion_xl/textual_inversion/config.py
index 5158d7dd..d801b2f8 100644
--- a/src/invoke_training/pipelines/stable_diffusion_xl/textual_inversion/config.py
+++ b/src/invoke_training/pipelines/stable_diffusion_xl/textual_inversion/config.py
@@ -117,11 +117,22 @@ class SdxlTextualInversionConfig(BasePipelineConfig):
     """
 
     mixed_precision: Literal["no", "fp16", "bf16", "fp8"] = "no"
-    """The mixed precision mode to use. This value is passed to Hugging Face Accelerate.
-    See
+    """The mixed precision mode to use.
+
+    If mixed precision is enabled, then all non-trainable parameters will be cast to the specified precision. The
+    trainable parameters are always kept in float32 precision to avoid issues with numerical stability.
+
+    Recommendations:
+
+    - `"no"`: Use this mode if you have plenty of VRAM available.
+    - `"bf16"`: Use this mode if you have limited VRAM and a GPU that supports bfloat16.
+    - `"fp16"`: Use this mode if you have limited VRAM and a GPU that does not support bfloat16.
+    - `"fp8"`: You are likely to run into numerical stability issues with this mode. Only use this mode if you know what you are doing and are willing to work through some issues.
+
+    This value is passed to Hugging Face Accelerate. See
     [`accelerate.Accelerator.mixed_precision`](https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.mixed_precision)
     for more details.
-    """
+    """  # noqa: E501
 
     xformers: bool = False
     """If `True`, use xformers for more efficient attention blocks.