From ab1b7b208076814f492826e0d0c35aabd1b72821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Wed, 23 Oct 2024 13:21:56 -0300 Subject: [PATCH 1/5] [Official callbacks] SDXL Controlnet CFG Cutoff (#9311) * initial proposal * style --- src/diffusers/callbacks.py | 59 ++++++++++++++++++- .../controlnet/pipeline_controlnet_sd_xl.py | 2 + 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/diffusers/callbacks.py b/src/diffusers/callbacks.py index 38542407e31f..4b8b15368c47 100644 --- a/src/diffusers/callbacks.py +++ b/src/diffusers/callbacks.py @@ -97,13 +97,17 @@ def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[s class SDXLCFGCutoffCallback(PipelineCallback): """ - Callback function for Stable Diffusion XL Pipelines. After certain number of steps (set by `cutoff_step_ratio` or - `cutoff_step_index`), this callback will disable the CFG. + Callback function for the base Stable Diffusion XL Pipelines. After certain number of steps (set by + `cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG. Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step. """ - tensor_inputs = ["prompt_embeds", "add_text_embeds", "add_time_ids"] + tensor_inputs = [ + "prompt_embeds", + "add_text_embeds", + "add_time_ids", + ] def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]: cutoff_step_ratio = self.config.cutoff_step_ratio @@ -129,6 +133,55 @@ def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[s callback_kwargs[self.tensor_inputs[0]] = prompt_embeds callback_kwargs[self.tensor_inputs[1]] = add_text_embeds callback_kwargs[self.tensor_inputs[2]] = add_time_ids + + return callback_kwargs + + +class SDXLControlnetCFGCutoffCallback(PipelineCallback): + """ + Callback function for the Controlnet Stable Diffusion XL Pipelines. After certain number of steps (set by + `cutoff_step_ratio` or `cutoff_step_index`), this callback will disable the CFG. + + Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step. + """ + + tensor_inputs = [ + "prompt_embeds", + "add_text_embeds", + "add_time_ids", + "image", + ] + + def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]: + cutoff_step_ratio = self.config.cutoff_step_ratio + cutoff_step_index = self.config.cutoff_step_index + + # Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio + cutoff_step = ( + cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio) + ) + + if step_index == cutoff_step: + prompt_embeds = callback_kwargs[self.tensor_inputs[0]] + prompt_embeds = prompt_embeds[-1:] # "-1" denotes the embeddings for conditional text tokens. + + add_text_embeds = callback_kwargs[self.tensor_inputs[1]] + add_text_embeds = add_text_embeds[-1:] # "-1" denotes the embeddings for conditional pooled text tokens + + add_time_ids = callback_kwargs[self.tensor_inputs[2]] + add_time_ids = add_time_ids[-1:] # "-1" denotes the embeddings for conditional added time vector + + # For Controlnet + image = callback_kwargs[self.tensor_inputs[3]] + image = image[-1:] + + pipeline._guidance_scale = 0.0 + + callback_kwargs[self.tensor_inputs[0]] = prompt_embeds + callback_kwargs[self.tensor_inputs[1]] = add_text_embeds + callback_kwargs[self.tensor_inputs[2]] = add_time_ids + callback_kwargs[self.tensor_inputs[3]] = image + return callback_kwargs diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 0f3a15172843..7a9433e1d357 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -242,6 +242,7 @@ class StableDiffusionXLControlNetPipeline( "add_time_ids", "negative_pooled_prompt_embeds", "negative_add_time_ids", + "image", ] def __init__( @@ -1540,6 +1541,7 @@ def __call__( ) add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids) negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids) + image = callback_outputs.pop("image", image) # call the callback, if provided if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): From bfa0aa4ff2a59a1ce4d3dd9e1fc6683e8d7ea33c Mon Sep 17 00:00:00 2001 From: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> Date: Wed, 23 Oct 2024 23:16:53 +0300 Subject: [PATCH 2/5] [SD3-5 dreambooth lora] update model cards (#9749) * improve readme * style --------- Co-authored-by: Sayak Paul --- .../dreambooth/train_dreambooth_lora_sd3.py | 19 ++++++++++++++----- examples/dreambooth/train_dreambooth_sd3.py | 16 ++++++++++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 8e33a5d32074..4b39dcfe41b0 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -86,6 +86,15 @@ def save_model_card( validation_prompt=None, repo_folder=None, ): + if "large" in base_model: + model_variant = "SD3.5-Large" + license_url = "https://huggingface.co/stabilityai/stable-diffusion-3.5-large/blob/main/LICENSE.md" + variant_tags = ["sd3.5-large", "sd3.5", "sd3.5-diffusers"] + else: + model_variant = "SD3" + license_url = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/blob/main/LICENSE.md" + variant_tags = ["sd3", "sd3-diffusers"] + widget_dict = [] if images is not None: for i, image in enumerate(images): @@ -95,7 +104,7 @@ def save_model_card( ) model_description = f""" -# SD3 DreamBooth LoRA - {repo_id} +# {model_variant} DreamBooth LoRA - {repo_id} @@ -120,7 +129,7 @@ def save_model_card( ```py from diffusers import AutoPipelineForText2Image import torch -pipeline = AutoPipelineForText2Image.from_pretrained('stabilityai/stable-diffusion-3-medium-diffusers', torch_dtype=torch.float16).to('cuda') +pipeline = AutoPipelineForText2Image.from_pretrained({base_model}, torch_dtype=torch.float16).to('cuda') pipeline.load_lora_weights('{repo_id}', weight_name='pytorch_lora_weights.safetensors') image = pipeline('{validation_prompt if validation_prompt else instance_prompt}').images[0] ``` @@ -135,7 +144,7 @@ def save_model_card( ## License -Please adhere to the licensing terms as described [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium/blob/main/LICENSE). +Please adhere to the licensing terms as described [here]({license_url}). """ model_card = load_or_create_model_card( repo_id_or_path=repo_id, @@ -151,11 +160,11 @@ def save_model_card( "diffusers-training", "diffusers", "lora", - "sd3", - "sd3-diffusers", "template:sd-lora", ] + tags += variant_tags + model_card = populate_model_card(model_card, tags=tags) model_card.save(os.path.join(repo_folder, "README.md")) diff --git a/examples/dreambooth/train_dreambooth_sd3.py b/examples/dreambooth/train_dreambooth_sd3.py index d5dfdfa218bc..5d10345304ab 100644 --- a/examples/dreambooth/train_dreambooth_sd3.py +++ b/examples/dreambooth/train_dreambooth_sd3.py @@ -77,6 +77,15 @@ def save_model_card( validation_prompt=None, repo_folder=None, ): + if "large" in base_model: + model_variant = "SD3.5-Large" + license_url = "https://huggingface.co/stabilityai/stable-diffusion-3.5-large/blob/main/LICENSE.md" + variant_tags = ["sd3.5-large", "sd3.5", "sd3.5-diffusers"] + else: + model_variant = "SD3" + license_url = "https://huggingface.co/stabilityai/stable-diffusion-3-medium/blob/main/LICENSE.md" + variant_tags = ["sd3", "sd3-diffusers"] + widget_dict = [] if images is not None: for i, image in enumerate(images): @@ -86,7 +95,7 @@ def save_model_card( ) model_description = f""" -# SD3 DreamBooth - {repo_id} +# {model_variant} DreamBooth - {repo_id} @@ -113,7 +122,7 @@ def save_model_card( ## License -Please adhere to the licensing terms as described `[here](https://huggingface.co/stabilityai/stable-diffusion-3-medium/blob/main/LICENSE)`. +Please adhere to the licensing terms as described `[here]({license_url})`. """ model_card = load_or_create_model_card( repo_id_or_path=repo_id, @@ -128,10 +137,9 @@ def save_model_card( "text-to-image", "diffusers-training", "diffusers", - "sd3", - "sd3-diffusers", "template:sd-lora", ] + tags += variant_tags model_card = populate_model_card(model_card, tags=tags) model_card.save(os.path.join(repo_folder, "README.md")) From 24c7d578baf6a8b79890101dd280278fff031d12 Mon Sep 17 00:00:00 2001 From: Rachit Shah Date: Thu, 24 Oct 2024 02:03:29 +0530 Subject: [PATCH 3/5] config attribute not foud error for FluxImagetoImage Pipeline for multi controlnet solved (#9586) Co-authored-by: YiYi Xu --- .../flux/pipeline_flux_controlnet_image_to_image.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py b/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py index 7b40ddfca79a..8d636feeae05 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +++ b/src/diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py @@ -903,9 +903,12 @@ def __call__( timestep = t.expand(latents.shape[0]).to(latents.dtype) - guidance = ( - torch.tensor([guidance_scale], device=device) if self.controlnet.config.guidance_embeds else None - ) + if isinstance(self.controlnet, FluxMultiControlNetModel): + use_guidance = self.controlnet.nets[0].config.guidance_embeds + else: + use_guidance = self.controlnet.config.guidance_embeds + + guidance = torch.tensor([guidance_scale], device=device) if use_guidance else None guidance = guidance.expand(latents.shape[0]) if guidance is not None else None if isinstance(controlnet_keep[i], list): From 1d1e1a2888bd65b51f13272de2f709fd91e0beb1 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Thu, 24 Oct 2024 20:19:09 +0530 Subject: [PATCH 4/5] Some minor updates to the nightly and push workflows (#9759) * move lora integration tests to nightly./ * remove slow marker in the workflow where not needed. --- .github/workflows/push_tests.yml | 6 +++--- tests/lora/test_lora_layers_flux.py | 4 +++- tests/lora/test_lora_layers_sd.py | 2 ++ tests/lora/test_lora_layers_sdxl.py | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index f07e6cda0d59..2289d1b5cad1 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -81,7 +81,7 @@ jobs: - name: Environment run: | python utils/print_env.py - - name: Slow PyTorch CUDA checkpoint tests on Ubuntu + - name: PyTorch CUDA checkpoint tests on Ubuntu env: HF_TOKEN: ${{ secrets.HF_TOKEN }} # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms @@ -184,7 +184,7 @@ jobs: run: | python utils/print_env.py - - name: Run slow Flax TPU tests + - name: Run Flax TPU tests env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -232,7 +232,7 @@ jobs: run: | python utils/print_env.py - - name: Run slow ONNXRuntime CUDA tests + - name: Run ONNXRuntime CUDA tests env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 3bc46d1e9b13..b58525cc7a6f 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -27,6 +27,7 @@ from diffusers.utils.testing_utils import ( floats_tensor, is_peft_available, + nightly, numpy_cosine_similarity_distance, require_peft_backend, require_torch_gpu, @@ -165,9 +166,10 @@ def test_modify_padding_mode(self): @slow +@nightly @require_torch_gpu @require_peft_backend -# @unittest.skip("We cannot run inference on this model with the current CI hardware") +@unittest.skip("We cannot run inference on this model with the current CI hardware") # TODO (DN6, sayakpaul): move these tests to a beefier GPU class FluxLoRAIntegrationTests(unittest.TestCase): """internal note: The integration slices were obtained on audace. diff --git a/tests/lora/test_lora_layers_sd.py b/tests/lora/test_lora_layers_sd.py index 50187e50a912..e91b0689b4ce 100644 --- a/tests/lora/test_lora_layers_sd.py +++ b/tests/lora/test_lora_layers_sd.py @@ -34,6 +34,7 @@ from diffusers.utils.import_utils import is_accelerate_available from diffusers.utils.testing_utils import ( load_image, + nightly, numpy_cosine_similarity_distance, require_peft_backend, require_torch_gpu, @@ -207,6 +208,7 @@ def test_integration_move_lora_dora_cpu(self): @slow +@nightly @require_torch_gpu @require_peft_backend class LoraIntegrationTests(unittest.TestCase): diff --git a/tests/lora/test_lora_layers_sdxl.py b/tests/lora/test_lora_layers_sdxl.py index 94a44ed8f9ec..30238c74873b 100644 --- a/tests/lora/test_lora_layers_sdxl.py +++ b/tests/lora/test_lora_layers_sdxl.py @@ -113,6 +113,7 @@ def tearDown(self): @slow +@nightly @require_torch_gpu @require_peft_backend class LoraSDXLIntegrationTests(unittest.TestCase): From 435f6b7e47c031f98b8374b1689e1abeb17bfdb6 Mon Sep 17 00:00:00 2001 From: Zhiyang Shen <1003151222@qq.com> Date: Fri, 25 Oct 2024 19:03:35 +0800 Subject: [PATCH 5/5] [Docs] fix docstring typo in SD3 pipeline (#9765) * fix docstring typo in SD3 pipeline * fix docstring typo in SD3 pipeline --- .../stable_diffusion_3/pipeline_stable_diffusion_3.py | 4 ++-- .../stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py | 4 ++-- .../stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py index 4b9df578bc4a..43cb40e6e733 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py @@ -762,8 +762,8 @@ def __call__( The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead - of a plain tuple. + Whether or not to return a [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] instead of + a plain tuple. joint_attention_kwargs (`dict`, *optional*): A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under `self.processor` in diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py index 794716303394..a07a056ec851 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py @@ -800,8 +800,8 @@ def __call__( The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead - of a plain tuple. + Whether or not to return a [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] instead of + a plain tuple. joint_attention_kwargs (`dict`, *optional*): A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under `self.processor` in diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py index 7401be39d6f9..d3e0ecf9c3a7 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py @@ -921,8 +921,8 @@ def __call__( The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead - of a plain tuple. + Whether or not to return a [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] instead of + a plain tuple. callback_on_step_end (`Callable`, *optional*): A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,