diff --git a/swarm/diffusion/diffusion_func.py b/swarm/diffusion/diffusion_func.py index 184fcb3..8e6c01d 100644 --- a/swarm/diffusion/diffusion_func.py +++ b/swarm/diffusion/diffusion_func.py @@ -19,7 +19,7 @@ def diffusion_callback(device_identifier, model_name, **kwargs): lora = kwargs.pop("lora", None) cross_attention_scale = kwargs.pop("cross_attention_scale", 1.0) - # set output_type if already there or upscale is selected (we use the latent upscaler) + # set output_type if already there or upscale is/ selected (we use the latent upscaler) output_type = kwargs.pop("output_type", "latent" if upscale else None) if output_type is not None: kwargs["output_type"] = output_type @@ -43,14 +43,22 @@ def diffusion_callback(device_identifier, model_name, **kwargs): "preprocessed_input", [kwargs.get("control_image")] ) - pipeline = pipeline_type.from_pretrained( - model_name, - revision=kwargs.pop("revision", "main"), - variant=kwargs.pop("variant", None), - torch_dtype=torch.float16, - controlnet=controlnet if "controlnet" in locals() else None, - use_safe_tensors=use_safe_tensors, - ) + pipeline = pipeline_type.from_pretrained( + model_name, + revision=kwargs.pop("revision", "main"), + variant=kwargs.pop("variant", None), + torch_dtype=torch.float16, + controlnet=controlnet if "controlnet" in locals() else None, + use_safe_tensors=use_safe_tensors, + ) + else: + pipeline = pipeline_type.from_pretrained( + model_name, + revision=kwargs.pop("revision", "main"), + variant=kwargs.pop("variant", None), + torch_dtype=torch.float16, + use_safe_tensors=use_safe_tensors, + ) if textual_inversion is not None: try: diff --git a/swarm/diffusion/kandinsky.py b/swarm/diffusion/kandinsky.py deleted file mode 100644 index a8ff0e0..0000000 --- a/swarm/diffusion/kandinsky.py +++ /dev/null @@ -1,73 +0,0 @@ -from diffusers import DiffusionPipeline -import torch -from ..post_processors.upscale import upscale_image -from ..post_processors.output_processor import OutputProcessor - - -def kandinsky_callback(device_identifier, model_name, **kwargs): - pipeline_type = kwargs.pop("pipeline_type", DiffusionPipeline) - pipeline_prior_type = kwargs.pop("pipeline_prior_type", DiffusionPipeline) - model_name_prior = kwargs.pop( - "model_name_prior", "kandinsky-community/kandinsky-2-1-prior" - ) - upscale = kwargs.pop("upscale", False) - num_images_per_prompt = kwargs.get("num_images_per_prompt", 1) - - guidance_scale = kwargs.get( - "guidance_scale", 1.0 - ) # both pipelines need this so don't pop it - - output_processor = OutputProcessor( - kwargs.pop("outputs", ["primary"]), - kwargs.pop("content_type", "image/jpeg"), - ) - - pipe_prior = pipeline_prior_type.from_pretrained( - model_name_prior, torch_dtype=torch.float16 - ) - pipe_prior.to(device_identifier) - - prompt = kwargs.pop("prompt", "") - negative_prompt = kwargs.pop("negative_prompt", "") - - generator = kwargs["generator"] - - if "image" in kwargs and "image2" in kwargs: - images_texts = [prompt, kwargs.pop("image"), kwargs.pop("image2")] - weights = [0.2, 0.3, 0.5] - image_embeds, negative_image_embeds = pipe_prior.interpolate( - images_texts, weights - ) - prompt = "" - else: - image_embeds, negative_image_embeds = pipe_prior( - prompt, negative_prompt, guidance_scale=1.0, generator=generator - ).to_tuple() - - pipe = pipeline_type.from_pretrained(model_name, torch_dtype=torch.float16) - pipe.to(device_identifier) - - height = kwargs.pop("height", 768) - width = kwargs.pop("width", 768) - images = pipe( - prompt, - negative_prompt=negative_prompt, - image_embeds=image_embeds, - negative_image_embeds=negative_image_embeds, - height=height, - width=width, - **kwargs, - ).images - - if upscale: - images = upscale_image( - images, - device_identifier, - kwargs.get("prompt", ""), - kwargs.get("negative_prompt", ""), - num_images_per_prompt, - kwargs["generator"], - ) - - output_processor.add_outputs(images) - return (output_processor.get_results(), {}) diff --git a/swarm/job_arguments.py b/swarm/job_arguments.py index 4a1cbe9..f78c106 100644 --- a/swarm/job_arguments.py +++ b/swarm/job_arguments.py @@ -6,7 +6,6 @@ from .audio.audioldm import txt2audio_diffusion_callback from .audio.bark import bark_diffusion_callback from .diffusion.diffusion_func_if import diffusion_if_callback -from .diffusion.kandinsky import kandinsky_callback from .type_helpers import get_type from .pre_processors.controlnet import scale_to_size from .external_resources import get_image, get_control_image, max_size, download_images @@ -37,9 +36,6 @@ async def format_args(job): if args["model_name"].startswith("DeepFloyd/"): return diffusion_if_callback, args - if args["model_name"].startswith("kandinsky-"): - return await format_kandinsky_args(args) - return await format_stable_diffusion_args(args, workflow)