How to configure inference settings to generate images with the Stable Diffusion XL pipeline?

35 Views Asked by At

I'm working with the Stable Diffusion XL (SDXL) model from Hugging Face's diffusers library and I want to set this inference parameters :

  • width: Width of the image in pixels.
  • height: Height of the image in pixels.
  • steps: Amount of inference steps performed on image generation.
  • cfg_scale: How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt).

Here's a minimal example of my current implementation:

import os
import datetime

from diffusers import DiffusionPipeline
import torch

if __name__ == "__main__":
    output_dir = "output_images"
    os.makedirs(output_dir, exist_ok=True)

    pipe = DiffusionPipeline.from_pretrained(
        # https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )
    pipe.to("cuda")
    # enabling xformers for memory efficiency
    pipe.enable_xformers_memory_efficient_attention()

    prompt = "Extreme close up of a slice a lemon with splashing green cocktail, alcohol,  healthy food photography"

    images = pipe(prompt=prompt).images
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    image_path = os.path.join(output_dir, f"output_{timestamp}.jpg")
    images[0].save(image_path)

    print(f"Image saved at: {image_path}")

How Can I set the inference parameters?

1

There are 1 best solutions below

0
LeMoussel On BEST ANSWER

Here is my solution

import os
import datetime

from diffusers import DiffusionPipeline
import torch

if __name__ == "__main__":
    output_dir = "output_images"
    os.makedirs(output_dir, exist_ok=True)

    pipe = DiffusionPipeline.from_pretrained(
        # https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )
    pipe.to("cuda")
    # enabling xformers for memory efficiency
    pipe.enable_xformers_memory_efficient_attention()

    prompt = "Extreme close up of a slice a lemon with splashing green cocktail, alcohol,  healthy food photography"

    images = pipe(
                prompt=prompt,
                negative_prompt='',
                width=1024,                                     # Width of the image in pixels.
                height=1024,                                    # Height of the image in pixels.
                guidance_scale=guidance_scale,                  # How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt).
                num_inference_steps=num_inference_steps,        # Amount of inference steps performed on image generation.
                num_images_per_prompt = 1,

    ).images
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    image_path = os.path.join(output_dir, f"output_{timestamp}.jpg")
    images[0].save(image_path)

    print(f"Image saved at: {image_path}")