adds the pipeline for pixart alpha controlnet by raulc0399 · Pull Request #8857 · huggingface/diffusers (original) (raw)

the following example uses the HED edge to control the generation.

import torch
import torchvision.transforms as T
import torchvision.transforms.functional as TF

from diffusers.models import PixArtControlNetAdapterModel
from diffusers.pipelines import PixArtAlphaControlnetPipeline, get_closest_hw
import PIL.Image as Image

from controlnet_aux import HEDdetector

input_image_path = "asset/images/controlnet/car.jpg"
given_image = Image.open(input_image_path)

path_to_controlnet = "raulc0399/pixart-alpha-hed-controlnet"
prompt = "modern car, city in background, clear sky, suny day"

weight_dtype = torch.float16
image_size = 1024

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

controlnet = PixArtControlNetAdapterModel.from_pretrained(
    path_to_controlnet,
    torch_dtype=weight_dtype,
    use_safetensors=True,
).to(device)

pipe = PixArtAlphaControlnetPipeline.from_pretrained(
    "PixArt-alpha/PixArt-XL-2-1024-MS",
    controlnet=controlnet,
    torch_dtype=weight_dtype,
    use_safetensors=True,
).to(device)

# preprocess image, generate HED edge
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")

width, height = get_closest_hw(given_image.size[0], given_image.size[1], image_size)

condition_transform = T.Compose([
    T.Lambda(lambda img: img.convert('RGB')),
    T.Resize(int(min(height, width))),
    T.CenterCrop([int(height), int(width)]),
    T.ToTensor()
])

control_image = condition_transform(control_image)
hed_edge = hed(control_image, detect_resolution=image_size, image_resolution=image_size)

with torch.no_grad():
    out = pipe(
        prompt=prompt,
        image=hed_edge,
        num_inference_steps=14,
        guidance_scale=4.5,
        height=image_size,
        width=image_size,
    )

    out.images[0].save(f"./output.jpg")