FLUX.1-dev / app.py
hysts's picture
hysts HF Staff
Fix
1d2a6dd
raw
history blame
5.18 kB
import random
import gradio as gr
import numpy as np
import PIL.Image
import spaces
import torch
from diffusers import AutoencoderTiny, DiffusionPipeline
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, vae=taef1).to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048
@spaces.GPU(duration=75)
def infer(
prompt: str,
seed: int,
randomize_seed: bool,
width: int = 1024,
height: int = 1024,
guidance_scale: float = 3.5,
num_inference_steps: int = 28,
progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
) -> tuple[PIL.Image.Image, int]:
"""Generate an image from a text prompt using the FLUX.1 [dev] model.
Note:
- Prompts must be written in English. Other languages are not currently supported.
- Prompts are limited to 77 tokens due to CLIP tokenizer constraints.
Args:
prompt: A text prompt in English to guide the image generation. Limited to 77 tokens.
seed: The seed value used for reproducible image generation.
randomize_seed: If True, overrides the seed with a randomly generated one.
width: Width of the output image in pixels. Defaults to 1024.
height: Height of the output image in pixels. Defaults to 1024.
guidance_scale: Controls how strongly the model follows the prompt.
Higher values lead to images more closely aligned with the prompt. Defaults to 3.5.
num_inference_steps: Number of denoising steps during generation. Higher values can improve quality. Defaults to 28.
progress: (Internal) Progress tracker for UI integration; should not be manually set by users.
Returns:
A tuple containing the generated image and the seed value used.
"""
if randomize_seed:
seed = random.randint(0, MAX_SEED) # noqa: S311
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt=prompt,
width=width,
height=height,
num_inference_steps=num_inference_steps,
generator=generator,
guidance_scale=guidance_scale,
).images[0]
return image, seed
def run_example(prompt: str) -> tuple[PIL.Image.Image, int]:
return infer(prompt, seed=42, randomize_seed=False)
examples = [
"a tiny astronaut hatching from an egg on the moon",
"a cat holding a sign that says hello world",
"an anime illustration of a wiener schnitzel",
]
css = """
#col-container {
margin: 0 auto;
max-width: 520px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""# FLUX.1 [dev]
12B param rectified flow transformer guidance-distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/)
[[non-commercial license](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] [[blog](https://blackforestlabs.ai/announcing-black-forest-labs/)] [[model](https://huggingface.co/black-forest-labs/FLUX.1-dev)]
""")
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
submit_btn=True,
)
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=1024,
)
height = gr.Slider(
label="Height",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=1024,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1,
maximum=15,
step=0.1,
value=3.5,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=50,
step=1,
value=28,
)
gr.Examples(
examples=examples,
fn=run_example,
inputs=prompt,
outputs=[result, seed],
)
prompt.submit(
fn=infer,
inputs=[prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
outputs=[result, seed],
)
if __name__ == "__main__":
demo.launch(mcp_server=True)