Spaces:

akhaliq
/

FLUX.1-Kontext-dev

Paused

File size: 3,542 Bytes

abb5336
1bafe30
 
 
9231de3
abb5336
1bafe30
920a718
1bafe30
 
abb5336
 
 
 
 
 
1bafe30
920a718
abb5336
1bafe30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
943caab
 
abb5336
 
943caab
 
abb5336
 
 
 
 
1bafe30
abb5336
1bafe30
 
abb5336
 
1bafe30
 
abb5336
1bafe30
abb5336
1bafe30
 
abb5336
 
 
a69ad7c
 
1bafe30
 
 
 
 
 
 
 
920a718
642f1c7
 
9231de3
1bafe30
 
 
 
920a718
1bafe30
 
 
 
 
 
 
c4c2a88
1bafe30
 
 
9231de3
1bafe30
 
 
 
 
 
 
10a36a8
1bafe30
 
 
 
abb5336

# This is a Gradio app that integrates a chat interface with a text-to-image and image editing model.
import gradio as gr
import numpy as np
import random
import os
from huggingface_hub import InferenceClient

# --- Constants ---
MAX_SEED = np.iinfo(np.int32).max

# --- Initialize Inference Client ---
client = InferenceClient(
    provider="fal-ai",
    api_key=os.environ["HF_TOKEN"],
    bill_to="huggingface",
)

# --- Core Inference Function for ChatInterface ---
def chat_fn(message, chat_history, seed, randomize_seed, guidance_scale, steps):
    """
    Performs image generation or editing based on user input from the chat interface.
    """
    prompt = message["text"]
    files = message["files"]

    if not prompt and not files:
        raise gr.Error("Please provide a prompt and/or upload an image.")

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    input_image = None
    if files:
        print(f"Received image: {files[0]}")
        try:
            # Try to open and convert the image
            with open(files[0], "rb") as image_file:
                input_image = image_file.read()
        except Exception as e:
            raise gr.Error(f"Could not process the uploaded image: {str(e)}. Please try uploading a different image format (JPEG, PNG, WebP).")

    if input_image:
        print(f"Received prompt for image editing: {prompt}")
        image = client.image_to_image(
            input_image,
            prompt=prompt,
            model="black-forest-labs/FLUX.1-Kontext-dev",
            guidance_scale=guidance_scale,
            num_inference_steps=steps,
            seed=seed
        )
    else:
        print(f"Received prompt for text-to-image: {prompt}")
        image = client.text_to_image(
            prompt=prompt,
            model="black-forest-labs/FLUX.1-Kontext-dev",
            guidance_scale=guidance_scale,
            num_inference_steps=steps,
            seed=seed
        )

    # Return the PIL Image as a Gradio Image component
    return gr.Image(value=image)

# --- UI Definition using gr.ChatInterface ---

seed_slider = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
randomize_checkbox = gr.Checkbox(label="Randomize seed", value=False)
guidance_slider = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=2.5)
steps_slider = gr.Slider(label="Steps", minimum=1, maximum=30, value=28, step=1)

# --- Examples without external URLs ---
# Remove examples temporarily to avoid format issues
examples = None

demo = gr.ChatInterface(
    fn=chat_fn,
    title="FLUX.1 Kontext [dev]",
    description="""<p style='text-align: center;'>
    A simple chat UI for the <b>FLUX.1 Kontext</b> model running on ZeroGPU.
    <br>
    To edit an image, upload it and type your instructions (e.g., "Add a hat").
    <br>
    To generate an image, just type a prompt (e.g., "A photo of an astronaut on a horse").
    <br>
    Find the model on <a href='https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev' target='_blank'>Hugging Face</a>.
    </p>""",
    multimodal=True,  # This is important for MultimodalTextbox to work
    textbox=gr.MultimodalTextbox(
        file_types=["image"],
        placeholder="Type a prompt and/or upload an image...",
        render=False
    ),
    additional_inputs=[
        seed_slider,
        randomize_checkbox,
        guidance_slider,
        steps_slider
    ],
    examples=examples,
    theme="soft"
)

if __name__ == "__main__":
    demo.launch(show_error=True)