Adding HF inference image generation to MCP tools (Flux Krea by default)
Browse files
app.py
CHANGED
@@ -9,6 +9,8 @@ from __future__ import annotations
|
|
9 |
import re
|
10 |
import json
|
11 |
import sys
|
|
|
|
|
12 |
from io import StringIO
|
13 |
from typing import List, Dict, Tuple, Annotated
|
14 |
|
@@ -18,6 +20,8 @@ from bs4 import BeautifulSoup
|
|
18 |
from readability import Document
|
19 |
from urllib.parse import urljoin, urldefrag, urlparse
|
20 |
from duckduckgo_search import DDGS
|
|
|
|
|
21 |
|
22 |
# Optional imports for Kokoro TTS (loaded lazily)
|
23 |
import numpy as np
|
@@ -626,7 +630,7 @@ CSS_STYLES = """
|
|
626 |
}
|
627 |
/* Default: add subtitle under titles */
|
628 |
.gradio-container h1::after {
|
629 |
-
|
630 |
display: block;
|
631 |
font-size: 1rem;
|
632 |
font-weight: 500;
|
@@ -660,14 +664,136 @@ kokoro_interface = gr.Interface(
|
|
660 |
allow_flagging="never",
|
661 |
)
|
662 |
|
663 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
664 |
demo = gr.TabbedInterface(
|
665 |
-
interface_list=[fetch_interface, concise_interface, code_interface, kokoro_interface],
|
666 |
tab_names=[
|
667 |
"Fetch Webpage",
|
668 |
"DuckDuckGo Search",
|
669 |
"Python Code Executor",
|
670 |
"Kokoro TTS",
|
|
|
671 |
],
|
672 |
title="Tools MCP",
|
673 |
theme="Nymbo/Nymbo_Theme",
|
|
|
9 |
import re
|
10 |
import json
|
11 |
import sys
|
12 |
+
import os
|
13 |
+
import random
|
14 |
from io import StringIO
|
15 |
from typing import List, Dict, Tuple, Annotated
|
16 |
|
|
|
20 |
from readability import Document
|
21 |
from urllib.parse import urljoin, urldefrag, urlparse
|
22 |
from duckduckgo_search import DDGS
|
23 |
+
from PIL import Image
|
24 |
+
from huggingface_hub import InferenceClient
|
25 |
|
26 |
# Optional imports for Kokoro TTS (loaded lazily)
|
27 |
import numpy as np
|
|
|
630 |
}
|
631 |
/* Default: add subtitle under titles */
|
632 |
.gradio-container h1::after {
|
633 |
+
content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS | Flux Krea (Serverless)";
|
634 |
display: block;
|
635 |
font-size: 1rem;
|
636 |
font-weight: 500;
|
|
|
664 |
allow_flagging="never",
|
665 |
)
|
666 |
|
667 |
+
# ==========================
|
668 |
+
# Flux Krea (Serverless)
|
669 |
+
# ==========================
|
670 |
+
|
671 |
+
HF_API_TOKEN = os.getenv("HF_READ_TOKEN")
|
672 |
+
|
673 |
+
|
674 |
+
def Flux_Krea_Generate( # <-- MCP tool #5 (Flux Krea - Serverless)
|
675 |
+
prompt: Annotated[str, "Text description of the image to generate."],
|
676 |
+
negative_prompt: Annotated[str, "What should NOT appear in the image." ] = (
|
677 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
678 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
679 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
680 |
+
),
|
681 |
+
steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
|
682 |
+
cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
|
683 |
+
sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
|
684 |
+
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
|
685 |
+
width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
686 |
+
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
|
687 |
+
) -> Image.Image:
|
688 |
+
"""
|
689 |
+
Generate a single image from a text prompt using the black-forest-labs/FLUX.1-Krea-dev
|
690 |
+
model via Hugging Face Inference (serverless). Returns a PIL image.
|
691 |
+
|
692 |
+
Notes (MCP):
|
693 |
+
- Per the latest Gradio MCP docs, images returned from tools are handled by the server and
|
694 |
+
converted to file URLs automatically for MCP clients. Ensure type hints and this docstring
|
695 |
+
"Args:" block are present so the tool schema is accurate.
|
696 |
+
|
697 |
+
Args:
|
698 |
+
prompt (str): Text description of the image to generate.
|
699 |
+
negative_prompt (str): What should NOT appear in the image.
|
700 |
+
steps (int): Number of denoising steps (1–100). Higher can improve quality.
|
701 |
+
cfg_scale (float): Guidance scale (1–20). Higher = follow the prompt more closely.
|
702 |
+
sampler (str): Sampling method label for UI; not all providers expose this control.
|
703 |
+
seed (int): Random seed. Use -1 to randomize on each call.
|
704 |
+
width (int): Output width in pixels (64–1216; multiples of 32 recommended).
|
705 |
+
height (int): Output height in pixels (64–1216; multiples of 32 recommended).
|
706 |
+
|
707 |
+
Returns:
|
708 |
+
PIL.Image.Image: The generated image.
|
709 |
+
|
710 |
+
Error modes:
|
711 |
+
- Raises gr.Error with a user-friendly message on auth/model/load errors.
|
712 |
+
"""
|
713 |
+
if not prompt or not prompt.strip():
|
714 |
+
raise gr.Error("Please provide a non-empty prompt.")
|
715 |
+
|
716 |
+
# Slightly enhance prompt for quality (kept consistent with Serverless space)
|
717 |
+
enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
|
718 |
+
|
719 |
+
# Try multiple providers for resilience
|
720 |
+
providers = ["auto", "replicate", "fal-ai"]
|
721 |
+
last_error: Exception | None = None
|
722 |
+
|
723 |
+
for provider in providers:
|
724 |
+
try:
|
725 |
+
client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
|
726 |
+
image = client.text_to_image(
|
727 |
+
prompt=enhanced_prompt,
|
728 |
+
negative_prompt=negative_prompt,
|
729 |
+
model="black-forest-labs/FLUX.1-Krea-dev",
|
730 |
+
width=width,
|
731 |
+
height=height,
|
732 |
+
num_inference_steps=steps,
|
733 |
+
guidance_scale=cfg_scale,
|
734 |
+
seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
|
735 |
+
)
|
736 |
+
return image
|
737 |
+
except Exception as e: # try next provider, transform last one to friendly error
|
738 |
+
last_error = e
|
739 |
+
continue
|
740 |
+
|
741 |
+
# If we reach here, all providers failed
|
742 |
+
msg = str(last_error) if last_error else "Unknown error"
|
743 |
+
if "404" in msg:
|
744 |
+
raise gr.Error("Model not found. Ensure 'FLUX.1-Krea-dev' is accessible with your HF token.")
|
745 |
+
if "503" in msg:
|
746 |
+
raise gr.Error("The model is warming up. Please try again shortly.")
|
747 |
+
if "401" in msg or "403" in msg:
|
748 |
+
raise gr.Error("Authentication failed. Set HF_READ_TOKEN environment variable with access to the model.")
|
749 |
+
raise gr.Error(f"Image generation failed: {msg}")
|
750 |
+
|
751 |
+
|
752 |
+
flux_krea_interface = gr.Interface(
|
753 |
+
fn=Flux_Krea_Generate,
|
754 |
+
inputs=[
|
755 |
+
gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2),
|
756 |
+
gr.Textbox(
|
757 |
+
label="Negative Prompt",
|
758 |
+
value=(
|
759 |
+
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
|
760 |
+
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
|
761 |
+
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
|
762 |
+
),
|
763 |
+
lines=2,
|
764 |
+
),
|
765 |
+
gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps"),
|
766 |
+
gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale"),
|
767 |
+
gr.Radio(label="Sampler", value="DPM++ 2M Karras", choices=[
|
768 |
+
"DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"
|
769 |
+
]),
|
770 |
+
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)"),
|
771 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width"),
|
772 |
+
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height"),
|
773 |
+
],
|
774 |
+
outputs=gr.Image(label="Generated Image"),
|
775 |
+
title="Flux Krea (Serverless)",
|
776 |
+
description=(
|
777 |
+
"<div style=\"text-align:center\">Generate images with FLUX.1-Krea-dev via Hugging Face Inference. "
|
778 |
+
"No input image required. 'Strength' is omitted. Set HF_READ_TOKEN as needed.</div>"
|
779 |
+
),
|
780 |
+
api_description=(
|
781 |
+
"Generate a single image from a text prompt using black-forest-labs/FLUX.1-Krea-dev on Hugging Face Inference. "
|
782 |
+
"Parameters: prompt (str), negative_prompt (str), steps (int, 1–100), cfg_scale (float, 1–20), "
|
783 |
+
"sampler (str, label only), seed (int, -1=random), width/height (int, 64–1216). Returns a PIL.Image."
|
784 |
+
),
|
785 |
+
allow_flagging="never",
|
786 |
+
)
|
787 |
+
|
788 |
+
# Build tabbed app including Flux Krea
|
789 |
demo = gr.TabbedInterface(
|
790 |
+
interface_list=[fetch_interface, concise_interface, code_interface, kokoro_interface, flux_krea_interface],
|
791 |
tab_names=[
|
792 |
"Fetch Webpage",
|
793 |
"DuckDuckGo Search",
|
794 |
"Python Code Executor",
|
795 |
"Kokoro TTS",
|
796 |
+
"Flux Krea (Serverless)",
|
797 |
],
|
798 |
title="Tools MCP",
|
799 |
theme="Nymbo/Nymbo_Theme",
|