Spaces:

akhaliq
/

FLUX.1-Kontext-dev

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 17 days ago

Commit

9ab45e8

verified ·

1 Parent(s): 01bf5a7

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -189

app.py CHANGED Viewed

@@ -1,166 +1,51 @@
 import gradio as gr
 import numpy as np
 import random
 import os
 import tempfile
-import subprocess
-import json
 from PIL import Image, ImageOps
 import pillow_heif  # For HEIF/AVIF support
-import io
 # --- Constants ---
 MAX_SEED = np.iinfo(np.int32).max
-def setup_node_environment():
-    """Setup Node.js environment and install required packages"""
-    try:
-        # Check if node is available
-        result = subprocess.run(['node', '--version'], capture_output=True, text=True)
-        if result.returncode != 0:
-            raise gr.Error("Node.js is not installed. Please install Node.js to use this feature.")
-        # Check if @huggingface/inference is installed, if not install it
-        package_check = subprocess.run(['npm', 'list', '@huggingface/inference'], capture_output=True, text=True)
-        if package_check.returncode != 0:
-            print("Installing @huggingface/inference package...")
-            install_result = subprocess.run(['npm', 'install', '@huggingface/inference'], capture_output=True, text=True)
-            if install_result.returncode != 0:
-                raise gr.Error(f"Failed to install @huggingface/inference: {install_result.stderr}")
-        return True
-    except FileNotFoundError:
-        raise gr.Error("Node.js or npm not found. Please install Node.js and npm.")
-def create_js_inference_script(image_path, prompt, hf_token):
-    """Create JavaScript inference script"""
-    js_code = f"""
-const {{ InferenceClient }} = require("@huggingface/inference");
-const fs = require("fs");
-async function runInference() {{
-    try {{
-        const client = new InferenceClient("{hf_token}");
-        const data = fs.readFileSync("{image_path}");
-        const image = await client.imageToImage({{
-            provider: "replicate",
-            model: "black-forest-labs/FLUX.1-Kontext-dev",
-            inputs: data,
-            parameters: {{ prompt: "{prompt}" }},
-        }}, {{
-            billTo: "huggingface",
-        }});
-        // Convert blob to buffer
-        const arrayBuffer = await image.arrayBuffer();
-        const buffer = Buffer.from(arrayBuffer);
-        // Output as base64 for Python to read
-        const base64 = buffer.toString('base64');
-        console.log(JSON.stringify({{
-            success: true,
-            image_base64: base64,
-            content_type: image.type || 'image/jpeg'
-        }}));
-    }} catch (error) {{
-        console.log(JSON.stringify({{
-            success: false,
-            error: error.message
-        }}));
-        process.exit(1);
-    }}
-}}
-runInference();
-"""
-    return js_code
-def query_api_js(image_bytes, prompt, seed, guidance_scale, steps, progress_callback=None):
-    """Send request using JavaScript HF Inference Client"""
-    # Get token from environment variable
-    hf_token = os.getenv("HF_TOKEN")
-    if not hf_token:
-        raise gr.Error("HF_TOKEN environment variable not found. Please add your Hugging Face token to the environment.")
-    if progress_callback:
-        progress_callback(0.1, "Setting up Node.js environment...")
-    # Setup Node.js environment
-    setup_node_environment()
-    if progress_callback:
-        progress_callback(0.2, "Preparing image...")
-    # Create a temporary file for the image
-    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
-        temp_file.write(image_bytes)
-        temp_image_path = temp_file.name
-    # Create temporary JavaScript file
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False) as js_file:
-        js_code = create_js_inference_script(temp_image_path, prompt.replace('"', '\\"'), hf_token)
-        js_file.write(js_code)
-        js_file_path = js_file.name
-    try:
-        if progress_callback:
-            progress_callback(0.3, "Running JavaScript inference...")
-        # Run the JavaScript code
-        result = subprocess.run(
-            ['node', js_file_path],
-            capture_output=True,
-            text=True,
-            timeout=300  # 5 minute timeout
-        )
-        if progress_callback:
-            progress_callback(0.8, "Processing result...")
-        if result.returncode != 0:
-            raise gr.Error(f"JavaScript inference failed: {result.stderr}")
-        # Parse the JSON output
-        try:
-            output = json.loads(result.stdout.strip())
-        except json.JSONDecodeError:
-            raise gr.Error(f"Failed to parse JavaScript output: {result.stdout}")
-        if not output.get('success'):
-            raise gr.Error(f"Inference error: {output.get('error', 'Unknown error')}")
-        if progress_callback:
-            progress_callback(0.9, "Decoding image...")
-        # Decode base64 image
-        import base64
-        image_data = base64.b64decode(output['image_base64'])
-        if progress_callback:
-            progress_callback(1.0, "Complete!")
-        return image_data
-    except subprocess.TimeoutExpired:
-        raise gr.Error("Inference timed out. Please try again.")
-    except Exception as e:
-        raise gr.Error(f"Error running JavaScript inference: {str(e)}")
-    finally:
-        # Clean up temporary files
-        try:
-            os.unlink(temp_image_path)
-            os.unlink(js_file_path)
-        except:
-            pass
 # --- Core Inference Function for ChatInterface ---
-def chat_fn(message, chat_history, seed, randomize_seed, guidance_scale, steps, progress=gr.Progress()):
     """
     Performs image generation or editing based on user input from the chat interface.
     """
     prompt = message["text"]
     files = message["files"]
@@ -170,12 +55,12 @@ def chat_fn(message, chat_history, seed, randomize_seed, guidance_scale, steps,
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if files:
         print(f"Received image: {files[0]}")
         try:
-            # Register HEIF opener with PIL for AVIF/HEIF support
-            pillow_heif.register_heif_opener()
             # Try to open and convert the image
             input_image = Image.open(files[0])
             # Convert to RGB if needed (handles RGBA, P, etc.)
@@ -183,42 +68,31 @@ def chat_fn(message, chat_history, seed, randomize_seed, guidance_scale, steps,
                 input_image = input_image.convert("RGB")
             # Auto-orient the image based on EXIF data
             input_image = ImageOps.exif_transpose(input_image)
-            # Convert PIL image to bytes
-            img_byte_arr = io.BytesIO()
-            input_image.save(img_byte_arr, format='PNG')
-            img_byte_arr.seek(0)
-            image_bytes = img_byte_arr.getvalue()
         except Exception as e:
             raise gr.Error(f"Could not process the uploaded image: {str(e)}. Please try uploading a different image format (JPEG, PNG, WebP).")
-        progress(0.1, desc="Processing image...")
     else:
-        # For text-to-image, we need a placeholder image or handle differently
-        # FLUX.1 Kontext is primarily an image-to-image model
-        raise gr.Error("This model (FLUX.1 Kontext) requires an input image. Please upload an image to edit.")
-    try:
-        # Make API request using JavaScript
-        result_bytes = query_api_js(image_bytes, prompt, seed, guidance_scale, steps, progress_callback=progress)
-        # Try to convert response bytes to PIL Image
-        try:
-            image = Image.open(io.BytesIO(result_bytes))
-        except Exception as img_error:
-            print(f"Failed to open image: {img_error}")
-            print(f"Image bytes type: {type(result_bytes)}, length: {len(result_bytes) if hasattr(result_bytes, '__len__') else 'unknown'}")
-            raise gr.Error(f"Could not process API response as image. Response length: {len(result_bytes) if hasattr(result_bytes, '__len__') else 'unknown'}")
-        progress(1.0, desc="Complete!")
-        return gr.Image(value=image)
-    except gr.Error:
-        # Re-raise gradio errors as-is
-        raise
-    except Exception as e:
-        raise gr.Error(f"Failed to generate image: {str(e)}")
 # --- UI Definition using gr.ChatInterface ---
@@ -227,24 +101,26 @@ randomize_checkbox = gr.Checkbox(label="Randomize seed", value=False)
 guidance_slider = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=2.5)
 steps_slider = gr.Slider(label="Steps", minimum=1, maximum=30, value=28, step=1)
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="FLUX.1 Kontext [dev] - HF Inference Client (JS)",
     description="""<p style='text-align: center;'>
-    A simple chat UI for the <b>FLUX.1 Kontext [dev]</b> model using Hugging Face Inference Client via JavaScript.
     <br>
-    <b>Upload an image</b> and type your editing instructions (e.g., "Turn the cat into a tiger", "Add a hat").
     <br>
-    This model specializes in understanding context and making precise edits to your images.
     <br>
     Find the model on <a href='https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev' target='_blank'>Hugging Face</a>.
-    <br>
-    <b>Requirements:</b> Node.js and npm must be installed. Uses HF_TOKEN environment variable.
     </p>""",
-    multimodal=True,
     textbox=gr.MultimodalTextbox(
         file_types=["image"],
-        placeholder="Upload an image and type your editing instructions...",
         render=False
     ),
     additional_inputs=[
@@ -253,6 +129,7 @@ demo = gr.ChatInterface(
         guidance_slider,
         steps_slider
     ],
     theme="soft"
 )

 import gradio as gr
 import numpy as np
+import spaces
+import torch
 import random
 import os
 import tempfile
 from PIL import Image, ImageOps
 import pillow_heif  # For HEIF/AVIF support
+# Import the pipeline from diffusers
+from diffusers import FluxKontextPipeline
 # --- Constants ---
 MAX_SEED = np.iinfo(np.int32).max
+# --- Global pipeline variable ---
+pipe = None
+def load_model():
+    """Load the model on CPU first, then move to GPU when needed"""
+    global pipe
+    if pipe is None:
+        # Register HEIF opener with PIL for AVIF/HEIF support
+        pillow_heif.register_heif_opener()
+        # Get token from environment variable
+        hf_token = os.getenv("HF_TOKEN")
+        if hf_token:
+            pipe = FluxKontextPipeline.from_pretrained(
+                "black-forest-labs/FLUX.1-Kontext-dev",
+                torch_dtype=torch.bfloat16,
+                token=hf_token,
+            )
+        else:
+            raise gr.Error("HF_TOKEN environment variable not found. Please add your Hugging Face token to the Space settings.")
+    return pipe
 # --- Core Inference Function for ChatInterface ---
+@spaces.GPU(duration=120)  # Set duration based on expected inference time
+def chat_fn(message, chat_history, seed, randomize_seed, guidance_scale, steps, progress=gr.Progress(track_tqdm=True)):
     """
     Performs image generation or editing based on user input from the chat interface.
     """
+    # Load and move model to GPU within the decorated function
+    pipe = load_model()
+    pipe = pipe.to("cuda")
     prompt = message["text"]
     files = message["files"]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device="cuda").manual_seed(int(seed))
+    input_image = None
     if files:
         print(f"Received image: {files[0]}")
         try:
             # Try to open and convert the image
             input_image = Image.open(files[0])
             # Convert to RGB if needed (handles RGBA, P, etc.)
                 input_image = input_image.convert("RGB")
             # Auto-orient the image based on EXIF data
             input_image = ImageOps.exif_transpose(input_image)
         except Exception as e:
             raise gr.Error(f"Could not process the uploaded image: {str(e)}. Please try uploading a different image format (JPEG, PNG, WebP).")
+        image = pipe(
+            image=input_image,
+            prompt=prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=steps,
+            generator=generator,
+        ).images[0]
     else:
+        print(f"Received prompt for text-to-image: {prompt}")
+        image = pipe(
+            prompt=prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=steps,
+            generator=generator,
+        ).images[0]
+    # Move model back to CPU to free GPU memory
+    pipe = pipe.to("cpu")
+    torch.cuda.empty_cache()
+    # Return the PIL Image as a Gradio Image component
+    return gr.Image(value=image)
 # --- UI Definition using gr.ChatInterface ---
 guidance_slider = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=2.5)
 steps_slider = gr.Slider(label="Steps", minimum=1, maximum=30, value=28, step=1)
+# --- Examples without external URLs ---
+# Remove examples temporarily to avoid format issues
+examples = None
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="FLUX.1 Kontext [dev]",
     description="""<p style='text-align: center;'>
+    A simple chat UI for the <b>FLUX.1 Kontext</b> model running on ZeroGPU.
     <br>
+    To edit an image, upload it and type your instructions (e.g., "Add a hat").
     <br>
+    To generate an image, just type a prompt (e.g., "A photo of an astronaut on a horse").
     <br>
     Find the model on <a href='https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev' target='_blank'>Hugging Face</a>.
     </p>""",
+    multimodal=True,  # This is important for MultimodalTextbox to work
     textbox=gr.MultimodalTextbox(
         file_types=["image"],
+        placeholder="Type a prompt and/or upload an image...",
         render=False
     ),
     additional_inputs=[
         guidance_slider,
         steps_slider
     ],
+    examples=examples,
     theme="soft"
 )