Spaces:

prithivMLmods
/

Qwen-Image-Diffusion

Running on Zero

App Files Files Community

prithivMLmods commited on 24 days ago

Commit

0c8e12c

verified ·

1 Parent(s): 15dbdac

Update app.py

Browse files

Files changed (1) hide show

app.py +419 -378

app.py CHANGED Viewed

@@ -1,401 +1,442 @@
-import os
-import random
-import uuid
-import json
-import time
-import asyncio
-from threading import Thread
 import gradio as gr
 import spaces
 import torch
-import numpy as np
 from PIL import Image
-import cv2
-from transformers import (
-    Qwen2_5_VLForConditionalGeneration,
-    AutoProcessor,
-    TextIteratorStreamer,
-)
-from transformers.image_utils import load_image
-# Constants for text generation
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Vision-Matters-7B
-MODEL_ID_M = "Yuting6/Vision-Matters-7B"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load ViGaL-7B
-MODEL_ID_X = "yunfeixie/ViGaL-7B"
-processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_X, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load prithivMLmods/WR30a-Deep-7B-0711
-MODEL_ID_T = "prithivMLmods/WR30a-Deep-7B-0711"
-processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
-model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_T, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load Visionary-R1
-MODEL_ID_O = "maifoundations/Visionary-R1"
-processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True)
-model_o = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_O, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-#-----------------------------subfolder-----------------------------#
-# Load MonkeyOCR-pro-1.2B
-MODEL_ID_W = "echo840/MonkeyOCR-pro-1.2B"
-SUBFOLDER = "Recognition"
-processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True, subfolder=SUBFOLDER)
-model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_W, trust_remote_code=True,
-    subfolder=SUBFOLDER,
-    torch_dtype=torch.float16).to(device).eval()
-#-----------------------------subfolder-----------------------------#
-# Function to downsample video frames
-def downsample_video(video_path):
-    """
-    Downsamples the video to evenly spaced frames.
-    Each frame is returned as a PIL image along with its timestamp.
-    """
-    vidcap = cv2.VideoCapture(video_path)
-    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = vidcap.get(cv2.CAP_PROP_FPS)
-    frames = []
-    frame_indices = np.linspace(0, total_frames - 1, 10, dtype=int)
-    for i in frame_indices:
-        vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
-        success, image = vidcap.read()
-        if success:
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            pil_image = Image.fromarray(image)
-            timestamp = round(i / fps, 2)
-            frames.append((pil_image, timestamp))
-    vidcap.release()
-    return frames
-# Function to generate text responses based on image input
-@spaces.GPU
-def generate_image(model_name: str,
-                   text: str,
-                   image: Image.Image,
-                   max_new_tokens: int = 1024,
-                   temperature: float = 0.6,
-                   top_p: float = 0.9,
-                   top_k: int = 50,
-                   repetition_penalty: float = 1.2):
-    """
-    Generates responses using the selected model for image input.
-    """
-    if model_name == "Vision-Matters-7B":
-        processor = processor_m
-        model = model_m
-    elif model_name == "ViGaL-7B":
-        processor = processor_x
-        model = model_x
-    elif model_name == "Visionary-R1-3B":
-        processor = processor_o
-        model = model_o
-    elif model_name == "WR30a-Deep-7B-0711":
-        processor = processor_t
-        model = model_t
-    elif model_name == "MonkeyOCR-pro-1.2B":
-        processor = processor_w
-        model = model_w
-    else:
-        yield "Invalid model selected.", "Invalid model selected."
-        return
-    if image is None:
-        yield "Please upload an image.", "Please upload an image."
-        return
-    messages = [{
-        "role": "user",
-        "content": [
-            {"type": "image", "image": image},
-            {"type": "text", "text": text},
-        ]
-    }]
-    prompt_full = processor.apply_chat_template(messages,
-                                                tokenize=False,
-                                                add_generation_prompt=True)
-    inputs = processor(text=[prompt_full],
-                       images=[image],
-                       return_tensors="pt",
-                       padding=True,
-                       truncation=False,
-                       max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
-    streamer = TextIteratorStreamer(processor,
-                                    skip_prompt=True,
-                                    skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs, "streamer": streamer,
-        "max_new_tokens": max_new_tokens
-    }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        time.sleep(0.01)
-        yield buffer, buffer
-# Function to generate text responses based on video input
 @spaces.GPU
-def generate_video(model_name: str,
-                   text: str,
-                   video_path: str,
-                   max_new_tokens: int = 1024,
-                   temperature: float = 0.6,
-                   top_p: float = 0.9,
-                   top_k: int = 50,
-                   repetition_penalty: float = 1.2):
-    """
-    Generates responses using the selected model for video input.
-    """
-    if model_name == "Vision-Matters-7B":
-        processor = processor_m
-        model = model_m
-    elif model_name == "ViGaL-7B":
-        processor = processor_x
-        model = model_x
-    elif model_name == "Visionary-R1-3B":
-        processor = processor_o
-        model = model_o
-    elif model_name == "WR30a-Deep-7B-0711":
-        processor = processor_t
-        model = model_t
-    elif model_name == "MonkeyOCR-pro-1.2B":
-        processor = processor_w
-        model = model_w
     else:
-        yield "Invalid model selected.", "Invalid model selected."
-        return
-    if video_path is None:
-        yield "Please upload a video.", "Please upload a video."
-        return
-    frames = downsample_video(video_path)
-    messages = [{
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a helpful assistant."}]
-    }, {
-        "role": "user",
-        "content": [{"type": "text", "text": text}]
-    }]
-    for frame in frames:
-        image, timestamp = frame
-        messages[1]["content"].append({"type": "text", "text": f"Frame {timestamp}:"})
-        messages[1]["content"].append({"type": "image", "image": image})
-    inputs = processor.apply_chat_template(
-        messages,
-        tokenize=True,
-        add_generation_prompt=True,
-        return_dict=True,
-        return_tensors="pt",
-        truncation=False,
-        max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
-    streamer = TextIteratorStreamer(processor,
-                                    skip_prompt=True,
-                                    skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs,
-        "streamer": streamer,
-        "max_new_tokens": max_new_tokens,
-        "do_sample": True,
-        "temperature": temperature,
-        "top_p": top_p,
-        "top_k": top_k,
-        "repetition_penalty": repetition_penalty,
-    }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        buffer = buffer.replace("<|im_end|>", "")
-        time.sleep(0.01)
-        yield buffer, buffer
-# Define examples for image and video inference
-image_examples = [
-    ["Extract the content.", "images/7.png"],
-    ["Solve the problem to find the value.", "images/1.jpg"],
-    ["Explain the scene.", "images/6.JPG"],
-    ["Solve the problem step by step.", "images/2.jpg"],
-    ["Find the value of 'X'.", "images/3.jpg"],
-    ["Simplify the expression.", "images/4.jpg"],
-    ["Solve for the value.", "images/5.png"]
-]
-video_examples = [
-    ["Explain the video in detail.", "videos/1.mp4"],
-    ["Explain the video in detail.", "videos/2.mp4"]
-]
-# Updated CSS with the new submit button theme
-css = """
-.submit-btn {
-  --clr-font-main: hsla(0 0% 20% / 100);
-  --btn-bg-1: hsla(194 100% 69% / 1);
-  --btn-bg-2: hsla(217 100% 56% / 1);
-  --btn-bg-color: hsla(360 100% 100% / 1);
-  --radii: 0.5em;
-  cursor: pointer;
-  padding: 0.9em 1.4em;
-  min-width: 120px;
-  min-height: 44px;
-  font-size: var(--size, 1rem);
-  font-weight: 500;
-  transition: 0.8s;
-  background-size: 280% auto;
-  background-image: linear-gradient(
-    325deg,
-    var(--btn-bg-2) 0%,
-    var(--btn-bg-1) 55%,
-    var(--btn-bg-2) 90%
-  );
-  border: none;
-  border-radius: var(--radii);
-  color: var(--btn-bg-color);
-  box-shadow:
-    0px 0px 20px rgba(71, 184, 255, 0.5),
-    0px 5px 5px -1px rgba(58, 125, 233, 0.25),
-    inset 4px 4px 8px rgba(175, 230, 255, 0.5),
-    inset -4px -4px 8px rgba(19, 95, 216, 0.35);
-}
-.submit-btn:hover {
-  background-position: right top;
-}
-.submit-btn:is(:focus, :focus-visible, :active) {
-  outline: none;
-  box-shadow:
-    0 0 0 3px var(--btn-bg-color),
-    0 0 0 6px var(--btn-bg-2);
-}
-@media (prefers-reduced-motion: reduce) {
-  .submit-btn {
-    transition: linear;
-  }
-}
-.canvas-output {
-    border: 2px solid #4682B4;
-    border-radius: 10px;
-    padding: 20px;
-}
-"""
-# Create the Gradio Interface
-with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
-    gr.Markdown(
-        "# **[Multimodal VLMs [OCR | VQA]](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**"
     )
-    with gr.Row():
-        with gr.Column():
-            with gr.Tabs():
-                with gr.TabItem("Image Inference"):
-                    image_query = gr.Textbox(
-                        label="Query Input",
-                        placeholder="Enter your query here...")
-                    image_upload = gr.Image(type="pil", label="Image")
-                    image_submit = gr.Button("Submit",
-                                             elem_classes="submit-btn")
-                    gr.Examples(examples=image_examples,
-                                inputs=[image_query, image_upload])
-                with gr.TabItem("Video Inference"):
-                    video_query = gr.Textbox(
-                        label="Query Input",
-                        placeholder="Enter your query here...")
-                    video_upload = gr.Video(label="Video")
-                    video_submit = gr.Button("Submit",
-                                             elem_classes="submit-btn")
-                    gr.Examples(examples=video_examples,
-                                inputs=[video_query, video_upload])
-            with gr.Accordion("Advanced options", open=False):
-                max_new_tokens = gr.Slider(label="Max new tokens",
-                                           minimum=1,
-                                           maximum=MAX_MAX_NEW_TOKENS,
-                                           step=1,
-                                           value=DEFAULT_MAX_NEW_TOKENS)
-                temperature = gr.Slider(label="Temperature",
-                                        minimum=0.1,
-                                        maximum=4.0,
-                                        step=0.1,
-                                        value=0.6)
-                top_p = gr.Slider(label="Top-p (nucleus sampling)",
-                                  minimum=0.05,
-                                  maximum=1.0,
-                                  step=0.05,
-                                  value=0.9)
-                top_k = gr.Slider(label="Top-k",
-                                  minimum=1,
-                                  maximum=1000,
-                                  step=1,
-                                  value=50)
-                repetition_penalty = gr.Slider(label="Repetition penalty",
-                                               minimum=1.0,
-                                               maximum=2.0,
-                                               step=0.05,
-                                               value=1.2)
-        with gr.Column():
-            with gr.Column(elem_classes="canvas-output"):
-                gr.Markdown("## Output")
-                output = gr.Textbox(label="Raw Output Stream",
-                                    interactive=False,
-                                    lines=2, show_copy_button=True)
-                with gr.Accordion("(Result.md)", open=False):
-                    markdown_output = gr.Markdown(
-                        label="markup.md")
-                #download_btn = gr.Button("Download Result.md")
-            model_choice = gr.Radio(choices=[
-                 "Vision-Matters-7B", "WR30a-Deep-7B-0711",
-                 "ViGaL-7B", "MonkeyOCR-pro-1.2B", "Visionary-R1-3B"
-            ],
-                                    label="Select Model",
-                                    value="Vision-Matters-7B")
-            gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLMs-5x/discussions)")
-            gr.Markdown("> [WR30a-Deep-7B-0711](https://huggingface.co/prithivMLmods/WR30a-Deep-7B-0711): wr30a-deep-7b-0711 model is a fine-tuned version of qwen2.5-vl-7b-instruct, optimized for image captioning, visual analysis, and image reasoning. Built on top of the qwen2.5-vl architecture, this experimental model enhances visual comprehension capabilities with focused training on 1,500k image pairs for superior image understanding.")
-            gr.Markdown("> [MonkeyOCR-pro-1.2B](https://huggingface.co/echo840/MonkeyOCR-pro-1.2B): MonkeyOCR adopts a structure-recognition-relation (SRR) triplet paradigm, which simplifies the multi-tool pipeline of modular approaches while avoiding the inefficiency of using large multimodal models for full-page document processing.")
-            gr.Markdown("> [Vision Matters 7B](https://huggingface.co/Yuting6/Vision-Matters-7B): vision-matters is a simple visual perturbation framework that can be easily integrated into existing post-training pipelines including sft, dpo, and grpo. our findings highlight the critical role of visual perturbation: better reasoning begins with better seeing.")
-            gr.Markdown("> [ViGaL 7B](https://huggingface.co/yunfeixie/ViGaL-7B): vigal-7b shows that training a 7b mllm on simple games like snake using reinforcement learning boosts performance on benchmarks like mathvista and mmmu without needing worked solutions or diagrams indicating transferable reasoning skills.")
-            gr.Markdown("> [Visionary-R1](https://huggingface.co/maifoundations/Visionary-R1): visionary-r1 is a novel framework for training visual language models (vlms) to perform robust visual reasoning using reinforcement learning (rl). unlike traditional approaches that rely heavily on (sft) or (cot) annotations, visionary-r1 leverages only visual question-answer pairs and rl, making the process more scalable and accessible.")
-            gr.Markdown(">⚠️note: all the models in space are not guaranteed to perform well in video inference use cases.")
-    # Define the submit button actions
-    image_submit.click(fn=generate_image,
-                       inputs=[
-                           model_choice, image_query, image_upload,
-                           max_new_tokens, temperature, top_p, top_k,
-                           repetition_penalty
-                       ],
-                       outputs=[output, markdown_output])
-    video_submit.click(fn=generate_video,
-                       inputs=[
-                           model_choice, video_query, video_upload,
-                           max_new_tokens, temperature, top_p, top_k,
-                           repetition_penalty
-                       ],
-                       outputs=[output, markdown_output])
-if __name__ == "__main__":
-    demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)

 import gradio as gr
+import numpy as np
 import spaces
 import torch
+import random
+import json
+import os
 from PIL import Image
+from diffusers import FluxKontextPipeline
+from diffusers.utils import load_image
+from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, list_repo_files
+from safetensors.torch import load_file
+import requests
+import re
+# Load Kontext model
+MAX_SEED = np.iinfo(np.int32).max
+pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
+# Load LoRA data
+flux_loras_raw = [
+    {
+        "image": "https://huggingface.co/prithivMLmods/FLUX.1-Kontext-Cinematic-Relighting/resolve/main/images/1.png",
+        "title": "Kontext Cinematic Relighting",
+        "repo": "prithivMLmods/FLUX.1-Kontext-Cinematic-Relighting",
+        "trigger_word": "Cinematic Relighting, Relight this portrait with warm, cinematic indoor lighting. Add soft amber highlights and gentle shadows to the face mimicking golden-hour light through a cozy room. Maintain natural skin texture and soft facial shadows, while enhancing eye catchlights for a vivid, lifelike look. Adjust white balance to a warmer tone, and slightly boost exposure to soften the darker midtones. Preserve the subject's pose and expression, and enhance the depth with gentle background bokeh and subtle filmic glow.",
+        "weights": "FLUX.1-Kontext-Cinematic-Relighting.safetensors"
+    },
+]
+print(f"Loaded {len(flux_loras_raw)} LoRAs")
+# Global variables for LoRA management
+current_lora = None
+lora_cache = {}
+def load_lora_weights(repo_id, weights_filename):
+    """Load LoRA weights from HuggingFace"""
+    try:
+        # First try with the specified filename
+        try:
+            lora_path = hf_hub_download(repo_id=repo_id, filename=weights_filename)
+            if repo_id not in lora_cache:
+                lora_cache[repo_id] = lora_path
+            return lora_path
+        except Exception as e:
+            print(f"Failed to load {weights_filename}, trying to find alternative LoRA files...")
+            # If the specified file doesn't exist, try to find any .safetensors file
+            from huggingface_hub import list_repo_files
+            try:
+                files = list_repo_files(repo_id)
+                safetensors_files = [f for f in files if f.endswith(('.safetensors', '.bin')) and 'lora' in f.lower()]
+                if not safetensors_files:
+                    # Try without 'lora' in filename
+                    safetensors_files = [f for f in files if f.endswith('.safetensors')]
+                if safetensors_files:
+                    # Try the first available file
+                    for file in safetensors_files:
+                        try:
+                            print(f"Trying alternative file: {file}")
+                            lora_path = hf_hub_download(repo_id=repo_id, filename=file)
+                            if repo_id not in lora_cache:
+                                lora_cache[repo_id] = lora_path
+                            print(f"Successfully loaded alternative LoRA file: {file}")
+                            return lora_path
+                        except:
+                            continue
+                print(f"No suitable LoRA files found in {repo_id}")
+                return None
+            except Exception as list_error:
+                print(f"Error listing files in repo {repo_id}: {list_error}")
+                return None
+    except Exception as e:
+        print(f"Error loading LoRA from {repo_id}: {e}")
+        return None
+def update_selection(selected_state: gr.SelectData, flux_loras):
+    """Update UI when a LoRA is selected"""
+    if selected_state.index >= len(flux_loras):
+        return "### No LoRA selected", gr.update(), None
+    lora = flux_loras[selected_state.index]
+    lora_title = lora["title"]
+    lora_repo = lora["repo"]
+    trigger_word = lora["trigger_word"]
+    # Create a more informative selected text
+    updated_text = f"### 🎨 Selected Style: {lora_title}"
+    new_placeholder = f"Describe additional details, e.g., 'wearing a red hat' or 'smiling'"
+    return updated_text, gr.update(placeholder=new_placeholder), selected_state.index
+def get_huggingface_lora(link):
+    """Download LoRA from HuggingFace link"""
+    split_link = link.split("/")
+    if len(split_link) == 2:
+        try:
+            model_card = ModelCard.load(link)
+            trigger_word = model_card.data.get("instance_prompt", "")
+            # Try to find the correct safetensors file
+            files = list_repo_files(link)
+            safetensors_files = [f for f in files if f.endswith('.safetensors')]
+            # Prioritize files with 'lora' in the name
+            lora_files = [f for f in safetensors_files if 'lora' in f.lower()]
+            if lora_files:
+                safetensors_file = lora_files[0]
+            elif safetensors_files:
+                safetensors_file = safetensors_files[0]
+            else:
+                # Try .bin files as fallback
+                bin_files = [f for f in files if f.endswith('.bin') and 'lora' in f.lower()]
+                if bin_files:
+                    safetensors_file = bin_files[0]
+                else:
+                    safetensors_file = "pytorch_lora_weights.safetensors"  # Default fallback
+            print(f"Found LoRA file: {safetensors_file} in {link}")
+            return split_link[1], safetensors_file, trigger_word
+        except Exception as e:
+            print(f"Error in get_huggingface_lora: {e}")
+            # Try basic detection
+            try:
+                files = list_repo_files(link)
+                safetensors_file = next((f for f in files if f.endswith('.safetensors')), "pytorch_lora_weights.safetensors")
+                return split_link[1], safetensors_file, ""
+            except:
+                raise Exception(f"Error loading LoRA: {e}")
+    else:
+        raise Exception("Invalid HuggingFace repository format")
+def load_custom_lora(link):
+    """Load custom LoRA from user input"""
+    if not link:
+        return gr.update(visible=False), "", gr.update(visible=False), None, gr.Gallery(selected_index=None), "### 🎨 Select an art style from the gallery", None
+    try:
+        repo_name, weights_file, trigger_word = get_huggingface_lora(link)
+        card = f'''
+        <div class="custom_lora_card">
+            <div style="display: flex; align-items: center; margin-bottom: 12px;">
+                <span style="font-size: 18px; margin-right: 8px;">✅</span>
+                <strong style="font-size: 16px;">Custom LoRA Loaded!</strong>
+            </div>
+            <div style="background: rgba(255, 255, 255, 0.8); padding: 12px; border-radius: 8px;">
+                <h4 style="margin: 0 0 8px 0; color: #333;">{repo_name}</h4>
+                <small style="color: #666;">{"Trigger: <code style='background: #f0f0f0; padding: 2px 6px; border-radius: 4px;'><b>"+trigger_word+"</b></code>" if trigger_word else "No trigger word found"}</small>
+            </div>
+        </div>
+        '''
+        custom_lora_data = {
+            "repo": link,
+            "weights": weights_file,
+            "trigger_word": trigger_word
+        }
+        return gr.update(visible=True), card, gr.update(visible=True), custom_lora_data, gr.Gallery(selected_index=None), f"🎨 Custom Style: {repo_name}", None
+    except Exception as e:
+        return gr.update(visible=True), f"Error: {str(e)}", gr.update(visible=False), None, gr.update(), "### 🎨 Select an art style from the gallery", None
+def remove_custom_lora():
+    """Remove custom LoRA"""
+    return "", gr.update(visible=False), gr.update(visible=False), None, None
+def classify_gallery(flux_loras):
+    """Sort gallery by likes"""
+    try:
+        sorted_gallery = sorted(flux_loras, key=lambda x: x.get("likes", 0), reverse=True)
+        gallery_items = []
+        for item in sorted_gallery:
+            if "image" in item and "title" in item:
+                image_path = item["image"]
+                title = item["title"]
+                # Simply use the path as-is for Gradio to handle
+                gallery_items.append((image_path, title))
+                print(f"Added to gallery: {image_path} - {title}")
+        print(f"Total gallery items: {len(gallery_items)}")
+        return gallery_items, sorted_gallery
+    except Exception as e:
+        print(f"Error in classify_gallery: {e}")
+        import traceback
+        traceback.print_exc()
+        return [], []
+def infer_with_lora_wrapper(input_image, prompt, selected_index, custom_lora, seed=42, randomize_seed=False, guidance_scale=2.5, lora_scale=1.0, flux_loras=None, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper function to handle state serialization"""
+    return infer_with_lora(input_image, prompt, selected_index, custom_lora, seed, randomize_seed, guidance_scale, lora_scale, flux_loras, progress)
 @spaces.GPU
+def infer_with_lora(input_image, prompt, selected_index, custom_lora, seed=42, randomize_seed=False, guidance_scale=2.5, lora_scale=1.0, flux_loras=None, progress=gr.Progress(track_tqdm=True)):
+    """Generate image with selected LoRA"""
+    global current_lora, pipe
+    # Check if input image is provided
+    if input_image is None:
+        gr.Warning("Please upload your portrait photo first! 📸")
+        return None, seed, gr.update(visible=False)
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    # Determine which LoRA to use
+    lora_to_use = None
+    if custom_lora:
+        lora_to_use = custom_lora
+    elif selected_index is not None and flux_loras and selected_index < len(flux_loras):
+        lora_to_use = flux_loras[selected_index]
+    # Load LoRA if needed
+    if lora_to_use and lora_to_use != current_lora:
+        try:
+            # Unload current LoRA
+            if current_lora:
+                pipe.unload_lora_weights()
+                print(f"Unloaded previous LoRA")
+            # Load new LoRA
+            repo_id = lora_to_use.get("repo", "unknown")
+            weights_file = lora_to_use.get("weights", "pytorch_lora_weights.safetensors")
+            print(f"Loading LoRA: {repo_id} with weights: {weights_file}")
+            lora_path = load_lora_weights(repo_id, weights_file)
+            if lora_path:
+                pipe.load_lora_weights(lora_path, adapter_name="selected_lora")
+                pipe.set_adapters(["selected_lora"], adapter_weights=[lora_scale])
+                print(f"Successfully loaded: {lora_path} with scale {lora_scale}")
+                current_lora = lora_to_use
+            else:
+                print(f"Failed to load LoRA from {repo_id}")
+                gr.Warning(f"Failed to load {lora_to_use.get('title', 'style')}. Please try a different art style.")
+                return None, seed, gr.update(visible=False)
+        except Exception as e:
+            print(f"Error loading LoRA: {e}")
+            # Continue without LoRA
     else:
+        if lora_to_use:
+            print(f"Using already loaded LoRA: {lora_to_use.get('repo', 'unknown')}")
+    try:
+        # Convert image to RGB
+        input_image = input_image.convert("RGB")
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        gr.Warning("Error processing the uploaded image. Please try a different photo. 📸")
+        return None, seed, gr.update(visible=False)
+    # Check if LoRA is selected
+    if lora_to_use is None:
+        gr.Warning("Please select an art style from the gallery first! 🎨")
+        return None, seed, gr.update(visible=False)
+    # Add trigger word to prompt
+    trigger_word = lora_to_use.get("trigger_word", "")
+    # Special handling for different art styles
+    if trigger_word == "ghibli":
+        prompt = f"Create a Studio Ghibli anime style portrait of the person in the photo, {prompt}. Maintain the facial identity while transforming into whimsical anime art style."
+    elif trigger_word == "homer":
+        prompt = f"Paint the person in Winslow Homer's American realist style, {prompt}. Keep facial features while applying watercolor and marine art techniques."
+    elif trigger_word == "gogh":
+        prompt = f"Transform the portrait into Van Gogh's post-impressionist style with swirling brushstrokes, {prompt}. Maintain facial identity with expressive colors."
+    elif trigger_word == "Cezanne":
+        prompt = f"Render the person in Paul Cézanne's geometric post-impressionist style, {prompt}. Keep facial structure while applying structured brushwork."
+    elif trigger_word == "Renoir":
+        prompt = f"Paint the portrait in Pierre-Auguste Renoir's impressionist style with soft light, {prompt}. Maintain identity with luminous skin tones."
+    elif trigger_word == "claude monet":
+        prompt = f"Create an impressionist portrait in Claude Monet's style with visible brushstrokes, {prompt}. Keep facial features while using light and color."
+    elif trigger_word == "fantasy":
+        prompt = f"Transform into an epic fantasy character portrait, {prompt}. Maintain facial identity while adding magical and fantastical elements."
+    elif trigger_word == ", How2Draw":
+        prompt = f"create a How2Draw sketch of the person of the photo {prompt}, maintain the facial identity of the person and general features"
+    elif trigger_word == ", video game screenshot in the style of THSMS":
+        prompt = f"create a video game screenshot in the style of THSMS with the person from the photo, {prompt}. maintain the facial identity of the person and general features"
+    else:
+        prompt = f"convert the style of this portrait photo to {trigger_word} while maintaining the identity of the person. {prompt}. Make sure to maintain the person's facial identity and features, while still changing the overall style to {trigger_word}."
+    try:
+        image = pipe(
+            image=input_image,
+            prompt=prompt,
+            guidance_scale=guidance_scale,
+            generator=torch.Generator().manual_seed(seed),
+        ).images[0]
+        return image, seed, gr.update(visible=True)
+    except Exception as e:
+        print(f"Error during inference: {e}")
+        return None, seed, gr.update(visible=False)
+# CSS styling with beautiful gradient pastel design
+css = '''
+#gen_btn{height: 100%}
+#gen_column{align-self: stretch}
+#title{text-align: center}
+#title h1{font-size: 3em; display:inline-flex; align-items:center}
+#title img{width: 100px; margin-right: 0.5em}
+#gallery .grid-wrap{height: 10vh}
+#lora_list{background: var(--block-background-fill);padding: 0 1em .3em; font-size: 90%}
+.card_internal{display: flex;height: 100px;margin-top: .5em}
+.card_internal img{margin-right: 1em}
+.styler{--form-gap-width: 0px !important}
+#progress{height:30px}
+#progress .generating{display:none}
+.progress-container {width: 100%;height: 30px;background-color: #f0f0f0;border-radius: 15px;overflow: hidden;margin-bottom: 20px}
+.progress-bar {height: 100%;background-color: #4f46e5;width: calc(var(--current) / var(--total) * 100%);transition: width 0.5s ease-in-out}
+'''
+# Create Gradio interface
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+    gr_flux_loras = gr.State(value=flux_loras_raw)
+    title = gr.HTML(
+        """<h1>Flux Kontext DLC 🎈</h1>""",
     )
+    selected_state = gr.State(value=None)
+    custom_loaded_lora = gr.State(value=None)
+    with gr.Row(elem_id="main_app"):
+        with gr.Column(scale=4, elem_id="box_column"):
+            with gr.Group(elem_id="gallery_box"):
+                input_image = gr.Image(label="Upload an image for editing", type="pil", height=260)
+                gallery = gr.Gallery(
+                    label="Choose the Flux Kontext LoRA",
+                    allow_preview=False,
+                    columns=3,
+                    elem_id="gallery",
+                    show_share_button=False,
+                    height=400
+                )
+                custom_model = gr.Textbox(
+                    label="🔗 Or use a custom LoRA from HuggingFace",
+                    placeholder="e.g., username/lora-name",
+                    visible=True
+                )
+                custom_model_card = gr.HTML(visible=False)
+                custom_model_button = gr.Button("Remove custom LoRA", visible=False)
+        with gr.Column(scale=5):
+            with gr.Row():
+                prompt = gr.Textbox(
+                    label="Additional Details (optional)",
+                    show_label=False,
+                    lines=1,
+                    max_lines=1,
+                    placeholder="Describe additional details, e.g., 'wearing a red hat' or 'smiling'",
+                    elem_id="prompt"
+                )
+                run_button = gr.Button("Edit Image", elem_id="run_button")
+            result = gr.Image(label="Your Kontext Edited Image", interactive=False)
+            reuse_button = gr.Button("Reuse this image", visible=False)
+            with gr.Accordion("Advanced Settings", open=False):
+                lora_scale = gr.Slider(
+                    label="Style Strength",
+                    minimum=0,
+                    maximum=2,
+                    step=0.1,
+                    value=1.0,
+                    info="How strongly to apply the art style (1.0 = balanced)"
+                )
+                seed = gr.Slider(
+                    label="Random Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=0,
+                    info="Set to 0 for random results"
+                )
+                randomize_seed = gr.Checkbox(label="Randomize seed for each generation", value=True)
+                guidance_scale = gr.Slider(
+                    label="Image Guidance",
+                    minimum=1,
+                    maximum=10,
+                    step=0.1,
+                    value=2.5,
+                    info="How closely to follow the input image (lower = more creative)"
+                )
+            prompt_title = gr.Markdown(
+                value="### Select an art style from the gallery",
+                visible=True,
+                elem_id="selected_lora",
+            )
+    # Event handlers
+    custom_model.input(
+        fn=load_custom_lora,
+        inputs=[custom_model],
+        outputs=[custom_model_card, custom_model_card, custom_model_button, custom_loaded_lora, gallery, prompt_title, selected_state],
+    )
+    custom_model_button.click(
+        fn=remove_custom_lora,
+        outputs=[custom_model, custom_model_button, custom_model_card, custom_loaded_lora, selected_state]
+    )
+    gallery.select(
+        fn=update_selection,
+        inputs=[gr_flux_loras],
+        outputs=[prompt_title, prompt, selected_state],
+        show_progress=False
+    )
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer_with_lora_wrapper,
+        inputs=[input_image, prompt, selected_state, custom_loaded_lora, seed, randomize_seed, guidance_scale, lora_scale, gr_flux_loras],
+        outputs=[result, seed, reuse_button]
+    )
+    reuse_button.click(
+        fn=lambda image: image,
+        inputs=[result],
+        outputs=[input_image]
+    )
+    demo.load(
+        fn=classify_gallery,
+        inputs=[gr_flux_loras],
+        outputs=[gallery, gr_flux_loras]
+    )
+demo.queue(default_concurrency_limit=None)
+demo.launch()