Spaces:

prithivMLmods
/

Qwen-Image-Diffusion

Running on Zero

App Files Files Community

prithivMLmods commited on 17 days ago

Commit

0f2e032

verified ·

1 Parent(s): e1299f3

Update app.py

Browse files

Files changed (1) hide show

app.py +453 -339

app.py CHANGED Viewed

@@ -1,360 +1,474 @@
-import os
-import random
-import uuid
-import json
-import time
-import asyncio
-from threading import Thread
 import gradio as gr
 import spaces
 import torch
-import numpy as np
 from PIL import Image
-import cv2
-from transformers import (
-    Qwen2_5_VLForConditionalGeneration,
-    AutoModelForCausalLM,
-    AutoProcessor,
-    TextIteratorStreamer,
-)
-from transformers.image_utils import load_image
-# Constants for text generation
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Camel-Doc-OCR-080125
-MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-080125"
-processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_M, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load OCRFlux-3B
-MODEL_ID_X = "ChatDOC/OCRFlux-3B"
-processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_X, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load Behemoth-3B-070225
-MODEL_ID_T = "prithivMLmods/Behemoth-3B-070225-post0.1"
-processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
-model_t = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_T, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Load MonkeyOCR-pro-1.2B
-MODEL_ID_O = "echo840/MonkeyOCR-pro-1.2B"
-SUBFOLDER = "Recognition"
-processor_o = AutoProcessor.from_pretrained(MODEL_ID_O, trust_remote_code=True, subfolder=SUBFOLDER)
-model_o = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_O, trust_remote_code=True, subfolder=SUBFOLDER,
-    torch_dtype=torch.float16).to(device).eval()
-# Load ViGoRL-MCTS-SFT-7b-Spatial
-MODEL_ID_A = "gsarch/ViGoRL-MCTS-SFT-7b-Spatial"
-processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
-model_a = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_A, trust_remote_code=True,
-    torch_dtype=torch.float16).to(device).eval()
-# Function to downsample video frames
-def downsample_video(video_path):
-    """
-    Downsamples the video to evenly spaced frames.
-    Each frame is returned as a PIL image along with its timestamp.
-    """
-    vidcap = cv2.VideoCapture(video_path)
-    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = vidcap.get(cv2.CAP_PROP_FPS)
-    frames = []
-    frame_indices = np.linspace(0, total_frames - 1, 10, dtype=int)
-    for i in frame_indices:
-        vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
-        success, image = vidcap.read()
-        if success:
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            pil_image = Image.fromarray(image)
-            timestamp = round(i / fps, 2)
-            frames.append((pil_image, timestamp))
-    vidcap.release()
-    return frames
-# Function to generate text responses based on image input
-@spaces.GPU
-def generate_image(model_name: str,
-                   text: str,
-                   image: Image.Image,
-                   max_new_tokens: int = 1024,
-                   temperature: float = 0.6,
-                   top_p: float = 0.9,
-                   top_k: int = 50,
-                   repetition_penalty: float = 1.2):
-    """
-    Generates responses using the selected model for image input.
-    """
-    if model_name == "Camel-Doc-OCR-080125(v2)":
-        processor = processor_m
-        model = model_m
-    elif model_name == "OCRFlux-3B":
-        processor = processor_x
-        model = model_x
-    elif model_name == "Behemoth-3B-070225":
-        processor = processor_o
-        model = model_o
-    elif model_name == "MonkeyOCR-pro-1.2B":
-        processor = processor_t
-        model = model_t
-    elif model_name == "ViGoRL-MCTS-SFT-7B":
-        processor = processor_a
-        model = model_a
-    else:
-        yield "Invalid model selected.", "Invalid model selected."
-        return
-    if image is None:
-        yield "Please upload an image.", "Please upload an image."
-        return
-    messages = [{
-        "role": "user",
-        "content": [
-            {"type": "image", "image": image},
-            {"type": "text", "text": text},
-        ]
-    }]
-    prompt_full = processor.apply_chat_template(messages,
-                                                tokenize=False,
-                                                add_generation_prompt=True)
-    inputs = processor(text=[prompt_full],
-                       images=[image],
-                       return_tensors="pt",
-                       padding=True,
-                       truncation=False,
-                       max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
-    streamer = TextIteratorStreamer(processor,
-                                    skip_prompt=True,
-                                    skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs, "streamer": streamer,
-        "max_new_tokens": max_new_tokens
-    }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        time.sleep(0.01)
-        yield buffer, buffer
-# Function to generate text responses based on video input
 @spaces.GPU
-def generate_video(model_name: str,
-                   text: str,
-                   video_path: str,
-                   max_new_tokens: int = 1024,
-                   temperature: float = 0.6,
-                   top_p: float = 0.9,
-                   top_k: int = 50,
-                   repetition_penalty: float = 1.2):
-    """
-    Generates responses using the selected model for video input.
-    """
-    if model_name == "Camel-Doc-OCR-080125(v2)":
-        processor = processor_m
-        model = model_m
-    elif model_name == "OCRFlux-3B":
-        processor = processor_x
-        model = model_x
-    elif model_name == "Behemoth-3B-070225":
-        processor = processor_o
-        model = model_o
-    elif model_name == "MonkeyOCR-pro-1.2B":
-        processor = processor_t
-        model = model_t
-    elif model_name == "ViGoRL-MCTS-SFT-7B":
-        processor = processor_a
-        model = model_a
     else:
-        yield "Invalid model selected.", "Invalid model selected."
-        return
-    if video_path is None:
-        yield "Please upload a video.", "Please upload a video."
-        return
-    frames = downsample_video(video_path)
-    messages = [{
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a helpful assistant."}]
-    }, {
-        "role": "user",
-        "content": [{"type": "text", "text": text}]
-    }]
-    for frame in frames:
-        image, timestamp = frame
-        messages[1]["content"].append({"type": "text", "text": f"Frame {timestamp}:"})
-        messages[1]["content"].append({"type": "image", "image": image})
-    inputs = processor.apply_chat_template(
-        messages,
-        tokenize=True,
-        add_generation_prompt=True,
-        return_dict=True,
-        return_tensors="pt",
-        truncation=False,
-        max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
-    streamer = TextIteratorStreamer(processor,
-                                    skip_prompt=True,
-                                    skip_special_tokens=True)
-    generation_kwargs = {
-        **inputs,
-        "streamer": streamer,
-        "max_new_tokens": max_new_tokens,
-        "do_sample": True,
-        "temperature": temperature,
-        "top_p": top_p,
-        "top_k": top_k,
-        "repetition_penalty": repetition_penalty,
-    }
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        buffer = buffer.replace("<|im_end|>", "")
-        time.sleep(0.01)
-        yield buffer, buffer
-# Define examples for image and video inference
-image_examples = [
-    ["Explain the essence of the image.", "assets/images/B.jpg"],
-    ["Extract the content.", "assets/images/1.png"],
-    ["Describe the safety of the action shown in the image.", "assets/images/C.jpg"],
-    ["Caption the image.", "assets/images/A.jpg"],
-    ["Make this into a table for the README.md file.", "assets/images/2.jpg"],
-    ["Extract the table content from the image.", "assets/images/3.png"],
-    ["Perform OCR on the image.", "assets/images/4.jpg"]
-]
-video_examples = [
-    ["Explain the video in detail.", "assets/videos/a.mp4"],
-    ["Explain the video in detail.", "assets/videos/b.mp4"]
-]
-#css
-css = """
-.submit-btn {
-    background-color: #2980b9 !important;
-    color: white !important;
-}
-.submit-btn:hover {
-    background-color: #3498db !important;
-}
-.canvas-output {
-    border: 2px solid #4682B4;
-    border-radius: 10px;
-    padding: 20px;
-}
-"""
-# Create the Gradio Interface
-with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
-    gr.Markdown(
-        "# **[Multimodal OCR Outpost](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**"
     )
-    with gr.Row():
-        with gr.Column():
-            with gr.Tabs():
-                with gr.TabItem("Image Inference"):
-                    image_query = gr.Textbox(
-                        label="Query Input",
-                        placeholder="Enter your query here...")
-                    image_upload = gr.Image(type="pil", label="Image")
-                    image_submit = gr.Button("Submit",
-                                             elem_classes="submit-btn")
-                    gr.Examples(examples=image_examples,
-                                inputs=[image_query, image_upload])
-                with gr.TabItem("Video Inference"):
-                    video_query = gr.Textbox(
-                        label="Query Input",
-                        placeholder="Enter your query here...")
-                    video_upload = gr.Video(label="Video")
-                    video_submit = gr.Button("Submit",
-                                             elem_classes="submit-btn")
-                    gr.Examples(examples=video_examples,
-                                inputs=[video_query, video_upload])
-            with gr.Accordion("Advanced options", open=False):
-                max_new_tokens = gr.Slider(label="Max new tokens",
-                                           minimum=1,
-                                           maximum=MAX_MAX_NEW_TOKENS,
-                                           step=1,
-                                           value=DEFAULT_MAX_NEW_TOKENS)
-                temperature = gr.Slider(label="Temperature",
-                                        minimum=0.1,
-                                        maximum=4.0,
-                                        step=0.1,
-                                        value=0.6)
-                top_p = gr.Slider(label="Top-p (nucleus sampling)",
-                                  minimum=0.05,
-                                  maximum=1.0,
-                                  step=0.05,
-                                  value=0.9)
-                top_k = gr.Slider(label="Top-k",
-                                  minimum=1,
-                                  maximum=1000,
-                                  step=1,
-                                  value=50)
-                repetition_penalty = gr.Slider(label="Repetition penalty",
-                                               minimum=1.0,
-                                               maximum=2.0,
-                                               step=0.05,
-                                               value=1.2)
-        with gr.Column():
-            with gr.Column(elem_classes="canvas-output"):
-                gr.Markdown("## Output")
-                output = gr.Textbox(label="Raw Output Stream",
-                                    interactive=False,
-                                    lines=2, show_copy_button=True)
-                with gr.Accordion("(Result.md)", open=False):
-                    markdown_output = gr.Markdown(
-                        label="markup.md")
-            model_choice = gr.Radio(choices=[
-                 "Camel-Doc-OCR-080125(v2)", "OCRFlux-3B",
-                 "ViGoRL-MCTS-SFT-7B", "Behemoth-3B-070225",
-                 "MonkeyOCR-pro-1.2B"],
-                                    label="Select Model",
-                                    value="Camel-Doc-OCR-080125(v2)")
-            gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Outpost/discussions)")
-            gr.Markdown("> Camel-Doc-OCR-080125 is a specialized vision-language model, fine-tuned from Qwen2.5-VL-7B-Instruct, and excels at document retrieval, content extraction, and analysis recognition for both structured and unstructured digital documents. OCRFlux-3B is a 3B-parameter vision-language model optimized for high-quality OCR on PDFs and images, excelling in converting documents to clean Markdown text and supporting features like cross-page table/paragraph merging.")
-            gr.Markdown("> Both ViGoRL-MCTS-SFT-3b-Spatial and 7b-Spatial are vision-language models that use multi-turn visually grounded reinforcement learning for precise spatial reasoning and visual grounding, with the 3b and 7b variants differing mainly in their architectural size for fine-grained visual tasks.")
-            gr.Markdown("> Behemoth-3B-070225-post0.1 is an advanced 3B parameter model tailored for extensive multimodal comprehension, document parsing, and possibly generalized OCR/vision-language tasks. MonkeyOCR-pro-1.2B is a lightweight OCR model focusing on high-accuracy text extraction from images and scanned documents, suitable for resource-constrained environments.")
-            gr.Markdown("> ⚠️ Note: Models in this space may not perform well on video inference tasks.")
-    # Define the submit button actions
-    image_submit.click(fn=generate_image,
-                       inputs=[
-                           model_choice, image_query, image_upload,
-                           max_new_tokens, temperature, top_p, top_k,
-                           repetition_penalty
-                       ],
-                       outputs=[output, markdown_output])
-    video_submit.click(fn=generate_video,
-                       inputs=[
-                           model_choice, video_query, video_upload,
-                           max_new_tokens, temperature, top_p, top_k,
-                           repetition_penalty
-                       ],
-                       outputs=[output, markdown_output])
-if __name__ == "__main__":
-    demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)

 import gradio as gr
+import numpy as np
 import spaces
 import torch
+import random
+import json
+import os
 from PIL import Image
+from diffusers import FluxKontextPipeline
+from diffusers.utils import load_image
+from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, list_repo_files
+from safetensors.torch import load_file
+import requests
+import re
+# Load Kontext model
+MAX_SEED = np.iinfo(np.int32).max
+pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
+# Load LoRA data
+flux_loras_raw = [
+    {
+        "image": "examples/1.png",
+        "title": "Studio Ghibli",
+        "repo": "openfree/flux-chatgpt-ghibli-lora",
+        "trigger_word": "ghibli",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/2.png",
+        "title": "Winslow Homer",
+        "repo": "openfree/winslow-homer",
+        "trigger_word": "homer",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/3.png",
+        "title": "Van Gogh",
+        "repo": "openfree/van-gogh",
+        "trigger_word": "gogh",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/4.png",
+        "title": "Paul Cézanne",
+        "repo": "openfree/paul-cezanne",
+        "trigger_word": "Cezanne",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/5.png",
+        "title": "Renoir",
+        "repo": "openfree/pierre-auguste-renoir",
+        "trigger_word": "Renoir",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/6.png",
+        "title": "Claude Monet",
+        "repo": "openfree/claude-monet",
+        "trigger_word": "claude monet",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    },
+    {
+        "image": "examples/7.png",
+        "title": "Fantasy Art",
+        "repo": "openfree/myt-flux-fantasy",
+        "trigger_word": "fantasy",
+        "weights": "pytorch_lora_weights.safetensors",
+        "likes": 0
+    }
+]
+print(f"Loaded {len(flux_loras_raw)} LoRAs")
+# Global variables for LoRA management
+current_lora = None
+lora_cache = {}
+def load_lora_weights(repo_id, weights_filename):
+    """Load LoRA weights from HuggingFace"""
+    try:
+        # First try with the specified filename
+        try:
+            lora_path = hf_hub_download(repo_id=repo_id, filename=weights_filename)
+            if repo_id not in lora_cache:
+                lora_cache[repo_id] = lora_path
+            return lora_path
+        except Exception as e:
+            print(f"Failed to load {weights_filename}, trying to find alternative LoRA files...")
+            # If the specified file doesn't exist, try to find any .safetensors file
+            from huggingface_hub import list_repo_files
+            try:
+                files = list_repo_files(repo_id)
+                safetensors_files = [f for f in files if f.endswith(('.safetensors', '.bin')) and 'lora' in f.lower()]
+                if not safetensors_files:
+                    # Try without 'lora' in filename
+                    safetensors_files = [f for f in files if f.endswith('.safetensors')]
+                if safetensors_files:
+                    # Try the first available file
+                    for file in safetensors_files:
+                        try:
+                            print(f"Trying alternative file: {file}")
+                            lora_path = hf_hub_download(repo_id=repo_id, filename=file)
+                            if repo_id not in lora_cache:
+                                lora_cache[repo_id] = lora_path
+                            print(f"Successfully loaded alternative LoRA file: {file}")
+                            return lora_path
+                        except:
+                            continue
+                print(f"No suitable LoRA files found in {repo_id}")
+                return None
+            except Exception as list_error:
+                print(f"Error listing files in repo {repo_id}: {list_error}")
+                return None
+    except Exception as e:
+        print(f"Error loading LoRA from {repo_id}: {e}")
+        return None
+def update_selection(selected_state: gr.SelectData, flux_loras):
+    """Update UI when a LoRA is selected"""
+    if selected_state.index >= len(flux_loras):
+        return "### No LoRA selected", gr.update(), None
+    lora = flux_loras[selected_state.index]
+    lora_title = lora["title"]
+    lora_repo = lora["repo"]
+    trigger_word = lora["trigger_word"]
+    # Create a more informative selected text
+    updated_text = f"### 🎨 Selected Style: {lora_title}"
+    new_placeholder = f"Describe additional details, e.g., 'wearing a red hat' or 'smiling'"
+    return updated_text, gr.update(placeholder=new_placeholder), selected_state.index
+def get_huggingface_lora(link):
+    """Download LoRA from HuggingFace link"""
+    split_link = link.split("/")
+    if len(split_link) == 2:
+        try:
+            model_card = ModelCard.load(link)
+            trigger_word = model_card.data.get("instance_prompt", "")
+            # Try to find the correct safetensors file
+            files = list_repo_files(link)
+            safetensors_files = [f for f in files if f.endswith('.safetensors')]
+            # Prioritize files with 'lora' in the name
+            lora_files = [f for f in safetensors_files if 'lora' in f.lower()]
+            if lora_files:
+                safetensors_file = lora_files[0]
+            elif safetensors_files:
+                safetensors_file = safetensors_files[0]
+            else:
+                # Try .bin files as fallback
+                bin_files = [f for f in files if f.endswith('.bin') and 'lora' in f.lower()]
+                if bin_files:
+                    safetensors_file = bin_files[0]
+                else:
+                    safetensors_file = "pytorch_lora_weights.safetensors"  # Default fallback
+            print(f"Found LoRA file: {safetensors_file} in {link}")
+            return split_link[1], safetensors_file, trigger_word
+        except Exception as e:
+            print(f"Error in get_huggingface_lora: {e}")
+            # Try basic detection
+            try:
+                files = list_repo_files(link)
+                safetensors_file = next((f for f in files if f.endswith('.safetensors')), "pytorch_lora_weights.safetensors")
+                return split_link[1], safetensors_file, ""
+            except:
+                raise Exception(f"Error loading LoRA: {e}")
+    else:
+        raise Exception("Invalid HuggingFace repository format")
+def load_custom_lora(link):
+    """Load custom LoRA from user input"""
+    if not link:
+        return gr.update(visible=False), "", gr.update(visible=False), None, gr.Gallery(selected_index=None), "### 🎨 Select an art style from the gallery", None
+    try:
+        repo_name, weights_file, trigger_word = get_huggingface_lora(link)
+        card = f'''
+        <div class="custom_lora_card">
+            <div style="display: flex; align-items: center; margin-bottom: 12px;">
+                <span style="font-size: 18px; margin-right: 8px;">✅</span>
+                <strong style="font-size: 16px;">Custom LoRA Loaded!</strong>
+            </div>
+            <div style="background: rgba(255, 255, 255, 0.8); padding: 12px; border-radius: 8px;">
+                <h4 style="margin: 0 0 8px 0; color: #333;">{repo_name}</h4>
+                <small style="color: #666;">{"Trigger: <code style='background: #f0f0f0; padding: 2px 6px; border-radius: 4px;'><b>"+trigger_word+"</b></code>" if trigger_word else "No trigger word found"}</small>
+            </div>
+        </div>
+        '''
+        custom_lora_data = {
+            "repo": link,
+            "weights": weights_file,
+            "trigger_word": trigger_word
+        }
+        return gr.update(visible=True), card, gr.update(visible=True), custom_lora_data, gr.Gallery(selected_index=None), f"🎨 Custom Style: {repo_name}", None
+    except Exception as e:
+        return gr.update(visible=True), f"Error: {str(e)}", gr.update(visible=False), None, gr.update(), "### 🎨 Select an art style from the gallery", None
+def remove_custom_lora():
+    """Remove custom LoRA"""
+    return "", gr.update(visible=False), gr.update(visible=False), None, None
+def classify_gallery(flux_loras):
+    """Sort gallery by likes"""
+    try:
+        sorted_gallery = sorted(flux_loras, key=lambda x: x.get("likes", 0), reverse=True)
+        gallery_items = []
+        for item in sorted_gallery:
+            if "image" in item and "title" in item:
+                image_path = item["image"]
+                title = item["title"]
+                # Simply use the path as-is for Gradio to handle
+                gallery_items.append((image_path, title))
+                print(f"Added to gallery: {image_path} - {title}")
+        print(f"Total gallery items: {len(gallery_items)}")
+        return gallery_items, sorted_gallery
+    except Exception as e:
+        print(f"Error in classify_gallery: {e}")
+        import traceback
+        traceback.print_exc()
+        return [], []
+def infer_with_lora_wrapper(input_image, prompt, selected_index, custom_lora, seed=42, randomize_seed=False, guidance_scale=2.5, lora_scale=1.0, flux_loras=None, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper function to handle state serialization"""
+    return infer_with_lora(input_image, prompt, selected_index, custom_lora, seed, randomize_seed, guidance_scale, lora_scale, flux_loras, progress)
 @spaces.GPU
+def infer_with_lora(input_image, prompt, selected_index, custom_lora, seed=42, randomize_seed=False, guidance_scale=2.5, lora_scale=1.0, flux_loras=None, progress=gr.Progress(track_tqdm=True)):
+    """Generate image with selected LoRA"""
+    global current_lora, pipe
+    # Check if input image is provided
+    if input_image is None:
+        gr.Warning("Please upload your portrait photo first! 📸")
+        return None, seed, gr.update(visible=False)
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    # Determine which LoRA to use
+    lora_to_use = None
+    if custom_lora:
+        lora_to_use = custom_lora
+    elif selected_index is not None and flux_loras and selected_index < len(flux_loras):
+        lora_to_use = flux_loras[selected_index]
+    # Load LoRA if needed
+    if lora_to_use and lora_to_use != current_lora:
+        try:
+            # Unload current LoRA
+            if current_lora:
+                pipe.unload_lora_weights()
+                print(f"Unloaded previous LoRA")
+            # Load new LoRA
+            repo_id = lora_to_use.get("repo", "unknown")
+            weights_file = lora_to_use.get("weights", "pytorch_lora_weights.safetensors")
+            print(f"Loading LoRA: {repo_id} with weights: {weights_file}")
+            lora_path = load_lora_weights(repo_id, weights_file)
+            if lora_path:
+                pipe.load_lora_weights(lora_path, adapter_name="selected_lora")
+                pipe.set_adapters(["selected_lora"], adapter_weights=[lora_scale])
+                print(f"Successfully loaded: {lora_path} with scale {lora_scale}")
+                current_lora = lora_to_use
+            else:
+                print(f"Failed to load LoRA from {repo_id}")
+                gr.Warning(f"Failed to load {lora_to_use.get('title', 'style')}. Please try a different art style.")
+                return None, seed, gr.update(visible=False)
+        except Exception as e:
+            print(f"Error loading LoRA: {e}")
+            # Continue without LoRA
     else:
+        if lora_to_use:
+            print(f"Using already loaded LoRA: {lora_to_use.get('repo', 'unknown')}")
+    try:
+        # Convert image to RGB
+        input_image = input_image.convert("RGB")
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        gr.Warning("Error processing the uploaded image. Please try a different photo. 📸")
+        return None, seed, gr.update(visible=False)
+    # Check if LoRA is selected
+    if lora_to_use is None:
+        gr.Warning("Please select an art style from the gallery first! 🎨")
+        return None, seed, gr.update(visible=False)
+    # Add trigger word to prompt
+    trigger_word = lora_to_use.get("trigger_word", "")
+    # Special handling for different art styles
+    if trigger_word == "ghibli":
+        prompt = f"Create a Studio Ghibli anime style portrait of the person in the photo, {prompt}. Maintain the facial identity while transforming into whimsical anime art style."
+    elif trigger_word == "homer":
+        prompt = f"Paint the person in Winslow Homer's American realist style, {prompt}. Keep facial features while applying watercolor and marine art techniques."
+    elif trigger_word == "gogh":
+        prompt = f"Transform the portrait into Van Gogh's post-impressionist style with swirling brushstrokes, {prompt}. Maintain facial identity with expressive colors."
+    elif trigger_word == "Cezanne":
+        prompt = f"Render the person in Paul Cézanne's geometric post-impressionist style, {prompt}. Keep facial structure while applying structured brushwork."
+    elif trigger_word == "Renoir":
+        prompt = f"Paint the portrait in Pierre-Auguste Renoir's impressionist style with soft light, {prompt}. Maintain identity with luminous skin tones."
+    elif trigger_word == "claude monet":
+        prompt = f"Create an impressionist portrait in Claude Monet's style with visible brushstrokes, {prompt}. Keep facial features while using light and color."
+    elif trigger_word == "fantasy":
+        prompt = f"Transform into an epic fantasy character portrait, {prompt}. Maintain facial identity while adding magical and fantastical elements."
+    elif trigger_word == ", How2Draw":
+        prompt = f"create a How2Draw sketch of the person of the photo {prompt}, maintain the facial identity of the person and general features"
+    elif trigger_word == ", video game screenshot in the style of THSMS":
+        prompt = f"create a video game screenshot in the style of THSMS with the person from the photo, {prompt}. maintain the facial identity of the person and general features"
+    else:
+        prompt = f"convert the style of this portrait photo to {trigger_word} while maintaining the identity of the person. {prompt}. Make sure to maintain the person's facial identity and features, while still changing the overall style to {trigger_word}."
+    try:
+        image = pipe(
+            image=input_image,
+            prompt=prompt,
+            guidance_scale=guidance_scale,
+            generator=torch.Generator().manual_seed(seed),
+        ).images[0]
+        return image, seed, gr.update(visible=True)
+    except Exception as e:
+        print(f"Error during inference: {e}")
+        return None, seed, gr.update(visible=False)
+# Create Gradio interface
+with gr.Blocks(css=css) as demo:
+    gr_flux_loras = gr.State(value=flux_loras_raw)
+    title = gr.HTML(
+        """<h1>FLUX Kontex Super LoRAs🖖</h1>""",
     )
+    selected_state = gr.State(value=None)
+    custom_loaded_lora = gr.State(value=None)
+    with gr.Row(elem_id="main_app"):
+        with gr.Column(scale=4, elem_id="box_column"):
+            with gr.Group(elem_id="gallery_box"):
+                input_image = gr.Image(label="Upload your portrait photo 📸", type="pil", height=300)
+                gallery = gr.Gallery(
+                    label="Choose Your Art Style",
+                    allow_preview=False,
+                    columns=3,
+                    elem_id="gallery",
+                    show_share_button=False,
+                    height=400
+                )
+                custom_model = gr.Textbox(
+                    label="🔗 Or use a custom LoRA from HuggingFace",
+                    placeholder="e.g., username/lora-name",
+                    visible=True
+                )
+                custom_model_card = gr.HTML(visible=False)
+                custom_model_button = gr.Button("❌ Remove custom LoRA", visible=False)
+        with gr.Column(scale=5):
+            with gr.Row():
+                prompt = gr.Textbox(
+                    label="Additional Details (optional)",
+                    show_label=False,
+                    lines=1,
+                    max_lines=1,
+                    placeholder="Describe additional details, e.g., 'wearing a red hat' or 'smiling'",
+                    elem_id="prompt"
+                )
+                run_button = gr.Button("Generate ✨", elem_id="run_button")
+            result = gr.Image(label="Your Artistic Portrait", interactive=False)
+            reuse_button = gr.Button("🔄 Reuse this image", visible=False)
+            with gr.Accordion("⚙️ Advanced Settings", open=False):
+                lora_scale = gr.Slider(
+                    label="Style Strength",
+                    minimum=0,
+                    maximum=2,
+                    step=0.1,
+                    value=1.0,
+                    info="How strongly to apply the art style (1.0 = balanced)"
+                )
+                seed = gr.Slider(
+                    label="Random Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=0,
+                    info="Set to 0 for random results"
+                )
+                randomize_seed = gr.Checkbox(label="🎲 Randomize seed for each generation", value=True)
+                guidance_scale = gr.Slider(
+                    label="Image Guidance",
+                    minimum=1,
+                    maximum=10,
+                    step=0.1,
+                    value=2.5,
+                    info="How closely to follow the input image (lower = more creative)"
+                )
+            prompt_title = gr.Markdown(
+                value="### 🎨 Select an art style from the gallery",
+                visible=True,
+                elem_id="selected_lora",
+            )
+    # Event handlers
+    custom_model.input(
+        fn=load_custom_lora,
+        inputs=[custom_model],
+        outputs=[custom_model_card, custom_model_card, custom_model_button, custom_loaded_lora, gallery, prompt_title, selected_state],
+    )
+    custom_model_button.click(
+        fn=remove_custom_lora,
+        outputs=[custom_model, custom_model_button, custom_model_card, custom_loaded_lora, selected_state]
+    )
+    gallery.select(
+        fn=update_selection,
+        inputs=[gr_flux_loras],
+        outputs=[prompt_title, prompt, selected_state],
+        show_progress=False
+    )
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer_with_lora_wrapper,
+        inputs=[input_image, prompt, selected_state, custom_loaded_lora, seed, randomize_seed, guidance_scale, lora_scale, gr_flux_loras],
+        outputs=[result, seed, reuse_button]
+    )
+    reuse_button.click(
+        fn=lambda image: image,
+        inputs=[result],
+        outputs=[input_image]
+    )
+    # Initialize gallery
+    demo.load(
+        fn=classify_gallery,
+        inputs=[gr_flux_loras],
+        outputs=[gallery, gr_flux_loras]
+    )
+demo.queue(default_concurrency_limit=None)
+demo.launch()