FLUX-Open-Ghibli-Studio

Running on Zero

App Files Files Community

ginipick commited on Apr 2

Commit

7d4bf39

verified ·

1 Parent(s): 1b3b25e

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -269

app.py CHANGED Viewed

@@ -82,6 +82,8 @@ def inference(
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
     try:
         image = pipeline(
@@ -102,70 +104,123 @@ def inference(
 # ----------------------------- Florence-2 Captioner ---------------------------
 import subprocess
-subprocess.run(
-    'pip install flash-attn --no-build-isolation',
-    env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
-    shell=True
-)
 from transformers import AutoProcessor, AutoModelForCausalLM
 # Pre-load models and processors
-models = {
-    'gokaygokay/Florence-2-Flux-Large': AutoModelForCausalLM.from_pretrained(
-        'gokaygokay/Florence-2-Flux-Large', trust_remote_code=True
-    ).eval(),
-    'gokaygokay/Florence-2-Flux': AutoModelForCausalLM.from_pretrained(
-        'gokaygokay/Florence-2-Flux', trust_remote_code=True
-    ).eval(),
-}
-processors = {
-    'gokaygokay/Florence-2-Flux-Large': AutoProcessor.from_pretrained(
-        'gokaygokay/Florence-2-Flux-Large', trust_remote_code=True
-    ),
-    'gokaygokay/Florence-2-Flux': AutoProcessor.from_pretrained(
-        'gokaygokay/Florence-2-Flux', trust_remote_code=True
-    ),
-}
 @spaces.GPU
-def caption_image(image, model_name='gokaygokay/Florence-2-Flux-Large'):
     """
     Runs the selected Florence-2 model to generate a detailed caption.
     """
     from PIL import Image as PILImage
-    task_prompt = "<DESCRIPTION>"
-    user_prompt = task_prompt + "Describe this image in great detail."
     # Convert input to RGB if needed
-    image = PILImage.fromarray(image)
-    if image.mode != "RGB":
-        image = image.convert("RGB")
     model = models[model_name]
     processor = processors[model_name]
-    inputs = processor(text=user_prompt, images=image, return_tensors="pt")
-    generated_ids = model.generate(
-        input_ids=inputs["input_ids"],
-        pixel_values=inputs["pixel_values"],
-        max_new_tokens=1024,
-        num_beams=3,
-        repetition_penalty=1.10,
-    )
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-    parsed_answer = processor.post_process_generation(
-        generated_text, task=task_prompt, image_size=(image.width, image.height)
-    )
-    return parsed_answer["<DESCRIPTION>"]
-# --------- NEW FUNCTION: Process uploaded image and generate Ghibli style image ---------
 @spaces.GPU(duration=120)
 def process_uploaded_image(
     image,
-    model_name,
     seed,
     randomize_seed,
     width,
@@ -174,245 +229,157 @@ def process_uploaded_image(
     num_inference_steps,
     lora_scale
 ):
     # Step 1: Generate caption from the uploaded image
-    caption = caption_image(image, model_name)
     # Step 2: Append "ghibli style" to the caption
     ghibli_prompt = f"{caption}, ghibli style"
     # Step 3: Generate Ghibli-style image based on the caption
-    generated_image, used_seed = inference(
-        prompt=ghibli_prompt,
-        seed=seed,
-        randomize_seed=randomize_seed,
-        width=width,
-        height=height,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        lora_scale=lora_scale
-    )
-    return generated_image, used_seed, caption, ghibli_prompt
 # ----------------------------- Gradio UI --------------------------------------
 with gr.Blocks(analytics_enabled=False) as demo:
-    with gr.Tabs():
-        # ------------------ TAB 1: Image Generation ----------------------------
-        with gr.TabItem("FLUX Ghibli LoRA Generator"):
-            gr.Markdown("## Generate an image with the FLUX Ghibli LoRA")
             with gr.Row():
-                with gr.Column():
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        placeholder="Describe your Ghibli-style image...",
-                        lines=3
-                    )
-                    with gr.Row():
-                        seed = gr.Slider(
-                            label="Seed",
-                            minimum=0,
-                            maximum=MAX_SEED,
-                            step=1,
-                            value=42
-                        )
-                        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    with gr.Row():
-                        width = gr.Slider(
-                            label="Width",
-                            minimum=256,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=32,
-                            value=512
-                        )
-                        height = gr.Slider(
-                            label="Height",
-                            minimum=256,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=32,
-                            value=512
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            label="Guidance scale",
-                            minimum=0.0,
-                            maximum=10.0,
-                            step=0.1,
-                            value=3.5
-                        )
-                        num_inference_steps = gr.Slider(
-                            label="Steps",
-                            minimum=1,
-                            maximum=50,
-                            step=1,
-                            value=30
-                        )
-                    lora_scale = gr.Slider(
-                        label="LoRA scale",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.1,
-                        value=1.0
-                    )
-                    generate_button = gr.Button("Generate Image")
-                with gr.Column():
-                    output_image = gr.Image(label="Generated Image")
-                    output_seed = gr.Number(label="Seed Used")
-            # Link the button to the inference function
-            generate_button.click(
-                inference,
-                inputs=[
-                    prompt,
-                    seed,
-                    randomize_seed,
-                    width,
-                    height,
-                    guidance_scale,
-                    num_inference_steps,
-                    lora_scale,
-                ],
-                outputs=[output_image, output_seed]
-            )
-        # ------------------ TAB 2: Image Captioning ---------------------------
-        with gr.TabItem("Florence-2 Captioner"):
-            gr.Markdown("## Generate a caption for an uploaded image using Florence-2")
             with gr.Row():
-                with gr.Column():
-                    input_img = gr.Image(label="Upload an Image")
-                    model_selector = gr.Dropdown(
-                        choices=list(models.keys()),
-                        value='gokaygokay/Florence-2-Flux-Large',
-                        label="Select Model"
-                    )
-                    caption_button = gr.Button("Generate Caption")
-                with gr.Column():
-                    caption_output = gr.Textbox(label="Caption")
-            caption_button.click(
-                caption_image,
-                inputs=[input_img, model_selector],
-                outputs=[caption_output]
-            )
-        # ------------------ NEW TAB 3: Image to Ghibli Style ---------------------------
-        with gr.TabItem("이미지 to 지브리 스타일"):
-            gr.Markdown("## Upload an image and transform it to Ghibli style")
             with gr.Row():
-                with gr.Column():
-                    upload_img = gr.Image(label="Upload an Image")
-                    caption_model_selector = gr.Dropdown(
-                        choices=list(models.keys()),
-                        value='gokaygokay/Florence-2-Flux-Large',
-                        label="Caption Model",
-                        visible=False  # Hidden as requested
-                    )
-                    with gr.Row():
-                        img2img_seed = gr.Slider(
-                            label="Seed",
-                            minimum=0,
-                            maximum=MAX_SEED,
-                            step=1,
-                            value=42
-                        )
-                        img2img_randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    with gr.Row():
-                        img2img_width = gr.Slider(
-                            label="Width",
-                            minimum=256,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=32,
-                            value=512
-                        )
-                        img2img_height = gr.Slider(
-                            label="Height",
-                            minimum=256,
-                            maximum=MAX_IMAGE_SIZE,
-                            step=32,
-                            value=512
-                        )
-                    with gr.Row():
-                        img2img_guidance_scale = gr.Slider(
-                            label="Guidance scale",
-                            minimum=0.0,
-                            maximum=10.0,
-                            step=0.1,
-                            value=3.5
-                        )
-                        img2img_steps = gr.Slider(
-                            label="Steps",
-                            minimum=1,
-                            maximum=50,
-                            step=1,
-                            value=30
-                        )
-                    img2img_lora_scale = gr.Slider(
-                        label="LoRA scale",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.1,
-                        value=1.0
-                    )
-                    transform_button = gr.Button("Transform to Ghibli Style")
-                with gr.Column():
-                    ghibli_output_image = gr.Image(label="Generated Ghibli Image")
-                    ghibli_output_seed = gr.Number(label="Seed Used")
-                    extracted_caption = gr.Textbox(
-                        label="Extracted Description",
-                        visible=False  # Hidden as requested
-                    )
-                    ghibli_prompt = gr.Textbox(
-                        label="Generated Prompt",
-                        visible=False  # Hidden as requested
-                    )
-            # Auto-process when image is uploaded
-            upload_img.upload(
-                process_uploaded_image,
-                inputs=[
-                    upload_img,
-                    caption_model_selector,
-                    img2img_seed,
-                    img2img_randomize_seed,
-                    img2img_width,
-                    img2img_height,
-                    img2img_guidance_scale,
-                    img2img_steps,
-                    img2img_lora_scale,
-                ],
-                outputs=[
-                    ghibli_output_image,
-                    ghibli_output_seed,
-                    extracted_caption,
-                    ghibli_prompt,
-                ]
-            )
-            # Manual process button
-            transform_button.click(
-                process_uploaded_image,
-                inputs=[
-                    upload_img,
-                    caption_model_selector,
-                    img2img_seed,
-                    img2img_randomize_seed,
-                    img2img_width,
-                    img2img_height,
-                    img2img_guidance_scale,
-                    img2img_steps,
-                    img2img_lora_scale,
-                ],
-                outputs=[
-                    ghibli_output_image,
-                    ghibli_output_seed,
-                    extracted_caption,
-                    ghibli_prompt,
-                ]
             )
 demo.launch(debug=True)

     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
+    print(f"Running inference with prompt: {prompt}")
     try:
         image = pipeline(
 # ----------------------------- Florence-2 Captioner ---------------------------
 import subprocess
+try:
+    subprocess.run(
+        'pip install flash-attn --no-build-isolation',
+        env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
+        shell=True
+    )
+except Exception as e:
+    print(f"Warning: Could not install flash-attn: {e}")
 from transformers import AutoProcessor, AutoModelForCausalLM
+# Function to safely load models
+def load_caption_model(model_name):
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name, trust_remote_code=True
+        ).eval()
+        processor = AutoProcessor.from_pretrained(
+            model_name, trust_remote_code=True
+        )
+        return model, processor
+    except Exception as e:
+        print(f"Error loading caption model {model_name}: {e}")
+        return None, None
 # Pre-load models and processors
+print("Loading captioning models...")
+default_caption_model = 'gokaygokay/Florence-2-Flux-Large'
+models = {}
+processors = {}
+# Try to load the default model
+default_model, default_processor = load_caption_model(default_caption_model)
+if default_model is not None and default_processor is not None:
+    models[default_caption_model] = default_model
+    processors[default_caption_model] = default_processor
+    print(f"Successfully loaded default caption model: {default_caption_model}")
+else:
+    # Fallback to simpler model
+    fallback_model = 'gokaygokay/Florence-2-Flux'
+    fallback_model_obj, fallback_processor = load_caption_model(fallback_model)
+    if fallback_model_obj is not None and fallback_processor is not None:
+        models[fallback_model] = fallback_model_obj
+        processors[fallback_model] = fallback_processor
+        default_caption_model = fallback_model
+        print(f"Loaded fallback caption model: {fallback_model}")
+    else:
+        print("WARNING: Failed to load any caption model!")
 @spaces.GPU
+def caption_image(image, model_name=default_caption_model):
     """
     Runs the selected Florence-2 model to generate a detailed caption.
     """
     from PIL import Image as PILImage
+    import numpy as np
+    print(f"Starting caption generation with model: {model_name}")
+    # Handle case where image is already a PIL image
+    if isinstance(image, PILImage.Image):
+        pil_image = image
+    else:
+        # Convert numpy array to PIL
+        if isinstance(image, np.ndarray):
+            pil_image = PILImage.fromarray(image)
+        else:
+            print(f"Unexpected image type: {type(image)}")
+            return "Error: Unsupported image type"
     # Convert input to RGB if needed
+    if pil_image.mode != "RGB":
+        pil_image = pil_image.convert("RGB")
+    # Check if model is available
+    if model_name not in models or model_name not in processors:
+        available_models = list(models.keys())
+        if available_models:
+            model_name = available_models[0]
+            print(f"Requested model not available, using: {model_name}")
+        else:
+            return "Error: No caption models available"
     model = models[model_name]
     processor = processors[model_name]
+    task_prompt = "<DESCRIPTION>"
+    user_prompt = task_prompt + "Describe this image in great detail."
+    try:
+        inputs = processor(text=user_prompt, images=pil_image, return_tensors="pt")
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3,
+            repetition_penalty=1.10,
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        parsed_answer = processor.post_process_generation(
+            generated_text, task=task_prompt, image_size=(pil_image.width, pil_image.height)
+        )
+        # Extract the caption
+        caption = parsed_answer.get("<DESCRIPTION>", "")
+        print(f"Generated caption: {caption}")
+        return caption
+    except Exception as e:
+        print(f"Error during captioning: {e}")
+        return f"Error generating caption: {str(e)}"
+# --------- Process uploaded image and generate Ghibli style image ---------
 @spaces.GPU(duration=120)
 def process_uploaded_image(
     image,
     seed,
     randomize_seed,
     width,
     num_inference_steps,
     lora_scale
 ):
+    if image is None:
+        print("No image provided")
+        return None, None, "No image provided", "No image provided"
+    print("Starting image processing workflow")
     # Step 1: Generate caption from the uploaded image
+    try:
+        caption = caption_image(image)
+        if caption.startswith("Error:"):
+            print(f"Captioning failed: {caption}")
+            # Use a default caption as fallback
+            caption = "A beautiful scene"
+    except Exception as e:
+        print(f"Exception during captioning: {e}")
+        caption = "A beautiful scene"
     # Step 2: Append "ghibli style" to the caption
     ghibli_prompt = f"{caption}, ghibli style"
+    print(f"Final prompt for Ghibli generation: {ghibli_prompt}")
     # Step 3: Generate Ghibli-style image based on the caption
+    try:
+        generated_image, used_seed = inference(
+            prompt=ghibli_prompt,
+            seed=seed,
+            randomize_seed=randomize_seed,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            lora_scale=lora_scale
+        )
+        print(f"Image generation complete with seed: {used_seed}")
+        return generated_image, used_seed, caption, ghibli_prompt
+    except Exception as e:
+        print(f"Error generating image: {e}")
+        error_img = Image.new('RGB', (width, height), color='red')
+        return error_img, seed, caption, ghibli_prompt
 # ----------------------------- Gradio UI --------------------------------------
 with gr.Blocks(analytics_enabled=False) as demo:
+    gr.Markdown("# 이미지 to 지브리 스타일 변환")
+    gr.Markdown("이미지를 업로드하면 자동으로 이미지 설명이 추출되고 지브리 스타일로 변환됩니다.")
+    with gr.Row():
+        with gr.Column():
+            upload_img = gr.Image(label="이미지 업로드", type="pil")
             with gr.Row():
+                img2img_seed = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=42
+                )
+                img2img_randomize_seed = gr.Checkbox(label="랜덤 시드", value=True)
             with gr.Row():
+                img2img_width = gr.Slider(
+                    label="너비",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=512
+                )
+                img2img_height = gr.Slider(
+                    label="높이",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=32,
+                    value=512
+                )
             with gr.Row():
+                img2img_guidance_scale = gr.Slider(
+                    label="가이던스 스케일",
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    value=3.5
+                )
+                img2img_steps = gr.Slider(
+                    label="스텝",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=30
+                )
+            img2img_lora_scale = gr.Slider(
+                label="LoRA 스케일",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=1.0
             )
+            transform_button = gr.Button("지브리 스타일로 변환")
+        with gr.Column():
+            ghibli_output_image = gr.Image(label="생성된 지브리 스타일 이미지")
+            ghibli_output_seed = gr.Number(label="사용된 시드")
+            # Debug elements (hidden by default)
+            with gr.Accordion("디버그 정보", open=False):
+                extracted_caption = gr.Textbox(label="추출된 이미지 설명")
+                ghibli_prompt = gr.Textbox(label="생성에 사용된 프롬프트")
+    # Auto-process when image is uploaded
+    upload_img.upload(
+        process_uploaded_image,
+        inputs=[
+            upload_img,
+            img2img_seed,
+            img2img_randomize_seed,
+            img2img_width,
+            img2img_height,
+            img2img_guidance_scale,
+            img2img_steps,
+            img2img_lora_scale,
+        ],
+        outputs=[
+            ghibli_output_image,
+            ghibli_output_seed,
+            extracted_caption,
+            ghibli_prompt,
+        ]
+    )
+    # Manual process button
+    transform_button.click(
+        process_uploaded_image,
+        inputs=[
+            upload_img,
+            img2img_seed,
+            img2img_randomize_seed,
+            img2img_width,
+            img2img_height,
+            img2img_guidance_scale,
+            img2img_steps,
+            img2img_lora_scale,
+        ],
+        outputs=[
+            ghibli_output_image,
+            ghibli_output_seed,
+            extracted_caption,
+            ghibli_prompt,
+        ]
+    )
 demo.launch(debug=True)