webtoon-gen

Sleeping

App Files Files Community

ginipick commited on Dec 4, 2024

Commit

015ec9d

verified ·

1 Parent(s): e7fc396

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -193

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import tempfile
 import time
 from collections.abc import Sequence
 from typing import Any, cast
 import gradio as gr
 import numpy as np
@@ -15,21 +17,17 @@ from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
 from refiners.fluxion.utils import no_grad
 from refiners.solutions import BoxSegmenter
 from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
-import spaces
-import argparse
-import os
-from os import path
-import shutil
-from datetime import datetime
-from safetensors.torch import load_file
-from huggingface_hub import hf_hub_download
-import gradio as gr
 from diffusers import FluxPipeline
-from PIL import Image
-from huggingface_hub import login
-# HF 토큰 인증 처리
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("Please set the HF_TOKEN environment variable")
@@ -39,40 +37,7 @@ try:
 except Exception as e:
     raise ValueError(f"Failed to login to Hugging Face: {str(e)}")
-# FLUX 파이프라인 초기화 수정
-def initialize_pipeline():
-    try:
-        pipe = FluxPipeline.from_pretrained(
-            "black-forest-labs/FLUX.1-dev",
-            torch_dtype=torch.bfloat16,
-            use_auth_token=HF_TOKEN
-        )
-        pipe.load_lora_weights(
-            hf_hub_download(
-                "ByteDance/Hyper-SD",
-                "Hyper-FLUX.1-dev-8steps-lora.safetensors",
-                use_auth_token=HF_TOKEN
-            )
-        )
-        pipe.fuse_lora(lora_scale=0.125)
-        pipe.to(device="cuda", dtype=torch.bfloat16)
-        return pipe
-    except Exception as e:
-        raise ValueError(f"Failed to initialize pipeline: {str(e)}")
-# 파이프라인 초기화
-try:
-    pipe = initialize_pipeline()
-except Exception as e:
-    raise RuntimeError(f"Failed to setup the model: {str(e)}")
-BoundingBox = tuple[int, int, int, int]
-pillow_heif.register_heif_opener()
-pillow_heif.register_avif_opener()
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# weird dance because ZeroGPU
 segmenter = BoxSegmenter(device="cpu")
 segmenter.device = device
 segmenter.model = segmenter.model.to(device=segmenter.device)
@@ -80,66 +45,25 @@ segmenter.model = segmenter.model.to(device=segmenter.device)
 gd_model_path = "IDEA-Research/grounding-dino-base"
 gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
 gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
-gd_model = gd_model.to(device=device)  # type: ignore
 assert isinstance(gd_model, GroundingDinoForObjectDetection)
-# FLUX 파이프라인 초기화 코드 추가
-pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
-pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
 pipe.fuse_lora(lora_scale=0.125)
 pipe.to(device="cuda", dtype=torch.bfloat16)
-def generate_background(prompt: str, width: int, height: int) -> Image.Image:
-    """배경 이미지 생성 함수"""
-    try:
-        with timer("Background generation"):
-            image = pipe(
-                prompt=prompt,
-                width=width,
-                height=height,
-                num_inference_steps=8,
-                guidance_scale=4.0,
-            ).images[0]
-        return image
-    except Exception as e:
-        raise gr.Error(f"Background generation failed: {str(e)}") # 괄호 닫기 수정
-def combine_with_background(foreground: Image.Image, background: Image.Image) -> Image.Image:
-    """전경과 배경 합성 함수"""
-    background = background.resize(foreground.size)
-    return Image.alpha_composite(background.convert('RGBA'), foreground)
-def _process(
-    img: Image.Image,
-    prompt: str | BoundingBox | None,
-    bg_prompt: str | None,
-) -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
-    try:
-        # 기존 객체 추출 로직
-        mask, bbox, time_log = _gpu_process(img, prompt)
-        masked_alpha = apply_mask(img, mask, defringe=True)
-        # 배경 생성 및 합성
-        if bg_prompt:
-            background = generate_background(bg_prompt, img.width, img.height)
-            combined = combine_with_background(masked_alpha, background)
-        else:
-            combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
-        # 저장 로직
-        thresholded = mask.point(lambda p: 255 if p > 10 else 0)
-        bbox = thresholded.getbbox()
-        to_dl = masked_alpha.crop(bbox)
-        temp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
-        to_dl.save(temp, format="PNG")
-        temp.close()
-        return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
-    except Exception as e:
-        raise gr.Error(f"Processing failed: {str(e)}")
 def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
     if not bboxes:
         return None
@@ -153,18 +77,12 @@ def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
         max(bbox[3] for bbox in bboxes),
     )
 def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
     x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
     return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
 def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
-    assert isinstance(gd_processor, GroundingDinoProcessor)
-    # Grounding Dino expects a dot after each category.
     inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
     with no_grad():
         outputs = gd_model(**inputs)
     width, height = img.size
@@ -174,41 +92,44 @@ def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
         target_sizes=[(height, width)],
     )[0]
     assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
     bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
     return bbox_union(bboxes.numpy().tolist())
-def apply_mask(
-    img: Image.Image,
-    mask_img: Image.Image,
-    defringe: bool = True,
-) -> Image.Image:
     assert img.size == mask_img.size
     img = img.convert("RGB")
     mask_img = mask_img.convert("L")
     if defringe:
-        # Mitigate edge halo effects via color decontamination
         rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
         foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
         img = Image.fromarray((foreground * 255).astype("uint8"))
     result = Image.new("RGBA", img.size)
     result.paste(img, (0, 0), mask_img)
     return result
-@spaces.GPU
-def _gpu_process(
-    img: Image.Image,
-    prompt: str | BoundingBox | None,
-) -> tuple[Image.Image, BoundingBox | None, list[str]]:
-    # Because of ZeroGPU shenanigans, we need a *single* function with the
-    # `spaces.GPU` decorator that *does not* contain postprocessing.
     time_log: list[str] = []
     if isinstance(prompt, str):
         t0 = time.time()
         bbox = gd_detect(img, prompt)
@@ -218,16 +139,40 @@ def _gpu_process(
             raise gr.Error("No object detected")
     else:
         bbox = prompt
     t0 = time.time()
     mask = segmenter(img, bbox)
     time_log.append(f"segment: {time.time() - t0}")
     return mask, bbox, time_log
 def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     assert isinstance(img := prompts["image"], Image.Image)
@@ -240,45 +185,42 @@ def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Imag
         bbox = None
     return _process(img, bbox)
 def on_change_bbox(prompts: dict[str, Any] | None):
     return gr.update(interactive=prompts is not None)
-def process_prompt(img: Image.Image, prompt: str) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
-    return _process(img, prompt)
-def on_change_prompt(img: Image.Image | None, prompt: str | None):
     return gr.update(interactive=bool(img and prompt))
 css = """
-footer {
-    visibility: hidden;
 }
 """
-# 스타일 정의 추가
-css = """
-footer {visibility: hidden}
-.container {max-width: 1200px; margin: auto; padding: 20px;}
-.main-title {text-align: center; color: #2a2a2a; margin-bottom: 2em;}
-.tabs {background: #f7f7f7; border-radius: 15px; padding: 20px;}
-.input-column {background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 6px rgba(0,0,0,0.1);}
-.output-column {background: white; padding: 20px; border-radius: 10px; box-shadow: 0 2px 6px rgba(0,0,0,0.1);}
-.custom-button {background: #2196F3; color: white; border: none; border-radius: 5px; padding: 10px 20px;}
-.custom-button:hover {background: #1976D2;}
-.example-region {margin-top: 2em; padding: 20px; background: #f0f0f0; border-radius: 10px;}
-"""
-def process_prompt(img: Image.Image, prompt: str, bg_prompt: str = None) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
-    return _process(img, prompt, bg_prompt)
-def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
-    return gr.update(interactive=bool(img and prompt))
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     with gr.Tabs() as tabs:
         with gr.Tab("✨ Extract by Text", id="tab_prompt"):
             with gr.Row(equal_height=True):
@@ -316,26 +258,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             with gr.Accordion("📚 Examples", open=False):
                 examples = [
-                    {
-                        "image": "examples/text.jpg",
-                        "prompt": "text",
-                        "bg_prompt": "white background"
-                    },
-                    {
-                        "image": "examples/potted-plant.jpg",
-                        "prompt": "potted plant",
-                        "bg_prompt": "natural garden background"
-                    },
-                    {
-                        "image": "examples/chair.jpg",
-                        "prompt": "chair",
-                        "bg_prompt": "modern living room"
-                    },
-                    {
-                        "image": "examples/black-lamp.jpg",
-                        "prompt": "black lamp",
-                        "bg_prompt": "minimalist interior"
-                    }
                 ]
                 ex = gr.Examples(
                     examples=examples,
@@ -345,7 +269,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                     cache_examples=True
                 )
-        # Bounding Box 탭
         with gr.Tab("📏 Extract by Box", id="tab_bb"):
             with gr.Row(equal_height=True):
                 with gr.Column(scale=1, min_width=400):
@@ -377,22 +300,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
             with gr.Accordion("📚 Examples", open=False):
                 examples_bb = [
-                    {
-                        "image": "examples/text.jpg",
-                        "boxes": [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}]
-                    },
-                    {
-                        "image": "examples/potted-plant.jpg",
-                        "boxes": [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}]
-                    },
-                    {
-                        "image": "examples/chair.jpg",
-                        "boxes": [{"xmin": 98, "ymin": 330, "xmax": 973, "ymax": 1468}]
-                    },
-                    {
-                        "image": "examples/black-lamp.jpg",
-                        "boxes": [{"xmin": 88, "ymin": 148, "xmax": 700, "ymax": 1414}]
-                    }
                 ]
                 ex_bb = gr.Examples(
                     examples=examples_bb,

 import time
 from collections.abc import Sequence
 from typing import Any, cast
+import os
+from huggingface_hub import login
 import gradio as gr
 import numpy as np
 from refiners.fluxion.utils import no_grad
 from refiners.solutions import BoxSegmenter
 from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 from diffusers import FluxPipeline
+BoundingBox = tuple[int, int, int, int]
+# 초기화 및 설정
+pillow_heif.register_heif_opener()
+pillow_heif.register_avif_opener()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# HF 토큰 설정
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("Please set the HF_TOKEN environment variable")
 except Exception as e:
     raise ValueError(f"Failed to login to Hugging Face: {str(e)}")
+# 모델 초기화
 segmenter = BoxSegmenter(device="cpu")
 segmenter.device = device
 segmenter.model = segmenter.model.to(device=segmenter.device)
 gd_model_path = "IDEA-Research/grounding-dino-base"
 gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
 gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
+gd_model = gd_model.to(device=device)
 assert isinstance(gd_model, GroundingDinoForObjectDetection)
+# FLUX 파이프라인 초기화
+pipe = FluxPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-dev",
+    torch_dtype=torch.bfloat16,
+    use_auth_token=HF_TOKEN
+)
+pipe.load_lora_weights(
+    hf_hub_download(
+        "ByteDance/Hyper-SD",
+        "Hyper-FLUX.1-dev-8steps-lora.safetensors",
+        use_auth_token=HF_TOKEN
+    )
+)
 pipe.fuse_lora(lora_scale=0.125)
 pipe.to(device="cuda", dtype=torch.bfloat16)
 def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
     if not bboxes:
         return None
         max(bbox[3] for bbox in bboxes),
     )
 def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
     x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
     return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
 def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
     inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
     with no_grad():
         outputs = gd_model(**inputs)
     width, height = img.size
         target_sizes=[(height, width)],
     )[0]
     assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
     bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
     return bbox_union(bboxes.numpy().tolist())
+def apply_mask(img: Image.Image, mask_img: Image.Image, defringe: bool = True) -> Image.Image:
     assert img.size == mask_img.size
     img = img.convert("RGB")
     mask_img = mask_img.convert("L")
     if defringe:
         rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
         foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
         img = Image.fromarray((foreground * 255).astype("uint8"))
     result = Image.new("RGBA", img.size)
     result.paste(img, (0, 0), mask_img)
     return result
+def generate_background(prompt: str, width: int, height: int) -> Image.Image:
+    """배경 이미지 생성 함수"""
+    try:
+        with timer("Background generation"):
+            image = pipe(
+                prompt=prompt,
+                width=width,
+                height=height,
+                num_inference_steps=8,
+                guidance_scale=4.0,
+            ).images[0]
+        return image
+    except Exception as e:
+        raise gr.Error(f"Background generation failed: {str(e)}")
+def combine_with_background(foreground: Image.Image, background: Image.Image) -> Image.Image:
+    """전경과 배경 합성 함수"""
+    background = background.resize(foreground.size)
+    return Image.alpha_composite(background.convert('RGBA'), foreground)
+@spaces.GPU
+def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
     time_log: list[str] = []
     if isinstance(prompt, str):
         t0 = time.time()
         bbox = gd_detect(img, prompt)
             raise gr.Error("No object detected")
     else:
         bbox = prompt
     t0 = time.time()
     mask = segmenter(img, bbox)
     time_log.append(f"segment: {time.time() - t0}")
     return mask, bbox, time_log
+def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None) -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
+    if img.width > 2048 or img.height > 2048:
+        orig_res = max(img.width, img.height)
+        img.thumbnail((2048, 2048))
+        if isinstance(prompt, tuple):
+            x0, y0, x1, y1 = (int(x * 2048 / orig_res) for x in prompt)
+            prompt = (x0, y0, x1, y1)
+    mask, bbox, time_log = _gpu_process(img, prompt)
+    masked_alpha = apply_mask(img, mask, defringe=True)
+    if bg_prompt:
+        try:
+            background = generate_background(bg_prompt, img.width, img.height)
+            combined = combine_with_background(masked_alpha, background)
+        except Exception as e:
+            raise gr.Error(f"Background processing failed: {str(e)}")
+    else:
+        combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
+    thresholded = mask.point(lambda p: 255 if p > 10 else 0)
+    bbox = thresholded.getbbox()
+    to_dl = masked_alpha.crop(bbox)
+    temp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+    to_dl.save(temp, format="PNG")
+    temp.close()
+    return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
 def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     assert isinstance(img := prompts["image"], Image.Image)
         bbox = None
     return _process(img, bbox)
 def on_change_bbox(prompts: dict[str, Any] | None):
     return gr.update(interactive=prompts is not None)
+def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
+    return _process(img, prompt, bg_prompt)
+def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
     return gr.update(interactive=bool(img and prompt))
+# CSS 스타일 정의
 css = """
+footer {display: none}
+.main-title {
+    text-align: center;
+    margin: 2em 0;
+}
+.main-title h1 {
+    color: #2196F3;
+    font-size: 2.5em;
+}
+.container {
+    max-width: 1200px;
+    margin: auto;
+    padding: 20px;
 }
 """
+# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+    gr.HTML("""
+        <div class="main-title">
+            <h1>🎨 Advanced Image Object Extractor</h1>
+            <p>Extract objects from images using text prompts or bounding boxes</p>
+        </div>
+    """)
     with gr.Tabs() as tabs:
         with gr.Tab("✨ Extract by Text", id="tab_prompt"):
             with gr.Row(equal_height=True):
             with gr.Accordion("📚 Examples", open=False):
                 examples = [
+                    ["examples/text.jpg", "text", "white background"],
+                    ["examples/black-lamp.jpg", "black lamp", "minimalist interior"]
                 ]
                 ex = gr.Examples(
                     examples=examples,
                     cache_examples=True
                 )
         with gr.Tab("📏 Extract by Box", id="tab_bb"):
             with gr.Row(equal_height=True):
                 with gr.Column(scale=1, min_width=400):
             with gr.Accordion("📚 Examples", open=False):
                 examples_bb = [
+                    ["examples/text.jpg", [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}]],
+                    ["examples/black-lamp.jpg", [{"xmin": 88, "ymin": 148, "xmax": 700, "ymax": 1414}]]
                 ]
                 ex_bb = gr.Examples(
                     examples=examples_bb,