import spaces import argparse import os import shutil import cv2 import gradio as gr import numpy as np import torch from facexlib.utils.face_restoration_helper import FaceRestoreHelper import huggingface_hub from huggingface_hub import hf_hub_download from PIL import Image from torchvision.transforms.functional import normalize from dreamo.dreamo_pipeline import DreamOPipeline from dreamo.utils import img2tensor, resize_numpy_image_area, tensor2img, resize_numpy_image_long from tools import BEN2 parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=8080) parser.add_argument('--no_turbo', action='store_true') args = parser.parse_args() huggingface_hub.login(os.getenv('HF_TOKEN')) try: shutil.rmtree('gradio_cached_examples') except FileNotFoundError: print("cache folder not exist") class Generator: def __init__(self): device = torch.device('cuda') # preprocessing models # background remove model: BEN2 self.bg_rm_model = BEN2.BEN_Base().to(device).eval() hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='models') self.bg_rm_model.loadcheckpoints('models/BEN2_Base.pth') # face crop and align tool: facexlib self.face_helper = FaceRestoreHelper( upscale_factor=1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', device=device, ) # load dreamo model_root = 'black-forest-labs/FLUX.1-dev' dreamo_pipeline = DreamOPipeline.from_pretrained(model_root, torch_dtype=torch.bfloat16) dreamo_pipeline.load_dreamo_model(device, use_turbo=not args.no_turbo) self.dreamo_pipeline = dreamo_pipeline.to(device) @torch.no_grad() def get_align_face(self, img): # the face preprocessing code is same as PuLID self.face_helper.clean_all() image_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) self.face_helper.read_image(image_bgr) self.face_helper.get_face_landmarks_5(only_center_face=True) self.face_helper.align_warp_face() if len(self.face_helper.cropped_faces) == 0: return None align_face = self.face_helper.cropped_faces[0] input = img2tensor(align_face, bgr2rgb=True).unsqueeze(0) / 255.0 input = input.to(torch.device("cuda")) parsing_out = self.face_helper.face_parse(normalize(input, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0] parsing_out = parsing_out.argmax(dim=1, keepdim=True) bg_label = [0, 16, 18, 7, 8, 9, 14, 15] bg = sum(parsing_out == i for i in bg_label).bool() white_image = torch.ones_like(input) # only keep the face features face_features_image = torch.where(bg, white_image, input) face_features_image = tensor2img(face_features_image, rgb2bgr=False) return face_features_image generator = Generator() @spaces.GPU @torch.inference_mode() def generate_image( ref_image1, ref_image2, ref_task1, ref_task2, prompt, seed, width=1024, height=1024, ref_res=512, num_steps=12, guidance=3.5, true_cfg=1, cfg_start_step=0, cfg_end_step=0, neg_prompt='', neg_guidance=3.5, first_step_guidance=0, ): print(prompt) ref_conds = [] debug_images = [] ref_images = [ref_image1, ref_image2] ref_tasks = [ref_task1, ref_task2] for idx, (ref_image, ref_task) in enumerate(zip(ref_images, ref_tasks)): if ref_image is not None: if ref_task == "id": ref_image = resize_numpy_image_long(ref_image, 1024) ref_image = generator.get_align_face(ref_image) elif ref_task != "style": ref_image = generator.bg_rm_model.inference(Image.fromarray(ref_image)) if ref_task != "id": ref_image = resize_numpy_image_area(np.array(ref_image), ref_res * ref_res) debug_images.append(ref_image) ref_image = img2tensor(ref_image, bgr2rgb=False).unsqueeze(0) / 255.0 ref_image = 2 * ref_image - 1.0 ref_conds.append( { 'img': ref_image, 'task': ref_task, 'idx': idx + 1, } ) seed = int(seed) if seed == -1: seed = torch.Generator(device="cpu").seed() image = generator.dreamo_pipeline( prompt=prompt, width=width, height=height, num_inference_steps=num_steps, guidance_scale=guidance, ref_conds=ref_conds, generator=torch.Generator(device="cpu").manual_seed(seed), true_cfg_scale=true_cfg, true_cfg_start_step=cfg_start_step, true_cfg_end_step=cfg_end_step, negative_prompt=neg_prompt, neg_guidance_scale=neg_guidance, first_step_guidance_scale=first_step_guidance if first_step_guidance > 0 else guidance, ).images[0] return image, debug_images, seed # ----------------------------- # (1) 여기에 영상 API 호출을 위한 추가 코드 # ----------------------------- import requests import random import tempfile import subprocess from gradio_client import Client, handle_file # 예시: 원격 서버 Endpoint (필요하다면 수정) REMOTE_ENDPOINT = os.getenv("H100_URL") client = Client(REMOTE_ENDPOINT) def run_process_video_api(image_path: str, prompt: str, video_length: float = 2.0): """ 원격 /process 엔드포인트 호출하여 영상을 생성. (예시: prompt, negative_prompt, seed 등은 하드코딩하거나 원하는대로 조정 가능) """ # 랜덤 시드 seed_val = random.randint(0, 9999999) # negative_prompt = "" 등 고정 negative_prompt = "" use_teacache = True # /process 호출 (gradio_client) result = client.predict( input_image=handle_file(image_path), prompt=prompt, n_prompt=negative_prompt, seed=seed_val, use_teacache=use_teacache, video_length=video_length, api_name="/process", ) # result는 (video_dict, preview_dict, md_text, html_text) 구조 video_dict, preview_dict, md_text, html_text = result video_path = video_dict.get("video") if isinstance(video_dict, dict) else None return video_path def add_watermark_to_video(input_video_path: str, watermark_text="Ginigen.com") -> str: """ FFmpeg로 영상에 오른쪽 하단 워터마크를 추가한 새 영상을 리턴 """ if not os.path.exists(input_video_path): raise FileNotFoundError(f"Input video not found: {input_video_path}") # 출력 경로 base, ext = os.path.splitext(input_video_path) watermarked_path = base + "_wm" + ext # ffmpeg 명령어 구성 # - y: 덮어쓰기 # drawtext 필터로 오른쪽 하단(x=w-tw-10, y=h-th-10)에 boxcolor=black 반투명 박스 cmd = [ "ffmpeg", "-y", "-i", input_video_path, "-vf", f"drawtext=fontsize=20:fontcolor=white:text='{watermark_text}':x=w-tw-10:y=h-th-10:box=1:boxcolor=black@0.5:boxborderw=5", "-codec:a", "copy", watermarked_path ] try: subprocess.run(cmd, check=True) except Exception as e: print(f"[WARN] FFmpeg watermark failed: {e}") return input_video_path # 실패 시 원본 반환 return watermarked_path def generate_video_from_image(image_array: np.ndarray): """ 1) Numpy 이미지를 임시 파일로 저장 2) 원격 API로 2초 영상 생성 (기본 prompt 고정) 3) FFmpeg로 'Ginigen.com' 워터마크 추가 4) 최종 mp4 경로 반환 """ if image_array is None: raise gr.Error("이미지가 없습니다.") # (1) 임시 파일로 저장 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp: temp_img_path = fp.name Image.fromarray(image_array).save(temp_img_path, format="PNG") # (2) 원격 API 호출 default_video_prompt = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions." result_video_path = run_process_video_api( image_path=temp_img_path, prompt=default_video_prompt, video_length=2.0, ) if result_video_path is None: raise gr.Error("영상 API 호출 실패 또는 결과 없음") # (3) FFmpeg 워터마크 추가 final_video = add_watermark_to_video(result_video_path, watermark_text="Ginigen.com") return final_video # ----------------------------- # Custom CSS, Headers, etc. # ----------------------------- _CUSTOM_CSS_ = """ :root { --primary-color: #f8c3cd; /* Sakura pink - primary accent */ --secondary-color: #b3e5fc; /* Pastel blue - secondary accent */ --background-color: #f5f5f7; /* Very light gray background */ --card-background: #ffffff; /* White for cards */ --text-color: #424242; /* Dark gray for text */ --accent-color: #ffb6c1; /* Light pink for accents */ --success-color: #c8e6c9; /* Pastel green for success */ --warning-color: #fff9c4; /* Pastel yellow for warnings */ --shadow-color: rgba(0, 0, 0, 0.1); /* Shadow color */ --border-radius: 12px; /* Rounded corners */ } body { background-color: var(--background-color) !important; font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; } .gradio-container { max-width: 1200px !important; margin: 0 auto !important; } /* Header styling */ h1 { color: #9c27b0 !important; font-weight: 800 !important; text-shadow: 2px 2px 4px rgba(156, 39, 176, 0.2) !important; letter-spacing: -0.5px !important; } /* Card styling for panels */ .panel-box { border-radius: var(--border-radius) !important; box-shadow: 0 8px 16px var(--shadow-color) !important; background-color: var(--card-background) !important; border: none !important; overflow: hidden !important; padding: 20px !important; margin-bottom: 20px !important; } /* Button styling */ button.gr-button { background: linear-gradient(135deg, var(--primary-color), #e1bee7) !important; border-radius: var(--border-radius) !important; color: #4a148c !important; font-weight: 600 !important; border: none !important; padding: 10px 20px !important; transition: all 0.3s ease !important; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; } button.gr-button:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15) !important; background: linear-gradient(135deg, #e1bee7, var(--primary-color)) !important; } /* Input fields styling */ input, select, textarea, .gr-input { border-radius: 8px !important; border: 2px solid #e0e0e0 !important; padding: 10px 15px !important; transition: all 0.3s ease !important; background-color: #fafafa !important; } input:focus, select:focus, textarea:focus, .gr-input:focus { border-color: var(--primary-color) !important; box-shadow: 0 0 0 3px rgba(248, 195, 205, 0.3) !important; } /* Slider styling */ .gr-form input[type=range] { appearance: none !important; width: 100% !important; height: 6px !important; background: #e0e0e0 !important; border-radius: 5px !important; outline: none !important; } .gr-form input[type=range]::-webkit-slider-thumb { appearance: none !important; width: 16px !important; height: 16px !important; background: var(--primary-color) !important; border-radius: 50% !important; cursor: pointer !important; border: 2px solid white !important; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important; } /* Dropdown styling */ .gr-form select { background-color: white !important; border: 2px solid #e0e0e0 !important; border-radius: 8px !important; padding: 10px 15px !important; } .gr-form select option { padding: 10px !important; } /* Image upload area */ .gr-image-input { border: 2px dashed #b39ddb !important; border-radius: var(--border-radius) !important; background-color: #f3e5f5 !important; padding: 20px !important; display: flex !important; flex-direction: column !important; align-items: center !important; justify-content: center !important; transition: all 0.3s ease !important; } .gr-image-input:hover { background-color: #ede7f6 !important; border-color: #9575cd !important; } /* Add a nice pattern to the background */ body::before { content: "" !important; position: fixed !important; top: 0 !important; left: 0 !important; width: 100% !important; height: 100% !important; background: radial-gradient(circle at 10% 20%, rgba(248, 195, 205, 0.1) 0%, rgba(245, 245, 247, 0) 20%), radial-gradient(circle at 80% 70%, rgba(179, 229, 252, 0.1) 0%, rgba(245, 245, 247, 0) 20%) !important; pointer-events: none !important; z-index: -1 !important; } /* Gallery styling */ .gr-gallery { grid-gap: 15px !important; } .gr-gallery-item { border-radius: var(--border-radius) !important; overflow: hidden !important; box-shadow: 0 4px 8px var(--shadow-color) !important; transition: transform 0.3s ease !important; } .gr-gallery-item:hover { transform: scale(1.02) !important; } /* Label styling */ .gr-form label { font-weight: 600 !important; color: #673ab7 !important; margin-bottom: 5px !important; } /* Improve spacing */ .gr-padded { padding: 20px !important; } .gr-compact { gap: 15px !important; } .gr-form > div { margin-bottom: 16px !important; } /* Headings */ .gr-form h3 { color: #7b1fa2 !important; margin-top: 5px !important; margin-bottom: 15px !important; border-bottom: 2px solid #e1bee7 !important; padding-bottom: 8px !important; } /* Examples section */ #examples-panel { background-color: #f3e5f5 !important; border-radius: var(--border-radius) !important; padding: 15px !important; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.05) !important; } #examples-panel h2 { color: #7b1fa2 !important; font-size: 1.5rem !important; margin-bottom: 15px !important; } /* Accordion styling */ .gr-accordion { border: 1px solid #e0e0e0 !important; border-radius: var(--border-radius) !important; overflow: hidden !important; } .gr-accordion summary { padding: 12px 16px !important; background-color: #f9f9f9 !important; cursor: pointer !important; font-weight: 600 !important; color: #673ab7 !important; } /* Generate button special styling */ #generate-btn { background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important; font-size: 1.1rem !important; padding: 12px 24px !important; margin-top: 10px !important; margin-bottom: 15px !important; width: 100% !important; } #generate-btn:hover { background: linear-gradient(135deg, #fad0c4, #ff9a9e) !important; } """ _HEADER_ = '''
Create customized images with advanced AI
In the current demo version, due to ZeroGPU limitations, video generation is restricted to 2 seconds only. (The full version supports generation of up to 60 seconds)
If DreamO is helpful, please help to ⭐ the community. Thanks!
If you have any questions or feedback, feel free to open a discussion or contact arxivgpt@gmail.com