kofaceid

Running on Zero

App Files Files Community

aiqtech commited on 8 days ago

Commit

2432eb0

verified ·

1 Parent(s): 5bcee37

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -93

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import insightface
 import gradio as gr
 import numpy as np
 import os
 from huggingface_hub import snapshot_download, login
 from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
 from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
@@ -18,6 +19,22 @@ from PIL import Image
 from insightface.app import FaceAnalysis
 from insightface.data import get_image as ins_get_image
 # Hugging Face 토큰으로 로그인
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
@@ -30,87 +47,120 @@ else:
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
 # 모델 다운로드 (토큰 사용)
 try:
     ckpt_dir = snapshot_download(
         repo_id="Kwai-Kolors/Kolors",
         token=HF_TOKEN,
-        local_dir_use_symlinks=False
     )
     ckpt_dir_faceid = snapshot_download(
         repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus",
         token=HF_TOKEN,
-        local_dir_use_symlinks=False
     )
 except Exception as e:
     print(f"Error downloading models: {e}")
     raise
-# 모델 로딩 with error handling
 try:
-    text_encoder = ChatGLMModel.from_pretrained(
-        f'{ckpt_dir}/text_encoder',
-        torch_dtype=dtype,
-        token=HF_TOKEN,
-        trust_remote_code=True
-    )
-    if device == "cuda":
-        text_encoder = text_encoder.half().to(device)
-    tokenizer = ChatGLMTokenizer.from_pretrained(
-        f'{ckpt_dir}/text_encoder',
-        token=HF_TOKEN,
-        trust_remote_code=True
-    )
-    vae = AutoencoderKL.from_pretrained(
-        f"{ckpt_dir}/vae",
-        revision=None,
-        torch_dtype=dtype,
-        token=HF_TOKEN
-    )
-    if device == "cuda":
-        vae = vae.half().to(device)
-    scheduler = EulerDiscreteScheduler.from_pretrained(
-        f"{ckpt_dir}/scheduler",
-        token=HF_TOKEN
-    )
-    unet = UNet2DConditionModel.from_pretrained(
-        f"{ckpt_dir}/unet",
-        revision=None,
-        torch_dtype=dtype,
-        token=HF_TOKEN
-    )
-    if device == "cuda":
-        unet = unet.half().to(device)
-    # CLIP 모델 로딩 with fallback
     try:
         clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-            f'{ckpt_dir_faceid}/clip-vit-large-patch14-336',
             torch_dtype=dtype,
             ignore_mismatched_sizes=True,
-            token=HF_TOKEN
         )
-    except Exception as e:
-        print(f"Loading CLIP from local failed: {e}, trying alternative source...")
         clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             'openai/clip-vit-large-patch14-336',
             torch_dtype=dtype,
             ignore_mismatched_sizes=True,
-            token=HF_TOKEN
         )
-    clip_image_encoder.to(device)
-    clip_image_processor = CLIPImageProcessor(size=336, crop_size=336)
-except Exception as e:
-    print(f"Error loading models: {e}")
-    raise
-# Pipeline 생성
 pipe = StableDiffusionXLPipeline(
     vae=vae,
     text_encoder=text_encoder,
@@ -122,6 +172,8 @@ pipe = StableDiffusionXLPipeline(
     force_zeros_for_empty_prompt=False,
 )
 class FaceInfoGenerator():
     def __init__(self, root_dir="./.insightface/"):
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if device == "cuda" else ['CPUExecutionProvider']
@@ -160,7 +212,7 @@ MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 face_info_generator = FaceInfoGenerator()
-@spaces.GPU
 def infer(prompt,
           image=None,
           negative_prompt="low quality, blurry, distorted",
@@ -170,6 +222,7 @@ def infer(prompt,
           num_inference_steps=50
         ):
     if image is None:
         return None, 0
     if randomize_seed:
@@ -187,35 +240,40 @@ def infer(prompt,
         pipe.set_face_fidelity_scale(scale)
     except Exception as e:
         print(f"Error loading IP adapter: {e}")
-        raise
     # Face 정보 추출
     face_info = face_info_generator.get_faceinfo_one_img(image)
     if face_info is None:
         raise gr.Error("No face detected in the image. Please provide an image with a clear face.")
-    face_bbox_square = face_bbox_to_square(face_info["bbox"])
-    crop_image = image.crop(face_bbox_square)
-    crop_image = crop_image.resize((336, 336))
-    crop_image = [crop_image]
-    face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
-    face_embeds = face_embeds.to(device, dtype=dtype)
     # 이미지 생성
     try:
-        image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            height=1024,
-            width=1024,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            num_images_per_prompt=1,
-            generator=generator,
-            face_crop_image=crop_image,
-            face_insightface_embeds=face_embeds
-        ).images[0]
     except Exception as e:
         print(f"Error during inference: {e}")
         raise gr.Error(f"Failed to generate image: {str(e)}")
@@ -233,13 +291,6 @@ footer {
 }
 """
-def load_description(fp):
-    if os.path.exists(fp):
-        with open(fp, 'r', encoding='utf-8') as f:
-            content = f.read()
-        return content
-    return ""
 # Gradio Interface
 with gr.Blocks(theme="soft", css=css) as Kolors:
     gr.HTML(
@@ -309,16 +360,6 @@ with gr.Blocks(theme="soft", css=css) as Kolors:
         with gr.Column(elem_id="col-right"):
             result = gr.Image(label="Generated Portrait", show_label=True)
             seed_used = gr.Number(label="Seed Used", precision=0)
-    # 예제 추가
-    gr.Examples(
-        examples=[
-            ["A cinematic portrait, dramatic lighting, professional photography", None],
-            ["An oil painting portrait in Renaissance style, classical art", None],
-            ["A cyberpunk character portrait, neon lights, futuristic", None],
-        ],
-        inputs=[prompt, image],
-    )
     button.click(
         fn=infer,

 import gradio as gr
 import numpy as np
 import os
+import shutil
 from huggingface_hub import snapshot_download, login
 from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
 from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
 from insightface.app import FaceAnalysis
 from insightface.data import get_image as ins_get_image
+# 캐시 클리어 (선택적)
+def clear_cache():
+    cache_dir = "/home/user/.cache/huggingface/hub"
+    if os.path.exists(cache_dir):
+        try:
+            # CLIP 모델 캐시만 삭제
+            clip_cache = os.path.join(cache_dir, "models--openai--clip-vit-large-patch14-336")
+            if os.path.exists(clip_cache):
+                shutil.rmtree(clip_cache)
+                print("Cleared CLIP cache")
+        except Exception as e:
+            print(f"Could not clear cache: {e}")
+# 캐시 클리어 (필요시)
+# clear_cache()
 # Hugging Face 토큰으로 로그인
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
+print(f"Using device: {device}")
+print(f"Using dtype: {dtype}")
 # 모델 다운로드 (토큰 사용)
 try:
+    print("Downloading Kolors models...")
     ckpt_dir = snapshot_download(
         repo_id="Kwai-Kolors/Kolors",
         token=HF_TOKEN,
+        local_dir_use_symlinks=False,
+        resume_download=True
     )
+    print("Downloading FaceID models...")
     ckpt_dir_faceid = snapshot_download(
         repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus",
         token=HF_TOKEN,
+        local_dir_use_symlinks=False,
+        resume_download=True
     )
 except Exception as e:
     print(f"Error downloading models: {e}")
     raise
+# 모델 로딩
+print("Loading text encoder...")
+text_encoder = ChatGLMModel.from_pretrained(
+    f'{ckpt_dir}/text_encoder',
+    torch_dtype=dtype,
+    token=HF_TOKEN,
+    trust_remote_code=True
+)
+if device == "cuda":
+    text_encoder = text_encoder.half().to(device)
+print("Loading tokenizer...")
+tokenizer = ChatGLMTokenizer.from_pretrained(
+    f'{ckpt_dir}/text_encoder',
+    token=HF_TOKEN,
+    trust_remote_code=True
+)
+print("Loading VAE...")
+vae = AutoencoderKL.from_pretrained(
+    f"{ckpt_dir}/vae",
+    revision=None,
+    torch_dtype=dtype,
+    token=HF_TOKEN
+)
+if device == "cuda":
+    vae = vae.half().to(device)
+print("Loading scheduler...")
+scheduler = EulerDiscreteScheduler.from_pretrained(
+    f"{ckpt_dir}/scheduler",
+    token=HF_TOKEN
+)
+print("Loading UNet...")
+unet = UNet2DConditionModel.from_pretrained(
+    f"{ckpt_dir}/unet",
+    revision=None,
+    torch_dtype=dtype,
+    token=HF_TOKEN
+)
+if device == "cuda":
+    unet = unet.half().to(device)
+# CLIP 모델 로딩 - safetensors 우선 사용
+print("Loading CLIP model...")
 try:
+    # 먼저 로컬 FaceID 디렉토리에서 시도
+    local_clip_path = f'{ckpt_dir_faceid}/clip-vit-large-patch14-336'
+    if os.path.exists(local_clip_path):
+        print(f"Trying to load CLIP from local: {local_clip_path}")
+        clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+            local_clip_path,
+            torch_dtype=dtype,
+            ignore_mismatched_sizes=True,
+            token=HF_TOKEN,
+            use_safetensors=True,  # safetensors 우선 사용
+            local_files_only=True
+        )
+    else:
+        raise FileNotFoundError("Local CLIP not found")
+except Exception as e:
+    print(f"Local loading failed: {e}")
     try:
+        # OpenAI에서 직접 다운로드 (safetensors 버전)
+        print("Downloading CLIP from OpenAI...")
         clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+            'openai/clip-vit-large-patch14-336',
             torch_dtype=dtype,
             ignore_mismatched_sizes=True,
+            token=HF_TOKEN,
+            use_safetensors=True,  # safetensors 우선 사용
+            revision="main"
         )
+    except Exception as e2:
+        print(f"SafeTensors loading failed: {e2}")
+        # 최후의 수단: pytorch_model.bin 사용
+        print("Trying with pytorch format...")
         clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             'openai/clip-vit-large-patch14-336',
             torch_dtype=dtype,
             ignore_mismatched_sizes=True,
+            token=HF_TOKEN,
+            use_safetensors=False
         )
+clip_image_encoder.to(device)
+clip_image_processor = CLIPImageProcessor(size=336, crop_size=336)
+print("Creating pipeline...")
 pipe = StableDiffusionXLPipeline(
     vae=vae,
     text_encoder=text_encoder,
     force_zeros_for_empty_prompt=False,
 )
+print("Models loaded successfully!")
 class FaceInfoGenerator():
     def __init__(self, root_dir="./.insightface/"):
         providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if device == "cuda" else ['CPUExecutionProvider']
 MAX_IMAGE_SIZE = 1024
 face_info_generator = FaceInfoGenerator()
+@spaces.GPU(duration=60)
 def infer(prompt,
           image=None,
           negative_prompt="low quality, blurry, distorted",
           num_inference_steps=50
         ):
     if image is None:
+        gr.Warning("Please upload an image with a face.")
         return None, 0
     if randomize_seed:
         pipe.set_face_fidelity_scale(scale)
     except Exception as e:
         print(f"Error loading IP adapter: {e}")
+        raise gr.Error(f"Failed to load face adapter: {str(e)}")
     # Face 정보 추출
     face_info = face_info_generator.get_faceinfo_one_img(image)
     if face_info is None:
         raise gr.Error("No face detected in the image. Please provide an image with a clear face.")
+    try:
+        face_bbox_square = face_bbox_to_square(face_info["bbox"])
+        crop_image = image.crop(face_bbox_square)
+        crop_image = crop_image.resize((336, 336))
+        crop_image = [crop_image]
+        face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
+        face_embeds = face_embeds.to(device, dtype=dtype)
+    except Exception as e:
+        print(f"Error processing face: {e}")
+        raise gr.Error(f"Failed to process face: {str(e)}")
     # 이미지 생성
     try:
+        with torch.no_grad():
+            image = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                height=1024,
+                width=1024,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                num_images_per_prompt=1,
+                generator=generator,
+                face_crop_image=crop_image,
+                face_insightface_embeds=face_embeds
+            ).images[0]
     except Exception as e:
         print(f"Error during inference: {e}")
         raise gr.Error(f"Failed to generate image: {str(e)}")
 }
 """
 # Gradio Interface
 with gr.Blocks(theme="soft", css=css) as Kolors:
     gr.HTML(
         with gr.Column(elem_id="col-right"):
             result = gr.Image(label="Generated Portrait", show_label=True)
             seed_used = gr.Number(label="Seed Used", precision=0)
     button.click(
         fn=infer,