Spaces:

tori29umai
/

FramePack_mask_fadeout_frame1

Running on Zero

App Files Files Community

tori29umai commited on May 8

Commit

dfc7eb3

verified ·

1 Parent(s): 4b8e0c1

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -33

app.py CHANGED Viewed

@@ -90,7 +90,28 @@ from transformers import SiglipImageProcessor, SiglipVisionModel
 from diffusers_helper.clip_vision import hf_clip_vision_encode
 from diffusers_helper.bucket_tools import find_nearest_bucket
 # 追加: 指定された解像度リスト
 NEW_RESOLUTIONS = [
@@ -99,7 +120,6 @@ NEW_RESOLUTIONS = [
     (768, 512), (832, 480), (864, 448), (960, 416), (640, 640),
 ]
 # Spaces環境では、すべてのCUDA操作を遅延させる
 if not IN_HF_SPACE:
     # 非Spaces環境でのみCUDAメモリを取得
@@ -175,7 +195,7 @@ def process_image_mask(image_mask_dict):
     background = image_mask_dict.get("background")
     layers     = image_mask_dict.get("layers")
-    if background is None:
         return None
     # ---- 1) Drop alpha from background ----
@@ -186,36 +206,32 @@ def process_image_mask(image_mask_dict):
     if img_array.ndim == 3 and img_array.shape[2] == 4:
         img_array = img_array[..., :3]
-    # ---- 2) マスクがある場合のみマスク処理 ----
-    if layers and len(layers) > 0:
-        layer = layers[0]
-        if isinstance(layer, Image.Image) and layer.mode == "RGBA":
-            layer = layer.convert("RGB")
-        mask_array = np.array(layer)
-        if mask_array.ndim == 3 and mask_array.shape[2] == 4:
-            mask_array = mask_array[..., :3]
-        # convert to gray + binary
-        if mask_array.ndim == 3:
-            mask_gray = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
-        else:
-            mask_gray = mask_array
-        _, binary_mask = cv2.threshold(mask_gray, 1, 255, cv2.THRESH_BINARY)
-        # 市松模様合成ロジック
-        total_pixels = img_array.shape[0] * img_array.shape[1]
-        cell_size = max(int(np.sqrt(total_pixels) / 20), 10)
-        checkerboard = create_checkerboard(img_array.shape[1], img_array.shape[0], cell_size)
-        result = img_array.copy()
-        binary_mask_3ch = np.stack([binary_mask]*3, axis=2) // 255
-        for c in range(3):
-            result[..., c] = result[..., c] * (1 - binary_mask_3ch[..., c]) + checkerboard[..., c] * binary_mask_3ch[..., c]
-        return result.astype(np.uint8)
-    else:
-        # マスクがない場合は元の画像をそのまま返す
-        return img_array
 # 最も近い解像度を見つける関数
 def find_nearest_resolution(width, height):
@@ -337,7 +353,7 @@ def load_models():
         print("Transformerモデルを読み込み中...")
         transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
-            "tori29umai/FramePackI2V_HY_mask_fadeout", torch_dtype=torch.bfloat16
         ).cpu()
         transformer.eval()
@@ -432,6 +448,9 @@ def worker_with_temp_files(
     gpu_memory_preservation,
     use_teacache,
     mp4_crf,
 ):
     global last_update_time
     last_update_time = time.time()
@@ -480,6 +499,27 @@ def worker_with_temp_files(
         feature_extractor = models['feature_extractor']
         image_encoder = models['image_encoder']
         # Clean GPU
         if not high_vram:
@@ -764,7 +804,7 @@ def worker_with_temp_files(
 # 非GPU環境用の標準process関数を追加
 if not IN_HF_SPACE or 'spaces' not in globals():
-    def process_with_temp(image_mask_dict):
         """一時ファイルを使用する処理メインフロー（非GPU環境用）"""
         global stream
@@ -800,6 +840,13 @@ if not IN_HF_SPACE or 'spaces' not in globals():
         gpu_memory_preservation = 6.0
         use_teacache = False
         mp4_crf = 0
         # 非同期ワーカー起動
         stream = AsyncStream()
@@ -816,6 +863,9 @@ if not IN_HF_SPACE or 'spaces' not in globals():
             gpu_memory_preservation,
             use_teacache,
             mp4_crf,
         )
         temp_dir = None
@@ -909,7 +959,7 @@ if not IN_HF_SPACE or 'spaces' not in globals():
 # GPU環境用process_with_temp関数の内容を完成
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU(duration=180)
-    def process_with_temp(image_mask_dict):
         """一時ファイルを使用する処理メインフロー（GPU対応）"""
         global stream
@@ -945,6 +995,13 @@ if IN_HF_SPACE and 'spaces' in globals():
         gpu_memory_preservation = 6.0
         use_teacache = False
         mp4_crf = 0
         # 非同期ワーカー起動
         stream = AsyncStream()
@@ -961,6 +1018,9 @@ if IN_HF_SPACE and 'spaces' in globals():
             gpu_memory_preservation,
             use_teacache,
             mp4_crf,
         )
         temp_dir = None
@@ -1077,7 +1137,7 @@ if IN_HF_SPACE and 'spaces' in globals():
 css = make_progress_bar_css()
 block = gr.Blocks(css=css).queue()
 with block:
-    gr.Markdown("# FramePackI2V_HY_mask_fadeout_frame1 - 画像のマスクした部分を除去")
     with gr.Row():
         with gr.Column():
             # 入力画像をImageMaskで設定
@@ -1098,6 +1158,9 @@ with block:
                 start_button = gr.Button(value="生成開始")
                 end_button = gr.Button(value="生成中止", interactive=False)
         with gr.Column():
             preview_image = gr.Image(label="生成プレビュー", visible=False)
             result_frame = gr.Image(label="生成結果", visible=False, height="60vh", type="filepath")
@@ -1107,6 +1170,7 @@ with block:
     ips = [
         image_mask,
     ]
     ops = [
         preview_image,

 from diffusers_helper.clip_vision import hf_clip_vision_encode
 from diffusers_helper.bucket_tools import find_nearest_bucket
+# GPU使用に必要なモジュールのインポートを試みる（可能な場合）
+try:
+    from utils.lora_utils import merge_lora_to_state_dict
+    from utils.fp8_optimization_utils import optimize_state_dict_with_fp8, apply_fp8_monkey_patch
+    print("LoRAとFP8最適化モジュールを正常にインポートしました")
+except ImportError as e:
+    print(f"一部のモジュールのインポートに失敗しました: {e}")
+    # ダミー関数を定義
+    def merge_lora_to_state_dict(state_dict, lora_file, lora_multiplier, device=None):
+        print("Warning: LoRA適用機能が利用できません")
+        return state_dict
+    def optimize_state_dict_with_fp8(state_dict, device, target_keys, exclude_keys, move_to_device=False):
+        print("Warning: FP8最適化機能が利用できません")
+        return state_dict
+    def apply_fp8_monkey_patch(model, state_dict, use_scaled_mm=False):
+        print("Warning: FP8 monkey patch機能が利用できません")
+        pass
+outputs_folder = './outputs/'
+os.makedirs(outputs_folder, exist_ok=True)
 # 追加: 指定された解像度リスト
 NEW_RESOLUTIONS = [
     (768, 512), (832, 480), (864, 448), (960, 416), (640, 640),
 ]
 # Spaces環境では、すべてのCUDA操作を遅延させる
 if not IN_HF_SPACE:
     # 非Spaces環境でのみCUDAメモリを取得
     background = image_mask_dict.get("background")
     layers     = image_mask_dict.get("layers")
+    if background is None or not layers:
         return None
     # ---- 1) Drop alpha from background ----
     if img_array.ndim == 3 and img_array.shape[2] == 4:
         img_array = img_array[..., :3]
+    # ---- 2) Load mask layer and binarize ----
+    layer = layers[0]
+    if isinstance(layer, Image.Image) and layer.mode == "RGBA":
+        layer = layer.convert("RGB")
+    mask_array = np.array(layer)
+    if mask_array.ndim == 3 and mask_array.shape[2] == 4:
+        mask_array = mask_array[..., :3]
+    # convert to gray + binary
+    if mask_array.ndim == 3:
+        mask_gray = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
+    else:
+        mask_gray = mask_array
+    _, binary_mask = cv2.threshold(mask_gray, 1, 255, cv2.THRESH_BINARY)
+    # 市松模様合成ロジック
+    total_pixels = img_array.shape[0] * img_array.shape[1]
+    cell_size = max(int(np.sqrt(total_pixels) / 20), 10)
+    checkerboard = create_checkerboard(img_array.shape[1], img_array.shape[0], cell_size)
+    result = img_array.copy()
+    binary_mask_3ch = np.stack([binary_mask]*3, axis=2) // 255
+    for c in range(3):
+        result[..., c] = result[..., c] * (1 - binary_mask_3ch[..., c]) + checkerboard[..., c] * binary_mask_3ch[..., c]
+    return result.astype(np.uint8)
 # 最も近い解像度を見つける関数
 def find_nearest_resolution(width, height):
         print("Transformerモデルを読み込み中...")
         transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
+            "lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16
         ).cpu()
         transformer.eval()
     gpu_memory_preservation,
     use_teacache,
     mp4_crf,
+    lora_file,
+    lora_multiplier,
+    fp8_optimization,
 ):
     global last_update_time
     last_update_time = time.time()
         feature_extractor = models['feature_extractor']
         image_encoder = models['image_encoder']
+        # LoRAファイルの適用
+        if lora_file is not None and os.path.exists(lora_file):
+            try:
+                print(f"LoRAファイル {os.path.basename(lora_file)} をマージします...")
+                state_dict = transformer.state_dict()
+                state_dict = merge_lora_to_state_dict(state_dict, lora_file, lora_multiplier, device=gpu)
+                if fp8_optimization:
+                    TARGET_KEYS = ["transformer_blocks", "single_transformer_blocks"]
+                    EXCLUDE_KEYS = ["norm"]  # Exclude norm layers from FP8
+                    print("FP8最適化を適用します")
+                    state_dict = optimize_state_dict_with_fp8(state_dict, gpu, TARGET_KEYS, EXCLUDE_KEYS, move_to_device=False)
+                    apply_fp8_monkey_patch(transformer, state_dict, use_scaled_mm=False)
+                    gc.collect()
+                info = transformer.load_state_dict(state_dict, strict=True, assign=True)
+                print(f"LoRAと/またはFP8最適化を適用しました: {info}")
+            except Exception as e:
+                print(f"LoRA適用中にエラーが発生しました: {e}")
+                # エラー発生時も処理を継続
         # Clean GPU
         if not high_vram:
 # 非GPU環境用の標準process関数を追加
 if not IN_HF_SPACE or 'spaces' not in globals():
+    def process_with_temp(image_mask_dict, lora_multiplier=1.0):
         """一時ファイルを使用する処理メインフロー（非GPU環境用）"""
         global stream
         gpu_memory_preservation = 6.0
         use_teacache = False
         mp4_crf = 0
+        lora_file = "./LoRA/mask_fadeout_V1.safetensors"
+        fp8_optimization = False
+        # LoRAファイルの存在確認
+        if not os.path.exists(lora_file):
+            print(f"警告: LoRAファイル {lora_file} が見つかりません。LoRAなしで処理を続行します。")
+            lora_file = None
         # 非同期ワーカー起動
         stream = AsyncStream()
             gpu_memory_preservation,
             use_teacache,
             mp4_crf,
+            lora_file,
+            lora_multiplier,
+            fp8_optimization,
         )
         temp_dir = None
 # GPU環境用process_with_temp関数の内容を完成
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU(duration=180)
+    def process_with_temp(image_mask_dict, lora_multiplier=1.0):
         """一時ファイルを使用する処理メインフロー（GPU対応）"""
         global stream
         gpu_memory_preservation = 6.0
         use_teacache = False
         mp4_crf = 0
+        lora_file = "./LoRA/mask_fadeout_V1.safetensors"
+        fp8_optimization = False
+        # LoRAファイルの存在確認
+        if not os.path.exists(lora_file):
+            print(f"警告: LoRAファイル {lora_file} が見つかりません。LoRAなしで処理を続行します。")
+            lora_file = None
         # 非同期ワーカー起動
         stream = AsyncStream()
             gpu_memory_preservation,
             use_teacache,
             mp4_crf,
+            lora_file,
+            lora_multiplier,
+            fp8_optimization,
         )
         temp_dir = None
 css = make_progress_bar_css()
 block = gr.Blocks(css=css).queue()
 with block:
+    gr.Markdown("# FramePackI2V_HY_mask_fadeout - 画像のマスクした部分を除去")
     with gr.Row():
         with gr.Column():
             # 入力画像をImageMaskで設定
                 start_button = gr.Button(value="生成開始")
                 end_button = gr.Button(value="生成中止", interactive=False)
+            with gr.Group():
+                lora_multiplier = gr.Slider(label="LoRA倍率", minimum=0.0, maximum=2.0, value=1.0, step=0.1)
         with gr.Column():
             preview_image = gr.Image(label="生成プレビュー", visible=False)
             result_frame = gr.Image(label="生成結果", visible=False, height="60vh", type="filepath")
     ips = [
         image_mask,
+        lora_multiplier,
     ]
     ops = [
         preview_image,