FramePack_rotate_indoor

Running on Zero

App Files Files Community

tori29umai commited on 23 days ago

Commit

f2548df

verified ·

1 Parent(s): 5ca42f5

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -26

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 import json
 os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
@@ -54,6 +55,14 @@ if IN_HF_SPACE:
     except ImportError:
         print("spacesモジュールのインポートに失敗しました。Hugging Face Space環境外かもしれません")
         GPU_AVAILABLE = torch.cuda.is_available()
 from PIL import Image
 from diffusers import AutoencoderKLHunyuanVideo
@@ -61,8 +70,22 @@ from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPToke
 from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
 from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
 from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
 from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
-from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete, IN_HF_SPACE as MEMORY_IN_HF_SPACE
 from diffusers_helper.thread_utils import AsyncStream, async_run
 from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
 from transformers import SiglipImageProcessor, SiglipVisionModel
@@ -72,7 +95,7 @@ from diffusers_helper.bucket_tools import find_nearest_bucket
 outputs_folder = './outputs/'
 os.makedirs(outputs_folder, exist_ok=True)
-# Spaces環境では、すべてのCUDA操作を遅延させる
 if not IN_HF_SPACE:
     # 非Spaces環境でのみCUDAメモリを取得
     try:
@@ -109,6 +132,40 @@ else:
 models = {}
 cpu_fallback_mode = not GPU_AVAILABLE  # GPUが利用できない場合、CPU代替モードを使用
 # モデルロード関数を使用
 def load_models():
     global models, cpu_fallback_mode, GPU_INITIALIZED
@@ -141,7 +198,7 @@ def load_models():
             feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
             image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to(model_device)
-            transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_landscape', torch_dtype=transformer_dtype).to(model_device)
             print("すべてのモデルの読み込みに成功しました")
         except Exception as e:
@@ -162,7 +219,7 @@ def load_models():
             feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
             image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to('cpu')
-            transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_landscape', torch_dtype=transformer_dtype).to('cpu')
             print("CPUモードですべてのモデルの読み込みに成功しました")
@@ -250,7 +307,7 @@ def load_models():
         return {}
-# Hugging Face Spaces GPU装飾子を使用
 if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
     try:
         @spaces.GPU
@@ -273,6 +330,10 @@ if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
         # 装飾子がエラーの場合、非装飾子版を直接使用
         def initialize_models():
             return load_models()
 # 以下の関数内部でモデルの取得を遅延させる
@@ -369,10 +430,17 @@ stream = AsyncStream()
 @torch.no_grad()
-def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
     global last_update_time
     last_update_time = time.time()
     # 動画の長さを5秒以下に制限
     total_second_length = min(total_second_length, 3.0)
@@ -507,13 +575,14 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
                 load_model_as_complete(vae, target_device=device)
             start_latent = vae_encode(input_image_pt, vae)
         except Exception as e:
-            error_msg = f"VAEエンコーディング中にエラーが発生しました: {e}"
-            print(error_msg)
-            traceback.print_exc()
-            stream.output_queue.push(('error', error_msg))
-            stream.output_queue.push(('end', None))
-            return
         # CLIP Vision
         last_update_time = time.time()
@@ -588,6 +657,14 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
                     try:
                         output_filename = os.path.join(outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4')
                         save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
                         stream.output_queue.push(('file', output_filename))
                     except Exception as e:
                         print(f"最終動画保存中にエラーが発生しました: {e}")
@@ -816,6 +893,13 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
                 save_start_time = time.time()
                 save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
                 print(f"動画保存完了、所要時間: {time.time() - save_start_time:.2f}秒")
                 print(f'デコード完了。現在の潜在変数形状 {real_history_latents.shape}; ピクセル形状 {history_pixels.shape}')
@@ -876,10 +960,10 @@ def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_wind
     return
-# Hugging Face Spaces GPU装飾子を使用してプロセス関数を処理
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU
-    def process_with_gpu(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
         global stream
         assert input_image is not None, '入力画像がありません！'
@@ -897,8 +981,8 @@ if IN_HF_SPACE and 'spaces' in globals():
         try:
             stream = AsyncStream()
-            # ワーカーを非同期で起動
-            async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
             output_filename = None
             prev_output_filename = None
@@ -960,7 +1044,8 @@ if IN_HF_SPACE and 'spaces' in globals():
     process = process_with_gpu
 else:
-    def process(input_image, prompt, n_prompt, seed, total_second_length, use_teacache):
         global stream
         assert input_image is not None, '入力画像がありません！'
@@ -977,8 +1062,8 @@ else:
         try:
             stream = AsyncStream()
-            # ワーカーを非同期で起動
-            async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache)
             output_filename = None
             prev_output_filename = None
@@ -1020,7 +1105,7 @@ else:
                 except Exception as e:
                     print(f"出力処理中にエラーが発生しました: {e}")
                     # 長時間更新がないか確認
-                    current_time = time.time()
                     if current_time - last_update_time > 60:  # 60秒間更新がない場合、処理がフリーズした可能性
                         print(f"処理がフリーズした可能性があります。{current_time - last_update_time:.1f}秒間更新がありません")
@@ -1071,7 +1156,7 @@ def end_process():
 quick_prompts = [
-    'The camera smoothly orbits around the center of the scene, keeping the center point fixed and always in view',
 ]
 quick_prompts = [[x] for x in quick_prompts]
@@ -1245,7 +1330,7 @@ with block:
             prompt = gr.Textbox(
                 label="プロンプト / Prompt",
-                value='The camera smoothly orbits around the center of the scene, keeping the center point fixed and always in view',
                 elem_id="prompt-input"
             )
@@ -1257,7 +1342,14 @@ with block:
             )
             example_quick_prompts.click(lambda x: x[0], inputs=[example_quick_prompts], outputs=prompt, show_progress=False, queue=False)
-            # スタイルと二言語ラベルを追加したボタン
             with gr.Row(elem_classes="button-container"):
                 start_button = gr.Button(
                     value="生成開始 / Generate",
@@ -1332,12 +1424,11 @@ with block:
             error_message = gr.HTML('', elem_id='error-message', visible=True)
     # 処理関数
-    ips = [input_image, prompt, n_prompt, seed, total_second_length, use_teacache]
     # 開始と終了ボタンのイベント
     start_button.click(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button])
     end_button.click(fn=end_process)
-block.launch()

 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 import json
+import subprocess  # FFmpeg実行用に追加
 os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))
     except ImportError:
         print("spacesモジュールのインポートに失敗しました。Hugging Face Space環境外かもしれません")
         GPU_AVAILABLE = torch.cuda.is_available()
+else:
+    # ローカル環境ではCUDA利用可能性を直接チェック
+    GPU_AVAILABLE = torch.cuda.is_available()
+    if GPU_AVAILABLE:
+        print(f"GPU利用可能: デバイス名 {torch.cuda.get_device_name(0)}")
+        print(f"GPUメモリ: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
+    else:
+        print("GPU利用不可: CPUモードで実行します")
 from PIL import Image
 from diffusers import AutoencoderKLHunyuanVideo
 from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
 from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
 from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
+from utils.lora_utils import merge_lora_to_state_dict
 from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
+# memory.pyからのインポートを修正
+from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete
+# ローカル環境用のMEMORY_IN_HF_SPACE変数を定義
+MEMORY_IN_HF_SPACE = False
+if IN_HF_SPACE:
+    try:
+        # memory.pyにIN_HF_SPACEが定義されている場合はそれを使用
+        from diffusers_helper.memory import IN_HF_SPACE as MEMORY_IN_HF_SPACE
+    except ImportError:
+        # 定義されていない場合はローカル変数を使用
+        MEMORY_IN_HF_SPACE = IN_HF_SPACE
 from diffusers_helper.thread_utils import AsyncStream, async_run
 from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
 from transformers import SiglipImageProcessor, SiglipVisionModel
 outputs_folder = './outputs/'
 os.makedirs(outputs_folder, exist_ok=True)
+# ローカル/Spaces環境に基づいてメモリ設定を行う
 if not IN_HF_SPACE:
     # 非Spaces環境でのみCUDAメモリを取得
     try:
 models = {}
 cpu_fallback_mode = not GPU_AVAILABLE  # GPUが利用できない場合、CPU代替モードを使用
+# FFmpegで動画を左右反転する関数を追加
+def flip_video_horizontally(input_file, output_file):
+    """FFmpegを使用して動画を水平方向に反転します"""
+    try:
+        # 一時ファイル名を作成（元ファイル名の拡張子前に_flipped追加）
+        temp_output = os.path.splitext(output_file)[0] + "_flipped" + os.path.splitext(output_file)[1]
+        # FFmpegコマンドを実行
+        cmd = [
+            'ffmpeg',
+            '-i', input_file,
+            '-vf', 'hflip',  # 水平方向に反転
+            '-c:v', 'libx264',  # 同じコーデックを使用
+            '-crf', '18',  # 元のCRF値と同じ
+            '-y',  # 既存ファイルを上書き
+            temp_output
+        ]
+        result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        # 成功したら、元のファイルを置き換え
+        if os.path.exists(temp_output):
+            import shutil
+            shutil.move(temp_output, output_file)
+            print(f"動画を水平方向に反転して保存しました: {output_file}")
+            return True
+        else:
+            print("動画の反転処理に失敗しました")
+            return False
+    except Exception as e:
+        print(f"FFmpegによる動画反転中にエラーが発生しました: {e}")
+        traceback.print_exc()
+        return False
 # モデルロード関数を使用
 def load_models():
     global models, cpu_fallback_mode, GPU_INITIALIZED
             feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
             image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to(model_device)
+            transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_indoor', torch_dtype=transformer_dtype).to(model_device)
             print("すべてのモデルの読み込みに成功しました")
         except Exception as e:
             feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
             image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=dtype).to('cpu')
+            transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('tori29umai/FramePackI2V_HY_rotate_indoor', torch_dtype=transformer_dtype).to('cpu')
             print("CPUモードですべてのモデルの読み込みに成功しました")
         return {}
+# ローカル環境とHugging Face Spaces環境で分岐
 if IN_HF_SPACE and 'spaces' in globals() and GPU_AVAILABLE:
     try:
         @spaces.GPU
         # 装飾子がエラーの場合、非装飾子版を直接使用
         def initialize_models():
             return load_models()
+else:
+    # ローカル環境用の関数定義
+    def initialize_models():
+        return load_models()
 # 以下の関数内部でモデルの取得を遅延させる
 @torch.no_grad()
+def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, rotation_direction):
     global last_update_time
     last_update_time = time.time()
+    # 回転方向の処理
+    is_counterclockwise = "counter" in rotation_direction.lower()
+    if is_counterclockwise:
+        print("反時計回りモードが選択されました。画像を左右反転して処理します。")
+        # 画像を水平方向に反転
+        input_image = np.fliplr(input_image).copy()
     # 動画の長さを5秒以下に制限
     total_second_length = min(total_second_length, 3.0)
                 load_model_as_complete(vae, target_device=device)
             start_latent = vae_encode(input_image_pt, vae)
         except Exception as e:
+                    error_msg = f"VAEエンコーディング中にエラーが発生しました: {e}"
+                    print(error_msg)
+                    traceback.print_exc()
+                    stream.output_queue.push(('error', error_msg))
+                    stream.output_queue.push(('end', None))
+                    return
         # CLIP Vision
         last_update_time = time.time()
                     try:
                         output_filename = os.path.join(outputs_folder, f'{job_id}_final_{total_generated_latent_frames}.mp4')
                         save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
+                        # 反時計回りモードの場合、最終出力も反転
+                        if is_counterclockwise:
+                            print("反時計回りモード: 最終出力動画を水平方向に反転します")
+                            flip_result = flip_video_horizontally(output_filename, output_filename)
+                            if not flip_result:
+                                print("警告: 最終動画の反転に失敗しました。元の動画を使用します。")
                         stream.output_queue.push(('file', output_filename))
                     except Exception as e:
                         print(f"最終動画保存中にエラーが発生しました: {e}")
                 save_start_time = time.time()
                 save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=18)
                 print(f"動画保存完了、所要時間: {time.time() - save_start_time:.2f}秒")
+                # 反時計回りモードの場合、FFmpegで反転処理
+                if is_counterclockwise:
+                    print("反時計回りモード: 出力動画を水平方向に反転します")
+                    flip_result = flip_video_horizontally(output_filename, output_filename)
+                    if not flip_result:
+                        print("警告: 動画の反転に失敗しました。元の動画を使用します。")
                 print(f'デコード完了。現在の潜在変数形状 {real_history_latents.shape}; ピクセル形状 {history_pixels.shape}')
     return
+# ローカル環境とHugging Face Spaces環境で処理関数を分岐
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU
+    def process_with_gpu(input_image, prompt, n_prompt, seed, total_second_length, use_teacache, rotation_direction):
         global stream
         assert input_image is not None, '入力画像がありません！'
         try:
             stream = AsyncStream()
+            # ワーカーを非同期で起動（rotation_directionパラメータを追加）
+            async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, rotation_direction)
             output_filename = None
             prev_output_filename = None
     process = process_with_gpu
 else:
+    # ローカル環境用の処理関数
+    def process(input_image, prompt, n_prompt, seed, total_second_length, use_teacache, rotation_direction):
         global stream
         assert input_image is not None, '入力画像がありません！'
         try:
             stream = AsyncStream()
+            # ワーカーを非同期で起動（rotation_directionパラメータを追加）
+            async_run(worker, input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, rotation_direction)
             output_filename = None
             prev_output_filename = None
                 except Exception as e:
                     print(f"出力処理中にエラーが発生しました: {e}")
                     # 長時間更新がないか確認
+                    current_time = time.time()
                     if current_time - last_update_time > 60:  # 60秒間更新がない場合、処理がフリーズした可能性
                         print(f"処理がフリーズした可能性があります。{current_time - last_update_time:.1f}秒間更新がありません")
 quick_prompts = [
+    'The camera smoothly orbits around the center of the scene, gradually pulling back while keeping the center point fixed in view.',
 ]
 quick_prompts = [[x] for x in quick_prompts]
             prompt = gr.Textbox(
                 label="プロンプト / Prompt",
+                value= 'The camera smoothly orbits around the center of the scene, gradually pulling back while keeping the center point fixed in view.',
                 elem_id="prompt-input"
             )
             )
             example_quick_prompts.click(lambda x: x[0], inputs=[example_quick_prompts], outputs=prompt, show_progress=False, queue=False)
+            rotation_direction = gr.Dropdown(
+                ["時計回り / clockwise", "反時計回り / counterclockwise"],
+                label="回転の向き / Direction of rotation",
+                value="時計回り / clockwise"  # デフォルト値
+            )
             with gr.Row(elem_classes="button-container"):
                 start_button = gr.Button(
                     value="生成開始 / Generate",
             error_message = gr.HTML('', elem_id='error-message', visible=True)
     # 処理関数
+    ips = [input_image, prompt, n_prompt, seed, total_second_length, use_teacache, rotation_direction]
     # 開始と終了ボタンのイベント
     start_button.click(fn=process, inputs=ips, outputs=[result_video, preview_image, progress_desc, progress_bar, start_button, end_button])
     end_button.click(fn=end_process)
+block.launch()