fastvideogen

Running

App Files Files Community

ford442 commited on Nov 28, 2024

Commit

be81944

verified ·

1 Parent(s): 1504958

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -20

app.py CHANGED Viewed

@@ -21,15 +21,12 @@ from PIL import Image
 import tempfile
 import os
 import gc
-from openai import OpenAI
 import csv
 from datetime import datetime
 # Load Hugging Face token if needed
 hf_token = os.getenv("HF_TOKEN")
-openai_api_key = os.getenv("OPENAI_API_KEY")
-client = OpenAI(api_key=openai_api_key)
 system_prompt_t2v_path = "assets/system_prompt_t2v.txt"
 system_prompt_i2v_path = "assets/system_prompt_i2v.txt"
 with open(system_prompt_t2v_path, "r") as f:
@@ -48,7 +45,7 @@ vae_dir = Path(model_path) / "vae"
 unet_dir = Path(model_path) / "unet"
 scheduler_dir = Path(model_path) / "scheduler"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 DATA_DIR = "/data"
 os.makedirs(DATA_DIR, exist_ok=True)
@@ -57,7 +54,6 @@ LOG_FILE_PATH = os.path.join("/data", "user_requests.csv")
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 if not os.path.exists(LOG_FILE_PATH):
     with open(LOG_FILE_PATH, "w", newline="") as f:
         writer = csv.writer(f)
@@ -80,7 +76,6 @@ if not os.path.exists(LOG_FILE_PATH):
             ]
         )
 @lru_cache(maxsize=128)
 def log_request(
     request_type,
@@ -123,7 +118,6 @@ def log_request(
         except Exception as e:
             print(f"Error logging request: {e}")
 def compute_clip_embedding(text=None, image=None):
     """
     Compute CLIP embedding for a given text or image.
@@ -138,7 +132,6 @@ def compute_clip_embedding(text=None, image=None):
     embedding = outputs.detach().cpu().numpy().flatten().tolist()
     return embedding
 def load_vae(vae_dir):
     vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
     vae_config_path = vae_dir / "config.json"
@@ -149,7 +142,6 @@ def load_vae(vae_dir):
     vae.load_state_dict(vae_state_dict)
     return vae.to(device=device, dtype=torch.bfloat16)
 def load_unet(unet_dir):
     unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
     unet_config_path = unet_dir / "config.json"
@@ -159,13 +151,11 @@ def load_unet(unet_dir):
     transformer.load_state_dict(unet_state_dict, strict=True)
     return transformer.to(device=device, dtype=torch.bfloat16)
 def load_scheduler(scheduler_dir):
     scheduler_config_path = scheduler_dir / "scheduler_config.json"
     scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
     return RectifiedFlowScheduler.from_config(scheduler_config)
 # Helper function for image processing
 def center_crop_and_resize(frame, target_height, target_width):
     h, w, _ = frame.shape
@@ -182,7 +172,6 @@ def center_crop_and_resize(frame, target_height, target_width):
     frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
     return frame_resized
 def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
     image = Image.open(image_path).convert("RGB")
     image_np = np.array(image)
@@ -191,7 +180,6 @@ def load_image_to_tensor_with_resize(image_path, target_height=512, target_width
     frame_tensor = (frame_tensor / 127.5) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 def enhance_prompt_if_enabled(prompt, enhance_toggle, type="t2v"):
     if not enhance_toggle:
         print("Enhance toggle is off, Prompt: ", prompt)
@@ -215,7 +203,6 @@ def enhance_prompt_if_enabled(prompt, enhance_toggle, type="t2v"):
         print(f"Error: {e}")
         return prompt
 # Preset options for resolution and frame configuration
 preset_options = [
     {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
@@ -247,7 +234,6 @@ preset_options = [
     {"label": "512x320, 257 frames", "width": 512, "height": 320, "num_frames": 257},
 ]
 # Function to toggle visibility of sliders based on preset selection
 def preset_changed(preset):
     if preset != "Custom":
@@ -270,7 +256,6 @@ def preset_changed(preset):
             gr.update(visible=True),
         )
 # Load models
 vae = load_vae(vae_dir)
 unet = load_unet(unet_dir)
@@ -288,7 +273,6 @@ pipeline = XoraVideoPipeline(
     vae=vae,
 ).to(device)
 def generate_video_from_text(
     prompt="",
     enhance_prompt_toggle=False,
@@ -490,7 +474,6 @@ def generate_video_from_image(
     return output_path
 def create_advanced_options():
     with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
         seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
@@ -531,7 +514,6 @@ def create_advanced_options():
             num_frames_slider,
         ]
 # Define the Gradio interface with tabs
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     with gr.Row(elem_id="title-row"):

 import tempfile
 import os
 import gc
 import csv
 from datetime import datetime
 # Load Hugging Face token if needed
 hf_token = os.getenv("HF_TOKEN")
 system_prompt_t2v_path = "assets/system_prompt_t2v.txt"
 system_prompt_i2v_path = "assets/system_prompt_i2v.txt"
 with open(system_prompt_t2v_path, "r") as f:
 unet_dir = Path(model_path) / "unet"
 scheduler_dir = Path(model_path) / "scheduler"
+device = torch.device("cuda")
 DATA_DIR = "/data"
 os.makedirs(DATA_DIR, exist_ok=True)
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 if not os.path.exists(LOG_FILE_PATH):
     with open(LOG_FILE_PATH, "w", newline="") as f:
         writer = csv.writer(f)
             ]
         )
 @lru_cache(maxsize=128)
 def log_request(
     request_type,
         except Exception as e:
             print(f"Error logging request: {e}")
 def compute_clip_embedding(text=None, image=None):
     """
     Compute CLIP embedding for a given text or image.
     embedding = outputs.detach().cpu().numpy().flatten().tolist()
     return embedding
 def load_vae(vae_dir):
     vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
     vae_config_path = vae_dir / "config.json"
     vae.load_state_dict(vae_state_dict)
     return vae.to(device=device, dtype=torch.bfloat16)
 def load_unet(unet_dir):
     unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
     unet_config_path = unet_dir / "config.json"
     transformer.load_state_dict(unet_state_dict, strict=True)
     return transformer.to(device=device, dtype=torch.bfloat16)
 def load_scheduler(scheduler_dir):
     scheduler_config_path = scheduler_dir / "scheduler_config.json"
     scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
     return RectifiedFlowScheduler.from_config(scheduler_config)
 # Helper function for image processing
 def center_crop_and_resize(frame, target_height, target_width):
     h, w, _ = frame.shape
     frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
     return frame_resized
 def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
     image = Image.open(image_path).convert("RGB")
     image_np = np.array(image)
     frame_tensor = (frame_tensor / 127.5) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 def enhance_prompt_if_enabled(prompt, enhance_toggle, type="t2v"):
     if not enhance_toggle:
         print("Enhance toggle is off, Prompt: ", prompt)
         print(f"Error: {e}")
         return prompt
 # Preset options for resolution and frame configuration
 preset_options = [
     {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
     {"label": "512x320, 257 frames", "width": 512, "height": 320, "num_frames": 257},
 ]
 # Function to toggle visibility of sliders based on preset selection
 def preset_changed(preset):
     if preset != "Custom":
             gr.update(visible=True),
         )
 # Load models
 vae = load_vae(vae_dir)
 unet = load_unet(unet_dir)
     vae=vae,
 ).to(device)
 def generate_video_from_text(
     prompt="",
     enhance_prompt_toggle=False,
     return output_path
 def create_advanced_options():
     with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
         seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
             num_frames_slider,
         ]
 # Define the Gradio interface with tabs
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     with gr.Row(elem_id="title-row"):