pixio-video-stream

Running on Zero

App Files Files Community

tsi-org commited on Jun 22

Commit

94ff503

verified ·

1 Parent(s): 607da1d

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -135

app.py CHANGED Viewed

@@ -141,26 +141,12 @@ transformer.eval().to(dtype=torch.float16).requires_grad_(False)
 text_encoder.to(gpu)
 transformer.to(gpu)
-APP_STATE = {
-    "torch_compile_applied": False,
-    "fp8_applied": False,
-    "current_use_taehv": False,
-    "current_vae_decoder": None,
-    "last_generated_frames": [],  # Store frames for download
-    "last_generation_info": {}    # Store metadata
-}
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
-    Args:
-        frames: List of numpy arrays (HWC, RGB, uint8)
-        filepath: Output file path
-        fps: Frames per second
-    Returns:
-        The filepath of the created file
     """
     if not frames:
         return filepath
@@ -200,83 +186,52 @@ def frames_to_ts_file(frames, filepath, fps = 15):
     return filepath
-def frames_to_mp4_file(frames, filepath, fps=15):
     """
-    Convert frames to MP4 file using PyAV for download.
-    Args:
-        frames: List of numpy arrays (HWC, RGB, uint8)
-        filepath: Output file path
-        fps: Frames per second
-    Returns:
-        The filepath of the created file
     """
-    if not frames:
-        return filepath
-    height, width = frames[0].shape[:2]
-    # Create container for MP4 format
-    container = av.open(filepath, mode='w', format='mp4')
-    # Add video stream with high quality settings for download
-    stream = container.add_stream('h264', rate=fps)
-    stream.width = width
-    stream.height = height
-    stream.pix_fmt = 'yuv420p'
-    # High quality settings for download
-    stream.options = {
-        'preset': 'medium',
-        'crf': '18',  # Higher quality
-        'profile': 'high',
-        'level': '4.0'
-    }
-    try:
-        for frame_np in frames:
-            frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
-            frame = frame.reformat(format=stream.pix_fmt)
-            for packet in stream.encode(frame):
-                container.mux(packet)
-        for packet in stream.encode():
-            container.mux(packet)
-    finally:
-        container.close()
-    return filepath
-def create_download_video():
     """
-    Create a downloadable MP4 file from the last generated frames.
     """
-    if not APP_STATE["last_generated_frames"]:
         return None
     try:
-        # Create downloads directory if it doesn't exist
-        os.makedirs("downloads", exist_ok=True)
-        # Generate filename with timestamp and prompt hash
-        timestamp = int(time.time())
-        prompt_hash = hashlib.md5(APP_STATE["last_generation_info"].get("prompt", "").encode()).hexdigest()[:8]
-        filename = f"pixio_video_{timestamp}_{prompt_hash}.mp4"
-        filepath = os.path.join("downloads", filename)
-        # Create MP4 file
-        fps = APP_STATE["last_generation_info"].get("fps", 15)
-        frames_to_mp4_file(APP_STATE["last_generated_frames"], filepath, fps)
-        print(f"✅ Download video created: {filepath}")
         return filepath
     except Exception as e:
-        print(f"❌ Error creating download video: {e}")
-        import traceback
-        traceback.print_exc()
         return None
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
@@ -326,6 +281,13 @@ def initialize_vae_decoder(use_taehv=False, use_trt=False):
     APP_STATE["current_vae_decoder"] = vae_decoder
     print(f"✅ VAE decoder initialized: {'TAEHV' if use_taehv else 'Default VAE'}")
 # Initialize with default VAE
 initialize_vae_decoder(use_taehv=False, use_trt=args.trt)
@@ -340,17 +302,14 @@ pipeline.to(dtype=torch.float16).to(gpu)
 @spaces.GPU
 def video_generation_handler_streaming(prompt, seed=42, fps=15):
     """
-    Generator function that yields .ts video chunks using PyAV for streaming.
-    Now optimized for block-based processing and stores frames for download.
     """
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
-    print(f"🎬 Starting PyAV streaming: '{prompt}', seed: {seed}")
-    # Clear previous generation data
-    APP_STATE["last_generated_frames"] = []
-    APP_STATE["last_generation_info"] = {"prompt": prompt, "seed": seed, "fps": fps}
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
@@ -371,9 +330,14 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
     total_frames_yielded = 0
-    # Ensure temp directory exists
-    os.makedirs("gradio_tmp", exist_ok=True)
     # Generation loop
     for idx, current_num_frames in enumerate(all_num_frames):
@@ -424,10 +388,8 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         elif APP_STATE["current_use_taehv"] and idx > 0:
             pixels = pixels[:, 12:]
-        print(f"🔍 DEBUG Block {idx}: Pixels shape after skipping: {pixels.shape}")
-        # Process all frames from this block at once
-        all_frames_from_block = []
         for frame_idx in range(pixels.shape[1]):
             frame_tensor = pixels[0, frame_idx]
@@ -436,17 +398,14 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             frame_np = frame_np.to(torch.uint8).cpu().numpy()
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
-            all_frames_from_block.append(frame_np)
-            # Store frame for download
-            APP_STATE["last_generated_frames"].append(frame_np)
             total_frames_yielded += 1
-            # Yield status update for each frame (cute tracking!)
             blocks_completed = idx
             current_block_progress = (frame_idx + 1) / pixels.shape[1]
             total_progress = (blocks_completed + current_block_progress) / num_blocks * 100
-            # Cap at 100% to avoid going over
             total_progress = min(total_progress, 100.0)
             frame_status_html = (
@@ -461,34 +420,51 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
                 f"</div>"
             )
-            # Yield None for video but update status (frame-by-frame tracking)
-            yield None, frame_status_html, gr.update(visible=False)
-        # Encode entire block as one chunk immediately
-        if all_frames_from_block:
-            print(f"📹 Encoding block {idx} with {len(all_frames_from_block)} frames")
             try:
-                chunk_uuid = str(uuid.uuid4())[:8]
-                ts_filename = f"block_{idx:04d}_{chunk_uuid}.ts"
-                ts_path = os.path.join("gradio_tmp", ts_filename)
-                frames_to_ts_file(all_frames_from_block, ts_path, fps)
-                # Calculate final progress for this block
-                total_progress = (idx + 1) / num_blocks * 100
-                # Yield the actual video chunk
-                yield ts_path, gr.update(), gr.update(visible=False)
             except Exception as e:
-                print(f"⚠️ Error encoding block {idx}: {e}")
-                import traceback
-                traceback.print_exc()
         current_start_frame += current_num_frames
-    # Final completion status
     final_status_html = (
         f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
         f"  <div style='display: flex; align-items: center; margin-bottom: 8px;'>"
@@ -500,27 +476,33 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
-        f"      🎬 Playback: {fps} FPS • 📁 Format: MPEG-TS/H.264"
         f"    </p>"
         f"  </div>"
         f"</div>"
     )
-    yield None, final_status_html, gr.update(visible=True)
-    print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
     gr.Markdown("# 🚀 Pixio Streaming Video Generation")
-    gr.Markdown("Real-time video generation with Pixio), [[Project page]](https://pixio.myapps.ai) )")
     with gr.Row():
         with gr.Column(scale=2):
             with gr.Group():
                 prompt = gr.Textbox(
                     label="Prompt",
-                    placeholder="A stylish woman walks down a Tokyo street...",
                     lines=4,
-                    value=""
                 )
                 enhance_button = gr.Button("✨ Enhance Prompt", variant="secondary")
@@ -576,20 +558,20 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 label="Generation Status"
             )
-            # Download button (initially hidden)
-            download_btn = gr.DownloadButton(
-                label="📥 Download MP4",
-                value=create_download_video,
-                variant="secondary",
-                size="lg",
-                visible=False
-            )
-    # Connect the generator to the streaming video
     start_btn.click(
         fn=video_generation_handler_streaming,
         inputs=[prompt, seed, fps],
-        outputs=[streaming_video, status_display, download_btn]
     )
     enhance_button.click(
@@ -607,18 +589,17 @@ if __name__ == "__main__":
     os.makedirs("downloads", exist_ok=True)
     print("🚀 Starting Self-Forcing Streaming Demo")
-    print(f"📁 Temporary files will be stored in: gradio_tmp/")
-    print(f"📥 Download files will be stored in: downloads/")
-    print(f"🎯 Chunk encoding: PyAV (MPEG-TS/H.264)")
-    print(f"⚡ GPU acceleration: {gpu}")
     demo.queue().launch(
         server_name=args.host,
         server_port=args.port,
         share=args.share,
         show_error=True,
-        max_threads=40,
-        mcp_server=True
     )
 # import subprocess

 text_encoder.to(gpu)
 transformer.to(gpu)
+# Global state for download
+CURRENT_DOWNLOAD_PATH = None
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
     """
     if not frames:
         return filepath
     return filepath
+def create_hls_playlist(ts_files, playlist_path, fps=15):
     """
+    Create HLS playlist (.m3u8) file for streaming.
     """
+    segment_duration = 1.0  # Each segment duration in seconds
+    playlist_content = [
+        "#EXTM3U",
+        "#EXT-X-VERSION:3",
+        f"#EXT-X-TARGETDURATION:{int(segment_duration) + 1}",
+        "#EXT-X-MEDIA-SEQUENCE:0",
+        "#EXT-X-PLAYLIST-TYPE:VOD"
+    ]
+    for ts_file in ts_files:
+        ts_filename = os.path.basename(ts_file)
+        playlist_content.extend([
+            f"#EXTINF:{segment_duration:.1f},",
+            ts_filename
+        ])
+    playlist_content.append("#EXT-X-ENDLIST")
+    with open(playlist_path, 'w') as f:
+        f.write('\n'.join(playlist_content))
+    return playlist_path
+def frames_to_mp4_file(frames, filepath, fps=15):
     """
+    Convert frames to MP4 file using imageio.
     """
+    if not frames:
         return None
     try:
+        # Use imageio for reliable MP4 creation
+        with imageio.get_writer(filepath, fps=fps, codec='libx264', quality=8) as writer:
+            for frame in frames:
+                writer.append_data(frame)
+        print(f"✅ MP4 created successfully: {filepath}")
         return filepath
     except Exception as e:
+        print(f"❌ Error creating MP4: {e}")
         return None
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
     APP_STATE["current_vae_decoder"] = vae_decoder
     print(f"✅ VAE decoder initialized: {'TAEHV' if use_taehv else 'Default VAE'}")
+APP_STATE = {
+    "torch_compile_applied": False,
+    "fp8_applied": False,
+    "current_use_taehv": False,
+    "current_vae_decoder": None,
+}
 # Initialize with default VAE
 initialize_vae_decoder(use_taehv=False, use_trt=args.trt)
 @spaces.GPU
 def video_generation_handler_streaming(prompt, seed=42, fps=15):
     """
+    Generator function that creates HLS stream and final MP4.
     """
+    global CURRENT_DOWNLOAD_PATH
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
+    print(f"🎬 Starting HLS streaming: '{prompt}', seed: {seed}")
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
     total_frames_yielded = 0
+    all_frames_for_download = []  # Store frames for final MP4
+    ts_files = []  # Store TS files for HLS playlist
+    # Create unique session directory
+    session_id = str(uuid.uuid4())[:8]
+    session_dir = os.path.join("gradio_tmp", f"session_{session_id}")
+    os.makedirs(session_dir, exist_ok=True)
+    os.makedirs("downloads", exist_ok=True)
     # Generation loop
     for idx, current_num_frames in enumerate(all_num_frames):
         elif APP_STATE["current_use_taehv"] and idx > 0:
             pixels = pixels[:, 12:]
+        # Process frames from this block
+        block_frames = []
         for frame_idx in range(pixels.shape[1]):
             frame_tensor = pixels[0, frame_idx]
             frame_np = frame_np.to(torch.uint8).cpu().numpy()
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
+            block_frames.append(frame_np)
+            all_frames_for_download.append(frame_np)  # Store for final MP4
             total_frames_yielded += 1
+            # Progress tracking
             blocks_completed = idx
             current_block_progress = (frame_idx + 1) / pixels.shape[1]
             total_progress = (blocks_completed + current_block_progress) / num_blocks * 100
             total_progress = min(total_progress, 100.0)
             frame_status_html = (
                 f"</div>"
             )
+            yield None, frame_status_html
+        # Create TS segment for this block
+        if block_frames:
             try:
+                ts_filename = f"segment_{idx:04d}.ts"
+                ts_path = os.path.join(session_dir, ts_filename)
+                frames_to_ts_file(block_frames, ts_path, fps)
+                ts_files.append(ts_path)
+                # Create/update HLS playlist
+                playlist_path = os.path.join(session_dir, "playlist.m3u8")
+                create_hls_playlist(ts_files, playlist_path, fps)
+                # Yield the HLS playlist for streaming
+                yield playlist_path, gr.update()
             except Exception as e:
+                print(f"⚠️ Error creating HLS segment {idx}: {e}")
         current_start_frame += current_num_frames
+    # Create final MP4 for download
+    print("🎬 Creating final MP4 for download...")
+    try:
+        timestamp = int(time.time())
+        prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:8]
+        mp4_filename = f"pixio_video_{timestamp}_{prompt_hash}.mp4"
+        mp4_path = os.path.join("downloads", mp4_filename)
+        final_mp4 = frames_to_mp4_file(all_frames_for_download, mp4_path, fps)
+        if final_mp4:
+            CURRENT_DOWNLOAD_PATH = final_mp4
+            print(f"✅ Final MP4 created: {final_mp4}")
+        else:
+            print("❌ Failed to create final MP4")
+            CURRENT_DOWNLOAD_PATH = None
+    except Exception as e:
+        print(f"❌ Error creating final MP4: {e}")
+        CURRENT_DOWNLOAD_PATH = None
+    # Final completion status with download info
+    download_info = "📥 Download ready!" if CURRENT_DOWNLOAD_PATH else "❌ Download failed"
     final_status_html = (
         f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
         f"  <div style='display: flex; align-items: center; margin-bottom: 8px;'>"
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
+        f"      🎬 Playback: {fps} FPS • 📁 Format: HLS/H.264 • {download_info}"
         f"    </p>"
         f"  </div>"
         f"</div>"
     )
+    yield None, final_status_html
+    print(f"✅ HLS streaming complete! {total_frames_yielded} frames")
+def download_video():
+    """Return the current download file path."""
+    if CURRENT_DOWNLOAD_PATH and os.path.exists(CURRENT_DOWNLOAD_PATH):
+        return CURRENT_DOWNLOAD_PATH
+    return None
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
     gr.Markdown("# 🚀 Pixio Streaming Video Generation")
+    gr.Markdown("Real-time video generation with distilled Wan2-1.3B [[Model]](https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B), [[Project page]](https://pixio.myapps.ai), [[Paper]](https://arxiv.org/abs/2412.09738)")
     with gr.Row():
         with gr.Column(scale=2):
             with gr.Group():
                 prompt = gr.Textbox(
                     label="Prompt",
+                    placeholder="A close-up shot of a ceramic teacup slowly pouring water into a glass mug.",
                     lines=4,
+                    value="A close-up shot of a ceramic teacup slowly pouring water into a glass mug."
                 )
                 enhance_button = gr.Button("✨ Enhance Prompt", variant="secondary")
                 label="Generation Status"
             )
+            # Download button that appears after completion
+            with gr.Row():
+                download_btn = gr.DownloadButton(
+                    label="📥 Download MP4 Video",
+                    value=download_video,
+                    variant="secondary",
+                    size="lg"
+                )
+    # Connect the streaming function
     start_btn.click(
         fn=video_generation_handler_streaming,
         inputs=[prompt, seed, fps],
+        outputs=[streaming_video, status_display]
     )
     enhance_button.click(
     os.makedirs("downloads", exist_ok=True)
     print("🚀 Starting Self-Forcing Streaming Demo")
+    print(f"📁 Temporary files: gradio_tmp/")
+    print(f"📥 Download files: downloads/")
+    print(f"🎯 Streaming: HLS (.m3u8 + .ts segments)")
+    print(f"📱 Download: MP4 (imageio)")
     demo.queue().launch(
         server_name=args.host,
         server_port=args.port,
         share=args.share,
         show_error=True,
+        max_threads=40
     )
 # import subprocess