talkingAvater_bgk

Runtime error

App Files Files Community

oKen38461 commited on Jul 19

Commit

2089ecf

1 Parent(s): d9a2a3d

ストリーミング関連のフレームレートを25fpsから20fpsに変更し、関連するテストケースを更新しました。これにより、全体のフレーム数計算が一貫性を持つようになりました。

Browse files

Files changed (6) hide show

api_server_streaming.py +402 -0
app_streaming.py +1 -1
core/atomic_components/writer.py +1 -1
core/models/modules/lmdm_modules/model.py +1 -1
streaming_client.py +332 -0
test_streaming.py +2 -2

api_server_streaming.py ADDED Viewed

	@@ -0,0 +1,402 @@

+"""
+DittoTalkingHead Streaming API Server
+WebSocket/SSEによるリアルタイムストリーミング実装
+"""
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, File, UploadFile, HTTPException
+from fastapi.responses import StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+import asyncio
+import tempfile
+import numpy as np
+import base64
+import json
+from typing import AsyncGenerator, Optional
+import cv2
+import time
+import logging
+from pathlib import Path
+import traceback
+from stream_pipeline_offline import StreamSDK
+# ログ設定
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="DittoTalkingHead Streaming API")
+# CORS設定
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# SDK設定
+CFG_PKL = "checkpoints/ditto_cfg/v0.4_hubert_cfg_pytorch.pkl"
+DATA_ROOT = "checkpoints/ditto_pytorch"
+# グローバル設定
+class AppState:
+    def __init__(self):
+        self.sdk: Optional[StreamSDK] = None
+        self.active_connections: int = 0
+        self.max_connections: int = 5
+state = AppState()
+def init_sdk():
+    """SDKの初期化"""
+    if state.sdk is None:
+        logger.info("Initializing StreamSDK...")
+        state.sdk = StreamSDK(CFG_PKL, DATA_ROOT)
+        logger.info("StreamSDK initialized successfully")
+    return state.sdk
+@app.on_event("startup")
+async def startup_event():
+    """起動時にSDKを初期化"""
+    init_sdk()
+@app.get("/")
+async def root():
+    """ヘルスチェック"""
+    return {
+        "status": "ok",
+        "service": "DittoTalkingHead Streaming API",
+        "active_connections": state.active_connections,
+        "max_connections": state.max_connections
+    }
+@app.websocket("/ws/generate")
+async def websocket_endpoint(websocket: WebSocket):
+    """WebSocketエンドポイント - リアルタイムストリーミング"""
+    # 接続数チェック
+    if state.active_connections >= state.max_connections:
+        await websocket.close(code=1008, reason="Server busy")
+        return
+    await websocket.accept()
+    state.active_connections += 1
+    logger.info(f"New WebSocket connection. Active: {state.active_connections}")
+    sdk_instance = None
+    output_path = None
+    try:
+        # 初期設定を受信
+        config = await websocket.receive_json()
+        source_image_b64 = config.get("source_image")
+        sample_rate = config.get("sample_rate", 16000)
+        chunk_duration = config.get("chunk_duration", 0.2)
+        if not source_image_b64:
+            await websocket.send_json({"type": "error", "message": "source_image is required"})
+            return
+        # 画像をデコードして一時ファイルに保存
+        image_data = base64.b64decode(source_image_b64)
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
+            tmp_img.write(image_data)
+            source_path = tmp_img.name
+        # 出力ファイルの準備
+        output_path = tempfile.mktemp(suffix=".mp4")
+        # SDK設定
+        sdk_instance = init_sdk()
+        sdk_instance.setup(source_path, output_path, online_mode=True, max_size=1024)
+        await websocket.send_json({
+            "type": "ready",
+            "message": "Ready to receive audio chunks",
+            "chunk_size": int(sample_rate * chunk_duration)
+        })
+        # フレーム送信タスク
+        async def send_frames():
+            frame_count = 0
+            last_frame_time = time.time()
+            while True:
+                try:
+                    current_time = time.time()
+                    if sdk_instance.writer_queue.qsize() > 0:
+                        frame = sdk_instance.writer_queue.get_nowait()
+                        if frame is not None:
+                            # フレームをJPEGエンコード（品質調整可能）
+                            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 80]
+                            _, jpeg = cv2.imencode('.jpg',
+                                                   cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
+                                                   encode_param)
+                            frame_b64 = base64.b64encode(jpeg).decode('utf-8')
+                            # FPS計算
+                            fps = 1.0 / (current_time - last_frame_time) if current_time > last_frame_time else 0
+                            last_frame_time = current_time
+                            await websocket.send_json({
+                                "type": "frame",
+                                "frame_id": frame_count,
+                                "timestamp": current_time,
+                                "fps": round(fps, 2),
+                                "data": frame_b64
+                            })
+                            frame_count += 1
+                except asyncio.CancelledError:
+                    break
+                except Exception as e:
+                    logger.error(f"Error sending frame: {e}")
+                await asyncio.sleep(0.01)  # 10ms間隔でチェック
+        # フレーム送信タスクを開始
+        frame_task = asyncio.create_task(send_frames())
+        # 音声チャンクを受信して処理
+        total_samples = 0
+        chunk_size = int(sample_rate * chunk_duration)
+        processing_start = time.time()
+        while True:
+            message = await websocket.receive()
+            if "bytes" in message:
+                # 音声データを受信
+                audio_bytes = message["bytes"]
+                audio_chunk = np.frombuffer(audio_bytes, dtype=np.float32)
+                # パディング
+                if len(audio_chunk) < chunk_size:
+                    audio_chunk = np.pad(audio_chunk, (0, chunk_size - len(audio_chunk)))
+                # SDKに送信
+                sdk_instance.run_chunk(audio_chunk[:chunk_size])
+                total_samples += len(audio_chunk)
+                # 進捗情報を送信
+                elapsed = time.time() - processing_start
+                await websocket.send_json({
+                    "type": "progress",
+                    "samples_processed": total_samples,
+                    "duration_seconds": total_samples / sample_rate,
+                    "elapsed_seconds": elapsed
+                })
+            elif "text" in message:
+                # コマンドを受信
+                command = json.loads(message["text"])
+                if command.get("action") == "stop":
+                    logger.info("Received stop command")
+                    break
+        # 処理終了
+        frame_task.cancel()
+        try:
+            await frame_task
+        except asyncio.CancelledError:
+            pass
+        # フレーム数を推定してsetup_Nd
+        estimated_frames = int(total_samples / sample_rate * 20)
+        sdk_instance.setup_Nd(estimated_frames)
+        # 残りのフレームを処理
+        await websocket.send_json({"type": "processing", "message": "Finalizing video..."})
+        # SDKを閉じて最終MP4を生成
+        sdk_instance.close()
+        # 最終的なMP4を送信
+        if Path(output_path).exists():
+            with open(output_path, "rb") as f:
+                mp4_data = f.read()
+                mp4_b64 = base64.b64encode(mp4_data).decode('utf-8')
+            await websocket.send_json({
+                "type": "final_video",
+                "size_bytes": len(mp4_data),
+                "duration_seconds": total_samples / sample_rate,
+                "data": mp4_b64
+            })
+        else:
+            await websocket.send_json({
+                "type": "error",
+                "message": "Failed to generate final video"
+            })
+    except WebSocketDisconnect:
+        logger.info("Client disconnected")
+    except Exception as e:
+        logger.error(f"WebSocket error: {e}")
+        logger.error(traceback.format_exc())
+        try:
+            await websocket.send_json({
+                "type": "error",
+                "message": str(e)
+            })
+        except:
+            pass
+    finally:
+        state.active_connections -= 1
+        logger.info(f"Connection closed. Active: {state.active_connections}")
+        # クリーンアップ
+        if output_path and Path(output_path).exists():
+            try:
+                Path(output_path).unlink()
+            except:
+                pass
+@app.post("/sse/generate")
+async def sse_generate(
+    source_image: UploadFile = File(...),
+    sample_rate: int = 16000,
+    max_duration: float = 10.0
+):
+    """SSEエンドポイント - Server-Sent Eventsによるストリーミング"""
+    if state.active_connections >= state.max_connections:
+        raise HTTPException(status_code=503, detail="Server busy")
+    state.active_connections += 1
+    async def generate() -> AsyncGenerator[str, None]:
+        sdk_instance = None
+        output_path = None
+        try:
+            # 画像を保存
+            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
+                content = await source_image.read()
+                tmp_img.write(content)
+                source_path = tmp_img.name
+            output_path = tempfile.mktemp(suffix=".mp4")
+            # SDK設定
+            sdk_instance = init_sdk()
+            sdk_instance.setup(source_path, output_path, online_mode=True, max_size=1024)
+            # イベント送信
+            yield f"data: {json.dumps({'type': 'start', 'message': 'Processing started'})}\n\n"
+            # デモ用：ダミー音声を生成して処理
+            chunk_duration = 0.2
+            chunk_size = int(sample_rate * chunk_duration)
+            num_chunks = int(max_duration / chunk_duration)
+            for i in range(num_chunks):
+                # ダミー音声チャンク（実際の実装では音声ストリームから取得）
+                audio_chunk = np.random.randn(chunk_size).astype(np.float32) * 0.1
+                sdk_instance.run_chunk(audio_chunk)
+                # フレームチェック
+                if sdk_instance.writer_queue.qsize() > 0:
+                    try:
+                        frame = sdk_instance.writer_queue.get_nowait()
+                        if frame is not None:
+                            # サムネイル生成（低解像度）
+                            thumbnail = cv2.resize(frame, (160, 160))
+                            _, jpeg = cv2.imencode('.jpg', cv2.cvtColor(thumbnail, cv2.COLOR_RGB2BGR))
+                            frame_b64 = base64.b64encode(jpeg).decode('utf-8')
+                            yield f"data: {json.dumps({'type': 'thumbnail', 'frame_id': i, 'data': frame_b64})}\n\n"
+                    except:
+                        pass
+                await asyncio.sleep(chunk_duration)
+            # 完了
+            estimated_frames = num_chunks * 5  # 概算
+            sdk_instance.setup_Nd(estimated_frames)
+            sdk_instance.close()
+            yield f"data: {json.dumps({'type': 'complete', 'frames': estimated_frames})}\n\n"
+        except Exception as e:
+            logger.error(f"SSE error: {e}")
+            yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
+        finally:
+            state.active_connections -= 1
+            if output_path and Path(output_path).exists():
+                try:
+                    Path(output_path).unlink()
+                except:
+                    pass
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+        }
+    )
+@app.get("/test")
+async def test_page():
+    """テスト用HTMLページ"""
+    html_content = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>DittoTalkingHead Streaming Test</title>
+        <style>
+            body { font-family: Arial, sans-serif; margin: 20px; }
+            .container { max-width: 800px; margin: 0 auto; }
+            #live-frame { max-width: 100%; border: 1px solid #ccc; }
+            #status { margin: 10px 0; padding: 10px; background: #f0f0f0; }
+            .controls { margin: 20px 0; }
+            button { padding: 10px 20px; margin: 5px; }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>DittoTalkingHead Streaming Test</h1>
+            <div class="controls">
+                <input type="file" id="source-image" accept="image/*">
+                <button id="start-btn">Start Streaming</button>
+                <button id="stop-btn" disabled>Stop</button>
+            </div>
+            <div id="status">Ready</div>
+            <img id="live-frame" style="display: none;">
+            <video id="final-video" controls style="display: none; width: 100%;"></video>
+        </div>
+        <script>
+            // WebSocket実装はstreaming_api_guide.mdを参照
+            console.log('WebSocket endpoint: ws://localhost:8000/ws/generate');
+        </script>
+    </body>
+    </html>
+    """
+    from fastapi.responses import HTMLResponse
+    return HTMLResponse(content=html_content)
+if __name__ == "__main__":
+    import uvicorn
+    import torch
+    # GPU設定
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.backends.cudnn.benchmark = True
+    logger.info("Starting DittoTalkingHead Streaming API Server...")
+    logger.info(f"GPU available: {torch.cuda.is_available()}")
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info",
+        access_log=True
+    )

app_streaming.py CHANGED Viewed

@@ -46,7 +46,7 @@ def generator(mic, src_img):
         # setup: online_mode=True でストリーミング
         tmp_out = tempfile.mktemp(suffix=".mp4")
         sdk.setup(src_img, tmp_out, online_mode=True, max_size=1024)
-        N_total = int(np.ceil(len(wav_full) / sr * 25))  # 概算フレーム数
         sdk.setup_Nd(N_total)
         # 処理開始時刻

         # setup: online_mode=True でストリーミング
         tmp_out = tempfile.mktemp(suffix=".mp4")
         sdk.setup(src_img, tmp_out, online_mode=True, max_size=1024)
+        N_total = int(np.ceil(len(wav_full) / sr * 20))  # 概算フレーム数
         sdk.setup_Nd(N_total)
         # 処理開始時刻

core/atomic_components/writer.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 class VideoWriterByImageIO:
-    def __init__(self, video_path, fps=25, **kwargs):
         video_format = kwargs.get("format", "mp4")  # default is mp4 format
         codec = kwargs.get("vcodec", "libx264")  # default is libx264 encoding
         quality = kwargs.get("quality")  # video quality

 class VideoWriterByImageIO:
+    def __init__(self, video_path, fps=20, **kwargs):
         video_format = kwargs.get("format", "mp4")  # default is mp4 format
         codec = kwargs.get("vcodec", "libx264")  # default is libx264 encoding
         quality = kwargs.get("quality")  # video quality

core/models/modules/lmdm_modules/model.py CHANGED Viewed

@@ -237,7 +237,7 @@ class MotionDecoder(nn.Module):
     def __init__(
         self,
         nfeats: int,
-        seq_len: int = 100,  # 4 seconds, 25 fps
         latent_dim: int = 256,
         ff_size: int = 1024,
         num_layers: int = 4,

     def __init__(
         self,
         nfeats: int,
+        seq_len: int = 80,  # 4 seconds, 20 fps
         latent_dim: int = 256,
         ff_size: int = 1024,
         num_layers: int = 4,

streaming_client.py ADDED Viewed

	@@ -0,0 +1,332 @@

+"""
+DittoTalkingHead Streaming Client
+WebSocketを使用したストリーミングクライアントの実装例
+"""
+import asyncio
+import websockets
+import numpy as np
+import soundfile as sf
+import base64
+import json
+import cv2
+from typing import Optional, Callable
+import pyaudio
+import threading
+import queue
+from pathlib import Path
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class DittoStreamingClient:
+    """DittoTalkingHeadストリーミングクライアント"""
+    def __init__(self, server_url="ws://localhost:8000/ws/generate"):
+        self.server_url = server_url
+        self.sample_rate = 16000
+        self.chunk_duration = 0.2  # 200ms
+        self.chunk_size = int(self.sample_rate * self.chunk_duration)
+        self.websocket = None
+        self.is_connected = False
+        self.frame_callback: Optional[Callable] = None
+        self.final_video_callback: Optional[Callable] = None
+    async def connect(self, source_image_path: str):
+        """サーバーに接続してセッションを開始"""
+        try:
+            # 画像をBase64エンコード
+            with open(source_image_path, "rb") as f:
+                image_b64 = base64.b64encode(f.read()).decode('utf-8')
+            # WebSocket接続
+            self.websocket = await websockets.connect(self.server_url)
+            self.is_connected = True
+            # 初期設定を送信
+            await self.websocket.send(json.dumps({
+                "source_image": image_b64,
+                "sample_rate": self.sample_rate,
+                "chunk_duration": self.chunk_duration
+            }))
+            # 応答を待つ
+            response = await self.websocket.recv()
+            data = json.loads(response)
+            if data["type"] == "ready":
+                logger.info(f"Connected to server: {data['message']}")
+                return True
+            else:
+                logger.error(f"Connection failed: {data}")
+                return False
+        except Exception as e:
+            logger.error(f"Connection error: {e}")
+            self.is_connected = False
+            raise
+    async def disconnect(self):
+        """接続を切断"""
+        if self.websocket:
+            await self.websocket.close()
+            self.is_connected = False
+            logger.info("Disconnected from server")
+    async def stream_audio_file(self, audio_path: str, source_image_path: str):
+        """音声ファイルをストリーミング"""
+        try:
+            # 接続
+            await self.connect(source_image_path)
+            # 音声を読み込み
+            audio_data, sr = sf.read(audio_path)
+            if sr != self.sample_rate:
+                import librosa
+                audio_data = librosa.resample(
+                    audio_data,
+                    orig_sr=sr,
+                    target_sr=self.sample_rate
+                )
+            # フレーム受信タスク
+            receive_task = asyncio.create_task(self._receive_frames())
+            # 音声をチャンク単位で送信
+            total_chunks = 0
+            for i in range(0, len(audio_data), self.chunk_size):
+                chunk = audio_data[i:i+self.chunk_size]
+                if len(chunk) < self.chunk_size:
+                    chunk = np.pad(chunk, (0, self.chunk_size - len(chunk)))
+                # Float32として送信
+                await self.websocket.send(chunk.astype(np.float32).tobytes())
+                total_chunks += 1
+                # リアルタイムシミュレーション
+                await asyncio.sleep(self.chunk_duration)
+                # 進捗表示
+                progress = (i + self.chunk_size) / len(audio_data) * 100
+                logger.info(f"Streaming progress: {progress:.1f}%")
+            # 停止コマンドを送信
+            await self.websocket.send(json.dumps({"action": "stop"}))
+            logger.info(f"Sent {total_chunks} audio chunks")
+            # フレーム受信を待つ
+            await receive_task
+        finally:
+            await self.disconnect()
+    async def stream_microphone(self, source_image_path: str, duration: Optional[float] = None):
+        """マイクからリアルタイムストリーミング"""
+        try:
+            # 接続
+            await self.connect(source_image_path)
+            # フレーム受信タスク
+            receive_task = asyncio.create_task(self._receive_frames())
+            # マイク録音用のキュー
+            audio_queue = queue.Queue()
+            stop_event = threading.Event()
+            # マイク録音スレッド
+            def record_audio():
+                p = pyaudio.PyAudio()
+                stream = p.open(
+                    format=pyaudio.paFloat32,
+                    channels=1,
+                    rate=self.sample_rate,
+                    input=True,
+                    frames_per_buffer=self.chunk_size
+                )
+                logger.info("Recording started... Press Ctrl+C to stop")
+                try:
+                    start_time = asyncio.get_event_loop().time()
+                    while not stop_event.is_set():
+                        if duration and (asyncio.get_event_loop().time() - start_time) > duration:
+                            break
+                        audio_chunk = stream.read(self.chunk_size, exception_on_overflow=False)
+                        audio_queue.put(audio_chunk)
+                except Exception as e:
+                    logger.error(f"Recording error: {e}")
+                finally:
+                    stream.stop_stream()
+                    stream.close()
+                    p.terminate()
+                    logger.info("Recording stopped")
+            # 録音スレッドを開始
+            record_thread = threading.Thread(target=record_audio)
+            record_thread.start()
+            try:
+                # 音声データを送信
+                while record_thread.is_alive() or not audio_queue.empty():
+                    try:
+                        audio_chunk = audio_queue.get(timeout=0.1)
+                        audio_array = np.frombuffer(audio_chunk, dtype=np.float32)
+                        await self.websocket.send(audio_array.tobytes())
+                    except queue.Empty:
+                        continue
+                    except KeyboardInterrupt:
+                        logger.info("Stopping recording...")
+                        break
+            finally:
+                stop_event.set()
+                record_thread.join()
+                # 停止コマンドを送信
+                await self.websocket.send(json.dumps({"action": "stop"}))
+                # フレーム受信を待つ
+                await receive_task
+        finally:
+            await self.disconnect()
+    async def _receive_frames(self):
+        """フレームとメッセージを受信"""
+        frame_count = 0
+        try:
+            while True:
+                message = await self.websocket.recv()
+                data = json.loads(message)
+                if data["type"] == "frame":
+                    frame_count += 1
+                    logger.info(f"Received frame {data['frame_id']} (FPS: {data.get('fps', 0)})")
+                    if self.frame_callback:
+                        # フレームをデコード
+                        frame_data = base64.b64decode(data["data"])
+                        nparr = np.frombuffer(frame_data, np.uint8)
+                        frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                        self.frame_callback(frame, data)
+                elif data["type"] == "progress":
+                    logger.info(f"Progress: {data['duration_seconds']:.1f}s processed")
+                elif data["type"] == "processing":
+                    logger.info(f"Server: {data['message']}")
+                elif data["type"] == "final_video":
+                    logger.info(f"Received final video ({data['size_bytes']} bytes, {data['duration_seconds']:.1f}s)")
+                    if self.final_video_callback:
+                        video_data = base64.b64decode(data["data"])
+                        self.final_video_callback(video_data, data)
+                    break
+                elif data["type"] == "error":
+                    logger.error(f"Server error: {data['message']}")
+                    break
+        except websockets.exceptions.ConnectionClosed:
+            logger.info("Connection closed by server")
+        except Exception as e:
+            logger.error(f"Receive error: {e}")
+        logger.info(f"Total frames received: {frame_count}")
+    def set_frame_callback(self, callback: Callable):
+        """フレーム受信時のコールバックを設定"""
+        self.frame_callback = callback
+    def set_final_video_callback(self, callback: Callable):
+        """最終動画受信時のコールバックを設定"""
+        self.final_video_callback = callback
+# 使用例とテスト
+async def main():
+    """使用例"""
+    client = DittoStreamingClient()
+    # フレーム表示用のコールバック
+    def display_frame(frame, metadata):
+        cv2.imshow("Live Frame", frame)
+        cv2.waitKey(1)
+    # 最終動画保存用のコールバック
+    def save_video(video_data, metadata):
+        output_path = "output_streaming.mp4"
+        with open(output_path, "wb") as f:
+            f.write(video_data)
+        logger.info(f"Video saved to {output_path}")
+    client.set_frame_callback(display_frame)
+    client.set_final_video_callback(save_video)
+    # テスト画像とサンプル音声のパス
+    source_image = "example/reference.png"
+    audio_file = "example/audio.wav"
+    # ファイルが存在するか確認
+    if not Path(source_image).exists():
+        logger.error(f"Source image not found: {source_image}")
+        return
+    # 音声ファイルからストリーミング
+    if Path(audio_file).exists():
+        logger.info("=== Testing audio file streaming ===")
+        await client.stream_audio_file(audio_file, source_image)
+    else:
+        logger.warning(f"Audio file not found: {audio_file}")
+    # マイクからストリーミング（5秒間）
+    # logger.info("\n=== Testing microphone streaming (5 seconds) ===")
+    # await client.stream_microphone(source_image, duration=5.0)
+    cv2.destroyAllWindows()
+# バッチ処理クライアント
+class BatchStreamingClient:
+    """複数のリクエストを並列処理するクライアント"""
+    def __init__(self, server_url="ws://localhost:8000/ws/generate", max_parallel=3):
+        self.server_url = server_url
+        self.max_parallel = max_parallel
+    async def process_batch(self, tasks: list):
+        """バッチ処理"""
+        semaphore = asyncio.Semaphore(self.max_parallel)
+        async def process_with_limit(task):
+            async with semaphore:
+                client = DittoStreamingClient(self.server_url)
+                await client.stream_audio_file(
+                    task["audio_path"],
+                    task["image_path"]
+                )
+                return task["id"]
+        results = await asyncio.gather(
+            *[process_with_limit(task) for task in tasks],
+            return_exceptions=True
+        )
+        return results
+if __name__ == "__main__":
+    # 単一クライアントのテスト
+    asyncio.run(main())
+    # バッチ処理の例
+    # batch_client = BatchStreamingClient()
+    # tasks = [
+    #     {"id": 1, "audio_path": "audio1.wav", "image_path": "image1.png"},
+    #     {"id": 2, "audio_path": "audio2.wav", "image_path": "image2.png"},
+    # ]
+    # asyncio.run(batch_client.process_batch(tasks))

test_streaming.py CHANGED Viewed

@@ -34,7 +34,7 @@ def test_streaming():
     tmp_out = tempfile.mktemp(suffix=".mp4")
     sdk.setup(src_img, tmp_out, online_mode=True, max_size=1024)
-    N_total = int(np.ceil(duration * 25))  # 25fps
     sdk.setup_Nd(N_total)
     print("✅ セットアップ完了")
@@ -98,7 +98,7 @@ def test_streaming():
     print(f"✅ 出力ファイル: {tmp_out}")
     # 期待される結果の確認
-    expected_frames = int(duration * 25)  # 25fps
     if frames_received >= expected_frames * 0.8:  # 80%以上
         print("✅ テスト成功！")
     else:

     tmp_out = tempfile.mktemp(suffix=".mp4")
     sdk.setup(src_img, tmp_out, online_mode=True, max_size=1024)
+    N_total = int(np.ceil(duration * 20))  # 20fps
     sdk.setup_Nd(N_total)
     print("✅ セットアップ完了")
     print(f"✅ 出力ファイル: {tmp_out}")
     # 期待される結果の確認
+    expected_frames = int(duration * 20)  # 20fps
     if frames_received >= expected_frames * 0.8:  # 80%以上
         print("✅ テスト成功！")
     else: