Spaces:

Agents-MCP-Hackathon
/

video_mcp

Sleeping

App Files Files Community

jomasego commited on Jun 22

Commit

ee0209c

1 Parent(s): b279e29

Fix: Correct IndentationError and concurrency pattern in modal_whisper_app.py

Browse files

Files changed (1) hide show

modal_whisper_app.py +51 -45

modal_whisper_app.py CHANGED Viewed

@@ -758,38 +758,53 @@ def extract_video_urls_from_search(search_results: List[Dict[str, str]], max_url
     gpu="any",
     timeout=3600
 )
-async def analyze_videos_by_topic(video_urls: list, topic: str) -> dict:
-    # Analyze videos concurrently
-        response.raise_for_status()  # Raise HTTPError for bad responses (4XX or 5XX)
-        video_bytes = await response.aread()
-        print(f"[TopicAnalysisWorker] Downloaded {len(video_bytes)} bytes from {video_url}")
-        if not video_bytes:
-            raise ValueError("Downloaded video content is empty.")
-        # 2. Analyze video
-        analysis_result = await analyze_video_comprehensive.coro(video_bytes)
-        # Check if the analysis itself returned an error structure
-        if isinstance(analysis_result, dict) and any(key + "_error" in analysis_result for key in ["transcription", "caption", "actions", "objects"]):
-            print(f"[TopicAnalysisWorker] Comprehensive analysis for {video_url} reported errors: {analysis_result}")
-            return {"url": video_url, "error_type": "analysis_error", "error_details": analysis_result}
-        else:
-            return {"url": video_url, "analysis": analysis_result}
     except httpx.HTTPStatusError as e:
-        print(f"[TopicAnalysisWorker] HTTP error downloading {video_url}: {e}")
-        return {"url": video_url, "error_type": "download_error", "error_details": f"HTTP {e.response.status_code}: {e.response.text[:200]}"}
     except httpx.RequestError as e:
-        print(f"[TopicAnalysisWorker] Request error downloading {video_url}: {e}")
-        return {"url": video_url, "error_type": "download_error", "error_details": f"Failed to download: {str(e)}"}
     except Exception as e:
-        print(f"[TopicAnalysisWorker] Error processing video {video_url}: {e}")
         import traceback
-        # Consider logging traceback.format_exc() instead of printing if running in a less verbose environment
-        # traceback.print_exc() # This might be too verbose for regular Modal logs
-        return {"url": video_url, "error_type": "processing_error", "error_details": str(e), "traceback": traceback.format_exc()[:1000]}
 async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str, Any]:
     """Analyzes a list of videos (by URL) concurrently and aggregates results for a topic."""
     print(f"[TopicAnalysis] Starting concurrent analysis for topic: '{topic}' with {len(video_urls)} video(s).")
@@ -804,29 +819,20 @@ async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str
         results_aggregator["errors"].append({"topic_error": "No video URLs provided or found for the topic."})
         return results_aggregator
-    async with httpx.AsyncClient(timeout=300.0) as client: # 5 min timeout for individual downloads
-        tasks = [_download_and_analyze_one_video(client, url, topic) for url in video_urls]
-        # return_exceptions=True allows us to get results for successful tasks even if others fail
-        individual_results = await asyncio.gather(*tasks, return_exceptions=True)
-    for res_or_exc in individual_results:
-        if isinstance(res_or_exc, Exception):
-            # This handles exceptions not caught within _download_and_analyze_one_video itself (should be rare)
-            # Or if return_exceptions=True was used and _download_and_analyze_one_video raised an unhandled one.
-            print(f"[TopicAnalysis] An unexpected exception occurred during asyncio.gather: {res_or_exc}")
-            results_aggregator["errors"].append({"url": "unknown_url_due_to_gather_exception", "processing_error": str(res_or_exc)})
-        elif isinstance(res_or_exc, dict):
-            if "error_type" in res_or_exc:
-                results_aggregator["errors"].append(res_or_exc) # Append the error dict directly
-            elif "analysis" in res_or_exc:
-                results_aggregator["analyzed_videos"].append(res_or_exc)
             else:
-                 print(f"[TopicAnalysis] Received an unexpected dictionary structure: {res_or_exc}")
-                 results_aggregator["errors"].append({"url": res_or_exc.get("url", "unknown"), "processing_error": "Unknown result structure"})
         else:
-            print(f"[TopicAnalysis] Received an unexpected result type from asyncio.gather: {type(res_or_exc)}")
-            results_aggregator["errors"].append({"url": "unknown", "processing_error": f"Unexpected result type: {type(res_or_exc)}"})
     print(f"[TopicAnalysis] Finished concurrent analysis for topic '{topic}'.")
     return results_aggregator

     gpu="any",
     timeout=3600
 )
+@app.function(
+    image=video_analysis_image_v2,
+    secrets=[HF_TOKEN_SECRET],
+    timeout=1800,
+)
+async def _analyze_video_worker(video_url: str) -> dict:
+    """
+    Worker function to download a video from a URL and run comprehensive analysis.
+    This is designed to be called concurrently.
+    """
+    print(f"[Worker] Starting analysis for {video_url}")
+    try:
+        async with httpx.AsyncClient() as client:
+            print(f"[Worker] Downloading video from {video_url}")
+            response = await client.get(video_url, follow_redirects=True, timeout=60.0)
+            response.raise_for_status()
+            video_bytes = await response.aread()
+            print(f"[Worker] Downloaded {len(video_bytes)} bytes from {video_url}")
+            if not video_bytes:
+                raise ValueError("Downloaded video content is empty.")
+            analysis_result = await analyze_video_comprehensive.coro(video_bytes)
+            if isinstance(analysis_result, dict) and any("error" in str(v).lower() for v in analysis_result.values()):
+                print(f"[Worker] Comprehensive analysis for {video_url} reported errors: {analysis_result}")
+                return {"url": video_url, "status": "error", "error_type": "analysis_error", "details": analysis_result}
+            else:
+                return {"url": video_url, "status": "success", "analysis": analysis_result}
     except httpx.HTTPStatusError as e:
+        print(f"[Worker] HTTP error downloading {video_url}: {e}")
+        return {"url": video_url, "status": "error", "error_type": "download_error", "details": f"HTTP {e.response.status_code}"}
     except httpx.RequestError as e:
+        print(f"[Worker] Request error downloading {video_url}: {e}")
+        return {"url": video_url, "status": "error", "error_type": "download_error", "details": f"Failed to download: {str(e)}"}
     except Exception as e:
+        print(f"[Worker] Error processing video {video_url}: {e}")
         import traceback
+        return {"url": video_url, "status": "error", "error_type": "processing_error", "details": str(e), "traceback": traceback.format_exc()[:1000]}
+@app.function(
+    image=video_analysis_image_v2,
+    secrets=[HF_TOKEN_SECRET],
+    timeout=3600,
+    gpu="any",
+)
 async def analyze_videos_by_topic(video_urls: List[str], topic: str) -> Dict[str, Any]:
     """Analyzes a list of videos (by URL) concurrently and aggregates results for a topic."""
     print(f"[TopicAnalysis] Starting concurrent analysis for topic: '{topic}' with {len(video_urls)} video(s).")
         results_aggregator["errors"].append({"topic_error": "No video URLs provided or found for the topic."})
         return results_aggregator
+    # Use .map to run the worker function concurrently on all video URLs
+    # The list() call forces the generator to execute and retrieve all results.
+    individual_results = list(_analyze_video_worker.map(video_urls))
+    for result in individual_results:
+        if isinstance(result, dict):
+            if result.get("status") == "error":
+                results_aggregator["errors"].append(result)
             else:
+                results_aggregator["analyzed_videos"].append(result)
         else:
+            # This case handles unexpected return types from the worker, like exceptions
+            print(f"[TopicAnalysis] Received an unexpected result type from worker: {type(result)}")
+            results_aggregator["errors"].append({"url": "unknown", "error_type": "unexpected_result", "details": str(result)})
     print(f"[TopicAnalysis] Finished concurrent analysis for topic '{topic}'.")
     return results_aggregator