Spaces:

jomasego
/

mcp-video-frontend

Sleeping

App Files Files Community

jomasego commited on Jun 10

Commit

c8a7e17

1 Parent(s): b4c1755

Add MCP Video Analysis application with Claude AI integration

Browse files

Files changed (3) hide show

README.md +30 -3
app.py +340 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Mcp Video Frontend
-emoji: 📈
 colorFrom: purple
 colorTo: blue
 sdk: gradio
@@ -8,7 +8,34 @@ sdk_version: 5.33.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: This is a chat interface to demonstrate the video-MCP
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MCP Video Analysis with Claude AI
+emoji: 🎥
 colorFrom: purple
 colorTo: blue
 sdk: gradio
 app_file: app.py
 pinned: false
 license: mit
+short_description: Intelligent video content analysis powered by Modal backend and Anthropic Claude
 ---
+# 🎥 MCP Video Analysis with Claude AI
+This application provides comprehensive video analysis using the Model Context Protocol (MCP) to integrate multiple AI technologies:
+## 🔧 Technology Stack
+- **Modal Backend**: Scalable cloud compute for video processing
+- **Whisper**: Speech-to-text transcription
+- **Computer Vision Models**: Object detection, action recognition, and captioning
+- **Anthropic Claude**: Advanced AI for intelligent content analysis
+- **MCP Protocol**: Model Context Protocol for seamless integration
+## 🎯 Features
+- **Transcription**: Extract spoken content from videos
+- **Visual Analysis**: Identify objects, actions, and scenes
+- **Content Understanding**: AI-powered insights and summaries
+- **Custom Queries**: Ask specific questions about video content
+## 🚀 Usage
+1. Enter a video URL (YouTube or direct link)
+2. Optionally ask a specific question
+3. Click "Analyze Video" to get comprehensive insights
+4. Review both Claude's intelligent analysis and raw data
+## 🔒 Environment Variables Required
+- `ANTHROPIC_API_KEY`: Your Anthropic API key for Claude integration
+- `MODAL_VIDEO_ANALYSIS_ENDPOINT_URL`: Modal backend endpoint (optional, has default)
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,340 @@

+#!/usr/bin/env python3
+"""
+MCP Video Analysis Client with Anthropic Integration
+This application serves as an MCP (Model Context Protocol) client that:
+1. Connects to video analysis tools via MCP
+2. Integrates with Anthropic's Claude for intelligent video understanding
+3. Provides a Gradio interface for user interaction
+"""
+import os
+import json
+import asyncio
+import logging
+from typing import Dict, Any, List, Optional
+import gradio as gr
+import httpx
+from anthropic import Anthropic
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MCPVideoAnalysisClient:
+    """MCP Client for video analysis with Anthropic integration."""
+    def __init__(self):
+        # Initialize Anthropic client
+        self.anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not self.anthropic_api_key:
+            raise ValueError("ANTHROPIC_API_KEY environment variable is required")
+        self.anthropic_client = Anthropic(api_key=self.anthropic_api_key)
+        # Modal backend endpoint
+        self.modal_endpoint = os.getenv(
+            "MODAL_VIDEO_ANALYSIS_ENDPOINT_URL",
+            "https://jomasego--video-analysis-gradio-pipeline-process-video-analysis.modal.run"
+        )
+        logger.info(f"Initialized MCP Video Analysis Client with Modal endpoint: {self.modal_endpoint}")
+    async def analyze_video_with_modal(self, video_url: str) -> Dict[str, Any]:
+        """Call the Modal backend for comprehensive video analysis."""
+        try:
+            async with httpx.AsyncClient(timeout=300.0) as client:
+                logger.info(f"Calling Modal backend for video analysis: {video_url}")
+                response = await client.post(
+                    self.modal_endpoint,
+                    json={"video_url": video_url},
+                    headers={"Content-Type": "application/json"}
+                )
+                response.raise_for_status()
+                return response.json()
+        except Exception as e:
+            logger.error(f"Error calling Modal backend: {e}")
+            return {"error": f"Modal backend error: {str(e)}"}
+    def enhance_analysis_with_claude(self, video_analysis: Dict[str, Any], user_query: str = None) -> str:
+        """Use Claude to provide intelligent insights about the video analysis."""
+        # Prepare the analysis data for Claude
+        analysis_summary = self._format_analysis_for_claude(video_analysis)
+        # Create the prompt for Claude
+        system_prompt = """You are an expert video analyst with deep knowledge of multimedia content, storytelling, and visual communication. You excel at interpreting video analysis data and providing meaningful insights.
+Your task is to analyze the provided video analysis data and give intelligent, actionable insights. Focus on:
+1. Content understanding and themes
+2. Visual storytelling elements
+3. Technical quality assessment
+4. Audience engagement potential
+5. Key moments and highlights
+6. Contextual relevance
+Be concise but thorough, and tailor your response to be useful for content creators, marketers, or researchers."""
+        if user_query:
+            user_prompt = f"""Here is the video analysis data:
+{analysis_summary}
+User's specific question: {user_query}
+Please provide a comprehensive analysis addressing the user's question while incorporating insights from all the available data."""
+        else:
+            user_prompt = f"""Here is the video analysis data:
+{analysis_summary}
+Please provide a comprehensive analysis of this video, highlighting the most important insights and potential applications."""
+        try:
+            response = self.anthropic_client.messages.create(
+                model="claude-3-5-sonnet-20241022",
+                max_tokens=2000,
+                temperature=0.3,
+                system=system_prompt,
+                messages=[{"role": "user", "content": user_prompt}]
+            )
+            return response.content[0].text
+        except Exception as e:
+            logger.error(f"Error calling Anthropic API: {e}")
+            return f"Error generating Claude analysis: {str(e)}"
+    def _format_analysis_for_claude(self, analysis: Dict[str, Any]) -> str:
+        """Format the video analysis data for Claude consumption."""
+        formatted = []
+        # Handle transcription
+        if "transcription" in analysis:
+            transcription = analysis["transcription"]
+            if isinstance(transcription, str) and not transcription.startswith("Error"):
+                formatted.append(f"**TRANSCRIPTION:**\n{transcription}\n")
+            else:
+                formatted.append(f"**TRANSCRIPTION:** {transcription}\n")
+        # Handle caption
+        if "caption" in analysis:
+            caption = analysis["caption"]
+            if isinstance(caption, str) and not caption.startswith("Error"):
+                formatted.append(f"**VIDEO CAPTION:**\n{caption}\n")
+            else:
+                formatted.append(f"**VIDEO CAPTION:** {caption}\n")
+        # Handle actions
+        if "actions" in analysis:
+            actions = analysis["actions"]
+            if isinstance(actions, list) and actions:
+                action_text = []
+                for action in actions:
+                    if isinstance(action, dict):
+                        if "error" in action:
+                            action_text.append(f"Error: {action['error']}")
+                        else:
+                            # Format action detection results
+                            action_text.append(str(action))
+                    else:
+                        action_text.append(str(action))
+                formatted.append(f"**ACTION RECOGNITION:**\n{'; '.join(action_text)}\n")
+            else:
+                formatted.append(f"**ACTION RECOGNITION:** {actions}\n")
+        # Handle objects
+        if "objects" in analysis:
+            objects = analysis["objects"]
+            if isinstance(objects, list) and objects:
+                object_text = []
+                for obj in objects:
+                    if isinstance(obj, dict):
+                        if "error" in obj:
+                            object_text.append(f"Error: {obj['error']}")
+                        else:
+                            # Format object detection results
+                            object_text.append(str(obj))
+                    else:
+                        object_text.append(str(obj))
+                formatted.append(f"**OBJECT DETECTION:**\n{'; '.join(object_text)}\n")
+            else:
+                formatted.append(f"**OBJECT DETECTION:** {objects}\n")
+        # Handle any errors
+        if "error" in analysis:
+            formatted.append(f"**ANALYSIS ERROR:**\n{analysis['error']}\n")
+        return "\n".join(formatted) if formatted else "No analysis data available."
+    async def process_video_request(self, video_url: str, user_query: str = None) -> tuple[str, str]:
+        """Process a complete video analysis request with Claude enhancement."""
+        if not video_url or not video_url.strip():
+            return "Please provide a valid video URL.", ""
+        try:
+            # Step 1: Get video analysis from Modal backend
+            logger.info(f"Starting video analysis for: {video_url}")
+            video_analysis = await self.analyze_video_with_modal(video_url.strip())
+            # Step 2: Format the raw analysis for display
+            raw_analysis = json.dumps(video_analysis, indent=2)
+            # Step 3: Enhance with Claude insights
+            logger.info("Generating Claude insights...")
+            claude_insights = self.enhance_analysis_with_claude(video_analysis, user_query)
+            return claude_insights, raw_analysis
+        except Exception as e:
+            error_msg = f"Error processing video request: {str(e)}"
+            logger.error(error_msg)
+            return error_msg, ""
+# Initialize the MCP client
+try:
+    mcp_client = MCPVideoAnalysisClient()
+    logger.info("MCP Video Analysis Client initialized successfully")
+except Exception as e:
+    logger.error(f"Failed to initialize MCP client: {e}")
+    mcp_client = None
+# Gradio Interface Functions
+async def analyze_video_interface(video_url: str, user_query: str = None) -> tuple[str, str]:
+    """Gradio interface function for video analysis."""
+    if not mcp_client:
+        return "MCP Client not initialized. Please check your environment variables.", ""
+    return await mcp_client.process_video_request(video_url, user_query)
+def create_gradio_interface():
+    """Create and configure the Gradio interface."""
+    with gr.Blocks(
+        title="MCP Video Analysis with Claude",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        .main-header {
+            text-align: center;
+            margin-bottom: 30px;
+        }
+        .analysis-output {
+            max-height: 600px;
+            overflow-y: auto;
+        }
+        """
+    ) as interface:
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🎥 MCP Video Analysis with Claude AI</h1>
+            <p>Intelligent video content analysis powered by Modal backend and Anthropic Claude</p>
+        </div>
+        """)
+        with gr.Tab("🔍 Video Analysis"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    video_url_input = gr.Textbox(
+                        label="Video URL",
+                        placeholder="Enter YouTube URL or direct video link...",
+                        lines=2
+                    )
+                    user_query_input = gr.Textbox(
+                        label="Specific Question (Optional)",
+                        placeholder="Ask a specific question about the video...",
+                        lines=2
+                    )
+                    with gr.Row():
+                        analyze_btn = gr.Button("🚀 Analyze Video", variant="primary", size="lg")
+                        clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                with gr.Column(scale=2):
+                    claude_output = gr.Textbox(
+                        label="🤖 Claude AI Insights",
+                        lines=20,
+                        elem_classes=["analysis-output"],
+                        interactive=False
+                    )
+            with gr.Row():
+                raw_analysis_output = gr.JSON(
+                    label="📊 Raw Analysis Data",
+                    elem_classes=["analysis-output"]
+                )
+            # Example videos
+            gr.HTML("<h3>📝 Example Videos to Try:</h3>")
+            with gr.Row():
+                example_urls = [
+                    "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+                    "https://www.youtube.com/watch?v=jNQXAC9IVRw",
+                    "https://www.youtube.com/watch?v=9bZkp7q19f0"
+                ]
+                for i, url in enumerate(example_urls, 1):
+                    gr.Button(f"Example {i}", size="sm").click(
+                        lambda url=url: url, outputs=video_url_input
+                    )
+        with gr.Tab("ℹ️ About"):
+            gr.Markdown("""
+            ## About MCP Video Analysis
+            This application combines multiple AI technologies to provide comprehensive video analysis:
+            ### 🔧 Technology Stack
+            - **Modal Backend**: Scalable cloud compute for video processing
+            - **Whisper**: Speech-to-text transcription
+            - **Computer Vision Models**: Object detection, action recognition, and captioning
+            - **Anthropic Claude**: Advanced AI for intelligent content analysis
+            - **MCP Protocol**: Model Context Protocol for seamless integration
+            ### 🎯 Features
+            - **Transcription**: Extract spoken content from videos
+            - **Visual Analysis**: Identify objects, actions, and scenes
+            - **Content Understanding**: AI-powered insights and summaries
+            - **Custom Queries**: Ask specific questions about video content
+            ### 🚀 Usage
+            1. Enter a video URL (YouTube or direct link)
+            2. Optionally ask a specific question
+            3. Click "Analyze Video" to get comprehensive insights
+            4. Review both Claude's intelligent analysis and raw data
+            ### 🔒 Privacy & Security
+            - Video processing is handled securely in the cloud
+            - No video data is stored permanently
+            - API keys are handled securely via environment variables
+            """)
+        # Event handlers
+        def clear_all():
+            return "", "", "", ""
+        analyze_btn.click(
+            fn=analyze_video_interface,
+            inputs=[video_url_input, user_query_input],
+            outputs=[claude_output, raw_analysis_output],
+            show_progress=True
+        )
+        clear_btn.click(
+            fn=clear_all,
+            outputs=[video_url_input, user_query_input, claude_output, raw_analysis_output]
+        )
+    return interface
+# Create and launch the interface
+if __name__ == "__main__":
+    interface = create_gradio_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio>=4.0.0
+anthropic>=0.40.0
+httpx>=0.25.0
+asyncio-compat>=0.1.0