Final_Assignment_AWorld

Sleeping

App Files Files Community

Duibonduil commited on Jun 30

Commit

7f4b706

verified ·

1 Parent(s): 595b4ab

Upload 15 files

Browse files

Files changed (15) hide show

AWorld-main/aworlddistributed/mcp_servers/aworldsearch_server.py +238 -0
AWorld-main/aworlddistributed/mcp_servers/browser_server.py +149 -0
AWorld-main/aworlddistributed/mcp_servers/document_server.py +998 -0
AWorld-main/aworlddistributed/mcp_servers/download_server.py +199 -0
AWorld-main/aworlddistributed/mcp_servers/e2b_code_server.py +96 -0
AWorld-main/aworlddistributed/mcp_servers/gen_audio_server.py +197 -0
AWorld-main/aworlddistributed/mcp_servers/gen_pic_server.py +166 -0
AWorld-main/aworlddistributed/mcp_servers/gen_video_server.py +153 -0
AWorld-main/aworlddistributed/mcp_servers/image_server.py +240 -0
AWorld-main/aworlddistributed/mcp_servers/picsearch_server.py +180 -0
AWorld-main/aworlddistributed/mcp_servers/reasoning_server.py +102 -0
AWorld-main/aworlddistributed/mcp_servers/search_server.py +165 -0
AWorld-main/aworlddistributed/mcp_servers/utils.py +193 -0
AWorld-main/aworlddistributed/mcp_servers/video_server.py +484 -0
AWorld-main/aworlddistributed/mcp_servers/youtube_server.py +279 -0

AWorld-main/aworlddistributed/mcp_servers/aworldsearch_server.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import asyncio
+import json
+import logging
+import os
+import sys
+from typing import List, Dict, Any, Optional, Union
+import aiohttp
+from mcp.server import FastMCP
+from mcp.types import TextContent
+from pydantic import Field
+mcp = FastMCP("aworldsearch-server")
+async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
+    """Execute a single search query, returns None on error"""
+    try:
+        url = os.getenv('AWORLD_SEARCH_URL')
+        searchMode = os.getenv('AWORLD_SEARCH_SEARCHMODE')
+        source = os.getenv('AWORLD_SEARCH_SOURCE')
+        domain = os.getenv('AWORLD_SEARCH_DOMAIN')
+        uid = os.getenv('AWORLD_SEARCH_UID')
+        if not url or not searchMode or not source or not domain:
+            logging.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
+            return None
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        data = {
+            "domain": domain,
+            "extParams": {},
+            "page": 0,
+            "pageSize": num,
+            "query": query,
+            "searchMode": searchMode,
+            "source": source,
+            "userId": uid
+        }
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.post(url, headers=headers, json=data) as response:
+                    if response.status != 200:
+                        logging.warning(f"Query failed: {query}, status code: {response.status}")
+                        return None
+                    result = await response.json()
+                    return result
+            except aiohttp.ClientError:
+                logging.warning(f"Request error: {query}")
+                return None
+    except Exception:
+        logging.warning(f"Query exception: {query}")
+        return None
+def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Filter valid document results, returns empty list if input is None"""
+    if result is None:
+        return []
+    try:
+        valid_docs = []
+        # Check success field
+        if not result.get("success"):
+            return valid_docs
+        # Check searchDocs field
+        search_docs = result.get("searchDocs", [])
+        if not search_docs:
+            return valid_docs
+        # Extract required fields
+        required_fields = ["title", "docAbstract", "url", "doc"]
+        for doc in search_docs:
+            # Check if all required fields exist and are not empty
+            is_valid = True
+            for field in required_fields:
+                if field not in doc or not doc[field]:
+                    is_valid = False
+                    break
+            if is_valid:
+                # Keep only required fields
+                filtered_doc = {field: doc[field] for field in required_fields}
+                valid_docs.append(filtered_doc)
+        return valid_docs
+    except Exception:
+        return []
+@mcp.tool(description="Search based on the user's input query list")
+async def search(
+        query_list: List[str] = Field(
+            description="List format, queries to search for"
+        ),
+        num: int = Field(
+            5,
+            description="Maximum number of results per query, default is 5, please keep the total results within 15"
+        )
+) -> Union[str, TextContent]:
+    """Execute search main function, supports single query or query list"""
+    try:
+        # Get configuration from environment variables
+        env_total_num = os.getenv('AWORLD_SEARCH_TOTAL_NUM')
+        if env_total_num and env_total_num.isdigit():
+            # Force override input num parameter with environment variable
+            num = int(env_total_num)
+        # If no queries provided, return empty list
+        if not query_list:
+            # Initialize TextContent with additional parameters
+            return TextContent(
+                type="text",
+                text="",  # Empty string instead of None
+                **{"metadata": {}}  # Pass as additional fields
+            )
+        # When query count is >= 3 or slice_num is set, use corresponding value
+        slice_num = os.getenv('AWORLD_SEARCH_SLICE_NUM')
+        if slice_num and slice_num.isdigit():
+            actual_num = int(slice_num)
+        else:
+            actual_num = 2 if len(query_list) >= 3 else num
+        # Execute all queries in parallel
+        tasks = [search_single(q, actual_num) for q in query_list]
+        raw_results = await asyncio.gather(*tasks)
+        # Filter and merge results
+        all_valid_docs = []
+        for result in raw_results:
+            valid_docs = filter_valid_docs(result)
+            all_valid_docs.extend(valid_docs)
+        # If no valid results found, return empty list
+        if not all_valid_docs:
+            # Initialize TextContent with additional parameters
+            return TextContent(
+                type="text",
+                text="",  # Empty string instead of None
+                **{"metadata": {}}  # Pass as additional fields
+            )
+        # Format results as JSON
+        result_json = json.dumps(all_valid_docs, ensure_ascii=False)
+        # Create dictionary structure directly
+        combined_query = ",".join(query_list)
+        search_items = []
+        # Use a dictionary to deduplicate by URL
+        url_dict = {}
+        for doc in all_valid_docs:
+            url = doc.get("url", "")
+            if url not in url_dict:
+                url_dict[url] = {
+                    "title": doc.get("title", ""),
+                    "url": url,
+                    "snippet": doc.get("doc", "")[:100] + "..." if len(doc.get("doc", "")) > 100 else doc.get("doc", ""),
+                    "content": doc.get("doc", "")  # Map doc field to content
+                }
+        # Convert dictionary values to list
+        search_items = list(url_dict.values())
+        search_output_dict = {
+            "artifact_type": "WEB_PAGES",
+            "artifact_data": {
+                "query": combined_query,
+                "results": search_items
+            }
+        }
+        # Log results
+        logging.info(f"Completed {len(query_list)} queries, found {len(all_valid_docs)} valid documents")
+        # Initialize TextContent with additional parameters
+        return TextContent(
+            type="text",
+            text=result_json,
+            **{"metadata": search_output_dict}  # Pass processed data as metadata
+        )
+    except Exception as e:
+        # Handle errors
+        logging.error(f"Search error: {e}")
+        # Initialize TextContent with additional parameters
+        return TextContent(
+            type="text",
+            text="",  # Empty string instead of None
+            **{"metadata": {}}  # Pass as additional fields
+        )
+def main():
+    from dotenv import load_dotenv
+    load_dotenv(override=True)
+    print("Starting Audio MCP aworldsearch-server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+if __name__ == "__main__":
+    main()
+# if __name__ == "__main__":
+#     # Configure logging
+#     logging.basicConfig(
+#         level=logging.INFO,
+#         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+#     )
+#
+#
+#     # Test single query
+#     # asyncio.run(search("Alibaba financial report"))
+#
+#     # Test multiple queries
+#     test_queries = ["Alibaba financial report", "Tencent financial report", "Baidu financial report"]
+#     asyncio.run(search(query_list=test_queries))

AWorld-main/aworlddistributed/mcp_servers/browser_server.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""
+Browser MCP Server
+This module provides MCP server functionality for browser automation and interaction.
+It handles tasks such as web scraping, form submission, and automated browsing.
+Main functions:
+- browse_url: Opens a URL and performs specified actions
+- submit_form: Fills and submits forms on web pages
+"""
+import json
+import os
+import sys
+import traceback
+from browser_use import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from mcp.server.fastmcp import FastMCP
+from pydantic import Field
+from aworld.logs.util import logger
+mcp = FastMCP("browser-server")
+browser_system_prompt = """
+===== NAVIGATION STRATEGY =====
+1. START: Navigate to the most authoritative source for this information
+   - For general queries: Use Google with specific search terms
+   - For known sources: Go directly to the relevant website
+2. EVALUATE: Assess each page methodically
+   - Scan headings and highlighted text first
+   - Look for data tables, charts, or official statistics
+   - Check publication dates for timeliness
+3. EXTRACT: Capture exactly what's needed
+   - Take screenshots of visual evidence (charts, tables, etc.)
+   - Copy precise text that answers the query
+   - Note source URLs for citation
+4. DOWNLOAD: Save the most relevant file to local path for further processing
+   - Save the text if possible for futher text reading and analysis
+   - Save the image if possible for futher image reasoning analysis
+   - Save the pdf if possible for futher pdf reading and analysis
+5. ROBOT DETECTION:
+   - If the page is a robot detection page, abort immediately
+   - Navigate to the most authoritative source for similar information instead
+===== EFFICIENCY GUIDELINES =====
+- Use specific search queries with key terms from the task
+- Avoid getting distracted by tangential information
+- If blocked by paywalls, try archive.org or similar alternatives
+- Document each significant finding clearly and concisely
+Your goal is to extract precisely the information needed with minimal browsing steps.
+"""
+@mcp.tool(description="Perform browser actions using the browser-use package.")
+async def browser_use(
+    task: str = Field(description="The task to perform using the browser."),
+) -> str:
+    """
+    Perform browser actions using the browser-use package.
+    Args:
+        task (str): The task to perform using the browser.
+    Returns:
+        str: The result of the browser actions.
+    """
+    browser = Browser(
+        config=BrowserConfig(
+            headless=False,
+            new_context_config=BrowserContextConfig(
+                disable_security=True,
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+                minimum_wait_page_load_time=10,
+                maximum_wait_page_load_time=30,
+            ),
+        )
+    )
+    browser_context = BrowserContext(
+        config=BrowserContextConfig(
+            trace_path=os.getenv("LOG_FILE_PATH" + "/browser_trace.log")
+        ),
+        browser=browser,
+    )
+    agent = Agent(
+        task=task,
+        llm=ChatOpenAI(
+            model=os.getenv("LLM_MODEL_NAME"),
+            api_key=os.getenv("LLM_API_KEY"),
+            base_url=os.getenv("LLM_BASE_URL"),
+            model_name=os.getenv("LLM_MODEL_NAME"),
+            openai_api_base=os.getenv("LLM_BASE_URL"),
+            openai_api_key=os.getenv("LLM_API_KEY"),
+            temperature=1.0,
+        ),
+        browser_context=browser_context,
+        extend_system_message=browser_system_prompt,
+    )
+    try:
+        browser_execution: AgentHistoryList = await agent.run(max_steps=50)
+        if (
+            browser_execution is not None
+            and browser_execution.is_done()
+            and browser_execution.is_successful()
+        ):
+            exec_trace = browser_execution.extracted_content()
+            logger.info(
+                ">>> 🌏 Browse Execution Succeed!\n"
+                f">>> 💡 Result: {json.dumps(exec_trace, ensure_ascii=False, indent=4)}\n"
+                ">>> 🌏 Browse Execution Succeed!\n"
+            )
+            return browser_execution.final_result()
+        else:
+            return f"Browser execution failed for task: {task}"
+    except Exception as e:
+        logger.error(f"Browser execution failed: {traceback.format_exc()}")
+        return f"Browser execution failed for task: {task} due to {str(e)}"
+    finally:
+        await browser.close()
+        logger.info("Browser Closed!")
+def main():
+    load_dotenv()
+    print("Starting Browser MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/document_server.py ADDED Viewed

	@@ -0,0 +1,998 @@

+"""
+Document MCP Server
+This module provides MCP server functionality for document processing and analysis.
+It handles various document formats including:
+- Text files
+- PDF documents
+- Word documents (DOCX)
+- Excel spreadsheets
+- PowerPoint presentations
+- JSON and XML files
+- Source code files
+Each document type has specialized processing functions that extract content,
+structure, and metadata. The server focuses on local file processing with
+appropriate validation and error handling.
+Main functions:
+- mcpreadtext: Reads plain text files
+- mcpreadpdf: Reads PDF files with optional image extraction
+- mcpreaddocx: Reads Word documents
+- mcpreadexcel: Reads Excel spreadsheets
+- mcpreadpptx: Reads PowerPoint presentations
+- mcpreadjson: Reads and parses JSON/JSONL files
+- mcpreadxml: Reads and parses XML files
+- mcpreadsourcecode: Reads and analyzes source code files
+"""
+import io
+import json
+import os
+import sys
+import tempfile
+import traceback
+from datetime import date, datetime
+from typing import Any, Dict, List, Optional
+import fitz
+import html2text
+import pandas as pd
+import xmltodict
+from bs4 import BeautifulSoup
+from docx2markdown._docx_to_markdown import docx_to_markdown
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from PIL import Image, ImageDraw, ImageFont
+from pptx import Presentation
+from pydantic import BaseModel, Field
+from PyPDF2 import PdfReader
+from tabulate import tabulate
+from xls2xlsx import XLS2XLSX
+from aworld.logs.util import logger
+from aworld.utils import import_package
+from mcp_servers.image_server import encode_images
+mcp = FastMCP("document-server")
+# Define model classes for different document types
+class TextDocument(BaseModel):
+    """Model representing a text document"""
+    content: str
+    file_path: str
+    file_name: str
+    file_size: int
+    last_modified: str
+class HtmlDocument(BaseModel):
+    """Model representing an HTML document"""
+    content: str  # Extracted text content
+    html_content: str  # Original HTML content
+    file_path: str
+    file_name: str
+    file_size: int
+    last_modified: str
+    title: Optional[str] = None
+    links: Optional[List[Dict[str, str]]] = None
+    images: Optional[List[Dict[str, str]]] = None
+    tables: Optional[List[str]] = None
+    markdown: Optional[str] = None  # HTML converted to Markdown format
+class JsonDocument(BaseModel):
+    """Model representing a JSON document"""
+    format: str  # "json" or "jsonl"
+    type: Optional[str] = None  # "array" or "object" for standard JSON
+    count: Optional[int] = None
+    keys: Optional[List[str]] = None
+    data: Any
+    file_path: str
+    file_name: str
+class XmlDocument(BaseModel):
+    """Model representing an XML document"""
+    content: Dict
+    file_path: str
+    file_name: str
+class PdfImage(BaseModel):
+    """Model representing an image extracted from a PDF"""
+    page: int
+    format: str
+    width: int
+    height: int
+    path: str
+class PdfDocument(BaseModel):
+    """Model representing a PDF document"""
+    content: str
+    file_path: str
+    file_name: str
+    page_count: int
+    images: Optional[List[PdfImage]] = None
+    image_count: Optional[int] = None
+    image_dir: Optional[str] = None
+    error: Optional[str] = None
+class PdfResult(BaseModel):
+    """Model representing results from processing multiple PDF documents"""
+    total_files: int
+    success_count: int
+    failed_count: int
+    results: List[PdfDocument]
+class DocxDocument(BaseModel):
+    """Model representing a Word document"""
+    content: str
+    file_path: str
+    file_name: str
+class ExcelSheet(BaseModel):
+    """Model representing a sheet in an Excel file"""
+    name: str
+    data: List[Dict[str, Any]]
+    markdown_table: str
+    row_count: int
+    column_count: int
+class ExcelDocument(BaseModel):
+    """Model representing an Excel document"""
+    file_name: str
+    file_path: str
+    processed_path: Optional[str] = None
+    file_type: str
+    sheet_count: int
+    sheet_names: List[str]
+    sheets: List[ExcelSheet]
+    success: bool = True
+    error: Optional[str] = None
+class ExcelResult(BaseModel):
+    """Model representing results from processing multiple Excel documents"""
+    total_files: int
+    success_count: int
+    failed_count: int
+    results: List[ExcelDocument]
+class PowerPointSlide(BaseModel):
+    """Model representing a slide in a PowerPoint presentation"""
+    slide_number: int
+    image: str  # Base64 encoded image
+class PowerPointDocument(BaseModel):
+    """Model representing a PowerPoint document"""
+    file_path: str
+    file_name: str
+    slide_count: int
+    slides: List[PowerPointSlide]
+class SourceCodeDocument(BaseModel):
+    """Model representing a source code document"""
+    content: str
+    file_type: str
+    file_path: str
+    file_name: str
+    line_count: int
+    size_bytes: int
+    last_modified: str
+    classes: Optional[List[str]] = None
+    functions: Optional[List[str]] = None
+    imports: Optional[List[str]] = None
+    package: Optional[List[str]] = None
+    methods: Optional[List[str]] = None
+    includes: Optional[List[str]] = None
+class DocumentError(BaseModel):
+    """Model representing an error in document processing"""
+    error: str
+    file_path: Optional[str] = None
+    file_name: Optional[str] = None
+class ComplexEncoder(json.JSONEncoder):
+    def default(self, o):
+        if isinstance(o, datetime):
+            return o.strftime("%Y-%m-%d %H:%M:%S")
+        elif isinstance(o, date):
+            return o.strftime("%Y-%m-%d")
+        else:
+            return json.JSONEncoder.default(self, o)
+def handle_error(e: Exception, error_type: str, file_path: Optional[str] = None) -> str:
+    """Unified error handling and return standard format error message"""
+    error_msg = f"{error_type} error: {str(e)}"
+    logger.error(traceback.format_exc())
+    error = DocumentError(
+        error=error_msg,
+        file_path=file_path,
+        file_name=os.path.basename(file_path) if file_path else None,
+    )
+    return error.model_dump_json()
+def check_file_readable(document_path: str) -> str:
+    """Check if file exists and is readable, return error message or None"""
+    if not os.path.exists(document_path):
+        return f"File does not exist: {document_path}"
+    if not os.access(document_path, os.R_OK):
+        return f"File is not readable: {document_path}"
+    return None
+@mcp.tool(
+    description="Read and return content from local text file. Cannot process https://URLs files."
+)
+def mcpreadtext(
+    document_path: str = Field(description="The input local text file path."),
+) -> str:
+    """Read and return content from local text file. Cannot process https://URLs files."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    try:
+        with open(document_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        result = TextDocument(
+            content=content,
+            file_path=document_path,
+            file_name=os.path.basename(document_path),
+            file_size=os.path.getsize(document_path),
+            last_modified=datetime.fromtimestamp(
+                os.path.getmtime(document_path)
+            ).strftime("%Y-%m-%d %H:%M:%S"),
+        )
+        return result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "Text file reading", document_path)
+@mcp.tool(
+    description="Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."
+)
+def mcpreadjson(
+    document_path: str = Field(description="Local path to JSON or JSONL file"),
+    is_jsonl: bool = Field(
+        default=False,
+        description="Whether the file is in JSONL format (one JSON object per line)",
+    ),
+) -> str:
+    """Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    try:
+        # Choose processing method based on file type
+        if is_jsonl:
+            # Process JSONL file (one JSON object per line)
+            results = []
+            with open(document_path, "r", encoding="utf-8") as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        json_obj = json.loads(line)
+                        results.append(json_obj)
+                    except json.JSONDecodeError as e:
+                        logger.warning(
+                            f"JSON parsing error at line {line_num}: {str(e)}"
+                        )
+            # Create result model
+            result = JsonDocument(
+                format="jsonl",
+                count=len(results),
+                data=results,
+                file_path=document_path,
+                file_name=os.path.basename(document_path),
+            )
+        else:
+            # Process standard JSON file
+            with open(document_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            # Create result model based on data type
+            if isinstance(data, list):
+                result = JsonDocument(
+                    format="json",
+                    type="array",
+                    count=len(data),
+                    data=data,
+                    file_path=document_path,
+                    file_name=os.path.basename(document_path),
+                )
+            else:
+                result = JsonDocument(
+                    format="json",
+                    type="object",
+                    keys=list(data.keys()) if isinstance(data, dict) else [],
+                    data=data,
+                    file_path=document_path,
+                    file_name=os.path.basename(document_path),
+                )
+        return result.model_dump_json()
+    except json.JSONDecodeError as e:
+        return handle_error(e, "JSON parsing", document_path)
+    except Exception as e:
+        return handle_error(e, "JSON file reading", document_path)
+@mcp.tool(
+    description="Read and return content from XML file. return the parsed content. Cannot process https://URLs files."
+)
+def mcpreadxml(
+    document_path: str = Field(description="The local input XML file path."),
+) -> str:
+    """Read and return content from XML file. Cannot process https://URLs files."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    try:
+        with open(document_path, "r", encoding="utf-8") as f:
+            data = f.read()
+        result = XmlDocument(
+            content=xmltodict.parse(data),
+            file_path=document_path,
+            file_name=os.path.basename(document_path),
+        )
+        return result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "XML file reading", document_path)
+@mcp.tool(
+    description="Read and return content from PDF file with optional image extraction. return the parsed content. Cannot process https://URLs files."
+)
+def mcpreadpdf(
+    document_paths: List[str] = Field(description="The local input PDF file paths."),
+    extract_images: bool = Field(
+        default=False, description="Whether to extract images from PDF (default: False)"
+    ),
+) -> str:
+    """Read and return content from PDF file with optional image extraction. Cannot process https://URLs files."""
+    try:
+        results = []
+        success_count = 0
+        failed_count = 0
+        for document_path in document_paths:
+            error = check_file_readable(document_path)
+            if error:
+                results.append(
+                    PdfDocument(
+                        content="",
+                        file_path=document_path,
+                        file_name=os.path.basename(document_path),
+                        page_count=0,
+                        error=error,
+                    )
+                )
+                failed_count += 1
+                continue
+            try:
+                with open(document_path, "rb") as f:
+                    reader = PdfReader(f)
+                    content = " ".join(page.extract_text() for page in reader.pages)
+                    page_count = len(reader.pages)
+                    pdf_result = PdfDocument(
+                        content=content,
+                        file_path=document_path,
+                        file_name=os.path.basename(document_path),
+                        page_count=page_count,
+                    )
+                    # Extract images if requested
+                    if extract_images:
+                        images_data = []
+                        # Use /tmp directory for storing images
+                        output_dir = "/tmp/pdf_images"
+                        # Create output directory if it doesn't exist
+                        os.makedirs(output_dir, exist_ok=True)
+                        # Generate a unique subfolder based on filename to avoid conflicts
+                        pdf_name = os.path.splitext(os.path.basename(document_path))[0]
+                        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+                        image_dir = os.path.join(output_dir, f"{pdf_name}_{timestamp}")
+                        os.makedirs(image_dir, exist_ok=True)
+                        try:
+                            # Open PDF with PyMuPDF
+                            pdf_document = fitz.open(document_path)
+                            # Iterate through each page
+                            for page_index in range(len(pdf_document)):
+                                page = pdf_document[page_index]
+                                # Get image list
+                                image_list = page.get_images(full=True)
+                                # Process each image
+                                for img_index, img in enumerate(image_list):
+                                    # Extract image information
+                                    xref = img[0]
+                                    base_image = pdf_document.extract_image(xref)
+                                    image_bytes = base_image["image"]
+                                    image_ext = base_image["ext"]
+                                    # Save image to file in /tmp directory
+                                    img_filename = f"pdf_image_p{page_index+1}_{img_index+1}.{image_ext}"
+                                    img_path = os.path.join(image_dir, img_filename)
+                                    with open(img_path, "wb") as img_file:
+                                        img_file.write(image_bytes)
+                                        logger.success(f"Image saved: {img_path}")
+                                    # Get image dimensions
+                                    with Image.open(img_path) as img:
+                                        width, height = img.size
+                                    # Add to results with file path instead of base64
+                                    images_data.append(
+                                        PdfImage(
+                                            page=page_index + 1,
+                                            format=image_ext,
+                                            width=width,
+                                            height=height,
+                                            path=img_path,
+                                        )
+                                    )
+                            pdf_result.images = images_data
+                            pdf_result.image_count = len(images_data)
+                            pdf_result.image_dir = image_dir
+                        except Exception as img_error:
+                            logger.error(f"Error extracting images: {str(img_error)}")
+                            # Don't clean up on error so we can keep any successfully extracted images
+                            pdf_result.error = str(img_error)
+                results.append(pdf_result)
+                success_count += 1
+            except Exception as e:
+                results.append(
+                    PdfDocument(
+                        content="",
+                        file_path=document_path,
+                        file_name=os.path.basename(document_path),
+                        page_count=0,
+                        error=str(e),
+                    )
+                )
+                failed_count += 1
+        # Create final result
+        pdf_result = PdfResult(
+            total_files=len(document_paths),
+            success_count=success_count,
+            failed_count=failed_count,
+            results=results,
+        )
+        return pdf_result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "PDF file reading")
+@mcp.tool(
+    description="Read and return content from Word file. return the parsed content. Cannot process https://URLs files."
+)
+def mcpreaddocx(
+    document_path: str = Field(description="The local input Word file path."),
+) -> str:
+    """Read and return content from Word file. Cannot process https://URLs files."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    try:
+        file_name = os.path.basename(document_path)
+        md_file_path = f"{file_name}.md"
+        docx_to_markdown(document_path, md_file_path)
+        with open(md_file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        os.remove(md_file_path)
+        result = DocxDocument(
+            content=content, file_path=document_path, file_name=file_name
+        )
+        return result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "Word file reading", document_path)
+@mcp.tool(
+    description="Read multiple Excel/CSV files and convert sheets to Markdown tables. return the parsed content. Cannot process https://URLs files."
+)
+def mcpreadexcel(
+    document_paths: List[str] = Field(
+        description="List of local input Excel/CSV file paths."
+    ),
+    max_rows: int = Field(
+        1000, description="Maximum number of rows to read per sheet (default: 1000)"
+    ),
+    convert_xls_to_xlsx: bool = Field(
+        False,
+        description="Whether to convert XLS files to XLSX format (default: False)",
+    ),
+) -> str:
+    """Read multiple Excel/CSV files and convert sheets to Markdown tables. Cannot process https://URLs files."""
+    try:
+        # Import required packages
+        import_package("tabulate")
+        # Import xls2xlsx package if conversion is requested
+        if convert_xls_to_xlsx:
+            import_package("xls2xlsx")
+        all_results = []
+        temp_files = []  # Track temporary files for cleanup
+        success_count = 0
+        failed_count = 0
+        # Process each file
+        for document_path in document_paths:
+            # Check if file exists and is readable
+            error = check_file_readable(document_path)
+            if error:
+                all_results.append(
+                    ExcelDocument(
+                        file_name=os.path.basename(document_path),
+                        file_path=document_path,
+                        file_type="UNKNOWN",
+                        sheet_count=0,
+                        sheet_names=[],
+                        sheets=[],
+                        success=False,
+                        error=error,
+                    )
+                )
+                failed_count += 1
+                continue
+            try:
+                # Check file extension
+                file_ext = os.path.splitext(document_path)[1].lower()
+                # Validate file type
+                if file_ext not in [".csv", ".xls", ".xlsx", ".xlsm"]:
+                    error_msg = f"Unsupported file format: {file_ext}. Only CSV, XLS, XLSX, and XLSM formats are supported."
+                    all_results.append(
+                        ExcelDocument(
+                            file_name=os.path.basename(document_path),
+                            file_path=document_path,
+                            file_type=file_ext.replace(".", "").upper(),
+                            sheet_count=0,
+                            sheet_names=[],
+                            sheets=[],
+                            success=False,
+                            error=error_msg,
+                        )
+                    )
+                    failed_count += 1
+                    continue
+                # Convert XLS to XLSX if requested and file is XLS
+                processed_path = document_path
+                if convert_xls_to_xlsx and file_ext == ".xls":
+                    try:
+                        logger.info(f"Converting XLS to XLSX: {document_path}")
+                        converter = XLS2XLSX(document_path)
+                        # Create temp file with xlsx extension
+                        xlsx_path = (
+                            os.path.splitext(document_path)[0] + "_converted.xlsx"
+                        )
+                        converter.to_xlsx(xlsx_path)
+                        processed_path = xlsx_path
+                        temp_files.append(xlsx_path)  # Track for cleanup
+                        logger.success(f"Converted XLS to XLSX: {xlsx_path}")
+                    except Exception as conv_error:
+                        logger.error(f"XLS to XLSX conversion error: {str(conv_error)}")
+                        # Continue with original file if conversion fails
+                excel_sheets = []
+                sheet_names = []
+                # Handle CSV files differently
+                if file_ext == ".csv":
+                    # For CSV files, create a single sheet with the file name
+                    sheet_name = os.path.basename(document_path).replace(".csv", "")
+                    df = pd.read_csv(processed_path, nrows=max_rows)
+                    # Create markdown table
+                    markdown_table = "*Empty table*"
+                    if not df.empty:
+                        headers = df.columns.tolist()
+                        table_data = df.values.tolist()
+                        markdown_table = tabulate(
+                            table_data, headers=headers, tablefmt="pipe"
+                        )
+                        if len(df) >= max_rows:
+                            markdown_table += (
+                                f"\n\n*Note: Table truncated to {max_rows} rows*"
+                            )
+                    # Create sheet model
+                    excel_sheets.append(
+                        ExcelSheet(
+                            name=sheet_name,
+                            data=df.to_dict(orient="records"),
+                            markdown_table=markdown_table,
+                            row_count=len(df),
+                            column_count=len(df.columns),
+                        )
+                    )
+                    sheet_names = [sheet_name]
+                else:
+                    # For Excel files, process all sheets
+                    with pd.ExcelFile(processed_path) as xls:
+                        sheet_names = xls.sheet_names
+                        for sheet_name in sheet_names:
+                            # Read Excel sheet into DataFrame with row limit
+                            df = pd.read_excel(
+                                xls, sheet_name=sheet_name, nrows=max_rows
+                            )
+                            # Create markdown table
+                            markdown_table = "*Empty table*"
+                            if not df.empty:
+                                headers = df.columns.tolist()
+                                table_data = df.values.tolist()
+                                markdown_table = tabulate(
+                                    table_data, headers=headers, tablefmt="pipe"
+                                )
+                                if len(df) >= max_rows:
+                                    markdown_table += f"\n\n*Note: Table truncated to {max_rows} rows*"
+                            # Create sheet model
+                            excel_sheets.append(
+                                ExcelSheet(
+                                    name=sheet_name,
+                                    data=df.to_dict(orient="records"),
+                                    markdown_table=markdown_table,
+                                    row_count=len(df),
+                                    column_count=len(df.columns),
+                                )
+                            )
+                # Create result for this file
+                file_result = ExcelDocument(
+                    file_name=os.path.basename(document_path),
+                    file_path=document_path,
+                    processed_path=(
+                        processed_path if processed_path != document_path else None
+                    ),
+                    file_type=file_ext.replace(".", "").upper(),
+                    sheet_count=len(sheet_names),
+                    sheet_names=sheet_names,
+                    sheets=excel_sheets,
+                    success=True,
+                )
+                all_results.append(file_result)
+                success_count += 1
+            except Exception as file_error:
+                # Handle errors for individual files
+                error_msg = str(file_error)
+                logger.error(f"File reading error for {document_path}: {error_msg}")
+                all_results.append(
+                    ExcelDocument(
+                        file_name=os.path.basename(document_path),
+                        file_path=document_path,
+                        file_type=os.path.splitext(document_path)[1]
+                        .replace(".", "")
+                        .upper(),
+                        sheet_count=0,
+                        sheet_names=[],
+                        sheets=[],
+                        success=False,
+                        error=error_msg,
+                    )
+                )
+                failed_count += 1
+        # Clean up temporary files
+        for temp_file in temp_files:
+            try:
+                if os.path.exists(temp_file):
+                    os.remove(temp_file)
+                    logger.info(f"Removed temporary file: {temp_file}")
+            except Exception as cleanup_error:
+                logger.warning(
+                    f"Error cleaning up temporary file {temp_file}: {str(cleanup_error)}"
+                )
+        # Create final result
+        excel_result = ExcelResult(
+            total_files=len(document_paths),
+            success_count=success_count,
+            failed_count=failed_count,
+            results=all_results,
+        )
+        return excel_result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "Excel/CSV files processing")
+@mcp.tool(
+    description="Read and convert PowerPoint slides to base64 encoded images. return the parsed content. Cannot process https://URLs files."
+)
+def mcpreadpptx(
+    document_path: str = Field(description="The local input PowerPoint file path."),
+) -> str:
+    """Read and convert PowerPoint slides to base64 encoded images. Cannot process https://URLs files."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    # Create temporary directory
+    temp_dir = tempfile.mkdtemp()
+    slides_data = []
+    try:
+        presentation = Presentation(document_path)
+        total_slides = len(presentation.slides)
+        if total_slides == 0:
+            raise ValueError("PPT file does not contain any slides")
+        # Process each slide
+        for i, slide in enumerate(presentation.slides):
+            # Set slide dimensions
+            slide_width_px = 1920  # 16:9 ratio
+            slide_height_px = 1080
+            # Create blank image
+            slide_img = Image.new("RGB", (slide_width_px, slide_height_px), "white")
+            draw = ImageDraw.Draw(slide_img)
+            font = ImageFont.load_default()
+            # Draw slide number
+            draw.text((20, 20), f"Slide {i+1}/{total_slides}", fill="black", font=font)
+            # Process shapes in the slide
+            for shape in slide.shapes:
+                try:
+                    # Process images
+                    if hasattr(shape, "image") and shape.image:
+                        image_stream = io.BytesIO(shape.image.blob)
+                        img = Image.open(image_stream)
+                        left = int(
+                            shape.left * slide_width_px / presentation.slide_width
+                        )
+                        top = int(
+                            shape.top * slide_height_px / presentation.slide_height
+                        )
+                        slide_img.paste(img, (left, top))
+                    # Process text
+                    elif hasattr(shape, "text") and shape.text:
+                        text_left = int(
+                            shape.left * slide_width_px / presentation.slide_width
+                        )
+                        text_top = int(
+                            shape.top * slide_height_px / presentation.slide_height
+                        )
+                        draw.text(
+                            (text_left, text_top),
+                            shape.text,
+                            fill="black",
+                            font=font,
+                        )
+                except Exception as shape_error:
+                    logger.warning(
+                        f"Error processing shape in slide {i+1}: {str(shape_error)}"
+                    )
+            # Save slide image
+            img_path = os.path.join(temp_dir, f"slide_{i+1}.jpg")
+            slide_img.save(img_path, "JPEG")
+            # Convert to base64
+            base64_image = encode_images(img_path)
+            slides_data.append(
+                PowerPointSlide(
+                    slide_number=i + 1, image=f"data:image/jpeg;base64,{base64_image}"
+                )
+            )
+        # Create result
+        result = PowerPointDocument(
+            file_path=document_path,
+            file_name=os.path.basename(document_path),
+            slide_count=total_slides,
+            slides=slides_data,
+        )
+        return result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "PowerPoint processing", document_path)
+    finally:
+        # Clean up temporary files
+        try:
+            for file in os.listdir(temp_dir):
+                os.remove(os.path.join(temp_dir, file))
+            os.rmdir(temp_dir)
+        except Exception as cleanup_error:
+            logger.warning(f"Error cleaning up temporary files: {str(cleanup_error)}")
+@mcp.tool(
+    description="Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."
+)
+def mcpreadhtmltext(
+    document_path: str = Field(description="Local HTML file path or Web URL."),
+    extract_links: bool = Field(
+        default=True, description="Whether to extract link information"
+    ),
+    extract_images: bool = Field(
+        default=True, description="Whether to extract image information"
+    ),
+    extract_tables: bool = Field(
+        default=True, description="Whether to extract table information"
+    ),
+    convert_to_markdown: bool = Field(
+        default=True, description="Whether to convert HTML to Markdown format"
+    ),
+) -> str:
+    """Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."""
+    error = check_file_readable(document_path)
+    if error:
+        return DocumentError(error=error, file_path=document_path).model_dump_json()
+    try:
+        # Read HTML file
+        with open(document_path, "r", encoding="utf-8") as f:
+            html_content = f.read()
+        # Parse HTML using BeautifulSoup
+        soup = BeautifulSoup(html_content, "html.parser")
+        # Extract text content (remove script and style content)
+        for script in soup(["script", "style"]):
+            script.extract()
+        text_content = soup.get_text(separator="\n", strip=True)
+        # Extract title
+        title = soup.title.string if soup.title else None
+        # Initialize result object
+        result = HtmlDocument(
+            content=text_content,
+            html_content=html_content,
+            file_path=document_path,
+            file_name=os.path.basename(document_path),
+            file_size=os.path.getsize(document_path),
+            last_modified=datetime.fromtimestamp(
+                os.path.getmtime(document_path)
+            ).strftime("%Y-%m-%d %H:%M:%S"),
+            title=title,
+        )
+        # Extract links
+        if extract_links:
+            links = []
+            for link in soup.find_all("a"):
+                href = link.get("href")
+                text = link.get_text(strip=True)
+                if href:
+                    links.append({"url": href, "text": text})
+            result.links = links
+        # Extract images
+        if extract_images:
+            images = []
+            for img in soup.find_all("img"):
+                src = img.get("src")
+                alt = img.get("alt", "")
+                if src:
+                    images.append({"src": src, "alt": alt})
+            result.images = images
+        # Extract tables
+        if extract_tables:
+            tables = []
+            for table in soup.find_all("table"):
+                tables.append(str(table))
+            result.tables = tables
+        # Convert to Markdown
+        if convert_to_markdown:
+            h = html2text.HTML2Text()
+            h.ignore_links = False
+            h.ignore_images = False
+            h.ignore_tables = False
+            markdown_content = h.handle(html_content)
+            result.markdown = markdown_content
+        return result.model_dump_json()
+    except Exception as e:
+        return handle_error(e, "HTML file reading", document_path)
+def main():
+    load_dotenv()
+    print("Starting Document MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/download_server.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""
+Download MCP Server
+This module provides MCP server functionality for downloading files from URLs.
+It handles various download scenarios with proper validation, error handling,
+and progress tracking.
+Key features:
+- File downloading from HTTP/HTTPS URLs
+- Download progress tracking
+- File validation
+- Safe file saving
+Main functions:
+- mcpdownload: Downloads files from URLs to local filesystem
+"""
+import os
+import sys
+import traceback
+import urllib.parse
+from pathlib import Path
+from typing import List, Optional
+import requests
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel, Field
+from aworld.logs.util import logger
+mcp = FastMCP("download-server")
+class DownloadResult(BaseModel):
+    """Download result model with file information"""
+    file_path: str
+    file_name: str
+    file_size: int
+    content_type: Optional[str] = None
+    success: bool
+    error: Optional[str] = None
+class DownloadResults(BaseModel):
+    """Download results model for multiple files"""
+    results: List[DownloadResult]
+    success_count: int
+    failed_count: int
+@mcp.tool(description="Download files from URLs and save to the local filesystem.")
+def mcpdownloadfiles(
+    urls: List[str] = Field(
+        ..., description="The URLs of the files to download. Must be a list of URLs."
+    ),
+    output_dir: str = Field(
+        "/tmp/mcp_downloads",
+        description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
+    ),
+    timeout: int = Field(60, description="Download timeout in seconds (default: 60)."),
+) -> str:
+    """Download files from URLs and save to the local filesystem.
+    Args:
+        urls: The URLs of the files to download, must be a list of URLs
+        output_dir: Directory to save the downloaded files
+        timeout: Download timeout in seconds
+    Returns:
+        JSON string with download results information
+    """
+    results = []
+    success_count = 0
+    failed_count = 0
+    for single_url in urls:
+        result_json = _download_single_file(single_url, output_dir, "", timeout)
+        result = DownloadResult.model_validate_json(result_json)
+        results.append(result)
+        if result.success:
+            success_count += 1
+        else:
+            failed_count += 1
+    batch_results = DownloadResults(
+        results=results, success_count=success_count, failed_count=failed_count
+    )
+    return batch_results.model_dump_json()
+def _download_single_file(
+    url: str, output_dir: str, filename: str, timeout: int
+) -> str:
+    """Download a single file from URL and save it to the local filesystem."""
+    try:
+        # Validate URL
+        if not url.startswith(("http://", "https://")):
+            raise ValueError(
+                "Invalid URL format. URL must start with http:// or https://"
+            )
+        # Create output directory if it doesn't exist
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        # Determine filename if not provided
+        if not filename:
+            filename = os.path.basename(urllib.parse.urlparse(url).path)
+            if not filename:
+                filename = "downloaded_file"
+        # Full path to save the file
+        file_path = os.path.join(output_path, filename)
+        logger.info(f"Downloading file from {url} to {file_path}")
+        # Download the file with progress tracking
+        headers = {
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AWorld/1.0 (https://github.com/inclusionAI/AWorld; [email protected]) "
+                "Python/requests "
+            ),
+            "Accept": "text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Connection": "keep-alive",
+        }
+        response = requests.get(url, headers=headers, stream=True, timeout=timeout)
+        response.raise_for_status()
+        # Get content type and size
+        content_type = response.headers.get("Content-Type")
+        # Save the file
+        with open(file_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+        # Get actual file size
+        actual_size = os.path.getsize(file_path)
+        logger.info(f"File downloaded successfully to {file_path}")
+        # Create result
+        result = DownloadResult(
+            file_path=file_path,
+            file_name=filename,
+            file_size=actual_size,
+            content_type=content_type,
+            success=True,
+            error=None,
+        )
+        return result.model_dump_json()
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(f"Download error: {traceback.format_exc()}")
+        result = DownloadResult(
+            file_path="",
+            file_name="",
+            file_size=0,
+            content_type=None,
+            success=False,
+            error=error_msg,
+        )
+        return result.model_dump_json()
+def main():
+    load_dotenv()
+    print("Starting Download MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/e2b_code_server.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from e2b_code_interpreter import Sandbox
+from pydantic import Field
+from mcp.server.fastmcp import FastMCP
+import os
+# Initialize MCP server
+mcp = FastMCP("e2b-code-server")
+@mcp.tool(description="Upload local file to e2b sandbox.")
+async def e2b_upload_file(
+    path: str = Field(
+        description="The local file path to upload."
+    )
+) -> str:
+    """
+    Upload local file to e2b sandbox.
+    Args:
+        path (str): The local file path to upload.
+    Returns:
+        str: E2b file path and sandbox_id.
+    """
+    try:
+        os.environ["E2B_API_KEY"] = os.getenv("E2B_API_KEY")
+        sbx = Sandbox()
+        local_file_name = os.path.basename(path)
+        e2b_file_path = f"/home/user/{local_file_name}"
+        # Read local file relative to the current working directory
+        with open(path, "rb") as file:
+        # Upload file to the sandbox to absolute path
+            sbx.files.write(e2b_file_path, file)
+        return f"{e2b_file_path}, {sbx.sandbox_id}"
+    except Exception as e:
+        return f"Upload failed. Error: {str(e)}"
+@mcp.tool(description="Run code in a specified e2b sandbox.")
+async def e2b_run_code(
+    sandbox_id: str = Field(
+        default=None,
+        description="The sandbox id to run code in, if you have uploaded a file, you should use the sandbox_id returned by the e2b_upload_file function."
+    ),
+    code_block: str = Field(
+        default=None,
+        description="The code block to run in e2b sandbox."
+    ),
+) -> str:
+    """
+    Run code in a specified e2b sandbox.
+    Args:
+        sandbox_id (str): The sandbox id to run code in.
+        code_block (str): The code block to run in e2b sandbox.
+    Returns:
+        str: The result of running the code block.
+    """
+    try:
+        os.environ["E2B_API_KEY"] = os.getenv("E2B_API_KEY")
+        sbx = Sandbox(
+            sandbox_id=sandbox_id,
+        )
+        execution = sbx.run_code(code_block)
+        return execution.logs
+    except Exception as e:
+        return f"Run code failed. Error: {str(e)}"
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    import sys
+    print("Starting E2b Code MCP Server...", file=sys.stderr)
+    mcp.run(transport='stdio')
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+# Add this for compatibility with uvx
+import sys
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/gen_audio_server.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import os
+import time
+import json
+import requests
+import sys
+import hashlib
+from dotenv import load_dotenv
+from mcp.server import FastMCP
+from pydantic import Field
+from typing_extensions import Any
+from aworld.logs.util import logger
+mcp = FastMCP("gen-audio-server")
+def calculate_sha256(plain_text):
+    """
+    Calculate SHA-256 digest of a string.
+    Args:
+        plain_text (str): The text to digest
+    Returns:
+        str: Hexadecimal representation of the digest
+    """
+    try:
+        # Create SHA-256 hash object
+        sha256 = hashlib.sha256()
+        # Update with the bytes of the plain text (UTF-8 encoded)
+        sha256.update(plain_text.encode('utf-8'))
+        # Get the digest in bytes
+        digest_bytes = sha256.digest()
+        # Convert each byte to hexadecimal and join
+        hex_digest = ''.join([f'{b:02x}' for b in digest_bytes])
+        return hex_digest
+    except Exception as e:
+        logger.warning(f"Error calculating SHA-256 digest: {e}")
+        return ""
+def generate_headers(app_key, secret):
+    """Generate headers with fresh timestamp and digest"""
+    timestamp = str(int(time.time() * 1000))
+    plain_text = f"{app_key}_{secret}_{timestamp}"
+    digest = calculate_sha256(plain_text)
+    return {
+        'Content-Type': 'application/json',
+        'Alipay-Mf-Appkey': app_key,
+        'Alipay-Mf-Digest': digest,
+        'Alipay-Mf-Timestamp': timestamp
+    }
+@mcp.tool(description="Generate audio from text content")
+def gen_audio(content: str = Field(description="The text content to convert to audio")) -> Any:
+    """Generate audio from text content using TTS service"""
+    task_url = os.getenv('AUDIO_TASK_URL')
+    query_url = os.getenv('AUDIO_QUERY_URL')
+    app_key = os.getenv('AUDIO_APP_KEY')
+    secret = os.getenv('AUDIO_SECRET')
+    if not (task_url and query_url and app_key and secret):
+        logger.warning(f"Query failed: task_url, query_url, app_key, secret parameters incomplete")
+        return None
+    # Generate initial headers
+    headers = generate_headers(app_key, secret)
+    sample_rate = os.getenv('AUDIO_SAMPLE_RATE', '16000')
+    audio_format = os.getenv('AUDIO_AUDIO_FORMAT', 'wav')
+    tts_voice = os.getenv('AUDIO_TTS_VOICE', 'DBCNF245')
+    tts_speech_rate = os.getenv('AUDIO_TTS_SPEECH_RATE', '0')
+    tts_volume = os.getenv('AUDIO_TTS_VOLUME', '50')
+    tts_pitch = os.getenv('AUDIO_TTS_PITCH', '0')
+    voice_type = os.getenv('AUDIO_VOICE_TYPE', 'VOICE_CLONE_LAM')
+    # task_data
+    task_data = {
+        "sample_rate": sample_rate,
+        "audio_format": audio_format,
+        "tts_voice": tts_voice,
+        "tts_speech_rate": tts_speech_rate,
+        "tts_volume": tts_volume,
+        "tts_pitch": tts_pitch,
+        "tts_text": content,
+        "voice_type": voice_type,
+    }
+    try:
+        # Step 1: Submit task to generate audio
+        response = requests.post(task_url, headers=headers, json=task_data)
+        if response.status_code != 200:
+            return None
+        result = response.json()
+        # Check if task was successfully submitted
+        if not result.get("success"):
+            return None
+        # Extract task ID
+        task_id = result.get("data")
+        if not task_id:
+            return None
+        logger.info(f"Task submitted successfully. Task ID: {task_id}")
+        # Step 2: Poll for results
+        max_attempts = int(os.getenv('AUDIO_RETRY_TIMES', 10))
+        wait_time = int(os.getenv('AUDIO_SLEEP_TIME', 5))
+        query_url = query_url + f"?async_task_id={task_id}"
+        for attempt in range(max_attempts):
+            # Wait before polling
+            time.sleep(wait_time)
+            logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
+            # Generate fresh headers for each poll request
+            query_headers = generate_headers(app_key, secret)
+            # Poll for results
+            query_response = requests.post(query_url, headers=query_headers)
+            if query_response.status_code != 200:
+                logger.info(f"Poll request failed with status code {query_response.status_code}")
+                continue
+            try:
+                query_result = query_response.json()
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse response as JSON: {e}")
+                continue
+            # Check if processing is complete
+            if query_result.get("success") and query_result.get("data", {}).get("status") == "ST_SUCCESS":
+                # Extract audio URL based on the correct JSON structure
+                # Navigate through the nested structure: data -> result -> result -> audioUrl
+                audio_url = query_result.get("data", {}).get("result", {}).get("result", {}).get("audioUrl")
+                if audio_url:
+                    return json.dumps({"audio_data": audio_url})
+                else:
+                    logger.info("Audio URL not found in the response")
+                    return None
+            elif query_result.get("success") and query_result.get("data", {}).get("status") == "ST_RUNNING":
+                # If still running, continue to next polling attempt
+                logger.info("Task still running, continuing to next poll...")
+                continue
+            else:
+                # Any other status, return None
+                logger.warning(f"Unexpected status: {query_result.get('data', {}).get('status')}")
+                return None
+        # If we get here, polling timed out
+        logger.warning("Polling timed out after maximum attempts")
+        return None
+    except Exception as e:
+        import traceback
+        logger.warning(f"Exception occurred: {e}")
+        return None
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Starting Audio MCP gen-audio-server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+if __name__ == "__main__":
+    main()
+    # For testing without MCP
+    # result = gen_audio("hello ,this is test")
+    # print("\nFinal Result:")
+    # print(result)

AWorld-main/aworlddistributed/mcp_servers/gen_pic_server.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import time
+import json
+import requests
+import sys
+from dotenv import load_dotenv
+from mcp.server import FastMCP
+from pydantic import Field
+from typing_extensions import Any
+from aworld.logs.util import logger
+mcp = FastMCP("gen-pic-server")
+@mcp.tool(description="Generate picture from text content")
+def gen_picture(prompt: str = Field(description="The text prompt to generate an image"),
+                num: int = Field(0,
+                                 description="Number of images to generate, 0 means use environment variable")) -> Any:
+    """Generate picture from text prompt"""
+    api_key = os.getenv('DASHSCOPE_API_KEY')
+    submit_url = os.getenv('DASHSCOPE_SUBMIT_URL', '')
+    query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
+    if not api_key or not submit_url or not query_base_url:
+        logger.warning(
+            "Query failed: DASHSCOPE_API_KEY,DASHSCOPE_SUBMIT_URL,DASHSCOPE_QUERY_BASE_URL environment variable is not set")
+        return None
+    headers = {
+        'X-DashScope-Async': 'enable',
+        'Authorization': f'Bearer {api_key}',
+        'Content-Type': 'application/json'
+    }
+    # Get parameters from environment variables or use defaults
+    model = os.getenv('DASHSCOPE_MODEL', 'wanx2.1-t2i-turbo')
+    size = os.getenv('DASHSCOPE_SIZE', '1024*1024')
+    # Use num parameter if provided (>0), otherwise use environment variable
+    n = num if num > 0 else int(os.getenv('DASHSCOPE_N', '1'))
+    task_data = {
+        "model": model,
+        "input": {
+            "prompt": prompt
+        },
+        "parameters": {
+            "size": size,
+            "n": n
+        }
+    }
+    try:
+        # Step 1: Submit task to generate image
+        logger.info("Submitting task to generate image...")
+        response = requests.post(submit_url, headers=headers, json=task_data)
+        if response.status_code != 200:
+            logger.warning(f"Task submission failed with status code {response.status_code}")
+            return None
+        result = response.json()
+        # Check if task was successfully submitted
+        if not result.get("output") or not result.get("output").get("task_id"):
+            logger.warning("Failed to get task_id from response")
+            return None
+        # Extract task ID
+        task_id = result.get("output").get("task_id")
+        logger.info(f"Task submitted successfully. Task ID: {task_id}")
+        # Step 2: Poll for results
+        max_attempts = int(os.getenv('DASHSCOPE_RETRY_TIMES', 10))
+        wait_time = int(os.getenv('DASHSCOPE_SLEEP_TIME', 5))
+        query_url = f"{query_base_url}{task_id}"
+        for attempt in range(max_attempts):
+            # Wait before polling
+            time.sleep(wait_time)
+            logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
+            # Poll for results
+            query_response = requests.get(query_url, headers={'Authorization': f'Bearer {api_key}'})
+            if query_response.status_code != 200:
+                logger.info(f"Poll request failed with status code {query_response.status_code}")
+                continue
+            try:
+                query_result = query_response.json()
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse response as JSON: {e}")
+                continue
+            # Check task status
+            task_status = query_result.get("output", {}).get("task_status")
+            if task_status == "SUCCEEDED":
+                # Extract image URLs
+                results = query_result.get("output", {}).get("results", [])
+                if results:
+                    # Create a simple array of objects with image_url
+                    image_urls = []
+                    for result in results:
+                        if "url" in result:
+                            image_urls.append({"image_url": result["url"]})
+                    if image_urls:
+                        return json.dumps(image_urls)
+                    else:
+                        logger.info("No valid image URLs found in the response")
+                        return None
+                else:
+                    logger.info("No results found in the response")
+                    return None
+            elif task_status in ["PENDING", "RUNNING"]:
+                # If still running, continue to next polling attempt
+                logger.info(f"Task status: {task_status}, continuing to next poll...")
+                continue
+            elif task_status == "FAILED":
+                logger.warning("Task failed")
+                return None
+            else:
+                # Any other status, return None
+                logger.warning(f"Unexpected status: {task_status}")
+                return None
+        # If we get here, polling timed out
+        logger.warning("Polling timed out after maximum attempts")
+        return None
+    except Exception as e:
+        logger.warning(f"Exception occurred: {e}")
+        return None
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Starting MCP gen-pic-server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+if __name__ == "__main__":
+    main()
+    # For testing without MCP
+    # result = gen_picture("sunflower", 2)
+    # print("\nFinal Result:")
+    # print(result)

AWorld-main/aworlddistributed/mcp_servers/gen_video_server.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+import time
+import json
+import requests
+import sys
+from dotenv import load_dotenv
+from mcp.server import FastMCP
+from pydantic import Field
+from typing_extensions import Any
+from aworld.logs.util import logger
+mcp = FastMCP("gen-video-server")
+@mcp.tool(description="Generate video from text content")
+def gen_video(prompt: str = Field(description="The text prompt to generate a video")) -> Any:
+    """Generate video from text prompt"""
+    api_key = os.getenv('DASHSCOPE_API_KEY')
+    submit_url = os.getenv('DASHSCOPE_VIDEO_SUBMIT_URL', '')
+    query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
+    if not api_key or not submit_url or not query_base_url:
+        logger.warning("Query failed: DASHSCOPE_API_KEY, DASHSCOPE_VIDEO_SUBMIT_URL, DASHSCOPE_QUERY_BASE_URL environment variables are not set")
+        return None
+    headers = {
+        'X-DashScope-Async': 'enable',
+        'Authorization': f'Bearer {api_key}',
+        'Content-Type': 'application/json'
+    }
+    # Get parameters from environment variables or use defaults
+    model = os.getenv('DASHSCOPE_VIDEO_MODEL', 'wanx2.1-t2v-turbo')
+    size = os.getenv('DASHSCOPE_VIDEO_SIZE', '1280*720')
+    # Note: Currently the API only supports generating one video at a time
+    # But we keep the num parameter for API compatibility
+    task_data = {
+        "model": model,
+        "input": {
+            "prompt": prompt
+        },
+        "parameters": {
+            "size": size
+        }
+    }
+    try:
+        # Step 1: Submit task to generate video
+        logger.info("Submitting task to generate video...")
+        response = requests.post(submit_url, headers=headers, json=task_data)
+        if response.status_code != 200:
+            logger.warning(f"Task submission failed with status code {response.status_code}")
+            return None
+        result = response.json()
+        # Check if task was successfully submitted
+        if not result.get("output") or not result.get("output").get("task_id"):
+            logger.warning("Failed to get task_id from response")
+            return None
+        # Extract task ID
+        task_id = result.get("output").get("task_id")
+        logger.info(f"Task submitted successfully. Task ID: {task_id}")
+        # Step 2: Poll for results
+        max_attempts = int(os.getenv('DASHSCOPE_VIDEO_RETRY_TIMES', 10))  # Increased default retries for video
+        wait_time = int(os.getenv('DASHSCOPE_VIDEO_SLEEP_TIME', 5))      # Increased default wait time for video
+        query_url = f"{query_base_url}{task_id}"
+        for attempt in range(max_attempts):
+            # Wait before polling
+            time.sleep(wait_time)
+            logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
+            # Poll for results
+            query_response = requests.get(query_url, headers={'Authorization': f'Bearer {api_key}'})
+            if query_response.status_code != 200:
+                logger.info(f"Poll request failed with status code {query_response.status_code}")
+                continue
+            try:
+                query_result = query_response.json()
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse response as JSON: {e}")
+                continue
+            # Check task status
+            task_status = query_result.get("output", {}).get("task_status")
+            if task_status == "SUCCEEDED":
+                # Extract video URL
+                video_url = query_result.get("output", {}).get("video_url")
+                if video_url:
+                    # Return as array of objects with video_url for consistency with image API
+                    return json.dumps({"video_url": video_url})
+                else:
+                    logger.info("Video URL not found in the response")
+                    return None
+            elif task_status in ["PENDING", "RUNNING"]:
+                # If still running, continue to next polling attempt
+                logger.info(f"Task status: {task_status}, continuing to next poll...")
+                continue
+            elif task_status == "FAILED":
+                logger.warning("Task failed")
+                return None
+            else:
+                # Any other status, return None
+                logger.warning(f"Unexpected status: {task_status}")
+                return None
+        # If we get here, polling timed out
+        logger.warning("Polling timed out after maximum attempts")
+        return None
+    except Exception as e:
+        logger.warning(f"Exception occurred: {e}")
+        return None
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Starting MCP gen-video-server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+if __name__ == "__main__":
+    main()
+    # For testing without MCP
+    # result = gen_video("A cat running under moonlight")
+    # print("\nFinal Result:")
+    # print(result)

AWorld-main/aworlddistributed/mcp_servers/image_server.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Image MCP Server
+This module provides MCP server functionality for image processing and analysis.
+It handles image encoding, optimization, and various image analysis tasks such as
+OCR (Optical Character Recognition) and visual reasoning.
+The server supports both local image files and remote image URLs with proper validation
+and handles various image formats including JPEG, PNG, GIF, and others.
+Main functions:
+- encode_images: Encodes images to base64 format with optimization
+- optimize_image: Resizes and optimizes images for better performance
+- Various MCP tools for image analysis and processing
+"""
+# import asyncio
+import base64
+import os
+from io import BytesIO
+from typing import Any, Dict, List
+from PIL import Image
+from pydantic import Field
+from aworld.logs.util import logger
+from mcp_servers.utils import get_file_from_source
+from mcp.server.fastmcp import FastMCP
+from openai import OpenAI
+# Initialize MCP server
+mcp = FastMCP("image-server")
+IMAGE_OCR = (
+    "Input is a base64 encoded image. Read text from image if present. "
+    "Return a json string with the following format: "
+    '{"image_text": "text from image"}'
+)
+IMAGE_REASONING = (
+    "Input is a base64 encoded image. Given user's task: {task}, "
+    "solve it following the guide line:\n"
+    "1. Careful visual inspection\n"
+    "2. Contextual reasoning\n"
+    "3. Text transcription where relevant\n"
+    "4. Logical deduction from visual evidence\n"
+    "Return a json string with the following format: "
+    '{"image_reasoning_result": "reasoning result given task and image"}'
+)
+def optimize_image(image_data: bytes, max_size: int = 1024) -> bytes:
+    """
+    Optimize image by resizing if needed
+    Args:
+        image_data: Raw image data
+        max_size: Maximum dimension size in pixels
+    Returns:
+        bytes: Optimized image data
+    Raises:
+        ValueError: When image cannot be processed
+    """
+    try:
+        image = Image.open(BytesIO(image_data))
+        # Resize if image is too large
+        if max(image.size) > max_size:
+            ratio = max_size / max(image.size)
+            new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
+            image = image.resize(new_size, Image.Resampling.LANCZOS)
+        # Save to buffer
+        buffered = BytesIO()
+        image_format = image.format if image.format else "JPEG"
+        image.save(buffered, format=image_format)
+        return buffered.getvalue()
+    except Exception as e:
+        logger.warning(f"Failed to optimize image: {str(e)}")
+        return image_data  # Return original data if optimization fails
+def encode_images(image_sources: List[str], with_header: bool = True) -> List[str]:
+    """
+    Encode images to base64 format with robust file handling
+    Args:
+        image_sources: List of URLs or local file paths of images
+        with_header: Whether to include MIME type header
+    Returns:
+        List[str]: Base64 encoded image strings, with MIME type prefix if with_header is True
+    Raises:
+        ValueError: When image source is invalid or image format is not supported
+    """
+    if not image_sources:
+        raise ValueError("Image sources cannot be empty")
+    images = []
+    for image_source in image_sources:
+        try:
+            # Get file with validation (only image files allowed)
+            file_path, mime_type, content = get_file_from_source(
+                image_source,
+                allowed_mime_prefixes=["image/"],
+                max_size_mb=10.0,  # 10MB limit for images
+                type="image",
+            )
+            # Optimize image
+            optimized_content = optimize_image(content)
+            # Encode to base64
+            image_base64 = base64.b64encode(optimized_content).decode()
+            # Format with header if requested
+            final_image = (
+                f"data:{mime_type};base64,{image_base64}"
+                if with_header
+                else image_base64
+            )
+            images.append(final_image)
+            # Clean up temporary file if it was created for a URL
+            if file_path != os.path.abspath(image_source) and os.path.exists(file_path):
+                os.unlink(file_path)
+        except Exception as e:
+            logger.error(f"Error encoding image from {image_source}: {str(e)}")
+            raise
+    return images
+def image_to_base64(image_path):
+    try:
+        # todo 解析pdf或其他文件的图片
+        with Image.open(image_path) as image:
+            buffered = BytesIO()
+            image_format = image.format if image.format else "JPEG"
+            image.save(buffered, format=image_format)
+            image_bytes = buffered.getvalue()
+            base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
+            return base64_encoded
+    except Exception as e:
+        print(f"Base64 error: {e}")
+        return None
+def create_image_contents(prompt: str, image_base64: List[str]) -> List[Dict[str, Any]]:
+    """Create uniform image format for querying llm."""
+    content = [
+        {"type": "text", "text": prompt},
+    ]
+    content.extend(
+        [{"type": "image_url", "image_url": {"url": url}} for url in image_base64]
+    )
+    return content
+@mcp.tool(description="solve the question by careful reasoning given the image(s) in given local filepath or url, including reasoning, ocr, etc.")
+def mcp_image_recognition(
+    image_urls: List[str] = Field(
+        description="The input image(s) in given a list of local filepaths or urls."
+    ),
+    question: str = Field(description="The question to ask."),
+) -> str:
+    """solve the question by careful reasoning given the image(s) in given filepath or url."""
+    try:
+        image_base64 = image_to_base64(image_urls[0])
+        logger.info(f"image_url: {image_urls[0]}")
+        reasoning_prompt = question
+        messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content":
+                [
+                    {"type": "text", "text": reasoning_prompt},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{image_base64}"
+                        }
+                    },
+                ],
+                },
+            ]
+        client = OpenAI(
+            api_key=os.getenv("LLM_API_KEY"),
+            base_url=os.getenv("LLM_BASE_URL")
+        )
+        response = client.chat.completions.create(
+            model=os.getenv("LLM_MODEL_NAME"),
+            messages=messages,
+        )
+        logger.info(f"response: {response}")
+        image_reasoning_result = response.choices[0].message.content
+    except Exception as e:
+        image_reasoning_result = ""
+        import traceback
+        traceback.print_exc()
+        logger.error(f"image_reasoning_result-Execute error: {e}")
+    logger.info(
+        f"---get_reasoning_by_image-image_reasoning_result:{image_reasoning_result}"
+    )
+    return image_reasoning_result
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Starting Image MCP Server...", file=sys.stderr)
+    mcp.run(transport='stdio')
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+# Add this for compatibility with uvx
+import sys
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/picsearch_server.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import asyncio
+import json
+import logging
+import os
+import sys
+import aiohttp
+from typing import List, Dict, Any, Optional
+from dotenv import load_dotenv
+from mcp.server import FastMCP
+from pydantic import Field
+from aworld.logs.util import logger
+mcp = FastMCP("picsearch-server")
+async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
+    """Execute a single search query, returns None on error"""
+    try:
+        url = os.getenv('PIC_SEARCH_URL')
+        searchMode = os.getenv('PIC_SEARCH_SEARCHMODE')
+        source = os.getenv('PIC_SEARCH_SOURCE')
+        domain = os.getenv('PIC_SEARCH_DOMAIN')
+        uid = os.getenv('PIC_SEARCH_UID')
+        if not url or not searchMode or not source or not domain:
+            logger.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
+            return None
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        data = {
+            "domain": domain,
+            "extParams": {
+                "contentType": "llmWholeImage"
+            },
+            "page": 0,
+            "pageSize": num,
+            "query": query,
+            "searchMode": searchMode,
+            "source": source,
+            "userId": uid
+        }
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.post(url, headers=headers, json=data) as response:
+                    if response.status != 200:
+                        logger.warning(f"Query failed: {query}, status code: {response.status}")
+                        return None
+                    result = await response.json()
+                    return result
+            except aiohttp.ClientError:
+                logger.warning(f"Request error: {query}")
+                return None
+    except Exception:
+        logger.warning(f"Query exception: {query}")
+        return None
+def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Filter valid document results, returns empty list if input is None"""
+    if result is None:
+        return []
+    try:
+        valid_docs = []
+        # Check success field
+        if not result.get("success"):
+            return valid_docs
+        # Check searchDocs field
+        search_docs = result.get("searchImages", [])
+        if not search_docs:
+            return valid_docs
+        # Extract required fields
+        required_fields = ["title", "picUrl"]
+        for doc in search_docs:
+            # Check if all required fields exist and are not empty
+            is_valid = True
+            for field in required_fields:
+                if field not in doc or not doc[field]:
+                    is_valid = False
+                    break
+            if is_valid:
+                # Keep only required fields
+                filtered_doc = {field: doc[field] for field in required_fields}
+                valid_docs.append(filtered_doc)
+        return valid_docs
+    except Exception:
+        return []
+@mcp.tool(description="Search Picture based on the user's input query")
+async def search(
+    query: str = Field(
+            description="The query to search for picture"
+        ),
+    num: int = Field(
+    5,
+        description="Maximum number of results to return, default is 5"
+    )
+) -> Any:
+    """Execute search function for a single query"""
+    try:
+        # Get configuration from environment variables
+        env_total_num = os.getenv('PIC_SEARCH_TOTAL_NUM')
+        if env_total_num and env_total_num.isdigit():
+            # Force override input num parameter with environment variable
+            num = int(env_total_num)
+        # If no query provided, return empty list
+        if not query:
+            return json.dumps([])
+        # Get actual number of results to return
+        slice_num = os.getenv('PIC_SEARCH_SLICE_NUM')
+        if slice_num and slice_num.isdigit():
+            actual_num = int(slice_num)
+        else:
+            actual_num = num
+        # Execute the query
+        result = await search_single(query, actual_num)
+        # Filter results
+        valid_docs = filter_valid_docs(result)
+        # Return results
+        result_json = json.dumps(valid_docs, ensure_ascii=False)
+        logger.info(f"Completed query: '{query}', found {len(valid_docs)} valid documents")
+        logger.info(result_json)
+        return result_json
+    except Exception as e:
+        # Return empty list on exception
+        logger.error(f"Error processing query: {str(e)}")
+        return json.dumps([])
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Starting Audio MCP picsearch-server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+if __name__ == "__main__":
+    main()
+# if __name__ == "__main__":
+#     # Configure logging
+#     logging.basicConfig(
+#         level=logging.INFO,
+#         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+#     )
+#
+#
+#     # Test single query
+#     asyncio.run(search(query="Image search test"))
+#
+#     # Test multiple queries no longer applies

AWorld-main/aworlddistributed/mcp_servers/reasoning_server.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import sys
+import traceback
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from pydantic import Field
+from aworld.config.conf import AgentConfig
+from aworld.logs.util import logger
+from aworld.models.llm import call_llm_model, get_llm_model
+# Initialize MCP server
+mcp = FastMCP("reasoning-server")
+@mcp.tool(
+    description="Perform complex problem reasoning using powerful reasoning model."
+)
+def complex_problem_reasoning(
+    question: str = Field(
+        description="The input question for complex problem reasoning,"
+        + " such as math and code contest problem",
+    ),
+    original_task: str = Field(
+        default="",
+        description="The original task description."
+        + " This argument could be fetched from the <task>TASK</task> tag",
+    ),
+) -> str:
+    """
+    Perform complex problem reasoning using Powerful Reasoning model,
+    such as riddle, game or competition-level STEM(including code) problems.
+    Args:
+        question: The input question for complex problem reasoning
+        original_task: The original task description (optional)
+    Returns:
+        str: The reasoning result from the model
+    """
+    try:
+        # Prepare the prompt with both the question and original task if provided
+        prompt = question
+        if original_task:
+            prompt = f"Original Task: {original_task}\n\nQuestion: {question}"
+        # Call the LLM model for reasoning
+        response = call_llm_model(
+            llm_model=get_llm_model(
+                conf=AgentConfig(
+                    llm_provider="openai",
+                    llm_model_name=os.getenv("LLM_MODEL_NAME", "your_openai_api_key"),
+                    llm_api_key=os.getenv("LLM_API_KEY", "your_openai_api_key"),
+                    llm_base_url=os.getenv("LLM_BASE_URL", "your_openai_base_url"),
+                )
+            ),
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are an expert at solving complex problems including math,"
+                        " code contests, riddles, and puzzles."
+                        " Provide detailed step-by-step reasoning and a clear final answer."
+                    ),
+                },
+                {"role": "user", "content": prompt},
+            ],
+            temperature=float(os.getenv("LLM_TEMPERATURE", "0.3")),
+        )
+        # Extract the reasoning result
+        reasoning_result = response.content
+        logger.info("Complex reasoning completed successfully")
+        return reasoning_result
+    except Exception as e:
+        logger.error(f"Error in complex problem reasoning: {traceback.format_exc()}")
+        return f"Error performing reasoning: {str(e)}"
+def main():
+    load_dotenv()
+    print("Starting Reasoning MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/search_server.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+Search MCP Server
+This module provides MCP server functionality for performing web searches using various search engines.
+It supports structured queries and returns formatted search results.
+Key features:
+- Perform web searches using Exa, Google, and DuckDuckGo
+- Filter and format search results
+- Validate and process search queries
+Main functions:
+- mcpsearchexa: Searches the web using Exa
+- mcpsearchgoogle: Searches the web using Google
+- mcpsearchduckduckgo: Searches the web using DuckDuckGo
+"""
+import os
+import sys
+import traceback
+from typing import List, Optional
+import requests
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel, Field
+from aworld.logs.util import logger
+# Initialize MCP server
+mcp = FastMCP("search-server")
+# Base search result model that all providers will use
+class SearchResult(BaseModel):
+    """Base search result model with common fields"""
+    id: str
+    title: str
+    url: str
+    snippet: str
+    source: str  # Which search engine provided this result
+class GoogleSearchResult(SearchResult):
+    """Google-specific search result model"""
+    displayLink: str = ""
+    formattedUrl: str = ""
+    htmlSnippet: str = ""
+    htmlTitle: str = ""
+    kind: str = ""
+    link: str = ""
+class SearchResponse(BaseModel):
+    """Unified search response model"""
+    query: str
+    results: List[SearchResult]
+    count: int
+    source: str
+    error: Optional[str] = None
+@mcp.tool(description="Search the web using Google Custom Search API.")
+def mcpsearchgoogle(
+    query: str = Field(..., description="The search query string."),
+    num_results: int = Field(
+        10, description="Number of search results to return (default 10)."
+    ),
+    safe_search: bool = Field(
+        True, description="Whether to enable safe search filtering."
+    ),
+    language: str = Field("en", description="Language code for search results."),
+    country: str = Field("us", description="Country code for search results."),
+) -> str:
+    """
+    Search the web using Google Custom Search API.
+    Requires GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables to be set.
+    """
+    try:
+        api_key = os.environ.get("GOOGLE_API_KEY")
+        cse_id = os.environ.get("GOOGLE_CSE_ID")
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY environment variable not set")
+        if not cse_id:
+            raise ValueError("GOOGLE_CSE_ID environment variable not set")
+        # Ensure num_results is within valid range
+        num_results = max(1, num_results)
+        # Build the Google Custom Search API URL
+        url = "https://www.googleapis.com/customsearch/v1"
+        params = {
+            "key": api_key,
+            "cx": cse_id,
+            "q": query,
+            "num": num_results,
+            "safe": "active" if safe_search else "off",
+            "hl": language,
+            "gl": country,
+        }
+        logger.info(f"Google search starts for query: {query}")
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        search_results = []
+        if "items" in data:
+            for i, item in enumerate(data["items"]):
+                result = GoogleSearchResult(
+                    id=f"google-{i}",
+                    title=item.get("title", ""),
+                    url=item.get("link", ""),
+                    snippet=item.get("snippet", ""),
+                    source="google",
+                    displayLink=item.get("displayLink", ""),
+                    formattedUrl=item.get("formattedUrl", ""),
+                    htmlSnippet=item.get("htmlSnippet", ""),
+                    htmlTitle=item.get("htmlTitle", ""),
+                    kind=item.get("kind", ""),
+                    link=item.get("link", ""),
+                )
+                search_results.append(result)
+        return SearchResponse(
+            query=query,
+            results=search_results,
+            count=len(search_results),
+            source="google",
+        ).model_dump_json()
+    except Exception as e:
+        logger.error(f"Google search error: {traceback.format_exc()}")
+        return SearchResponse(
+            query=query, results=[], count=0, source="google", error=str(e)
+        ).model_dump_json()
+def main():
+    load_dotenv()
+    print("Starting Search MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/utils.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import asyncio
+import json
+import os
+import tempfile
+from typing import List, Optional, Tuple
+from urllib.parse import urlparse
+import requests
+from mcp.server import FastMCP
+from aworld.logs.util import logger
+def get_mime_type(file_path: str, default_mime: Optional[str] = None) -> str:
+    """
+    Detect MIME type of a file using python-magic if available,
+    otherwise fallback to extension-based detection.
+    Args:
+        file_path: Path to the file
+        default_mime: Default MIME type to return if detection fails
+    Returns:
+        str: Detected MIME type
+    """
+    # Try using python-magic for accurate MIME type detection
+    try:
+        # mime = magic.Magic(mime=True)
+        # return mime.from_file(file_path)
+        return "audio/mpeg"
+    except (AttributeError, IOError):
+        # Fallback to extension-based detection
+        extension_mime_map = {
+            # Audio formats
+            ".mp3": "audio/mpeg",
+            ".wav": "audio/wav",
+            ".ogg": "audio/ogg",
+            ".m4a": "audio/mp4",
+            ".flac": "audio/flac",
+            # Image formats
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".png": "image/png",
+            ".gif": "image/gif",
+            ".webp": "image/webp",
+            ".bmp": "image/bmp",
+            ".tiff": "image/tiff",
+            # Video formats
+            ".mp4": "video/mp4",
+            ".avi": "video/x-msvideo",
+            ".mov": "video/quicktime",
+            ".mkv": "video/x-matroska",
+            ".webm": "video/webm",
+        }
+        ext = os.path.splitext(file_path)[1].lower()
+        return extension_mime_map.get(ext, default_mime or "application/octet-stream")
+def is_url(path_or_url: str) -> bool:
+    """
+    Check if the given string is a URL.
+    Args:
+        path_or_url: String to check
+    Returns:
+        bool: True if the string is a URL, False otherwise
+    """
+    parsed = urlparse(path_or_url)
+    return bool(parsed.scheme and parsed.netloc)
+def get_file_from_source(
+    source: str,
+    allowed_mime_prefixes: List[str] = None,
+    max_size_mb: float = 100.0,
+    timeout: int = 60,
+    type: str = "image",
+) -> Tuple[str, str, bytes]:
+    """
+    Unified function to get file content from a URL or local path with validation.
+    Args:
+        source: URL or local file path
+        allowed_mime_prefixes: List of allowed MIME type prefixes (e.g., ['audio/', 'video/'])
+        max_size_mb: Maximum allowed file size in MB
+        timeout: Timeout for URL requests in seconds
+    Returns:
+        Tuple[str, str, bytes]: (file_path, mime_type, file_content)
+        - For URLs, file_path will be a temporary file path
+        - For local files, file_path will be the original path
+    Raises:
+        ValueError: When file doesn't exist, exceeds size limit, or has invalid MIME type
+        IOError: When file cannot be read
+        requests.RequestException: When URL request fails
+    """
+    max_size_bytes = max_size_mb * 1024 * 1024
+    temp_file = None
+    try:
+        if is_url(source):
+            # Handle URL
+            logger.info(f"Downloading file from URL: {source}")
+            response = requests.get(source, stream=True, timeout=timeout)
+            response.raise_for_status()
+            # Check Content-Length if available
+            content_length = response.headers.get("Content-Length")
+            if content_length and int(content_length) > max_size_bytes:
+                raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
+            # Create a temporary file
+            temp_file = tempfile.NamedTemporaryFile(delete=False)
+            file_path = temp_file.name
+            # Download content in chunks to avoid memory issues
+            content = bytearray()
+            downloaded_size = 0
+            for chunk in response.iter_content(chunk_size=8192):
+                downloaded_size += len(chunk)
+                if downloaded_size > max_size_bytes:
+                    raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
+                temp_file.write(chunk)
+                content.extend(chunk)
+            temp_file.close()
+            # Get MIME type
+            if type == "audio":
+                mime_type = "audio/mpeg"
+            elif type == "image":
+                mime_type = "image/jpeg"
+            elif type == "video":
+                mime_type = "video/mp4"
+            # mime_type = get_mime_type(file_path)
+            # For URLs where magic fails, try to use Content-Type header
+            if mime_type == "application/octet-stream":
+                content_type = response.headers.get("Content-Type", "").split(";")[0]
+                if content_type:
+                    mime_type = content_type
+        else:
+            # Handle local file
+            file_path = os.path.abspath(source)
+            # Check if file exists
+            if not os.path.exists(file_path):
+                raise ValueError(f"File not found: {file_path}")
+            # Check file size
+            file_size = os.path.getsize(file_path)
+            if file_size > max_size_bytes:
+                raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
+            # Get MIME type
+            if type == "audio":
+                mime_type = "audio/mpeg"
+            elif type == "image":
+                mime_type = "image/jpeg"
+            elif type == "video":
+                mime_type = "video/mp4"
+            # mime_type = get_mime_type(file_path)
+            # Read file content
+            with open(file_path, "rb") as f:
+                content = f.read()
+        # Validate MIME type if allowed_mime_prefixes is provided
+        if allowed_mime_prefixes:
+            if not any(
+                mime_type.startswith(prefix) for prefix in allowed_mime_prefixes
+            ):
+                allowed_types = ", ".join(allowed_mime_prefixes)
+                raise ValueError(
+                    f"Invalid file type: {mime_type}. Allowed types: {allowed_types}"
+                )
+        return file_path, mime_type, content
+    except Exception as e:
+        # Clean up temporary file if an error occurs
+        if temp_file and os.path.exists(temp_file.name):
+            os.unlink(temp_file.name)
+        raise e
+if __name__ == "__main__":
+    mcp_tools = []
+    logger.success(f"{json.dumps(mcp_tools, indent=4, ensure_ascii=False)}")

AWorld-main/aworlddistributed/mcp_servers/video_server.py ADDED Viewed

	@@ -0,0 +1,484 @@

+# pylint: disable=E1101
+import base64
+import os
+import sys
+import traceback
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from openai import OpenAI
+from pydantic import Field
+from aworld.logs.util import logger
+from mcp_servers.utils import get_file_from_source
+client = OpenAI(api_key=os.getenv("LLM_API_KEY"), base_url=os.getenv("LLM_BASE_URL"))
+# Initialize MCP server
+mcp = FastMCP("Video Server")
+@dataclass
+class KeyframeResult:
+    """Result of keyframe extraction from a video.
+    Attributes:
+        frame_paths: List of file paths to the saved keyframes
+        frame_timestamps: List of timestamps (in seconds) corresponding to each frame
+        output_directory: Directory where frames were saved
+        frame_count: Number of frames extracted
+        success: Whether the extraction was successful
+        error_message: Error message if extraction failed, None otherwise
+    """
+    frame_paths: List[str]
+    frame_timestamps: List[float]
+    output_directory: str
+    frame_count: int
+    success: bool
+    error_message: Optional[str] = None
+VIDEO_ANALYZE = (
+    "Input is a sequence of video frames. Given user's task: {task}. "
+    "analyze the video content following these steps:\n"
+    "1. Temporal sequence understanding\n"
+    "2. Motion and action analysis\n"
+    "3. Scene context interpretation\n"
+    "4. Object and person tracking\n"
+    "Return a json string with the following format: "
+    '{{"video_analysis_result": "analysis result given task and video frames"}}'
+)
+VIDEO_EXTRACT_SUBTITLES = (
+    "Input is a sequence of video frames. "
+    "Extract all subtitles (if present) in the video. "
+    "Return a json string with the following format: "
+    '{"video_subtitles": "extracted subtitles from video"}'
+)
+VIDEO_SUMMARIZE = (
+    "Input is a sequence of video frames. "
+    "Summarize the main content of the video. "
+    "Include key points, main topics, and important visual elements. "
+    "Return a json string with the following format: "
+    '{"video_summary": "concise summary of the video content"}'
+)
+def get_video_frames(
+    video_source: str,
+    sample_rate: int = 2,
+    start_time: float = 0,
+    end_time: float = None,
+) -> List[Dict[str, Any]]:
+    """
+    Get frames from video with given sample rate using robust file handling
+    Args:
+        video_source: Path or URL to the video file
+        sample_rate: Number of frames to sample per second
+        start_time: Start time of the video segment in seconds (default: 0)
+        end_time: End time of the video segment in seconds (default: None, meaning the end of the video)
+    Returns:
+        List[Dict[str, Any]]: List of dictionaries containing frame data and timestamp
+    Raises:
+        ValueError: When video file cannot be opened or is not a valid video
+    """
+    try:
+        # Get file with validation (only video files allowed)
+        file_path, _, _ = get_file_from_source(
+            video_source,
+            allowed_mime_prefixes=["video/"],
+            max_size_mb=2500.0,  # 2500MB limit for videos
+            type="video",  # Specify type as video to handle video files
+        )
+        # Open video file
+        video = cv2.VideoCapture(file_path)
+        if not video.isOpened():
+            raise ValueError(f"Could not open video file: {file_path}")
+        fps = video.get(cv2.CAP_PROP_FPS)
+        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        video_duration = frame_count / fps  # 30s
+        if end_time is None:
+            end_time = video_duration
+        if start_time > end_time:
+            raise ValueError("Start time cannot be greater than end time.")
+        if start_time < 0:
+            start_time = 0
+        if end_time > video_duration:
+            end_time = video_duration
+        start_frame = int(start_time * fps)
+        end_frame = int(end_time * fps)
+        all_frames = []
+        frames = []
+        # Calculate frame interval based on sample rate
+        frame_interval = max(1, int(fps / sample_rate))
+        # Set the video capture to the start frame
+        video.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+        for i in range(start_frame, end_frame):
+            ret, frame = video.read()
+            if not ret:
+                break
+            # Convert frame to JPEG format
+            _, buffer = cv2.imencode(".jpg", frame)
+            frame_data = base64.b64encode(buffer).decode("utf-8")
+            # Add data URL prefix for JPEG image
+            frame_data = f"data:image/jpeg;base64,{frame_data}"
+            all_frames.append({"data": frame_data, "time": i / fps})
+        for i in range(0, len(all_frames), frame_interval):
+            frames.append(all_frames[i])
+        video.release()
+        # Clean up temporary file if it was created for a URL
+        if file_path != os.path.abspath(video_source) and os.path.exists(file_path):
+            os.unlink(file_path)
+        if not frames:
+            raise ValueError(f"Could not extract any frames from video: {video_source}")
+        return frames
+    except Exception as e:
+        logger.error(f"Error extracting frames from {video_source}: {str(e)}")
+        raise
+def create_video_content(
+    prompt: str, video_frames: List[Dict[str, Any]]
+) -> List[Dict[str, Any]]:
+    """Create uniform video format for querying llm."""
+    content = [{"type": "text", "text": prompt}]
+    content.extend(
+        [
+            {"type": "image_url", "image_url": {"url": frame["data"]}}
+            for frame in video_frames
+        ]
+    )
+    return content
+@mcp.tool(description="Analyze the video content by the given question.")
+def mcp_analyze_video(
+    video_url: str = Field(description="The input video in given filepath or url."),
+    question: str = Field(description="The question to analyze."),
+    sample_rate: int = Field(default=2, description="Sample n frames per second."),
+    start_time: float = Field(
+        default=0, description="Start time of the video segment in seconds."
+    ),
+    end_time: float = Field(
+        default=None, description="End time of the video segment in seconds."
+    ),
+) -> str:
+    """analyze the video content by the given question."""
+    try:
+        video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
+        logger.info(f"---len video_frames:{len(video_frames)}")
+        interval = 20
+        frame_nums = 30
+        all_res = []
+        for i in range(0, len(video_frames), interval):
+            inputs = []
+            cur_frames = video_frames[i : i + frame_nums]
+            content = create_video_content(
+                VIDEO_ANALYZE.format(task=question), cur_frames
+            )
+            inputs.append({"role": "user", "content": content})
+            try:
+                response = client.chat.completions.create(
+                    model=os.getenv("LLM_MODEL_NAME"),
+                    messages=inputs,
+                    temperature=0,
+                )
+                cur_video_analysis_result = response.choices[0].message.content
+            except Exception:
+                cur_video_analysis_result = ""
+            all_res.append(
+                f"result of video part {int(i / interval + 1)}: {cur_video_analysis_result}"
+            )
+            if i + frame_nums >= len(video_frames):
+                break
+        video_analysis_result = "\n".join(all_res)
+    except (ValueError, IOError, RuntimeError):
+        video_analysis_result = ""
+        logger.error(f"video_analysis-Execute error: {traceback.format_exc()}")
+    logger.info(
+        f"---get_analysis_by_video-video_analysis_result:{video_analysis_result}"
+    )
+    return video_analysis_result
+@mcp.tool(description="Extract subtitles from the video.")
+def mcp_extract_video_subtitles(
+    video_url: str = Field(description="The input video in given filepath or url."),
+    sample_rate: int = Field(default=2, description="Sample n frames per second."),
+    start_time: float = Field(
+        default=0, description="Start time of the video segment in seconds."
+    ),
+    end_time: float = Field(
+        default=None, description="End time of the video segment in seconds."
+    ),
+) -> str:
+    """extract subtitles from the video."""
+    inputs = []
+    try:
+        video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
+        content = create_video_content(VIDEO_EXTRACT_SUBTITLES, video_frames)
+        inputs.append({"role": "user", "content": content})
+        response = client.chat.completions.create(
+            model=os.getenv("LLM_MODEL_NAME"),
+            messages=inputs,
+            temperature=0,
+        )
+        video_subtitles = response.choices[0].message.content
+    except (ValueError, IOError, RuntimeError):
+        video_subtitles = ""
+        logger.error(f"video_subtitles-Execute error: {traceback.format_exc()}")
+    logger.info(f"---get_subtitles_from_video-video_subtitles:{video_subtitles}")
+    return video_subtitles
+@mcp.tool(description="Summarize the main content of the video.")
+def mcp_summarize_video(
+    video_url: str = Field(description="The input video in given filepath or url."),
+    sample_rate: int = Field(default=2, description="Sample n frames per second."),
+    start_time: float = Field(
+        default=0, description="Start time of the video segment in seconds."
+    ),
+    end_time: float = Field(
+        default=None, description="End time of the video segment in seconds."
+    ),
+) -> str:
+    """summarize the main content of the video."""
+    try:
+        video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
+        logger.info(f"---len video_frames:{len(video_frames)}")
+        interval = 490
+        frame_nums = 500
+        all_res = []
+        for i in range(0, len(video_frames), interval):
+            inputs = []
+            cur_frames = video_frames[i : i + frame_nums]
+            content = create_video_content(VIDEO_SUMMARIZE, cur_frames)
+            inputs.append({"role": "user", "content": content})
+            try:
+                response = client.chat.completions.create(
+                    model=os.getenv("LLM_MODEL_NAME"),
+                    messages=inputs,
+                    temperature=0,
+                )
+                logger.info(f"---response:{response}")
+                cur_video_summary = response.choices[0].message.content
+            except Exception:
+                cur_video_summary = ""
+            all_res.append(
+                f"summary of video part {int(i / interval + 1)}: {cur_video_summary}"
+            )
+            logger.info(
+                f"summary of video part {int(i / interval + 1)}: {cur_video_summary}"
+            )
+        video_summary = "\n".join(all_res)
+    except (ValueError, IOError, RuntimeError):
+        video_summary = ""
+        logger.error(f"video_summary-Execute error: {traceback.format_exc()}")
+    logger.info(f"---get_summary_from_video-video_summary:{video_summary}")
+    return video_summary
+@mcp.tool(description="Extract key frames around the target time with scene detection")
+def get_video_keyframes(
+    video_path: str = Field(description="The input video in given filepath or url."),
+    target_time: int = Field(
+        description=(
+            "The specific time point for extraction,"
+            " centered within the window_size argument,"
+            " the unit is of second."
+        )
+    ),
+    window_size: int = Field(
+        default=5,
+        description="The window size for extraction, the unit is of second.",
+    ),
+    cleanup: bool = Field(
+        default=False,
+        description="Whether to delete the original video file after processing.",
+    ),
+    output_dir: str = Field(
+        default=os.getenv("FILESYSTEM_SERVER_WORKDIR", "./keyframes"),
+        description="Directory where extracted frames will be saved.",
+    ),
+) -> KeyframeResult:
+    """Extract key frames around the target time with scene detection.
+    This function extracts frames from a video file around a specific time point,
+    using scene detection to identify significant changes between frames. Only frames
+    with substantial visual differences are saved, reducing redundancy.
+    Args:
+        video_path: Path or URL to the video file
+        target_time: Specific time point (in seconds) to extract frames around
+        window_size: Time window (in seconds) centered on target_time
+        cleanup: Whether to delete the original video file after processing
+        output_dir: Directory where extracted frames will be saved
+    Returns:
+        KeyframeResult: A dataclass containing paths to saved frames, timestamps,
+                        and metadata about the extraction process
+    Raises:
+        Exception: Exceptions are caught internally and reported in the result
+    """
+    def save_frames(frames, frame_times, output_dir) -> Tuple[List[str], List[float]]:
+        """Save extracted frames to disk"""
+        os.makedirs(output_dir, exist_ok=True)
+        saved_paths = []
+        saved_timestamps = []
+        for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
+            filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
+        os.makedirs(output_dir, exist_ok=True)
+        saved_paths = []
+        saved_timestamps = []
+        for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
+            filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
+            cv2.imwrite(filename, frame)
+            saved_paths.append(filename)
+            saved_timestamps.append(timestamp)
+        return saved_paths, saved_timestamps
+    def extract_keyframes(
+        video_path, target_time, window_size
+    ) -> Tuple[List[Any], List[float]]:
+        """Extract key frames around the target time with scene detection"""
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        # Calculate frame numbers for the time window
+        start_frame = int((target_time - window_size / 2) * fps)
+        end_frame = int((target_time + window_size / 2) * fps)
+        frames = []
+        frame_times = []
+        # Set video position to start_frame
+        cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, start_frame))
+        prev_frame = None
+        while cap.isOpened():
+            frame_pos = cap.get(cv2.CAP_PROP_POS_FRAMES)
+            if frame_pos >= end_frame:
+                break
+            ret, frame = cap.read()
+            if not ret:
+                break
+            # Convert frame to grayscale for scene detection
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            # If this is the first frame, save it
+            if prev_frame is None:
+                frames.append(frame)
+                frame_times.append(frame_pos / fps)
+            else:
+                # Calculate difference between current and previous frame
+                diff = cv2.absdiff(gray, prev_frame)
+                mean_diff = np.mean(diff)
+                # If significant change detected, save frame
+                if mean_diff > 20:  # Threshold for scene change
+                    frames.append(frame)
+                    frame_times.append(frame_pos / fps)
+            prev_frame = gray
+        cap.release()
+        return frames, frame_times
+    try:
+        # Extract keyframes
+        frames, frame_times = extract_keyframes(video_path, target_time, window_size)
+        # Save frames
+        frame_paths, frame_timestamps = save_frames(frames, frame_times, output_dir)
+        # Cleanup
+        if cleanup and os.path.exists(video_path):
+            os.remove(video_path)
+        return KeyframeResult(
+            frame_paths=frame_paths,
+            frame_timestamps=frame_timestamps,
+            output_directory=output_dir,
+            frame_count=len(frame_paths),
+            success=True,
+        )
+    except Exception as e:
+        error_message = f"Error processing video: {str(e)}"
+        print(error_message)
+        return KeyframeResult(
+            frame_paths=[],
+            frame_timestamps=[],
+            output_directory=output_dir,
+            frame_count=0,
+            success=False,
+            error_message=error_message,
+        )
+def main():
+    load_dotenv()
+    print("Starting Video MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+# Add this for compatibility with uvx
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()

AWorld-main/aworlddistributed/mcp_servers/youtube_server.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Youtube Download MCP Server
+This module provides MCP server functionality for downloading files from Youtube URLs.
+It handles various download scenarios with proper validation, error handling,
+and progress tracking.
+Key features:
+- File downloading from Youtube HTTP/HTTPS URLs
+- Download progress tracking
+- File validation
+- Safe file saving
+Main functions:
+- mcpyoutubedownload: Downloads files from URLs of Youtube to local filesystem
+"""
+import os
+import sys
+import time
+import traceback
+import urllib.parse
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+from dotenv import load_dotenv
+from mcp.server.fastmcp import FastMCP
+from pydantic import BaseModel, Field
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from aworld.logs.util import logger
+mcp = FastMCP("youtube-server")
+_default_driver_path = os.environ.get(
+    "CHROME_DRIVER_PATH",
+    os.path.expanduser("~/Downloads/chromedriver-mac-arm64/chromedriver"),
+)
+class YoutubeDownloadResults(BaseModel):
+    """Download result model with file information"""
+    file_path: str
+    file_name: str
+    file_size: int
+    content_type: Optional[str] = None
+    success: bool
+    error: Optional[str] = None
+@mcp.tool(
+    description="Download the youtube file from the URL and save to the local filesystem."
+)
+def download_youtube_files(
+    url: str = Field(
+        description="The URL of youtube file to download. Must be a String."
+    ),
+    output_dir: str = Field(
+        "/tmp/mcp_downloads",
+        description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
+    ),
+    timeout: int = Field(
+        180, description="Download timeout in seconds (default: 180)."
+    ),
+) -> str:
+    """Download the youtube file from the URL and save to the local filesystem.
+    Args:
+        url: The URL of youtube file to download, must be a String
+        output_dir: Directory to save the downloaded files
+        timeout: Download timeout in seconds
+    Returns:
+        JSON string with download results information
+    """
+    # Handle Field objects if they're passed directly
+    if hasattr(url, "default") and not isinstance(url, str):
+        url = url.default
+    if hasattr(output_dir, "default") and not isinstance(output_dir, str):
+        output_dir = output_dir.default
+    if hasattr(timeout, "default") and not isinstance(timeout, int):
+        timeout = timeout.default
+    def _get_youtube_content(url: str, output_dir: str, timeout: int) -> None:
+        """Use Selenium to download YouTube content via cobalt.tools"""
+        try:
+            options = webdriver.ChromeOptions()
+            options.add_argument("--disable-blink-features=AutomationControlled")
+            # Set download file default path
+            prefs = {
+                "download.default_directory": output_dir,
+                "download.prompt_for_download": False,
+                "download.directory_upgrade": True,
+                "safebrowsing.enabled": True,
+            }
+            options.add_experimental_option("prefs", prefs)
+            # Create WebDriver object and launch Chrome browser
+            service = Service(executable_path=_default_driver_path)
+            driver = webdriver.Chrome(service=service, options=options)
+            logger.info(f"Opening cobalt.tools to download from {url}")
+            # Open target webpage
+            driver.get("https://cobalt.tools/")
+            # Wait for page to load
+            time.sleep(5)
+            # Find input field and enter YouTube link
+            input_field = driver.find_element(By.ID, "link-area")
+            input_field.send_keys(url)
+            time.sleep(5)
+            # Find download button and click
+            download_button = driver.find_element(By.ID, "download-button")
+            download_button.click()
+            time.sleep(5)
+            try:
+                # Handle bot detection popup
+                driver.find_element(
+                    By.CLASS_NAME,
+                    "button.elevated.popup-button.undefined.svelte-nnawom.active",
+                ).click()
+            except Exception as e:
+                logger.warning(f"Bot detection handling: {str(e)}")
+            # Wait for download to complete
+            cnt = 0
+            while (
+                len(os.listdir(output_dir)) == 0
+                or os.listdir(output_dir)[0].split(".")[-1] == "crdownload"
+            ):
+                time.sleep(3)
+                cnt += 3
+                if cnt >= timeout:
+                    logger.warning(f"Download timeout after {timeout} seconds")
+                    break
+            logger.info("Download process completed")
+        except Exception as e:
+            logger.error(f"Error during YouTube content download: {str(e)}")
+            raise
+        finally:
+            # Close browser
+            if "driver" in locals():
+                driver.quit()
+    def _download_single_file(
+        url: str, output_dir: str, filename: str, timeout: int
+    ) -> str:
+        """Download a single file from URL and save it to the local filesystem."""
+        try:
+            # Validate URL
+            if not url.startswith(("http://", "https://")):
+                raise ValueError(
+                    "Invalid URL format. URL must start with http:// or https://"
+                )
+            # Create output directory if it doesn't exist
+            output_path = Path(output_dir)
+            output_path.mkdir(parents=True, exist_ok=True)
+            # Determine filename if not provided
+            if not filename:
+                filename = os.path.basename(urllib.parse.urlparse(url).path)
+                if not filename:
+                    filename = "downloaded_file"
+            filename += "_" + datetime.now().strftime("%Y%m%d_%H%M%S")
+            file_path = Path(os.path.join(output_path, filename))
+            file_path.mkdir(parents=True, exist_ok=True)
+            logger.info(f"Output path: {file_path}")
+            # check if video already exists with folder: /tmp/mcp_downloads
+            video_id = url.split("?v=")[-1].split("&")[0] if "?v=" in url else ""
+            base_path = os.getenv("FILESYSTEM_SERVER_WORKDIR")
+            # checker function
+            def find_existing_video(search_dir, video_id):
+                if not video_id:
+                    return None
+                for item in os.listdir(search_dir):
+                    item_path = os.path.join(search_dir, item)
+                    if os.path.isfile(item_path) and video_id in item:
+                        return item_path
+                    elif os.path.isdir(item_path):
+                        found = find_existing_video(item_path, video_id)
+                        if found:
+                            return found
+                return None
+            existing_file = find_existing_video(base_path, video_id)
+            if existing_file:
+                result = YoutubeDownloadResults(
+                    file_path=existing_file,
+                    file_name=os.path.basename(existing_file),
+                    file_size=os.path.getsize(existing_file),
+                    content_type="mp4",
+                    success=True,
+                    error=None,
+                )
+                logger.info(
+                    f"Found {video_id} is already downloaded in: {existing_file}"
+                )
+                return result.model_dump_json()
+            logger.info(f"Downloading file from {url} to {file_path}")
+            _get_youtube_content(url, str(file_path), timeout)
+            # Check if download was successful
+            if len(os.listdir(file_path)) == 0:
+                raise FileNotFoundError("No files were downloaded")
+            download_file = os.path.join(file_path, os.listdir(file_path)[0])
+            # Get actual file size
+            actual_size = os.path.getsize(download_file)
+            logger.success(f"File downloaded successfully to {download_file}")
+            # Create result
+            result = YoutubeDownloadResults(
+                file_path=download_file,
+                file_name=os.listdir(file_path)[0],
+                file_size=actual_size,
+                content_type="mp4",
+                success=True,
+                error=None,
+            )
+            return result.model_dump_json()
+        except Exception as e:
+            error_msg = str(e)
+            logger.error(f"Download error: {traceback.format_exc()}")
+            result = YoutubeDownloadResults(
+                file_path="",
+                file_name="",
+                file_size=0,
+                content_type=None,
+                success=False,
+                error=error_msg,
+            )
+            return result.model_dump_json()
+    result_json = _download_single_file(url, output_dir, "", timeout)
+    result = YoutubeDownloadResults.model_validate_json(result_json)
+    return result.model_dump_json()
+def main():
+    load_dotenv()
+    print("Starting YoutubeDownload MCP Server...", file=sys.stderr)
+    mcp.run(transport="stdio")
+# Make the module callable
+def __call__():
+    """
+    Make the module callable for uvx.
+    This function is called when the module is executed directly.
+    """
+    main()
+sys.modules[__name__].__call__ = __call__
+# Run the server when the script is executed directly
+if __name__ == "__main__":
+    main()