Duibonduil commited on
Commit
7f4b706
·
verified ·
1 Parent(s): 595b4ab

Upload 15 files

Browse files
AWorld-main/aworlddistributed/mcp_servers/aworldsearch_server.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+ from typing import List, Dict, Any, Optional, Union
7
+
8
+ import aiohttp
9
+ from mcp.server import FastMCP
10
+ from mcp.types import TextContent
11
+ from pydantic import Field
12
+
13
+ mcp = FastMCP("aworldsearch-server")
14
+
15
+
16
+ async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
17
+ """Execute a single search query, returns None on error"""
18
+ try:
19
+ url = os.getenv('AWORLD_SEARCH_URL')
20
+ searchMode = os.getenv('AWORLD_SEARCH_SEARCHMODE')
21
+ source = os.getenv('AWORLD_SEARCH_SOURCE')
22
+ domain = os.getenv('AWORLD_SEARCH_DOMAIN')
23
+ uid = os.getenv('AWORLD_SEARCH_UID')
24
+ if not url or not searchMode or not source or not domain:
25
+ logging.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
26
+ return None
27
+
28
+ headers = {
29
+ 'Content-Type': 'application/json'
30
+ }
31
+ data = {
32
+ "domain": domain,
33
+ "extParams": {},
34
+ "page": 0,
35
+ "pageSize": num,
36
+ "query": query,
37
+ "searchMode": searchMode,
38
+ "source": source,
39
+ "userId": uid
40
+ }
41
+
42
+ async with aiohttp.ClientSession() as session:
43
+ try:
44
+ async with session.post(url, headers=headers, json=data) as response:
45
+ if response.status != 200:
46
+ logging.warning(f"Query failed: {query}, status code: {response.status}")
47
+ return None
48
+
49
+ result = await response.json()
50
+ return result
51
+ except aiohttp.ClientError:
52
+ logging.warning(f"Request error: {query}")
53
+ return None
54
+ except Exception:
55
+ logging.warning(f"Query exception: {query}")
56
+ return None
57
+
58
+
59
+ def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
60
+ """Filter valid document results, returns empty list if input is None"""
61
+ if result is None:
62
+ return []
63
+
64
+ try:
65
+ valid_docs = []
66
+
67
+ # Check success field
68
+ if not result.get("success"):
69
+ return valid_docs
70
+
71
+ # Check searchDocs field
72
+ search_docs = result.get("searchDocs", [])
73
+ if not search_docs:
74
+ return valid_docs
75
+
76
+ # Extract required fields
77
+ required_fields = ["title", "docAbstract", "url", "doc"]
78
+
79
+ for doc in search_docs:
80
+ # Check if all required fields exist and are not empty
81
+ is_valid = True
82
+ for field in required_fields:
83
+ if field not in doc or not doc[field]:
84
+ is_valid = False
85
+ break
86
+
87
+ if is_valid:
88
+ # Keep only required fields
89
+ filtered_doc = {field: doc[field] for field in required_fields}
90
+ valid_docs.append(filtered_doc)
91
+
92
+ return valid_docs
93
+ except Exception:
94
+ return []
95
+
96
+
97
+ @mcp.tool(description="Search based on the user's input query list")
98
+ async def search(
99
+ query_list: List[str] = Field(
100
+ description="List format, queries to search for"
101
+ ),
102
+ num: int = Field(
103
+ 5,
104
+ description="Maximum number of results per query, default is 5, please keep the total results within 15"
105
+ )
106
+ ) -> Union[str, TextContent]:
107
+ """Execute search main function, supports single query or query list"""
108
+ try:
109
+ # Get configuration from environment variables
110
+ env_total_num = os.getenv('AWORLD_SEARCH_TOTAL_NUM')
111
+ if env_total_num and env_total_num.isdigit():
112
+ # Force override input num parameter with environment variable
113
+ num = int(env_total_num)
114
+
115
+ # If no queries provided, return empty list
116
+ if not query_list:
117
+ # Initialize TextContent with additional parameters
118
+ return TextContent(
119
+ type="text",
120
+ text="", # Empty string instead of None
121
+ **{"metadata": {}} # Pass as additional fields
122
+ )
123
+
124
+ # When query count is >= 3 or slice_num is set, use corresponding value
125
+ slice_num = os.getenv('AWORLD_SEARCH_SLICE_NUM')
126
+ if slice_num and slice_num.isdigit():
127
+ actual_num = int(slice_num)
128
+ else:
129
+ actual_num = 2 if len(query_list) >= 3 else num
130
+
131
+ # Execute all queries in parallel
132
+ tasks = [search_single(q, actual_num) for q in query_list]
133
+ raw_results = await asyncio.gather(*tasks)
134
+
135
+ # Filter and merge results
136
+ all_valid_docs = []
137
+ for result in raw_results:
138
+ valid_docs = filter_valid_docs(result)
139
+ all_valid_docs.extend(valid_docs)
140
+
141
+ # If no valid results found, return empty list
142
+ if not all_valid_docs:
143
+ # Initialize TextContent with additional parameters
144
+ return TextContent(
145
+ type="text",
146
+ text="", # Empty string instead of None
147
+ **{"metadata": {}} # Pass as additional fields
148
+ )
149
+
150
+ # Format results as JSON
151
+ result_json = json.dumps(all_valid_docs, ensure_ascii=False)
152
+
153
+ # Create dictionary structure directly
154
+ combined_query = ",".join(query_list)
155
+
156
+ search_items = []
157
+ # Use a dictionary to deduplicate by URL
158
+ url_dict = {}
159
+ for doc in all_valid_docs:
160
+ url = doc.get("url", "")
161
+ if url not in url_dict:
162
+ url_dict[url] = {
163
+ "title": doc.get("title", ""),
164
+ "url": url,
165
+ "snippet": doc.get("doc", "")[:100] + "..." if len(doc.get("doc", "")) > 100 else doc.get("doc", ""),
166
+ "content": doc.get("doc", "") # Map doc field to content
167
+ }
168
+
169
+ # Convert dictionary values to list
170
+ search_items = list(url_dict.values())
171
+
172
+
173
+
174
+ search_output_dict = {
175
+ "artifact_type": "WEB_PAGES",
176
+ "artifact_data": {
177
+ "query": combined_query,
178
+ "results": search_items
179
+ }
180
+ }
181
+
182
+ # Log results
183
+ logging.info(f"Completed {len(query_list)} queries, found {len(all_valid_docs)} valid documents")
184
+
185
+ # Initialize TextContent with additional parameters
186
+ return TextContent(
187
+ type="text",
188
+ text=result_json,
189
+ **{"metadata": search_output_dict} # Pass processed data as metadata
190
+ )
191
+ except Exception as e:
192
+ # Handle errors
193
+ logging.error(f"Search error: {e}")
194
+ # Initialize TextContent with additional parameters
195
+ return TextContent(
196
+ type="text",
197
+ text="", # Empty string instead of None
198
+ **{"metadata": {}} # Pass as additional fields
199
+ )
200
+
201
+
202
+ def main():
203
+ from dotenv import load_dotenv
204
+
205
+ load_dotenv(override=True)
206
+
207
+ print("Starting Audio MCP aworldsearch-server...", file=sys.stderr)
208
+ mcp.run(transport="stdio")
209
+
210
+
211
+ # Make the module callable
212
+ def __call__():
213
+ """
214
+ Make the module callable for uvx.
215
+ This function is called when the module is executed directly.
216
+ """
217
+ main()
218
+
219
+
220
+ sys.modules[__name__].__call__ = __call__
221
+
222
+ if __name__ == "__main__":
223
+ main()
224
+
225
+ # if __name__ == "__main__":
226
+ # # Configure logging
227
+ # logging.basicConfig(
228
+ # level=logging.INFO,
229
+ # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
230
+ # )
231
+ #
232
+ #
233
+ # # Test single query
234
+ # # asyncio.run(search("Alibaba financial report"))
235
+ #
236
+ # # Test multiple queries
237
+ # test_queries = ["Alibaba financial report", "Tencent financial report", "Baidu financial report"]
238
+ # asyncio.run(search(query_list=test_queries))
AWorld-main/aworlddistributed/mcp_servers/browser_server.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Browser MCP Server
3
+
4
+ This module provides MCP server functionality for browser automation and interaction.
5
+ It handles tasks such as web scraping, form submission, and automated browsing.
6
+
7
+ Main functions:
8
+ - browse_url: Opens a URL and performs specified actions
9
+ - submit_form: Fills and submits forms on web pages
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ import traceback
16
+
17
+ from browser_use import Agent
18
+ from browser_use.agent.views import AgentHistoryList
19
+ from browser_use.browser.browser import Browser, BrowserConfig
20
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
21
+ from dotenv import load_dotenv
22
+ from langchain_openai import ChatOpenAI
23
+ from mcp.server.fastmcp import FastMCP
24
+ from pydantic import Field
25
+
26
+ from aworld.logs.util import logger
27
+
28
+ mcp = FastMCP("browser-server")
29
+ browser_system_prompt = """
30
+ ===== NAVIGATION STRATEGY =====
31
+ 1. START: Navigate to the most authoritative source for this information
32
+ - For general queries: Use Google with specific search terms
33
+ - For known sources: Go directly to the relevant website
34
+
35
+ 2. EVALUATE: Assess each page methodically
36
+ - Scan headings and highlighted text first
37
+ - Look for data tables, charts, or official statistics
38
+ - Check publication dates for timeliness
39
+
40
+ 3. EXTRACT: Capture exactly what's needed
41
+ - Take screenshots of visual evidence (charts, tables, etc.)
42
+ - Copy precise text that answers the query
43
+ - Note source URLs for citation
44
+
45
+ 4. DOWNLOAD: Save the most relevant file to local path for further processing
46
+ - Save the text if possible for futher text reading and analysis
47
+ - Save the image if possible for futher image reasoning analysis
48
+ - Save the pdf if possible for futher pdf reading and analysis
49
+
50
+ 5. ROBOT DETECTION:
51
+ - If the page is a robot detection page, abort immediately
52
+ - Navigate to the most authoritative source for similar information instead
53
+
54
+ ===== EFFICIENCY GUIDELINES =====
55
+ - Use specific search queries with key terms from the task
56
+ - Avoid getting distracted by tangential information
57
+ - If blocked by paywalls, try archive.org or similar alternatives
58
+ - Document each significant finding clearly and concisely
59
+
60
+ Your goal is to extract precisely the information needed with minimal browsing steps.
61
+ """
62
+
63
+
64
+ @mcp.tool(description="Perform browser actions using the browser-use package.")
65
+ async def browser_use(
66
+ task: str = Field(description="The task to perform using the browser."),
67
+ ) -> str:
68
+ """
69
+ Perform browser actions using the browser-use package.
70
+ Args:
71
+ task (str): The task to perform using the browser.
72
+ Returns:
73
+ str: The result of the browser actions.
74
+ """
75
+ browser = Browser(
76
+ config=BrowserConfig(
77
+ headless=False,
78
+ new_context_config=BrowserContextConfig(
79
+ disable_security=True,
80
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
81
+ minimum_wait_page_load_time=10,
82
+ maximum_wait_page_load_time=30,
83
+ ),
84
+ )
85
+ )
86
+ browser_context = BrowserContext(
87
+ config=BrowserContextConfig(
88
+ trace_path=os.getenv("LOG_FILE_PATH" + "/browser_trace.log")
89
+ ),
90
+ browser=browser,
91
+ )
92
+ agent = Agent(
93
+ task=task,
94
+ llm=ChatOpenAI(
95
+ model=os.getenv("LLM_MODEL_NAME"),
96
+ api_key=os.getenv("LLM_API_KEY"),
97
+ base_url=os.getenv("LLM_BASE_URL"),
98
+ model_name=os.getenv("LLM_MODEL_NAME"),
99
+ openai_api_base=os.getenv("LLM_BASE_URL"),
100
+ openai_api_key=os.getenv("LLM_API_KEY"),
101
+ temperature=1.0,
102
+ ),
103
+ browser_context=browser_context,
104
+ extend_system_message=browser_system_prompt,
105
+ )
106
+ try:
107
+ browser_execution: AgentHistoryList = await agent.run(max_steps=50)
108
+ if (
109
+ browser_execution is not None
110
+ and browser_execution.is_done()
111
+ and browser_execution.is_successful()
112
+ ):
113
+ exec_trace = browser_execution.extracted_content()
114
+ logger.info(
115
+ ">>> 🌏 Browse Execution Succeed!\n"
116
+ f">>> 💡 Result: {json.dumps(exec_trace, ensure_ascii=False, indent=4)}\n"
117
+ ">>> 🌏 Browse Execution Succeed!\n"
118
+ )
119
+ return browser_execution.final_result()
120
+ else:
121
+ return f"Browser execution failed for task: {task}"
122
+ except Exception as e:
123
+ logger.error(f"Browser execution failed: {traceback.format_exc()}")
124
+ return f"Browser execution failed for task: {task} due to {str(e)}"
125
+ finally:
126
+ await browser.close()
127
+ logger.info("Browser Closed!")
128
+
129
+
130
+ def main():
131
+ load_dotenv()
132
+ print("Starting Browser MCP Server...", file=sys.stderr)
133
+ mcp.run(transport="stdio")
134
+
135
+
136
+ # Make the module callable
137
+ def __call__():
138
+ """
139
+ Make the module callable for uvx.
140
+ This function is called when the module is executed directly.
141
+ """
142
+ main()
143
+
144
+
145
+ sys.modules[__name__].__call__ = __call__
146
+
147
+ # Run the server when the script is executed directly
148
+ if __name__ == "__main__":
149
+ main()
AWorld-main/aworlddistributed/mcp_servers/document_server.py ADDED
@@ -0,0 +1,998 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Document MCP Server
3
+
4
+ This module provides MCP server functionality for document processing and analysis.
5
+ It handles various document formats including:
6
+ - Text files
7
+ - PDF documents
8
+ - Word documents (DOCX)
9
+ - Excel spreadsheets
10
+ - PowerPoint presentations
11
+ - JSON and XML files
12
+ - Source code files
13
+
14
+ Each document type has specialized processing functions that extract content,
15
+ structure, and metadata. The server focuses on local file processing with
16
+ appropriate validation and error handling.
17
+
18
+ Main functions:
19
+ - mcpreadtext: Reads plain text files
20
+ - mcpreadpdf: Reads PDF files with optional image extraction
21
+ - mcpreaddocx: Reads Word documents
22
+ - mcpreadexcel: Reads Excel spreadsheets
23
+ - mcpreadpptx: Reads PowerPoint presentations
24
+ - mcpreadjson: Reads and parses JSON/JSONL files
25
+ - mcpreadxml: Reads and parses XML files
26
+ - mcpreadsourcecode: Reads and analyzes source code files
27
+ """
28
+
29
+ import io
30
+ import json
31
+ import os
32
+ import sys
33
+ import tempfile
34
+ import traceback
35
+ from datetime import date, datetime
36
+ from typing import Any, Dict, List, Optional
37
+
38
+ import fitz
39
+ import html2text
40
+ import pandas as pd
41
+ import xmltodict
42
+ from bs4 import BeautifulSoup
43
+ from docx2markdown._docx_to_markdown import docx_to_markdown
44
+ from dotenv import load_dotenv
45
+ from mcp.server.fastmcp import FastMCP
46
+ from PIL import Image, ImageDraw, ImageFont
47
+ from pptx import Presentation
48
+ from pydantic import BaseModel, Field
49
+ from PyPDF2 import PdfReader
50
+ from tabulate import tabulate
51
+ from xls2xlsx import XLS2XLSX
52
+
53
+ from aworld.logs.util import logger
54
+ from aworld.utils import import_package
55
+ from mcp_servers.image_server import encode_images
56
+
57
+ mcp = FastMCP("document-server")
58
+
59
+
60
+ # Define model classes for different document types
61
+ class TextDocument(BaseModel):
62
+ """Model representing a text document"""
63
+
64
+ content: str
65
+ file_path: str
66
+ file_name: str
67
+ file_size: int
68
+ last_modified: str
69
+
70
+
71
+ class HtmlDocument(BaseModel):
72
+ """Model representing an HTML document"""
73
+
74
+ content: str # Extracted text content
75
+ html_content: str # Original HTML content
76
+ file_path: str
77
+ file_name: str
78
+ file_size: int
79
+ last_modified: str
80
+ title: Optional[str] = None
81
+ links: Optional[List[Dict[str, str]]] = None
82
+ images: Optional[List[Dict[str, str]]] = None
83
+ tables: Optional[List[str]] = None
84
+ markdown: Optional[str] = None # HTML converted to Markdown format
85
+
86
+
87
+ class JsonDocument(BaseModel):
88
+ """Model representing a JSON document"""
89
+
90
+ format: str # "json" or "jsonl"
91
+ type: Optional[str] = None # "array" or "object" for standard JSON
92
+ count: Optional[int] = None
93
+ keys: Optional[List[str]] = None
94
+ data: Any
95
+ file_path: str
96
+ file_name: str
97
+
98
+
99
+ class XmlDocument(BaseModel):
100
+ """Model representing an XML document"""
101
+
102
+ content: Dict
103
+ file_path: str
104
+ file_name: str
105
+
106
+
107
+ class PdfImage(BaseModel):
108
+ """Model representing an image extracted from a PDF"""
109
+
110
+ page: int
111
+ format: str
112
+ width: int
113
+ height: int
114
+ path: str
115
+
116
+
117
+ class PdfDocument(BaseModel):
118
+ """Model representing a PDF document"""
119
+
120
+ content: str
121
+ file_path: str
122
+ file_name: str
123
+ page_count: int
124
+ images: Optional[List[PdfImage]] = None
125
+ image_count: Optional[int] = None
126
+ image_dir: Optional[str] = None
127
+ error: Optional[str] = None
128
+
129
+
130
+ class PdfResult(BaseModel):
131
+ """Model representing results from processing multiple PDF documents"""
132
+
133
+ total_files: int
134
+ success_count: int
135
+ failed_count: int
136
+ results: List[PdfDocument]
137
+
138
+
139
+ class DocxDocument(BaseModel):
140
+ """Model representing a Word document"""
141
+
142
+ content: str
143
+ file_path: str
144
+ file_name: str
145
+
146
+
147
+ class ExcelSheet(BaseModel):
148
+ """Model representing a sheet in an Excel file"""
149
+
150
+ name: str
151
+ data: List[Dict[str, Any]]
152
+ markdown_table: str
153
+ row_count: int
154
+ column_count: int
155
+
156
+
157
+ class ExcelDocument(BaseModel):
158
+ """Model representing an Excel document"""
159
+
160
+ file_name: str
161
+ file_path: str
162
+ processed_path: Optional[str] = None
163
+ file_type: str
164
+ sheet_count: int
165
+ sheet_names: List[str]
166
+ sheets: List[ExcelSheet]
167
+ success: bool = True
168
+ error: Optional[str] = None
169
+
170
+
171
+ class ExcelResult(BaseModel):
172
+ """Model representing results from processing multiple Excel documents"""
173
+
174
+ total_files: int
175
+ success_count: int
176
+ failed_count: int
177
+ results: List[ExcelDocument]
178
+
179
+
180
+ class PowerPointSlide(BaseModel):
181
+ """Model representing a slide in a PowerPoint presentation"""
182
+
183
+ slide_number: int
184
+ image: str # Base64 encoded image
185
+
186
+
187
+ class PowerPointDocument(BaseModel):
188
+ """Model representing a PowerPoint document"""
189
+
190
+ file_path: str
191
+ file_name: str
192
+ slide_count: int
193
+ slides: List[PowerPointSlide]
194
+
195
+
196
+ class SourceCodeDocument(BaseModel):
197
+ """Model representing a source code document"""
198
+
199
+ content: str
200
+ file_type: str
201
+ file_path: str
202
+ file_name: str
203
+ line_count: int
204
+ size_bytes: int
205
+ last_modified: str
206
+ classes: Optional[List[str]] = None
207
+ functions: Optional[List[str]] = None
208
+ imports: Optional[List[str]] = None
209
+ package: Optional[List[str]] = None
210
+ methods: Optional[List[str]] = None
211
+ includes: Optional[List[str]] = None
212
+
213
+
214
+ class DocumentError(BaseModel):
215
+ """Model representing an error in document processing"""
216
+
217
+ error: str
218
+ file_path: Optional[str] = None
219
+ file_name: Optional[str] = None
220
+
221
+
222
+ class ComplexEncoder(json.JSONEncoder):
223
+ def default(self, o):
224
+ if isinstance(o, datetime):
225
+ return o.strftime("%Y-%m-%d %H:%M:%S")
226
+ elif isinstance(o, date):
227
+ return o.strftime("%Y-%m-%d")
228
+ else:
229
+ return json.JSONEncoder.default(self, o)
230
+
231
+
232
+ def handle_error(e: Exception, error_type: str, file_path: Optional[str] = None) -> str:
233
+ """Unified error handling and return standard format error message"""
234
+ error_msg = f"{error_type} error: {str(e)}"
235
+ logger.error(traceback.format_exc())
236
+
237
+ error = DocumentError(
238
+ error=error_msg,
239
+ file_path=file_path,
240
+ file_name=os.path.basename(file_path) if file_path else None,
241
+ )
242
+
243
+ return error.model_dump_json()
244
+
245
+
246
+ def check_file_readable(document_path: str) -> str:
247
+ """Check if file exists and is readable, return error message or None"""
248
+ if not os.path.exists(document_path):
249
+ return f"File does not exist: {document_path}"
250
+ if not os.access(document_path, os.R_OK):
251
+ return f"File is not readable: {document_path}"
252
+ return None
253
+
254
+
255
+ @mcp.tool(
256
+ description="Read and return content from local text file. Cannot process https://URLs files."
257
+ )
258
+ def mcpreadtext(
259
+ document_path: str = Field(description="The input local text file path."),
260
+ ) -> str:
261
+ """Read and return content from local text file. Cannot process https://URLs files."""
262
+ error = check_file_readable(document_path)
263
+ if error:
264
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
265
+
266
+ try:
267
+ with open(document_path, "r", encoding="utf-8") as f:
268
+ content = f.read()
269
+
270
+ result = TextDocument(
271
+ content=content,
272
+ file_path=document_path,
273
+ file_name=os.path.basename(document_path),
274
+ file_size=os.path.getsize(document_path),
275
+ last_modified=datetime.fromtimestamp(
276
+ os.path.getmtime(document_path)
277
+ ).strftime("%Y-%m-%d %H:%M:%S"),
278
+ )
279
+
280
+ return result.model_dump_json()
281
+ except Exception as e:
282
+ return handle_error(e, "Text file reading", document_path)
283
+
284
+
285
+ @mcp.tool(
286
+ description="Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."
287
+ )
288
+ def mcpreadjson(
289
+ document_path: str = Field(description="Local path to JSON or JSONL file"),
290
+ is_jsonl: bool = Field(
291
+ default=False,
292
+ description="Whether the file is in JSONL format (one JSON object per line)",
293
+ ),
294
+ ) -> str:
295
+ """Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."""
296
+ error = check_file_readable(document_path)
297
+ if error:
298
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
299
+
300
+ try:
301
+ # Choose processing method based on file type
302
+ if is_jsonl:
303
+ # Process JSONL file (one JSON object per line)
304
+ results = []
305
+ with open(document_path, "r", encoding="utf-8") as f:
306
+ for line_num, line in enumerate(f, 1):
307
+ line = line.strip()
308
+ if not line:
309
+ continue
310
+ try:
311
+ json_obj = json.loads(line)
312
+ results.append(json_obj)
313
+ except json.JSONDecodeError as e:
314
+ logger.warning(
315
+ f"JSON parsing error at line {line_num}: {str(e)}"
316
+ )
317
+
318
+ # Create result model
319
+ result = JsonDocument(
320
+ format="jsonl",
321
+ count=len(results),
322
+ data=results,
323
+ file_path=document_path,
324
+ file_name=os.path.basename(document_path),
325
+ )
326
+
327
+ else:
328
+ # Process standard JSON file
329
+ with open(document_path, "r", encoding="utf-8") as f:
330
+ data = json.load(f)
331
+
332
+ # Create result model based on data type
333
+ if isinstance(data, list):
334
+ result = JsonDocument(
335
+ format="json",
336
+ type="array",
337
+ count=len(data),
338
+ data=data,
339
+ file_path=document_path,
340
+ file_name=os.path.basename(document_path),
341
+ )
342
+ else:
343
+ result = JsonDocument(
344
+ format="json",
345
+ type="object",
346
+ keys=list(data.keys()) if isinstance(data, dict) else [],
347
+ data=data,
348
+ file_path=document_path,
349
+ file_name=os.path.basename(document_path),
350
+ )
351
+
352
+ return result.model_dump_json()
353
+
354
+ except json.JSONDecodeError as e:
355
+ return handle_error(e, "JSON parsing", document_path)
356
+ except Exception as e:
357
+ return handle_error(e, "JSON file reading", document_path)
358
+
359
+
360
+ @mcp.tool(
361
+ description="Read and return content from XML file. return the parsed content. Cannot process https://URLs files."
362
+ )
363
+ def mcpreadxml(
364
+ document_path: str = Field(description="The local input XML file path."),
365
+ ) -> str:
366
+ """Read and return content from XML file. Cannot process https://URLs files."""
367
+ error = check_file_readable(document_path)
368
+ if error:
369
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
370
+
371
+ try:
372
+ with open(document_path, "r", encoding="utf-8") as f:
373
+ data = f.read()
374
+
375
+ result = XmlDocument(
376
+ content=xmltodict.parse(data),
377
+ file_path=document_path,
378
+ file_name=os.path.basename(document_path),
379
+ )
380
+
381
+ return result.model_dump_json()
382
+ except Exception as e:
383
+ return handle_error(e, "XML file reading", document_path)
384
+
385
+
386
+ @mcp.tool(
387
+ description="Read and return content from PDF file with optional image extraction. return the parsed content. Cannot process https://URLs files."
388
+ )
389
+ def mcpreadpdf(
390
+ document_paths: List[str] = Field(description="The local input PDF file paths."),
391
+ extract_images: bool = Field(
392
+ default=False, description="Whether to extract images from PDF (default: False)"
393
+ ),
394
+ ) -> str:
395
+ """Read and return content from PDF file with optional image extraction. Cannot process https://URLs files."""
396
+ try:
397
+
398
+ results = []
399
+ success_count = 0
400
+ failed_count = 0
401
+
402
+ for document_path in document_paths:
403
+ error = check_file_readable(document_path)
404
+ if error:
405
+ results.append(
406
+ PdfDocument(
407
+ content="",
408
+ file_path=document_path,
409
+ file_name=os.path.basename(document_path),
410
+ page_count=0,
411
+ error=error,
412
+ )
413
+ )
414
+ failed_count += 1
415
+ continue
416
+
417
+ try:
418
+ with open(document_path, "rb") as f:
419
+ reader = PdfReader(f)
420
+ content = " ".join(page.extract_text() for page in reader.pages)
421
+ page_count = len(reader.pages)
422
+
423
+ pdf_result = PdfDocument(
424
+ content=content,
425
+ file_path=document_path,
426
+ file_name=os.path.basename(document_path),
427
+ page_count=page_count,
428
+ )
429
+
430
+ # Extract images if requested
431
+ if extract_images:
432
+ images_data = []
433
+ # Use /tmp directory for storing images
434
+ output_dir = "/tmp/pdf_images"
435
+
436
+ # Create output directory if it doesn't exist
437
+ os.makedirs(output_dir, exist_ok=True)
438
+
439
+ # Generate a unique subfolder based on filename to avoid conflicts
440
+ pdf_name = os.path.splitext(os.path.basename(document_path))[0]
441
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
442
+ image_dir = os.path.join(output_dir, f"{pdf_name}_{timestamp}")
443
+ os.makedirs(image_dir, exist_ok=True)
444
+
445
+ try:
446
+ # Open PDF with PyMuPDF
447
+ pdf_document = fitz.open(document_path)
448
+
449
+ # Iterate through each page
450
+ for page_index in range(len(pdf_document)):
451
+ page = pdf_document[page_index]
452
+
453
+ # Get image list
454
+ image_list = page.get_images(full=True)
455
+
456
+ # Process each image
457
+ for img_index, img in enumerate(image_list):
458
+ # Extract image information
459
+ xref = img[0]
460
+ base_image = pdf_document.extract_image(xref)
461
+ image_bytes = base_image["image"]
462
+ image_ext = base_image["ext"]
463
+
464
+ # Save image to file in /tmp directory
465
+ img_filename = f"pdf_image_p{page_index+1}_{img_index+1}.{image_ext}"
466
+ img_path = os.path.join(image_dir, img_filename)
467
+
468
+ with open(img_path, "wb") as img_file:
469
+ img_file.write(image_bytes)
470
+ logger.success(f"Image saved: {img_path}")
471
+
472
+ # Get image dimensions
473
+ with Image.open(img_path) as img:
474
+ width, height = img.size
475
+
476
+ # Add to results with file path instead of base64
477
+ images_data.append(
478
+ PdfImage(
479
+ page=page_index + 1,
480
+ format=image_ext,
481
+ width=width,
482
+ height=height,
483
+ path=img_path,
484
+ )
485
+ )
486
+
487
+ pdf_result.images = images_data
488
+ pdf_result.image_count = len(images_data)
489
+ pdf_result.image_dir = image_dir
490
+
491
+ except Exception as img_error:
492
+ logger.error(f"Error extracting images: {str(img_error)}")
493
+ # Don't clean up on error so we can keep any successfully extracted images
494
+ pdf_result.error = str(img_error)
495
+
496
+ results.append(pdf_result)
497
+ success_count += 1
498
+
499
+ except Exception as e:
500
+ results.append(
501
+ PdfDocument(
502
+ content="",
503
+ file_path=document_path,
504
+ file_name=os.path.basename(document_path),
505
+ page_count=0,
506
+ error=str(e),
507
+ )
508
+ )
509
+ failed_count += 1
510
+
511
+ # Create final result
512
+ pdf_result = PdfResult(
513
+ total_files=len(document_paths),
514
+ success_count=success_count,
515
+ failed_count=failed_count,
516
+ results=results,
517
+ )
518
+
519
+ return pdf_result.model_dump_json()
520
+
521
+ except Exception as e:
522
+ return handle_error(e, "PDF file reading")
523
+
524
+
525
+ @mcp.tool(
526
+ description="Read and return content from Word file. return the parsed content. Cannot process https://URLs files."
527
+ )
528
+ def mcpreaddocx(
529
+ document_path: str = Field(description="The local input Word file path."),
530
+ ) -> str:
531
+ """Read and return content from Word file. Cannot process https://URLs files."""
532
+ error = check_file_readable(document_path)
533
+ if error:
534
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
535
+
536
+ try:
537
+
538
+ file_name = os.path.basename(document_path)
539
+ md_file_path = f"{file_name}.md"
540
+ docx_to_markdown(document_path, md_file_path)
541
+
542
+ with open(md_file_path, "r", encoding="utf-8") as f:
543
+ content = f.read()
544
+
545
+ os.remove(md_file_path)
546
+
547
+ result = DocxDocument(
548
+ content=content, file_path=document_path, file_name=file_name
549
+ )
550
+
551
+ return result.model_dump_json()
552
+ except Exception as e:
553
+ return handle_error(e, "Word file reading", document_path)
554
+
555
+
556
+ @mcp.tool(
557
+ description="Read multiple Excel/CSV files and convert sheets to Markdown tables. return the parsed content. Cannot process https://URLs files."
558
+ )
559
+ def mcpreadexcel(
560
+ document_paths: List[str] = Field(
561
+ description="List of local input Excel/CSV file paths."
562
+ ),
563
+ max_rows: int = Field(
564
+ 1000, description="Maximum number of rows to read per sheet (default: 1000)"
565
+ ),
566
+ convert_xls_to_xlsx: bool = Field(
567
+ False,
568
+ description="Whether to convert XLS files to XLSX format (default: False)",
569
+ ),
570
+ ) -> str:
571
+ """Read multiple Excel/CSV files and convert sheets to Markdown tables. Cannot process https://URLs files."""
572
+ try:
573
+
574
+ # Import required packages
575
+ import_package("tabulate")
576
+
577
+ # Import xls2xlsx package if conversion is requested
578
+ if convert_xls_to_xlsx:
579
+ import_package("xls2xlsx")
580
+
581
+ all_results = []
582
+ temp_files = [] # Track temporary files for cleanup
583
+ success_count = 0
584
+ failed_count = 0
585
+
586
+ # Process each file
587
+ for document_path in document_paths:
588
+ # Check if file exists and is readable
589
+ error = check_file_readable(document_path)
590
+ if error:
591
+ all_results.append(
592
+ ExcelDocument(
593
+ file_name=os.path.basename(document_path),
594
+ file_path=document_path,
595
+ file_type="UNKNOWN",
596
+ sheet_count=0,
597
+ sheet_names=[],
598
+ sheets=[],
599
+ success=False,
600
+ error=error,
601
+ )
602
+ )
603
+ failed_count += 1
604
+ continue
605
+
606
+ try:
607
+ # Check file extension
608
+ file_ext = os.path.splitext(document_path)[1].lower()
609
+
610
+ # Validate file type
611
+ if file_ext not in [".csv", ".xls", ".xlsx", ".xlsm"]:
612
+ error_msg = f"Unsupported file format: {file_ext}. Only CSV, XLS, XLSX, and XLSM formats are supported."
613
+ all_results.append(
614
+ ExcelDocument(
615
+ file_name=os.path.basename(document_path),
616
+ file_path=document_path,
617
+ file_type=file_ext.replace(".", "").upper(),
618
+ sheet_count=0,
619
+ sheet_names=[],
620
+ sheets=[],
621
+ success=False,
622
+ error=error_msg,
623
+ )
624
+ )
625
+ failed_count += 1
626
+ continue
627
+
628
+ # Convert XLS to XLSX if requested and file is XLS
629
+ processed_path = document_path
630
+ if convert_xls_to_xlsx and file_ext == ".xls":
631
+ try:
632
+ logger.info(f"Converting XLS to XLSX: {document_path}")
633
+ converter = XLS2XLSX(document_path)
634
+ # Create temp file with xlsx extension
635
+ xlsx_path = (
636
+ os.path.splitext(document_path)[0] + "_converted.xlsx"
637
+ )
638
+ converter.to_xlsx(xlsx_path)
639
+ processed_path = xlsx_path
640
+ temp_files.append(xlsx_path) # Track for cleanup
641
+ logger.success(f"Converted XLS to XLSX: {xlsx_path}")
642
+ except Exception as conv_error:
643
+ logger.error(f"XLS to XLSX conversion error: {str(conv_error)}")
644
+ # Continue with original file if conversion fails
645
+
646
+ excel_sheets = []
647
+ sheet_names = []
648
+
649
+ # Handle CSV files differently
650
+ if file_ext == ".csv":
651
+ # For CSV files, create a single sheet with the file name
652
+ sheet_name = os.path.basename(document_path).replace(".csv", "")
653
+ df = pd.read_csv(processed_path, nrows=max_rows)
654
+
655
+ # Create markdown table
656
+ markdown_table = "*Empty table*"
657
+ if not df.empty:
658
+ headers = df.columns.tolist()
659
+ table_data = df.values.tolist()
660
+ markdown_table = tabulate(
661
+ table_data, headers=headers, tablefmt="pipe"
662
+ )
663
+
664
+ if len(df) >= max_rows:
665
+ markdown_table += (
666
+ f"\n\n*Note: Table truncated to {max_rows} rows*"
667
+ )
668
+
669
+ # Create sheet model
670
+ excel_sheets.append(
671
+ ExcelSheet(
672
+ name=sheet_name,
673
+ data=df.to_dict(orient="records"),
674
+ markdown_table=markdown_table,
675
+ row_count=len(df),
676
+ column_count=len(df.columns),
677
+ )
678
+ )
679
+
680
+ sheet_names = [sheet_name]
681
+
682
+ else:
683
+ # For Excel files, process all sheets
684
+ with pd.ExcelFile(processed_path) as xls:
685
+ sheet_names = xls.sheet_names
686
+
687
+ for sheet_name in sheet_names:
688
+ # Read Excel sheet into DataFrame with row limit
689
+ df = pd.read_excel(
690
+ xls, sheet_name=sheet_name, nrows=max_rows
691
+ )
692
+
693
+ # Create markdown table
694
+ markdown_table = "*Empty table*"
695
+ if not df.empty:
696
+ headers = df.columns.tolist()
697
+ table_data = df.values.tolist()
698
+ markdown_table = tabulate(
699
+ table_data, headers=headers, tablefmt="pipe"
700
+ )
701
+
702
+ if len(df) >= max_rows:
703
+ markdown_table += f"\n\n*Note: Table truncated to {max_rows} rows*"
704
+
705
+ # Create sheet model
706
+ excel_sheets.append(
707
+ ExcelSheet(
708
+ name=sheet_name,
709
+ data=df.to_dict(orient="records"),
710
+ markdown_table=markdown_table,
711
+ row_count=len(df),
712
+ column_count=len(df.columns),
713
+ )
714
+ )
715
+
716
+ # Create result for this file
717
+ file_result = ExcelDocument(
718
+ file_name=os.path.basename(document_path),
719
+ file_path=document_path,
720
+ processed_path=(
721
+ processed_path if processed_path != document_path else None
722
+ ),
723
+ file_type=file_ext.replace(".", "").upper(),
724
+ sheet_count=len(sheet_names),
725
+ sheet_names=sheet_names,
726
+ sheets=excel_sheets,
727
+ success=True,
728
+ )
729
+
730
+ all_results.append(file_result)
731
+ success_count += 1
732
+
733
+ except Exception as file_error:
734
+ # Handle errors for individual files
735
+ error_msg = str(file_error)
736
+ logger.error(f"File reading error for {document_path}: {error_msg}")
737
+ all_results.append(
738
+ ExcelDocument(
739
+ file_name=os.path.basename(document_path),
740
+ file_path=document_path,
741
+ file_type=os.path.splitext(document_path)[1]
742
+ .replace(".", "")
743
+ .upper(),
744
+ sheet_count=0,
745
+ sheet_names=[],
746
+ sheets=[],
747
+ success=False,
748
+ error=error_msg,
749
+ )
750
+ )
751
+ failed_count += 1
752
+
753
+ # Clean up temporary files
754
+ for temp_file in temp_files:
755
+ try:
756
+ if os.path.exists(temp_file):
757
+ os.remove(temp_file)
758
+ logger.info(f"Removed temporary file: {temp_file}")
759
+ except Exception as cleanup_error:
760
+ logger.warning(
761
+ f"Error cleaning up temporary file {temp_file}: {str(cleanup_error)}"
762
+ )
763
+
764
+ # Create final result
765
+ excel_result = ExcelResult(
766
+ total_files=len(document_paths),
767
+ success_count=success_count,
768
+ failed_count=failed_count,
769
+ results=all_results,
770
+ )
771
+
772
+ return excel_result.model_dump_json()
773
+
774
+ except Exception as e:
775
+ return handle_error(e, "Excel/CSV files processing")
776
+
777
+
778
+ @mcp.tool(
779
+ description="Read and convert PowerPoint slides to base64 encoded images. return the parsed content. Cannot process https://URLs files."
780
+ )
781
+ def mcpreadpptx(
782
+ document_path: str = Field(description="The local input PowerPoint file path."),
783
+ ) -> str:
784
+ """Read and convert PowerPoint slides to base64 encoded images. Cannot process https://URLs files."""
785
+ error = check_file_readable(document_path)
786
+ if error:
787
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
788
+
789
+ # Create temporary directory
790
+ temp_dir = tempfile.mkdtemp()
791
+ slides_data = []
792
+
793
+ try:
794
+ presentation = Presentation(document_path)
795
+ total_slides = len(presentation.slides)
796
+
797
+ if total_slides == 0:
798
+ raise ValueError("PPT file does not contain any slides")
799
+
800
+ # Process each slide
801
+ for i, slide in enumerate(presentation.slides):
802
+ # Set slide dimensions
803
+ slide_width_px = 1920 # 16:9 ratio
804
+ slide_height_px = 1080
805
+
806
+ # Create blank image
807
+ slide_img = Image.new("RGB", (slide_width_px, slide_height_px), "white")
808
+ draw = ImageDraw.Draw(slide_img)
809
+ font = ImageFont.load_default()
810
+
811
+ # Draw slide number
812
+ draw.text((20, 20), f"Slide {i+1}/{total_slides}", fill="black", font=font)
813
+
814
+ # Process shapes in the slide
815
+ for shape in slide.shapes:
816
+ try:
817
+ # Process images
818
+ if hasattr(shape, "image") and shape.image:
819
+ image_stream = io.BytesIO(shape.image.blob)
820
+ img = Image.open(image_stream)
821
+ left = int(
822
+ shape.left * slide_width_px / presentation.slide_width
823
+ )
824
+ top = int(
825
+ shape.top * slide_height_px / presentation.slide_height
826
+ )
827
+ slide_img.paste(img, (left, top))
828
+
829
+ # Process text
830
+ elif hasattr(shape, "text") and shape.text:
831
+ text_left = int(
832
+ shape.left * slide_width_px / presentation.slide_width
833
+ )
834
+ text_top = int(
835
+ shape.top * slide_height_px / presentation.slide_height
836
+ )
837
+ draw.text(
838
+ (text_left, text_top),
839
+ shape.text,
840
+ fill="black",
841
+ font=font,
842
+ )
843
+
844
+ except Exception as shape_error:
845
+ logger.warning(
846
+ f"Error processing shape in slide {i+1}: {str(shape_error)}"
847
+ )
848
+
849
+ # Save slide image
850
+ img_path = os.path.join(temp_dir, f"slide_{i+1}.jpg")
851
+ slide_img.save(img_path, "JPEG")
852
+
853
+ # Convert to base64
854
+ base64_image = encode_images(img_path)
855
+ slides_data.append(
856
+ PowerPointSlide(
857
+ slide_number=i + 1, image=f"data:image/jpeg;base64,{base64_image}"
858
+ )
859
+ )
860
+
861
+ # Create result
862
+ result = PowerPointDocument(
863
+ file_path=document_path,
864
+ file_name=os.path.basename(document_path),
865
+ slide_count=total_slides,
866
+ slides=slides_data,
867
+ )
868
+
869
+ return result.model_dump_json()
870
+
871
+ except Exception as e:
872
+ return handle_error(e, "PowerPoint processing", document_path)
873
+ finally:
874
+ # Clean up temporary files
875
+ try:
876
+ for file in os.listdir(temp_dir):
877
+ os.remove(os.path.join(temp_dir, file))
878
+ os.rmdir(temp_dir)
879
+ except Exception as cleanup_error:
880
+ logger.warning(f"Error cleaning up temporary files: {str(cleanup_error)}")
881
+
882
+
883
+ @mcp.tool(
884
+ description="Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."
885
+ )
886
+ def mcpreadhtmltext(
887
+ document_path: str = Field(description="Local HTML file path or Web URL."),
888
+ extract_links: bool = Field(
889
+ default=True, description="Whether to extract link information"
890
+ ),
891
+ extract_images: bool = Field(
892
+ default=True, description="Whether to extract image information"
893
+ ),
894
+ extract_tables: bool = Field(
895
+ default=True, description="Whether to extract table information"
896
+ ),
897
+ convert_to_markdown: bool = Field(
898
+ default=True, description="Whether to convert HTML to Markdown format"
899
+ ),
900
+ ) -> str:
901
+ """Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."""
902
+ error = check_file_readable(document_path)
903
+ if error:
904
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
905
+
906
+ try:
907
+
908
+ # Read HTML file
909
+ with open(document_path, "r", encoding="utf-8") as f:
910
+ html_content = f.read()
911
+
912
+ # Parse HTML using BeautifulSoup
913
+ soup = BeautifulSoup(html_content, "html.parser")
914
+
915
+ # Extract text content (remove script and style content)
916
+ for script in soup(["script", "style"]):
917
+ script.extract()
918
+ text_content = soup.get_text(separator="\n", strip=True)
919
+
920
+ # Extract title
921
+ title = soup.title.string if soup.title else None
922
+
923
+ # Initialize result object
924
+ result = HtmlDocument(
925
+ content=text_content,
926
+ html_content=html_content,
927
+ file_path=document_path,
928
+ file_name=os.path.basename(document_path),
929
+ file_size=os.path.getsize(document_path),
930
+ last_modified=datetime.fromtimestamp(
931
+ os.path.getmtime(document_path)
932
+ ).strftime("%Y-%m-%d %H:%M:%S"),
933
+ title=title,
934
+ )
935
+
936
+ # Extract links
937
+ if extract_links:
938
+ links = []
939
+ for link in soup.find_all("a"):
940
+ href = link.get("href")
941
+ text = link.get_text(strip=True)
942
+ if href:
943
+ links.append({"url": href, "text": text})
944
+ result.links = links
945
+
946
+ # Extract images
947
+ if extract_images:
948
+ images = []
949
+ for img in soup.find_all("img"):
950
+ src = img.get("src")
951
+ alt = img.get("alt", "")
952
+ if src:
953
+ images.append({"src": src, "alt": alt})
954
+ result.images = images
955
+
956
+ # Extract tables
957
+ if extract_tables:
958
+ tables = []
959
+ for table in soup.find_all("table"):
960
+ tables.append(str(table))
961
+ result.tables = tables
962
+
963
+ # Convert to Markdown
964
+ if convert_to_markdown:
965
+ h = html2text.HTML2Text()
966
+ h.ignore_links = False
967
+ h.ignore_images = False
968
+ h.ignore_tables = False
969
+ markdown_content = h.handle(html_content)
970
+ result.markdown = markdown_content
971
+
972
+ return result.model_dump_json()
973
+
974
+ except Exception as e:
975
+ return handle_error(e, "HTML file reading", document_path)
976
+
977
+
978
+ def main():
979
+ load_dotenv()
980
+
981
+ print("Starting Document MCP Server...", file=sys.stderr)
982
+ mcp.run(transport="stdio")
983
+
984
+
985
+ # Make the module callable
986
+ def __call__():
987
+ """
988
+ Make the module callable for uvx.
989
+ This function is called when the module is executed directly.
990
+ """
991
+ main()
992
+
993
+
994
+ sys.modules[__name__].__call__ = __call__
995
+
996
+ # Run the server when the script is executed directly
997
+ if __name__ == "__main__":
998
+ main()
AWorld-main/aworlddistributed/mcp_servers/download_server.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Download MCP Server
3
+
4
+ This module provides MCP server functionality for downloading files from URLs.
5
+ It handles various download scenarios with proper validation, error handling,
6
+ and progress tracking.
7
+
8
+ Key features:
9
+ - File downloading from HTTP/HTTPS URLs
10
+ - Download progress tracking
11
+ - File validation
12
+ - Safe file saving
13
+
14
+ Main functions:
15
+ - mcpdownload: Downloads files from URLs to local filesystem
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import traceback
21
+ import urllib.parse
22
+ from pathlib import Path
23
+ from typing import List, Optional
24
+
25
+ import requests
26
+ from dotenv import load_dotenv
27
+ from mcp.server.fastmcp import FastMCP
28
+ from pydantic import BaseModel, Field
29
+
30
+ from aworld.logs.util import logger
31
+
32
+ mcp = FastMCP("download-server")
33
+
34
+
35
+ class DownloadResult(BaseModel):
36
+ """Download result model with file information"""
37
+
38
+ file_path: str
39
+ file_name: str
40
+ file_size: int
41
+ content_type: Optional[str] = None
42
+ success: bool
43
+ error: Optional[str] = None
44
+
45
+
46
+ class DownloadResults(BaseModel):
47
+ """Download results model for multiple files"""
48
+
49
+ results: List[DownloadResult]
50
+ success_count: int
51
+ failed_count: int
52
+
53
+
54
+ @mcp.tool(description="Download files from URLs and save to the local filesystem.")
55
+ def mcpdownloadfiles(
56
+ urls: List[str] = Field(
57
+ ..., description="The URLs of the files to download. Must be a list of URLs."
58
+ ),
59
+ output_dir: str = Field(
60
+ "/tmp/mcp_downloads",
61
+ description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
62
+ ),
63
+ timeout: int = Field(60, description="Download timeout in seconds (default: 60)."),
64
+ ) -> str:
65
+ """Download files from URLs and save to the local filesystem.
66
+
67
+ Args:
68
+ urls: The URLs of the files to download, must be a list of URLs
69
+ output_dir: Directory to save the downloaded files
70
+ timeout: Download timeout in seconds
71
+
72
+ Returns:
73
+ JSON string with download results information
74
+ """
75
+ results = []
76
+ success_count = 0
77
+ failed_count = 0
78
+
79
+ for single_url in urls:
80
+ result_json = _download_single_file(single_url, output_dir, "", timeout)
81
+ result = DownloadResult.model_validate_json(result_json)
82
+ results.append(result)
83
+
84
+ if result.success:
85
+ success_count += 1
86
+ else:
87
+ failed_count += 1
88
+
89
+ batch_results = DownloadResults(
90
+ results=results, success_count=success_count, failed_count=failed_count
91
+ )
92
+
93
+ return batch_results.model_dump_json()
94
+
95
+
96
+ def _download_single_file(
97
+ url: str, output_dir: str, filename: str, timeout: int
98
+ ) -> str:
99
+ """Download a single file from URL and save it to the local filesystem."""
100
+ try:
101
+ # Validate URL
102
+ if not url.startswith(("http://", "https://")):
103
+ raise ValueError(
104
+ "Invalid URL format. URL must start with http:// or https://"
105
+ )
106
+
107
+ # Create output directory if it doesn't exist
108
+ output_path = Path(output_dir)
109
+ output_path.mkdir(parents=True, exist_ok=True)
110
+
111
+ # Determine filename if not provided
112
+ if not filename:
113
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
114
+ if not filename:
115
+ filename = "downloaded_file"
116
+
117
+ # Full path to save the file
118
+ file_path = os.path.join(output_path, filename)
119
+
120
+ logger.info(f"Downloading file from {url} to {file_path}")
121
+ # Download the file with progress tracking
122
+ headers = {
123
+ "User-Agent": (
124
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
125
+ "AWorld/1.0 (https://github.com/inclusionAI/AWorld; [email protected]) "
126
+ "Python/requests "
127
+ ),
128
+ "Accept": "text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,image/webp,*/*;q=0.8",
129
+ "Accept-Language": "en-US,en;q=0.5",
130
+ "Accept-Encoding": "gzip, deflate, br",
131
+ "Connection": "keep-alive",
132
+ }
133
+
134
+ response = requests.get(url, headers=headers, stream=True, timeout=timeout)
135
+ response.raise_for_status()
136
+
137
+ # Get content type and size
138
+ content_type = response.headers.get("Content-Type")
139
+
140
+ # Save the file
141
+ with open(file_path, "wb") as f:
142
+ for chunk in response.iter_content(chunk_size=8192):
143
+ if chunk:
144
+ f.write(chunk)
145
+
146
+ # Get actual file size
147
+ actual_size = os.path.getsize(file_path)
148
+
149
+ logger.info(f"File downloaded successfully to {file_path}")
150
+
151
+ # Create result
152
+ result = DownloadResult(
153
+ file_path=file_path,
154
+ file_name=filename,
155
+ file_size=actual_size,
156
+ content_type=content_type,
157
+ success=True,
158
+ error=None,
159
+ )
160
+
161
+ return result.model_dump_json()
162
+
163
+ except Exception as e:
164
+ error_msg = str(e)
165
+ logger.error(f"Download error: {traceback.format_exc()}")
166
+
167
+ result = DownloadResult(
168
+ file_path="",
169
+ file_name="",
170
+ file_size=0,
171
+ content_type=None,
172
+ success=False,
173
+ error=error_msg,
174
+ )
175
+
176
+ return result.model_dump_json()
177
+
178
+
179
+ def main():
180
+ load_dotenv()
181
+
182
+ print("Starting Download MCP Server...", file=sys.stderr)
183
+ mcp.run(transport="stdio")
184
+
185
+
186
+ # Make the module callable
187
+ def __call__():
188
+ """
189
+ Make the module callable for uvx.
190
+ This function is called when the module is executed directly.
191
+ """
192
+ main()
193
+
194
+
195
+ sys.modules[__name__].__call__ = __call__
196
+
197
+ # Run the server when the script is executed directly
198
+ if __name__ == "__main__":
199
+ main()
AWorld-main/aworlddistributed/mcp_servers/e2b_code_server.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from e2b_code_interpreter import Sandbox
2
+ from pydantic import Field
3
+ from mcp.server.fastmcp import FastMCP
4
+ import os
5
+
6
+ # Initialize MCP server
7
+ mcp = FastMCP("e2b-code-server")
8
+
9
+
10
+ @mcp.tool(description="Upload local file to e2b sandbox.")
11
+ async def e2b_upload_file(
12
+ path: str = Field(
13
+ description="The local file path to upload."
14
+ )
15
+ ) -> str:
16
+ """
17
+ Upload local file to e2b sandbox.
18
+
19
+ Args:
20
+ path (str): The local file path to upload.
21
+
22
+ Returns:
23
+ str: E2b file path and sandbox_id.
24
+
25
+ """
26
+ try:
27
+ os.environ["E2B_API_KEY"] = os.getenv("E2B_API_KEY")
28
+ sbx = Sandbox()
29
+ local_file_name = os.path.basename(path)
30
+ e2b_file_path = f"/home/user/{local_file_name}"
31
+ # Read local file relative to the current working directory
32
+ with open(path, "rb") as file:
33
+ # Upload file to the sandbox to absolute path
34
+ sbx.files.write(e2b_file_path, file)
35
+ return f"{e2b_file_path}, {sbx.sandbox_id}"
36
+ except Exception as e:
37
+ return f"Upload failed. Error: {str(e)}"
38
+
39
+
40
+ @mcp.tool(description="Run code in a specified e2b sandbox.")
41
+ async def e2b_run_code(
42
+ sandbox_id: str = Field(
43
+ default=None,
44
+ description="The sandbox id to run code in, if you have uploaded a file, you should use the sandbox_id returned by the e2b_upload_file function."
45
+ ),
46
+ code_block: str = Field(
47
+ default=None,
48
+ description="The code block to run in e2b sandbox."
49
+ ),
50
+ ) -> str:
51
+ """
52
+ Run code in a specified e2b sandbox.
53
+
54
+ Args:
55
+ sandbox_id (str): The sandbox id to run code in.
56
+ code_block (str): The code block to run in e2b sandbox.
57
+
58
+ Returns:
59
+ str: The result of running the code block.
60
+ """
61
+ try:
62
+ os.environ["E2B_API_KEY"] = os.getenv("E2B_API_KEY")
63
+ sbx = Sandbox(
64
+ sandbox_id=sandbox_id,
65
+ )
66
+ execution = sbx.run_code(code_block)
67
+ return execution.logs
68
+ except Exception as e:
69
+ return f"Run code failed. Error: {str(e)}"
70
+
71
+ def main():
72
+ from dotenv import load_dotenv
73
+ load_dotenv()
74
+
75
+ import sys
76
+ print("Starting E2b Code MCP Server...", file=sys.stderr)
77
+ mcp.run(transport='stdio')
78
+
79
+
80
+ # Make the module callable
81
+ def __call__():
82
+ """
83
+ Make the module callable for uvx.
84
+ This function is called when the module is executed directly.
85
+ """
86
+ main()
87
+
88
+
89
+ # Add this for compatibility with uvx
90
+ import sys
91
+ sys.modules[__name__].__call__ = __call__
92
+
93
+ # Run the server when the script is executed directly
94
+ if __name__ == "__main__":
95
+ main()
96
+
AWorld-main/aworlddistributed/mcp_servers/gen_audio_server.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ import sys
6
+ import hashlib
7
+
8
+ from dotenv import load_dotenv
9
+ from mcp.server import FastMCP
10
+ from pydantic import Field
11
+ from typing_extensions import Any
12
+
13
+ from aworld.logs.util import logger
14
+
15
+ mcp = FastMCP("gen-audio-server")
16
+
17
+
18
+ def calculate_sha256(plain_text):
19
+ """
20
+ Calculate SHA-256 digest of a string.
21
+
22
+ Args:
23
+ plain_text (str): The text to digest
24
+
25
+ Returns:
26
+ str: Hexadecimal representation of the digest
27
+ """
28
+ try:
29
+ # Create SHA-256 hash object
30
+ sha256 = hashlib.sha256()
31
+
32
+ # Update with the bytes of the plain text (UTF-8 encoded)
33
+ sha256.update(plain_text.encode('utf-8'))
34
+
35
+ # Get the digest in bytes
36
+ digest_bytes = sha256.digest()
37
+
38
+ # Convert each byte to hexadecimal and join
39
+ hex_digest = ''.join([f'{b:02x}' for b in digest_bytes])
40
+
41
+ return hex_digest
42
+ except Exception as e:
43
+ logger.warning(f"Error calculating SHA-256 digest: {e}")
44
+ return ""
45
+
46
+
47
+ def generate_headers(app_key, secret):
48
+ """Generate headers with fresh timestamp and digest"""
49
+ timestamp = str(int(time.time() * 1000))
50
+ plain_text = f"{app_key}_{secret}_{timestamp}"
51
+ digest = calculate_sha256(plain_text)
52
+
53
+ return {
54
+ 'Content-Type': 'application/json',
55
+ 'Alipay-Mf-Appkey': app_key,
56
+ 'Alipay-Mf-Digest': digest,
57
+ 'Alipay-Mf-Timestamp': timestamp
58
+ }
59
+
60
+
61
+ @mcp.tool(description="Generate audio from text content")
62
+ def gen_audio(content: str = Field(description="The text content to convert to audio")) -> Any:
63
+ """Generate audio from text content using TTS service"""
64
+ task_url = os.getenv('AUDIO_TASK_URL')
65
+ query_url = os.getenv('AUDIO_QUERY_URL')
66
+ app_key = os.getenv('AUDIO_APP_KEY')
67
+ secret = os.getenv('AUDIO_SECRET')
68
+ if not (task_url and query_url and app_key and secret):
69
+ logger.warning(f"Query failed: task_url, query_url, app_key, secret parameters incomplete")
70
+ return None
71
+
72
+ # Generate initial headers
73
+ headers = generate_headers(app_key, secret)
74
+
75
+ sample_rate = os.getenv('AUDIO_SAMPLE_RATE', '16000')
76
+ audio_format = os.getenv('AUDIO_AUDIO_FORMAT', 'wav')
77
+ tts_voice = os.getenv('AUDIO_TTS_VOICE', 'DBCNF245')
78
+ tts_speech_rate = os.getenv('AUDIO_TTS_SPEECH_RATE', '0')
79
+ tts_volume = os.getenv('AUDIO_TTS_VOLUME', '50')
80
+ tts_pitch = os.getenv('AUDIO_TTS_PITCH', '0')
81
+ voice_type = os.getenv('AUDIO_VOICE_TYPE', 'VOICE_CLONE_LAM')
82
+
83
+ # task_data
84
+ task_data = {
85
+ "sample_rate": sample_rate,
86
+ "audio_format": audio_format,
87
+ "tts_voice": tts_voice,
88
+ "tts_speech_rate": tts_speech_rate,
89
+ "tts_volume": tts_volume,
90
+ "tts_pitch": tts_pitch,
91
+ "tts_text": content,
92
+ "voice_type": voice_type,
93
+ }
94
+
95
+ try:
96
+ # Step 1: Submit task to generate audio
97
+
98
+ response = requests.post(task_url, headers=headers, json=task_data)
99
+
100
+ if response.status_code != 200:
101
+ return None
102
+
103
+ result = response.json()
104
+
105
+ # Check if task was successfully submitted
106
+ if not result.get("success"):
107
+ return None
108
+
109
+ # Extract task ID
110
+ task_id = result.get("data")
111
+ if not task_id:
112
+ return None
113
+
114
+ logger.info(f"Task submitted successfully. Task ID: {task_id}")
115
+
116
+ # Step 2: Poll for results
117
+ max_attempts = int(os.getenv('AUDIO_RETRY_TIMES', 10))
118
+ wait_time = int(os.getenv('AUDIO_SLEEP_TIME', 5))
119
+ query_url = query_url + f"?async_task_id={task_id}"
120
+
121
+ for attempt in range(max_attempts):
122
+ # Wait before polling
123
+ time.sleep(wait_time)
124
+ logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
125
+
126
+ # Generate fresh headers for each poll request
127
+ query_headers = generate_headers(app_key, secret)
128
+
129
+ # Poll for results
130
+ query_response = requests.post(query_url, headers=query_headers)
131
+
132
+ if query_response.status_code != 200:
133
+ logger.info(f"Poll request failed with status code {query_response.status_code}")
134
+ continue
135
+
136
+ try:
137
+ query_result = query_response.json()
138
+ except json.JSONDecodeError as e:
139
+ logger.warning(f"Failed to parse response as JSON: {e}")
140
+ continue
141
+
142
+ # Check if processing is complete
143
+ if query_result.get("success") and query_result.get("data", {}).get("status") == "ST_SUCCESS":
144
+ # Extract audio URL based on the correct JSON structure
145
+ # Navigate through the nested structure: data -> result -> result -> audioUrl
146
+ audio_url = query_result.get("data", {}).get("result", {}).get("result", {}).get("audioUrl")
147
+
148
+ if audio_url:
149
+ return json.dumps({"audio_data": audio_url})
150
+ else:
151
+ logger.info("Audio URL not found in the response")
152
+ return None
153
+ elif query_result.get("success") and query_result.get("data", {}).get("status") == "ST_RUNNING":
154
+ # If still running, continue to next polling attempt
155
+ logger.info("Task still running, continuing to next poll...")
156
+ continue
157
+ else:
158
+ # Any other status, return None
159
+ logger.warning(f"Unexpected status: {query_result.get('data', {}).get('status')}")
160
+ return None
161
+
162
+ # If we get here, polling timed out
163
+ logger.warning("Polling timed out after maximum attempts")
164
+ return None
165
+
166
+ except Exception as e:
167
+ import traceback
168
+ logger.warning(f"Exception occurred: {e}")
169
+ return None
170
+
171
+
172
+ def main():
173
+ from dotenv import load_dotenv
174
+
175
+ load_dotenv()
176
+
177
+ print("Starting Audio MCP gen-audio-server...", file=sys.stderr)
178
+ mcp.run(transport="stdio")
179
+
180
+
181
+ # Make the module callable
182
+ def __call__():
183
+ """
184
+ Make the module callable for uvx.
185
+ This function is called when the module is executed directly.
186
+ """
187
+ main()
188
+
189
+
190
+ sys.modules[__name__].__call__ = __call__
191
+
192
+ if __name__ == "__main__":
193
+ main()
194
+ # For testing without MCP
195
+ # result = gen_audio("hello ,this is test")
196
+ # print("\nFinal Result:")
197
+ # print(result)
AWorld-main/aworlddistributed/mcp_servers/gen_pic_server.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ import sys
6
+
7
+ from dotenv import load_dotenv
8
+ from mcp.server import FastMCP
9
+ from pydantic import Field
10
+ from typing_extensions import Any
11
+
12
+ from aworld.logs.util import logger
13
+
14
+ mcp = FastMCP("gen-pic-server")
15
+
16
+
17
+ @mcp.tool(description="Generate picture from text content")
18
+ def gen_picture(prompt: str = Field(description="The text prompt to generate an image"),
19
+ num: int = Field(0,
20
+ description="Number of images to generate, 0 means use environment variable")) -> Any:
21
+ """Generate picture from text prompt"""
22
+ api_key = os.getenv('DASHSCOPE_API_KEY')
23
+ submit_url = os.getenv('DASHSCOPE_SUBMIT_URL', '')
24
+ query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
25
+
26
+ if not api_key or not submit_url or not query_base_url:
27
+ logger.warning(
28
+ "Query failed: DASHSCOPE_API_KEY,DASHSCOPE_SUBMIT_URL,DASHSCOPE_QUERY_BASE_URL environment variable is not set")
29
+ return None
30
+
31
+ headers = {
32
+ 'X-DashScope-Async': 'enable',
33
+ 'Authorization': f'Bearer {api_key}',
34
+ 'Content-Type': 'application/json'
35
+ }
36
+
37
+ # Get parameters from environment variables or use defaults
38
+ model = os.getenv('DASHSCOPE_MODEL', 'wanx2.1-t2i-turbo')
39
+ size = os.getenv('DASHSCOPE_SIZE', '1024*1024')
40
+
41
+ # Use num parameter if provided (>0), otherwise use environment variable
42
+ n = num if num > 0 else int(os.getenv('DASHSCOPE_N', '1'))
43
+
44
+ task_data = {
45
+ "model": model,
46
+ "input": {
47
+ "prompt": prompt
48
+ },
49
+ "parameters": {
50
+ "size": size,
51
+ "n": n
52
+ }
53
+ }
54
+
55
+ try:
56
+ # Step 1: Submit task to generate image
57
+ logger.info("Submitting task to generate image...")
58
+
59
+ response = requests.post(submit_url, headers=headers, json=task_data)
60
+
61
+ if response.status_code != 200:
62
+ logger.warning(f"Task submission failed with status code {response.status_code}")
63
+ return None
64
+
65
+ result = response.json()
66
+
67
+ # Check if task was successfully submitted
68
+ if not result.get("output") or not result.get("output").get("task_id"):
69
+ logger.warning("Failed to get task_id from response")
70
+ return None
71
+
72
+ # Extract task ID
73
+ task_id = result.get("output").get("task_id")
74
+ logger.info(f"Task submitted successfully. Task ID: {task_id}")
75
+
76
+ # Step 2: Poll for results
77
+ max_attempts = int(os.getenv('DASHSCOPE_RETRY_TIMES', 10))
78
+ wait_time = int(os.getenv('DASHSCOPE_SLEEP_TIME', 5))
79
+ query_url = f"{query_base_url}{task_id}"
80
+
81
+ for attempt in range(max_attempts):
82
+ # Wait before polling
83
+ time.sleep(wait_time)
84
+ logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
85
+
86
+ # Poll for results
87
+ query_response = requests.get(query_url, headers={'Authorization': f'Bearer {api_key}'})
88
+
89
+ if query_response.status_code != 200:
90
+ logger.info(f"Poll request failed with status code {query_response.status_code}")
91
+ continue
92
+
93
+ try:
94
+ query_result = query_response.json()
95
+ except json.JSONDecodeError as e:
96
+ logger.warning(f"Failed to parse response as JSON: {e}")
97
+ continue
98
+
99
+ # Check task status
100
+ task_status = query_result.get("output", {}).get("task_status")
101
+
102
+ if task_status == "SUCCEEDED":
103
+ # Extract image URLs
104
+ results = query_result.get("output", {}).get("results", [])
105
+ if results:
106
+ # Create a simple array of objects with image_url
107
+ image_urls = []
108
+ for result in results:
109
+ if "url" in result:
110
+ image_urls.append({"image_url": result["url"]})
111
+
112
+ if image_urls:
113
+ return json.dumps(image_urls)
114
+ else:
115
+ logger.info("No valid image URLs found in the response")
116
+ return None
117
+ else:
118
+ logger.info("No results found in the response")
119
+ return None
120
+ elif task_status in ["PENDING", "RUNNING"]:
121
+ # If still running, continue to next polling attempt
122
+ logger.info(f"Task status: {task_status}, continuing to next poll...")
123
+ continue
124
+ elif task_status == "FAILED":
125
+ logger.warning("Task failed")
126
+ return None
127
+ else:
128
+ # Any other status, return None
129
+ logger.warning(f"Unexpected status: {task_status}")
130
+ return None
131
+
132
+ # If we get here, polling timed out
133
+ logger.warning("Polling timed out after maximum attempts")
134
+ return None
135
+
136
+ except Exception as e:
137
+ logger.warning(f"Exception occurred: {e}")
138
+ return None
139
+
140
+
141
+ def main():
142
+ from dotenv import load_dotenv
143
+
144
+ load_dotenv()
145
+
146
+ print("Starting MCP gen-pic-server...", file=sys.stderr)
147
+ mcp.run(transport="stdio")
148
+
149
+
150
+ # Make the module callable
151
+ def __call__():
152
+ """
153
+ Make the module callable for uvx.
154
+ This function is called when the module is executed directly.
155
+ """
156
+ main()
157
+
158
+
159
+ sys.modules[__name__].__call__ = __call__
160
+
161
+ if __name__ == "__main__":
162
+ main()
163
+ # For testing without MCP
164
+ # result = gen_picture("sunflower", 2)
165
+ # print("\nFinal Result:")
166
+ # print(result)
AWorld-main/aworlddistributed/mcp_servers/gen_video_server.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ import sys
6
+
7
+ from dotenv import load_dotenv
8
+ from mcp.server import FastMCP
9
+ from pydantic import Field
10
+ from typing_extensions import Any
11
+
12
+ from aworld.logs.util import logger
13
+
14
+ mcp = FastMCP("gen-video-server")
15
+
16
+ @mcp.tool(description="Generate video from text content")
17
+ def gen_video(prompt: str = Field(description="The text prompt to generate a video")) -> Any:
18
+ """Generate video from text prompt"""
19
+ api_key = os.getenv('DASHSCOPE_API_KEY')
20
+ submit_url = os.getenv('DASHSCOPE_VIDEO_SUBMIT_URL', '')
21
+ query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
22
+
23
+ if not api_key or not submit_url or not query_base_url:
24
+ logger.warning("Query failed: DASHSCOPE_API_KEY, DASHSCOPE_VIDEO_SUBMIT_URL, DASHSCOPE_QUERY_BASE_URL environment variables are not set")
25
+ return None
26
+
27
+ headers = {
28
+ 'X-DashScope-Async': 'enable',
29
+ 'Authorization': f'Bearer {api_key}',
30
+ 'Content-Type': 'application/json'
31
+ }
32
+
33
+ # Get parameters from environment variables or use defaults
34
+ model = os.getenv('DASHSCOPE_VIDEO_MODEL', 'wanx2.1-t2v-turbo')
35
+ size = os.getenv('DASHSCOPE_VIDEO_SIZE', '1280*720')
36
+
37
+ # Note: Currently the API only supports generating one video at a time
38
+ # But we keep the num parameter for API compatibility
39
+
40
+ task_data = {
41
+ "model": model,
42
+ "input": {
43
+ "prompt": prompt
44
+ },
45
+ "parameters": {
46
+ "size": size
47
+ }
48
+ }
49
+
50
+ try:
51
+ # Step 1: Submit task to generate video
52
+ logger.info("Submitting task to generate video...")
53
+
54
+ response = requests.post(submit_url, headers=headers, json=task_data)
55
+
56
+ if response.status_code != 200:
57
+ logger.warning(f"Task submission failed with status code {response.status_code}")
58
+ return None
59
+
60
+ result = response.json()
61
+
62
+ # Check if task was successfully submitted
63
+ if not result.get("output") or not result.get("output").get("task_id"):
64
+ logger.warning("Failed to get task_id from response")
65
+ return None
66
+
67
+ # Extract task ID
68
+ task_id = result.get("output").get("task_id")
69
+ logger.info(f"Task submitted successfully. Task ID: {task_id}")
70
+
71
+ # Step 2: Poll for results
72
+ max_attempts = int(os.getenv('DASHSCOPE_VIDEO_RETRY_TIMES', 10)) # Increased default retries for video
73
+ wait_time = int(os.getenv('DASHSCOPE_VIDEO_SLEEP_TIME', 5)) # Increased default wait time for video
74
+ query_url = f"{query_base_url}{task_id}"
75
+
76
+ for attempt in range(max_attempts):
77
+ # Wait before polling
78
+ time.sleep(wait_time)
79
+ logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
80
+
81
+ # Poll for results
82
+ query_response = requests.get(query_url, headers={'Authorization': f'Bearer {api_key}'})
83
+
84
+ if query_response.status_code != 200:
85
+ logger.info(f"Poll request failed with status code {query_response.status_code}")
86
+ continue
87
+
88
+ try:
89
+ query_result = query_response.json()
90
+ except json.JSONDecodeError as e:
91
+ logger.warning(f"Failed to parse response as JSON: {e}")
92
+ continue
93
+
94
+ # Check task status
95
+ task_status = query_result.get("output", {}).get("task_status")
96
+
97
+ if task_status == "SUCCEEDED":
98
+ # Extract video URL
99
+ video_url = query_result.get("output", {}).get("video_url")
100
+
101
+ if video_url:
102
+ # Return as array of objects with video_url for consistency with image API
103
+ return json.dumps({"video_url": video_url})
104
+ else:
105
+ logger.info("Video URL not found in the response")
106
+ return None
107
+ elif task_status in ["PENDING", "RUNNING"]:
108
+ # If still running, continue to next polling attempt
109
+ logger.info(f"Task status: {task_status}, continuing to next poll...")
110
+ continue
111
+ elif task_status == "FAILED":
112
+ logger.warning("Task failed")
113
+ return None
114
+ else:
115
+ # Any other status, return None
116
+ logger.warning(f"Unexpected status: {task_status}")
117
+ return None
118
+
119
+ # If we get here, polling timed out
120
+ logger.warning("Polling timed out after maximum attempts")
121
+ return None
122
+
123
+ except Exception as e:
124
+ logger.warning(f"Exception occurred: {e}")
125
+ return None
126
+
127
+
128
+ def main():
129
+ from dotenv import load_dotenv
130
+
131
+ load_dotenv()
132
+
133
+ print("Starting MCP gen-video-server...", file=sys.stderr)
134
+ mcp.run(transport="stdio")
135
+
136
+
137
+ # Make the module callable
138
+ def __call__():
139
+ """
140
+ Make the module callable for uvx.
141
+ This function is called when the module is executed directly.
142
+ """
143
+ main()
144
+
145
+
146
+ sys.modules[__name__].__call__ = __call__
147
+
148
+ if __name__ == "__main__":
149
+ main()
150
+ # For testing without MCP
151
+ # result = gen_video("A cat running under moonlight")
152
+ # print("\nFinal Result:")
153
+ # print(result)
AWorld-main/aworlddistributed/mcp_servers/image_server.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image MCP Server
3
+
4
+ This module provides MCP server functionality for image processing and analysis.
5
+ It handles image encoding, optimization, and various image analysis tasks such as
6
+ OCR (Optical Character Recognition) and visual reasoning.
7
+
8
+ The server supports both local image files and remote image URLs with proper validation
9
+ and handles various image formats including JPEG, PNG, GIF, and others.
10
+
11
+ Main functions:
12
+ - encode_images: Encodes images to base64 format with optimization
13
+ - optimize_image: Resizes and optimizes images for better performance
14
+ - Various MCP tools for image analysis and processing
15
+ """
16
+
17
+ # import asyncio
18
+ import base64
19
+ import os
20
+ from io import BytesIO
21
+ from typing import Any, Dict, List
22
+
23
+ from PIL import Image
24
+ from pydantic import Field
25
+ from aworld.logs.util import logger
26
+ from mcp_servers.utils import get_file_from_source
27
+ from mcp.server.fastmcp import FastMCP
28
+ from openai import OpenAI
29
+
30
+ # Initialize MCP server
31
+ mcp = FastMCP("image-server")
32
+
33
+
34
+ IMAGE_OCR = (
35
+ "Input is a base64 encoded image. Read text from image if present. "
36
+ "Return a json string with the following format: "
37
+ '{"image_text": "text from image"}'
38
+ )
39
+
40
+ IMAGE_REASONING = (
41
+ "Input is a base64 encoded image. Given user's task: {task}, "
42
+ "solve it following the guide line:\n"
43
+ "1. Careful visual inspection\n"
44
+ "2. Contextual reasoning\n"
45
+ "3. Text transcription where relevant\n"
46
+ "4. Logical deduction from visual evidence\n"
47
+ "Return a json string with the following format: "
48
+ '{"image_reasoning_result": "reasoning result given task and image"}'
49
+ )
50
+
51
+
52
+ def optimize_image(image_data: bytes, max_size: int = 1024) -> bytes:
53
+ """
54
+ Optimize image by resizing if needed
55
+
56
+ Args:
57
+ image_data: Raw image data
58
+ max_size: Maximum dimension size in pixels
59
+
60
+ Returns:
61
+ bytes: Optimized image data
62
+
63
+ Raises:
64
+ ValueError: When image cannot be processed
65
+ """
66
+ try:
67
+ image = Image.open(BytesIO(image_data))
68
+
69
+ # Resize if image is too large
70
+ if max(image.size) > max_size:
71
+ ratio = max_size / max(image.size)
72
+ new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
73
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
74
+
75
+ # Save to buffer
76
+ buffered = BytesIO()
77
+ image_format = image.format if image.format else "JPEG"
78
+ image.save(buffered, format=image_format)
79
+ return buffered.getvalue()
80
+
81
+ except Exception as e:
82
+ logger.warning(f"Failed to optimize image: {str(e)}")
83
+ return image_data # Return original data if optimization fails
84
+
85
+
86
+ def encode_images(image_sources: List[str], with_header: bool = True) -> List[str]:
87
+ """
88
+ Encode images to base64 format with robust file handling
89
+
90
+ Args:
91
+ image_sources: List of URLs or local file paths of images
92
+ with_header: Whether to include MIME type header
93
+
94
+ Returns:
95
+ List[str]: Base64 encoded image strings, with MIME type prefix if with_header is True
96
+
97
+ Raises:
98
+ ValueError: When image source is invalid or image format is not supported
99
+ """
100
+ if not image_sources:
101
+ raise ValueError("Image sources cannot be empty")
102
+
103
+ images = []
104
+ for image_source in image_sources:
105
+ try:
106
+ # Get file with validation (only image files allowed)
107
+ file_path, mime_type, content = get_file_from_source(
108
+ image_source,
109
+ allowed_mime_prefixes=["image/"],
110
+ max_size_mb=10.0, # 10MB limit for images
111
+ type="image",
112
+ )
113
+
114
+ # Optimize image
115
+ optimized_content = optimize_image(content)
116
+
117
+ # Encode to base64
118
+ image_base64 = base64.b64encode(optimized_content).decode()
119
+
120
+ # Format with header if requested
121
+ final_image = (
122
+ f"data:{mime_type};base64,{image_base64}"
123
+ if with_header
124
+ else image_base64
125
+ )
126
+
127
+ images.append(final_image)
128
+
129
+ # Clean up temporary file if it was created for a URL
130
+ if file_path != os.path.abspath(image_source) and os.path.exists(file_path):
131
+ os.unlink(file_path)
132
+
133
+ except Exception as e:
134
+ logger.error(f"Error encoding image from {image_source}: {str(e)}")
135
+ raise
136
+
137
+ return images
138
+
139
+ def image_to_base64(image_path):
140
+ try:
141
+ # todo 解析pdf或其他文件的图片
142
+ with Image.open(image_path) as image:
143
+ buffered = BytesIO()
144
+ image_format = image.format if image.format else "JPEG"
145
+ image.save(buffered, format=image_format)
146
+ image_bytes = buffered.getvalue()
147
+ base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
148
+ return base64_encoded
149
+ except Exception as e:
150
+ print(f"Base64 error: {e}")
151
+ return None
152
+
153
+
154
+ def create_image_contents(prompt: str, image_base64: List[str]) -> List[Dict[str, Any]]:
155
+ """Create uniform image format for querying llm."""
156
+ content = [
157
+ {"type": "text", "text": prompt},
158
+ ]
159
+ content.extend(
160
+ [{"type": "image_url", "image_url": {"url": url}} for url in image_base64]
161
+ )
162
+ return content
163
+
164
+
165
+ @mcp.tool(description="solve the question by careful reasoning given the image(s) in given local filepath or url, including reasoning, ocr, etc.")
166
+ def mcp_image_recognition(
167
+ image_urls: List[str] = Field(
168
+ description="The input image(s) in given a list of local filepaths or urls."
169
+ ),
170
+ question: str = Field(description="The question to ask."),
171
+ ) -> str:
172
+ """solve the question by careful reasoning given the image(s) in given filepath or url."""
173
+
174
+ try:
175
+ image_base64 = image_to_base64(image_urls[0])
176
+ logger.info(f"image_url: {image_urls[0]}")
177
+ reasoning_prompt = question
178
+ messages=[
179
+ {"role": "system", "content": "You are a helpful assistant."},
180
+ {"role": "user", "content":
181
+ [
182
+ {"type": "text", "text": reasoning_prompt},
183
+ {
184
+ "type": "image_url",
185
+ "image_url": {
186
+ "url": f"data:image/jpeg;base64,{image_base64}"
187
+ }
188
+ },
189
+ ],
190
+ },
191
+ ]
192
+
193
+ client = OpenAI(
194
+ api_key=os.getenv("LLM_API_KEY"),
195
+ base_url=os.getenv("LLM_BASE_URL")
196
+ )
197
+ response = client.chat.completions.create(
198
+ model=os.getenv("LLM_MODEL_NAME"),
199
+ messages=messages,
200
+ )
201
+
202
+ logger.info(f"response: {response}")
203
+ image_reasoning_result = response.choices[0].message.content
204
+
205
+ except Exception as e:
206
+ image_reasoning_result = ""
207
+ import traceback
208
+ traceback.print_exc()
209
+ logger.error(f"image_reasoning_result-Execute error: {e}")
210
+
211
+ logger.info(
212
+ f"---get_reasoning_by_image-image_reasoning_result:{image_reasoning_result}"
213
+ )
214
+
215
+ return image_reasoning_result
216
+
217
+
218
+ def main():
219
+ from dotenv import load_dotenv
220
+ load_dotenv()
221
+
222
+ print("Starting Image MCP Server...", file=sys.stderr)
223
+ mcp.run(transport='stdio')
224
+
225
+ # Make the module callable
226
+ def __call__():
227
+ """
228
+ Make the module callable for uvx.
229
+ This function is called when the module is executed directly.
230
+ """
231
+ main()
232
+
233
+
234
+ # Add this for compatibility with uvx
235
+ import sys
236
+ sys.modules[__name__].__call__ = __call__
237
+
238
+ # Run the server when the script is executed directly
239
+ if __name__ == "__main__":
240
+ main()
AWorld-main/aworlddistributed/mcp_servers/picsearch_server.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+
7
+ import aiohttp
8
+ from typing import List, Dict, Any, Optional
9
+ from dotenv import load_dotenv
10
+ from mcp.server import FastMCP
11
+ from pydantic import Field
12
+
13
+ from aworld.logs.util import logger
14
+
15
+
16
+ mcp = FastMCP("picsearch-server")
17
+
18
+ async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
19
+ """Execute a single search query, returns None on error"""
20
+ try:
21
+ url = os.getenv('PIC_SEARCH_URL')
22
+ searchMode = os.getenv('PIC_SEARCH_SEARCHMODE')
23
+ source = os.getenv('PIC_SEARCH_SOURCE')
24
+ domain = os.getenv('PIC_SEARCH_DOMAIN')
25
+ uid = os.getenv('PIC_SEARCH_UID')
26
+ if not url or not searchMode or not source or not domain:
27
+ logger.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
28
+ return None
29
+
30
+ headers = {
31
+ 'Content-Type': 'application/json'
32
+ }
33
+ data = {
34
+ "domain": domain,
35
+ "extParams": {
36
+ "contentType": "llmWholeImage"
37
+ },
38
+ "page": 0,
39
+ "pageSize": num,
40
+ "query": query,
41
+ "searchMode": searchMode,
42
+ "source": source,
43
+ "userId": uid
44
+ }
45
+
46
+ async with aiohttp.ClientSession() as session:
47
+ try:
48
+ async with session.post(url, headers=headers, json=data) as response:
49
+ if response.status != 200:
50
+ logger.warning(f"Query failed: {query}, status code: {response.status}")
51
+ return None
52
+
53
+ result = await response.json()
54
+ return result
55
+ except aiohttp.ClientError:
56
+ logger.warning(f"Request error: {query}")
57
+ return None
58
+ except Exception:
59
+ logger.warning(f"Query exception: {query}")
60
+ return None
61
+
62
+
63
+ def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
64
+ """Filter valid document results, returns empty list if input is None"""
65
+ if result is None:
66
+ return []
67
+
68
+ try:
69
+ valid_docs = []
70
+
71
+ # Check success field
72
+ if not result.get("success"):
73
+ return valid_docs
74
+
75
+ # Check searchDocs field
76
+ search_docs = result.get("searchImages", [])
77
+ if not search_docs:
78
+ return valid_docs
79
+
80
+ # Extract required fields
81
+ required_fields = ["title", "picUrl"]
82
+
83
+ for doc in search_docs:
84
+ # Check if all required fields exist and are not empty
85
+ is_valid = True
86
+ for field in required_fields:
87
+ if field not in doc or not doc[field]:
88
+ is_valid = False
89
+ break
90
+
91
+ if is_valid:
92
+ # Keep only required fields
93
+ filtered_doc = {field: doc[field] for field in required_fields}
94
+ valid_docs.append(filtered_doc)
95
+
96
+ return valid_docs
97
+ except Exception:
98
+ return []
99
+
100
+ @mcp.tool(description="Search Picture based on the user's input query")
101
+ async def search(
102
+ query: str = Field(
103
+ description="The query to search for picture"
104
+ ),
105
+ num: int = Field(
106
+ 5,
107
+ description="Maximum number of results to return, default is 5"
108
+ )
109
+ ) -> Any:
110
+ """Execute search function for a single query"""
111
+ try:
112
+ # Get configuration from environment variables
113
+ env_total_num = os.getenv('PIC_SEARCH_TOTAL_NUM')
114
+ if env_total_num and env_total_num.isdigit():
115
+ # Force override input num parameter with environment variable
116
+ num = int(env_total_num)
117
+
118
+ # If no query provided, return empty list
119
+ if not query:
120
+ return json.dumps([])
121
+
122
+ # Get actual number of results to return
123
+ slice_num = os.getenv('PIC_SEARCH_SLICE_NUM')
124
+ if slice_num and slice_num.isdigit():
125
+ actual_num = int(slice_num)
126
+ else:
127
+ actual_num = num
128
+
129
+ # Execute the query
130
+ result = await search_single(query, actual_num)
131
+
132
+ # Filter results
133
+ valid_docs = filter_valid_docs(result)
134
+
135
+ # Return results
136
+ result_json = json.dumps(valid_docs, ensure_ascii=False)
137
+ logger.info(f"Completed query: '{query}', found {len(valid_docs)} valid documents")
138
+ logger.info(result_json)
139
+
140
+ return result_json
141
+ except Exception as e:
142
+ # Return empty list on exception
143
+ logger.error(f"Error processing query: {str(e)}")
144
+ return json.dumps([])
145
+
146
+
147
+ def main():
148
+ from dotenv import load_dotenv
149
+
150
+ load_dotenv()
151
+
152
+ print("Starting Audio MCP picsearch-server...", file=sys.stderr)
153
+ mcp.run(transport="stdio")
154
+
155
+
156
+ # Make the module callable
157
+ def __call__():
158
+ """
159
+ Make the module callable for uvx.
160
+ This function is called when the module is executed directly.
161
+ """
162
+ main()
163
+
164
+ sys.modules[__name__].__call__ = __call__
165
+
166
+ if __name__ == "__main__":
167
+ main()
168
+
169
+ # if __name__ == "__main__":
170
+ # # Configure logging
171
+ # logging.basicConfig(
172
+ # level=logging.INFO,
173
+ # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
174
+ # )
175
+ #
176
+ #
177
+ # # Test single query
178
+ # asyncio.run(search(query="Image search test"))
179
+ #
180
+ # # Test multiple queries no longer applies
AWorld-main/aworlddistributed/mcp_servers/reasoning_server.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import traceback
4
+
5
+ from dotenv import load_dotenv
6
+ from mcp.server.fastmcp import FastMCP
7
+ from pydantic import Field
8
+
9
+ from aworld.config.conf import AgentConfig
10
+ from aworld.logs.util import logger
11
+ from aworld.models.llm import call_llm_model, get_llm_model
12
+
13
+ # Initialize MCP server
14
+ mcp = FastMCP("reasoning-server")
15
+
16
+
17
+ @mcp.tool(
18
+ description="Perform complex problem reasoning using powerful reasoning model."
19
+ )
20
+ def complex_problem_reasoning(
21
+ question: str = Field(
22
+ description="The input question for complex problem reasoning,"
23
+ + " such as math and code contest problem",
24
+ ),
25
+ original_task: str = Field(
26
+ default="",
27
+ description="The original task description."
28
+ + " This argument could be fetched from the <task>TASK</task> tag",
29
+ ),
30
+ ) -> str:
31
+ """
32
+ Perform complex problem reasoning using Powerful Reasoning model,
33
+ such as riddle, game or competition-level STEM(including code) problems.
34
+
35
+ Args:
36
+ question: The input question for complex problem reasoning
37
+ original_task: The original task description (optional)
38
+
39
+ Returns:
40
+ str: The reasoning result from the model
41
+ """
42
+ try:
43
+ # Prepare the prompt with both the question and original task if provided
44
+ prompt = question
45
+ if original_task:
46
+ prompt = f"Original Task: {original_task}\n\nQuestion: {question}"
47
+
48
+ # Call the LLM model for reasoning
49
+ response = call_llm_model(
50
+ llm_model=get_llm_model(
51
+ conf=AgentConfig(
52
+ llm_provider="openai",
53
+ llm_model_name=os.getenv("LLM_MODEL_NAME", "your_openai_api_key"),
54
+ llm_api_key=os.getenv("LLM_API_KEY", "your_openai_api_key"),
55
+ llm_base_url=os.getenv("LLM_BASE_URL", "your_openai_base_url"),
56
+ )
57
+ ),
58
+ messages=[
59
+ {
60
+ "role": "system",
61
+ "content": (
62
+ "You are an expert at solving complex problems including math,"
63
+ " code contests, riddles, and puzzles."
64
+ " Provide detailed step-by-step reasoning and a clear final answer."
65
+ ),
66
+ },
67
+ {"role": "user", "content": prompt},
68
+ ],
69
+ temperature=float(os.getenv("LLM_TEMPERATURE", "0.3")),
70
+ )
71
+
72
+ # Extract the reasoning result
73
+ reasoning_result = response.content
74
+
75
+ logger.info("Complex reasoning completed successfully")
76
+ return reasoning_result
77
+
78
+ except Exception as e:
79
+ logger.error(f"Error in complex problem reasoning: {traceback.format_exc()}")
80
+ return f"Error performing reasoning: {str(e)}"
81
+
82
+
83
+ def main():
84
+ load_dotenv()
85
+ print("Starting Reasoning MCP Server...", file=sys.stderr)
86
+ mcp.run(transport="stdio")
87
+
88
+
89
+ # Make the module callable
90
+ def __call__():
91
+ """
92
+ Make the module callable for uvx.
93
+ This function is called when the module is executed directly.
94
+ """
95
+ main()
96
+
97
+
98
+ sys.modules[__name__].__call__ = __call__
99
+
100
+ # Run the server when the script is executed directly
101
+ if __name__ == "__main__":
102
+ main()
AWorld-main/aworlddistributed/mcp_servers/search_server.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search MCP Server
3
+
4
+ This module provides MCP server functionality for performing web searches using various search engines.
5
+ It supports structured queries and returns formatted search results.
6
+
7
+ Key features:
8
+ - Perform web searches using Exa, Google, and DuckDuckGo
9
+ - Filter and format search results
10
+ - Validate and process search queries
11
+
12
+ Main functions:
13
+ - mcpsearchexa: Searches the web using Exa
14
+ - mcpsearchgoogle: Searches the web using Google
15
+ - mcpsearchduckduckgo: Searches the web using DuckDuckGo
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import traceback
21
+ from typing import List, Optional
22
+
23
+ import requests
24
+ from dotenv import load_dotenv
25
+ from mcp.server.fastmcp import FastMCP
26
+ from pydantic import BaseModel, Field
27
+
28
+ from aworld.logs.util import logger
29
+
30
+ # Initialize MCP server
31
+ mcp = FastMCP("search-server")
32
+
33
+
34
+ # Base search result model that all providers will use
35
+ class SearchResult(BaseModel):
36
+ """Base search result model with common fields"""
37
+
38
+ id: str
39
+ title: str
40
+ url: str
41
+ snippet: str
42
+ source: str # Which search engine provided this result
43
+
44
+
45
+ class GoogleSearchResult(SearchResult):
46
+ """Google-specific search result model"""
47
+
48
+ displayLink: str = ""
49
+ formattedUrl: str = ""
50
+ htmlSnippet: str = ""
51
+ htmlTitle: str = ""
52
+ kind: str = ""
53
+ link: str = ""
54
+
55
+
56
+ class SearchResponse(BaseModel):
57
+ """Unified search response model"""
58
+
59
+ query: str
60
+ results: List[SearchResult]
61
+ count: int
62
+ source: str
63
+ error: Optional[str] = None
64
+
65
+
66
+ @mcp.tool(description="Search the web using Google Custom Search API.")
67
+ def mcpsearchgoogle(
68
+ query: str = Field(..., description="The search query string."),
69
+ num_results: int = Field(
70
+ 10, description="Number of search results to return (default 10)."
71
+ ),
72
+ safe_search: bool = Field(
73
+ True, description="Whether to enable safe search filtering."
74
+ ),
75
+ language: str = Field("en", description="Language code for search results."),
76
+ country: str = Field("us", description="Country code for search results."),
77
+ ) -> str:
78
+ """
79
+ Search the web using Google Custom Search API.
80
+
81
+ Requires GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables to be set.
82
+ """
83
+ try:
84
+ api_key = os.environ.get("GOOGLE_API_KEY")
85
+ cse_id = os.environ.get("GOOGLE_CSE_ID")
86
+
87
+ if not api_key:
88
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
89
+ if not cse_id:
90
+ raise ValueError("GOOGLE_CSE_ID environment variable not set")
91
+
92
+ # Ensure num_results is within valid range
93
+ num_results = max(1, num_results)
94
+
95
+ # Build the Google Custom Search API URL
96
+ url = "https://www.googleapis.com/customsearch/v1"
97
+ params = {
98
+ "key": api_key,
99
+ "cx": cse_id,
100
+ "q": query,
101
+ "num": num_results,
102
+ "safe": "active" if safe_search else "off",
103
+ "hl": language,
104
+ "gl": country,
105
+ }
106
+
107
+ logger.info(f"Google search starts for query: {query}")
108
+ response = requests.get(url, params=params, timeout=10)
109
+ response.raise_for_status()
110
+
111
+ data = response.json()
112
+ search_results = []
113
+
114
+ if "items" in data:
115
+ for i, item in enumerate(data["items"]):
116
+ result = GoogleSearchResult(
117
+ id=f"google-{i}",
118
+ title=item.get("title", ""),
119
+ url=item.get("link", ""),
120
+ snippet=item.get("snippet", ""),
121
+ source="google",
122
+ displayLink=item.get("displayLink", ""),
123
+ formattedUrl=item.get("formattedUrl", ""),
124
+ htmlSnippet=item.get("htmlSnippet", ""),
125
+ htmlTitle=item.get("htmlTitle", ""),
126
+ kind=item.get("kind", ""),
127
+ link=item.get("link", ""),
128
+ )
129
+ search_results.append(result)
130
+
131
+ return SearchResponse(
132
+ query=query,
133
+ results=search_results,
134
+ count=len(search_results),
135
+ source="google",
136
+ ).model_dump_json()
137
+
138
+ except Exception as e:
139
+ logger.error(f"Google search error: {traceback.format_exc()}")
140
+ return SearchResponse(
141
+ query=query, results=[], count=0, source="google", error=str(e)
142
+ ).model_dump_json()
143
+
144
+
145
+ def main():
146
+ load_dotenv()
147
+
148
+ print("Starting Search MCP Server...", file=sys.stderr)
149
+ mcp.run(transport="stdio")
150
+
151
+
152
+ # Make the module callable
153
+ def __call__():
154
+ """
155
+ Make the module callable for uvx.
156
+ This function is called when the module is executed directly.
157
+ """
158
+ main()
159
+
160
+
161
+ sys.modules[__name__].__call__ = __call__
162
+
163
+ # Run the server when the script is executed directly
164
+ if __name__ == "__main__":
165
+ main()
AWorld-main/aworlddistributed/mcp_servers/utils.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import tempfile
5
+ from typing import List, Optional, Tuple
6
+ from urllib.parse import urlparse
7
+
8
+ import requests
9
+ from mcp.server import FastMCP
10
+
11
+ from aworld.logs.util import logger
12
+
13
+
14
+ def get_mime_type(file_path: str, default_mime: Optional[str] = None) -> str:
15
+ """
16
+ Detect MIME type of a file using python-magic if available,
17
+ otherwise fallback to extension-based detection.
18
+
19
+ Args:
20
+ file_path: Path to the file
21
+ default_mime: Default MIME type to return if detection fails
22
+
23
+ Returns:
24
+ str: Detected MIME type
25
+ """
26
+ # Try using python-magic for accurate MIME type detection
27
+ try:
28
+ # mime = magic.Magic(mime=True)
29
+ # return mime.from_file(file_path)
30
+ return "audio/mpeg"
31
+ except (AttributeError, IOError):
32
+ # Fallback to extension-based detection
33
+ extension_mime_map = {
34
+ # Audio formats
35
+ ".mp3": "audio/mpeg",
36
+ ".wav": "audio/wav",
37
+ ".ogg": "audio/ogg",
38
+ ".m4a": "audio/mp4",
39
+ ".flac": "audio/flac",
40
+ # Image formats
41
+ ".jpg": "image/jpeg",
42
+ ".jpeg": "image/jpeg",
43
+ ".png": "image/png",
44
+ ".gif": "image/gif",
45
+ ".webp": "image/webp",
46
+ ".bmp": "image/bmp",
47
+ ".tiff": "image/tiff",
48
+ # Video formats
49
+ ".mp4": "video/mp4",
50
+ ".avi": "video/x-msvideo",
51
+ ".mov": "video/quicktime",
52
+ ".mkv": "video/x-matroska",
53
+ ".webm": "video/webm",
54
+ }
55
+
56
+ ext = os.path.splitext(file_path)[1].lower()
57
+ return extension_mime_map.get(ext, default_mime or "application/octet-stream")
58
+
59
+
60
+ def is_url(path_or_url: str) -> bool:
61
+ """
62
+ Check if the given string is a URL.
63
+
64
+ Args:
65
+ path_or_url: String to check
66
+
67
+ Returns:
68
+ bool: True if the string is a URL, False otherwise
69
+ """
70
+ parsed = urlparse(path_or_url)
71
+ return bool(parsed.scheme and parsed.netloc)
72
+
73
+
74
+ def get_file_from_source(
75
+ source: str,
76
+ allowed_mime_prefixes: List[str] = None,
77
+ max_size_mb: float = 100.0,
78
+ timeout: int = 60,
79
+ type: str = "image",
80
+ ) -> Tuple[str, str, bytes]:
81
+ """
82
+ Unified function to get file content from a URL or local path with validation.
83
+
84
+ Args:
85
+ source: URL or local file path
86
+ allowed_mime_prefixes: List of allowed MIME type prefixes (e.g., ['audio/', 'video/'])
87
+ max_size_mb: Maximum allowed file size in MB
88
+ timeout: Timeout for URL requests in seconds
89
+
90
+ Returns:
91
+ Tuple[str, str, bytes]: (file_path, mime_type, file_content)
92
+ - For URLs, file_path will be a temporary file path
93
+ - For local files, file_path will be the original path
94
+
95
+ Raises:
96
+ ValueError: When file doesn't exist, exceeds size limit, or has invalid MIME type
97
+ IOError: When file cannot be read
98
+ requests.RequestException: When URL request fails
99
+ """
100
+ max_size_bytes = max_size_mb * 1024 * 1024
101
+ temp_file = None
102
+
103
+ try:
104
+ if is_url(source):
105
+ # Handle URL
106
+ logger.info(f"Downloading file from URL: {source}")
107
+ response = requests.get(source, stream=True, timeout=timeout)
108
+ response.raise_for_status()
109
+
110
+ # Check Content-Length if available
111
+ content_length = response.headers.get("Content-Length")
112
+ if content_length and int(content_length) > max_size_bytes:
113
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
114
+
115
+ # Create a temporary file
116
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
117
+ file_path = temp_file.name
118
+
119
+ # Download content in chunks to avoid memory issues
120
+ content = bytearray()
121
+ downloaded_size = 0
122
+ for chunk in response.iter_content(chunk_size=8192):
123
+ downloaded_size += len(chunk)
124
+ if downloaded_size > max_size_bytes:
125
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
126
+ temp_file.write(chunk)
127
+ content.extend(chunk)
128
+
129
+ temp_file.close()
130
+
131
+ # Get MIME type
132
+ if type == "audio":
133
+ mime_type = "audio/mpeg"
134
+ elif type == "image":
135
+ mime_type = "image/jpeg"
136
+ elif type == "video":
137
+ mime_type = "video/mp4"
138
+
139
+ # mime_type = get_mime_type(file_path)
140
+
141
+ # For URLs where magic fails, try to use Content-Type header
142
+ if mime_type == "application/octet-stream":
143
+ content_type = response.headers.get("Content-Type", "").split(";")[0]
144
+ if content_type:
145
+ mime_type = content_type
146
+ else:
147
+ # Handle local file
148
+ file_path = os.path.abspath(source)
149
+
150
+ # Check if file exists
151
+ if not os.path.exists(file_path):
152
+ raise ValueError(f"File not found: {file_path}")
153
+
154
+ # Check file size
155
+ file_size = os.path.getsize(file_path)
156
+ if file_size > max_size_bytes:
157
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
158
+
159
+ # Get MIME type
160
+ if type == "audio":
161
+ mime_type = "audio/mpeg"
162
+ elif type == "image":
163
+ mime_type = "image/jpeg"
164
+ elif type == "video":
165
+ mime_type = "video/mp4"
166
+ # mime_type = get_mime_type(file_path)
167
+
168
+ # Read file content
169
+ with open(file_path, "rb") as f:
170
+ content = f.read()
171
+
172
+ # Validate MIME type if allowed_mime_prefixes is provided
173
+ if allowed_mime_prefixes:
174
+ if not any(
175
+ mime_type.startswith(prefix) for prefix in allowed_mime_prefixes
176
+ ):
177
+ allowed_types = ", ".join(allowed_mime_prefixes)
178
+ raise ValueError(
179
+ f"Invalid file type: {mime_type}. Allowed types: {allowed_types}"
180
+ )
181
+
182
+ return file_path, mime_type, content
183
+
184
+ except Exception as e:
185
+ # Clean up temporary file if an error occurs
186
+ if temp_file and os.path.exists(temp_file.name):
187
+ os.unlink(temp_file.name)
188
+ raise e
189
+
190
+
191
+ if __name__ == "__main__":
192
+ mcp_tools = []
193
+ logger.success(f"{json.dumps(mcp_tools, indent=4, ensure_ascii=False)}")
AWorld-main/aworlddistributed/mcp_servers/video_server.py ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: disable=E1101
2
+
3
+ import base64
4
+ import os
5
+ import sys
6
+ import traceback
7
+ from dataclasses import dataclass
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ import cv2
11
+ import numpy as np
12
+ from dotenv import load_dotenv
13
+ from mcp.server.fastmcp import FastMCP
14
+ from openai import OpenAI
15
+ from pydantic import Field
16
+
17
+ from aworld.logs.util import logger
18
+ from mcp_servers.utils import get_file_from_source
19
+
20
+ client = OpenAI(api_key=os.getenv("LLM_API_KEY"), base_url=os.getenv("LLM_BASE_URL"))
21
+
22
+ # Initialize MCP server
23
+ mcp = FastMCP("Video Server")
24
+
25
+
26
+ @dataclass
27
+ class KeyframeResult:
28
+ """Result of keyframe extraction from a video.
29
+
30
+ Attributes:
31
+ frame_paths: List of file paths to the saved keyframes
32
+ frame_timestamps: List of timestamps (in seconds) corresponding to each frame
33
+ output_directory: Directory where frames were saved
34
+ frame_count: Number of frames extracted
35
+ success: Whether the extraction was successful
36
+ error_message: Error message if extraction failed, None otherwise
37
+ """
38
+
39
+ frame_paths: List[str]
40
+ frame_timestamps: List[float]
41
+ output_directory: str
42
+ frame_count: int
43
+ success: bool
44
+ error_message: Optional[str] = None
45
+
46
+
47
+ VIDEO_ANALYZE = (
48
+ "Input is a sequence of video frames. Given user's task: {task}. "
49
+ "analyze the video content following these steps:\n"
50
+ "1. Temporal sequence understanding\n"
51
+ "2. Motion and action analysis\n"
52
+ "3. Scene context interpretation\n"
53
+ "4. Object and person tracking\n"
54
+ "Return a json string with the following format: "
55
+ '{{"video_analysis_result": "analysis result given task and video frames"}}'
56
+ )
57
+
58
+
59
+ VIDEO_EXTRACT_SUBTITLES = (
60
+ "Input is a sequence of video frames. "
61
+ "Extract all subtitles (if present) in the video. "
62
+ "Return a json string with the following format: "
63
+ '{"video_subtitles": "extracted subtitles from video"}'
64
+ )
65
+
66
+ VIDEO_SUMMARIZE = (
67
+ "Input is a sequence of video frames. "
68
+ "Summarize the main content of the video. "
69
+ "Include key points, main topics, and important visual elements. "
70
+ "Return a json string with the following format: "
71
+ '{"video_summary": "concise summary of the video content"}'
72
+ )
73
+
74
+
75
+ def get_video_frames(
76
+ video_source: str,
77
+ sample_rate: int = 2,
78
+ start_time: float = 0,
79
+ end_time: float = None,
80
+ ) -> List[Dict[str, Any]]:
81
+ """
82
+ Get frames from video with given sample rate using robust file handling
83
+
84
+ Args:
85
+ video_source: Path or URL to the video file
86
+ sample_rate: Number of frames to sample per second
87
+ start_time: Start time of the video segment in seconds (default: 0)
88
+ end_time: End time of the video segment in seconds (default: None, meaning the end of the video)
89
+
90
+ Returns:
91
+ List[Dict[str, Any]]: List of dictionaries containing frame data and timestamp
92
+
93
+ Raises:
94
+ ValueError: When video file cannot be opened or is not a valid video
95
+ """
96
+ try:
97
+ # Get file with validation (only video files allowed)
98
+ file_path, _, _ = get_file_from_source(
99
+ video_source,
100
+ allowed_mime_prefixes=["video/"],
101
+ max_size_mb=2500.0, # 2500MB limit for videos
102
+ type="video", # Specify type as video to handle video files
103
+ )
104
+
105
+ # Open video file
106
+ video = cv2.VideoCapture(file_path)
107
+ if not video.isOpened():
108
+ raise ValueError(f"Could not open video file: {file_path}")
109
+
110
+ fps = video.get(cv2.CAP_PROP_FPS)
111
+ frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
112
+ video_duration = frame_count / fps # 30s
113
+
114
+ if end_time is None:
115
+ end_time = video_duration
116
+
117
+ if start_time > end_time:
118
+ raise ValueError("Start time cannot be greater than end time.")
119
+
120
+ if start_time < 0:
121
+ start_time = 0
122
+
123
+ if end_time > video_duration:
124
+ end_time = video_duration
125
+
126
+ start_frame = int(start_time * fps)
127
+ end_frame = int(end_time * fps)
128
+
129
+ all_frames = []
130
+ frames = []
131
+
132
+ # Calculate frame interval based on sample rate
133
+ frame_interval = max(1, int(fps / sample_rate))
134
+
135
+ # Set the video capture to the start frame
136
+ video.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
137
+
138
+ for i in range(start_frame, end_frame):
139
+ ret, frame = video.read()
140
+ if not ret:
141
+ break
142
+
143
+ # Convert frame to JPEG format
144
+ _, buffer = cv2.imencode(".jpg", frame)
145
+ frame_data = base64.b64encode(buffer).decode("utf-8")
146
+
147
+ # Add data URL prefix for JPEG image
148
+ frame_data = f"data:image/jpeg;base64,{frame_data}"
149
+
150
+ all_frames.append({"data": frame_data, "time": i / fps})
151
+
152
+ for i in range(0, len(all_frames), frame_interval):
153
+ frames.append(all_frames[i])
154
+
155
+ video.release()
156
+
157
+ # Clean up temporary file if it was created for a URL
158
+ if file_path != os.path.abspath(video_source) and os.path.exists(file_path):
159
+ os.unlink(file_path)
160
+
161
+ if not frames:
162
+ raise ValueError(f"Could not extract any frames from video: {video_source}")
163
+
164
+ return frames
165
+
166
+ except Exception as e:
167
+ logger.error(f"Error extracting frames from {video_source}: {str(e)}")
168
+ raise
169
+
170
+
171
+ def create_video_content(
172
+ prompt: str, video_frames: List[Dict[str, Any]]
173
+ ) -> List[Dict[str, Any]]:
174
+ """Create uniform video format for querying llm."""
175
+ content = [{"type": "text", "text": prompt}]
176
+ content.extend(
177
+ [
178
+ {"type": "image_url", "image_url": {"url": frame["data"]}}
179
+ for frame in video_frames
180
+ ]
181
+ )
182
+ return content
183
+
184
+
185
+ @mcp.tool(description="Analyze the video content by the given question.")
186
+ def mcp_analyze_video(
187
+ video_url: str = Field(description="The input video in given filepath or url."),
188
+ question: str = Field(description="The question to analyze."),
189
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
190
+ start_time: float = Field(
191
+ default=0, description="Start time of the video segment in seconds."
192
+ ),
193
+ end_time: float = Field(
194
+ default=None, description="End time of the video segment in seconds."
195
+ ),
196
+ ) -> str:
197
+ """analyze the video content by the given question."""
198
+
199
+ try:
200
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
201
+ logger.info(f"---len video_frames:{len(video_frames)}")
202
+ interval = 20
203
+ frame_nums = 30
204
+ all_res = []
205
+ for i in range(0, len(video_frames), interval):
206
+ inputs = []
207
+ cur_frames = video_frames[i : i + frame_nums]
208
+ content = create_video_content(
209
+ VIDEO_ANALYZE.format(task=question), cur_frames
210
+ )
211
+ inputs.append({"role": "user", "content": content})
212
+ try:
213
+ response = client.chat.completions.create(
214
+ model=os.getenv("LLM_MODEL_NAME"),
215
+ messages=inputs,
216
+ temperature=0,
217
+ )
218
+ cur_video_analysis_result = response.choices[0].message.content
219
+ except Exception:
220
+ cur_video_analysis_result = ""
221
+ all_res.append(
222
+ f"result of video part {int(i / interval + 1)}: {cur_video_analysis_result}"
223
+ )
224
+ if i + frame_nums >= len(video_frames):
225
+ break
226
+ video_analysis_result = "\n".join(all_res)
227
+
228
+ except (ValueError, IOError, RuntimeError):
229
+ video_analysis_result = ""
230
+ logger.error(f"video_analysis-Execute error: {traceback.format_exc()}")
231
+
232
+ logger.info(
233
+ f"---get_analysis_by_video-video_analysis_result:{video_analysis_result}"
234
+ )
235
+ return video_analysis_result
236
+
237
+
238
+ @mcp.tool(description="Extract subtitles from the video.")
239
+ def mcp_extract_video_subtitles(
240
+ video_url: str = Field(description="The input video in given filepath or url."),
241
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
242
+ start_time: float = Field(
243
+ default=0, description="Start time of the video segment in seconds."
244
+ ),
245
+ end_time: float = Field(
246
+ default=None, description="End time of the video segment in seconds."
247
+ ),
248
+ ) -> str:
249
+ """extract subtitles from the video."""
250
+ inputs = []
251
+ try:
252
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
253
+ content = create_video_content(VIDEO_EXTRACT_SUBTITLES, video_frames)
254
+ inputs.append({"role": "user", "content": content})
255
+
256
+ response = client.chat.completions.create(
257
+ model=os.getenv("LLM_MODEL_NAME"),
258
+ messages=inputs,
259
+ temperature=0,
260
+ )
261
+ video_subtitles = response.choices[0].message.content
262
+ except (ValueError, IOError, RuntimeError):
263
+ video_subtitles = ""
264
+ logger.error(f"video_subtitles-Execute error: {traceback.format_exc()}")
265
+
266
+ logger.info(f"---get_subtitles_from_video-video_subtitles:{video_subtitles}")
267
+ return video_subtitles
268
+
269
+
270
+ @mcp.tool(description="Summarize the main content of the video.")
271
+ def mcp_summarize_video(
272
+ video_url: str = Field(description="The input video in given filepath or url."),
273
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
274
+ start_time: float = Field(
275
+ default=0, description="Start time of the video segment in seconds."
276
+ ),
277
+ end_time: float = Field(
278
+ default=None, description="End time of the video segment in seconds."
279
+ ),
280
+ ) -> str:
281
+ """summarize the main content of the video."""
282
+ try:
283
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
284
+ logger.info(f"---len video_frames:{len(video_frames)}")
285
+ interval = 490
286
+ frame_nums = 500
287
+ all_res = []
288
+ for i in range(0, len(video_frames), interval):
289
+ inputs = []
290
+ cur_frames = video_frames[i : i + frame_nums]
291
+ content = create_video_content(VIDEO_SUMMARIZE, cur_frames)
292
+ inputs.append({"role": "user", "content": content})
293
+ try:
294
+ response = client.chat.completions.create(
295
+ model=os.getenv("LLM_MODEL_NAME"),
296
+ messages=inputs,
297
+ temperature=0,
298
+ )
299
+ logger.info(f"---response:{response}")
300
+ cur_video_summary = response.choices[0].message.content
301
+ except Exception:
302
+ cur_video_summary = ""
303
+ all_res.append(
304
+ f"summary of video part {int(i / interval + 1)}: {cur_video_summary}"
305
+ )
306
+ logger.info(
307
+ f"summary of video part {int(i / interval + 1)}: {cur_video_summary}"
308
+ )
309
+ video_summary = "\n".join(all_res)
310
+
311
+ except (ValueError, IOError, RuntimeError):
312
+ video_summary = ""
313
+ logger.error(f"video_summary-Execute error: {traceback.format_exc()}")
314
+
315
+ logger.info(f"---get_summary_from_video-video_summary:{video_summary}")
316
+ return video_summary
317
+
318
+
319
+ @mcp.tool(description="Extract key frames around the target time with scene detection")
320
+ def get_video_keyframes(
321
+ video_path: str = Field(description="The input video in given filepath or url."),
322
+ target_time: int = Field(
323
+ description=(
324
+ "The specific time point for extraction,"
325
+ " centered within the window_size argument,"
326
+ " the unit is of second."
327
+ )
328
+ ),
329
+ window_size: int = Field(
330
+ default=5,
331
+ description="The window size for extraction, the unit is of second.",
332
+ ),
333
+ cleanup: bool = Field(
334
+ default=False,
335
+ description="Whether to delete the original video file after processing.",
336
+ ),
337
+ output_dir: str = Field(
338
+ default=os.getenv("FILESYSTEM_SERVER_WORKDIR", "./keyframes"),
339
+ description="Directory where extracted frames will be saved.",
340
+ ),
341
+ ) -> KeyframeResult:
342
+ """Extract key frames around the target time with scene detection.
343
+
344
+ This function extracts frames from a video file around a specific time point,
345
+ using scene detection to identify significant changes between frames. Only frames
346
+ with substantial visual differences are saved, reducing redundancy.
347
+
348
+ Args:
349
+ video_path: Path or URL to the video file
350
+ target_time: Specific time point (in seconds) to extract frames around
351
+ window_size: Time window (in seconds) centered on target_time
352
+ cleanup: Whether to delete the original video file after processing
353
+ output_dir: Directory where extracted frames will be saved
354
+
355
+ Returns:
356
+ KeyframeResult: A dataclass containing paths to saved frames, timestamps,
357
+ and metadata about the extraction process
358
+
359
+ Raises:
360
+ Exception: Exceptions are caught internally and reported in the result
361
+ """
362
+
363
+ def save_frames(frames, frame_times, output_dir) -> Tuple[List[str], List[float]]:
364
+ """Save extracted frames to disk"""
365
+ os.makedirs(output_dir, exist_ok=True)
366
+ saved_paths = []
367
+ saved_timestamps = []
368
+ for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
369
+ filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
370
+ os.makedirs(output_dir, exist_ok=True)
371
+ saved_paths = []
372
+ saved_timestamps = []
373
+
374
+ for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
375
+ filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
376
+ cv2.imwrite(filename, frame)
377
+ saved_paths.append(filename)
378
+ saved_timestamps.append(timestamp)
379
+
380
+ return saved_paths, saved_timestamps
381
+
382
+ def extract_keyframes(
383
+ video_path, target_time, window_size
384
+ ) -> Tuple[List[Any], List[float]]:
385
+ """Extract key frames around the target time with scene detection"""
386
+ cap = cv2.VideoCapture(video_path)
387
+ fps = cap.get(cv2.CAP_PROP_FPS)
388
+
389
+ # Calculate frame numbers for the time window
390
+ start_frame = int((target_time - window_size / 2) * fps)
391
+ end_frame = int((target_time + window_size / 2) * fps)
392
+
393
+ frames = []
394
+ frame_times = []
395
+
396
+ # Set video position to start_frame
397
+ cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, start_frame))
398
+
399
+ prev_frame = None
400
+ while cap.isOpened():
401
+ frame_pos = cap.get(cv2.CAP_PROP_POS_FRAMES)
402
+ if frame_pos >= end_frame:
403
+ break
404
+
405
+ ret, frame = cap.read()
406
+ if not ret:
407
+ break
408
+
409
+ # Convert frame to grayscale for scene detection
410
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
411
+
412
+ # If this is the first frame, save it
413
+ if prev_frame is None:
414
+ frames.append(frame)
415
+ frame_times.append(frame_pos / fps)
416
+ else:
417
+ # Calculate difference between current and previous frame
418
+ diff = cv2.absdiff(gray, prev_frame)
419
+ mean_diff = np.mean(diff)
420
+
421
+ # If significant change detected, save frame
422
+ if mean_diff > 20: # Threshold for scene change
423
+ frames.append(frame)
424
+ frame_times.append(frame_pos / fps)
425
+
426
+ prev_frame = gray
427
+
428
+ cap.release()
429
+ return frames, frame_times
430
+
431
+ try:
432
+ # Extract keyframes
433
+ frames, frame_times = extract_keyframes(video_path, target_time, window_size)
434
+
435
+ # Save frames
436
+ frame_paths, frame_timestamps = save_frames(frames, frame_times, output_dir)
437
+
438
+ # Cleanup
439
+ if cleanup and os.path.exists(video_path):
440
+ os.remove(video_path)
441
+
442
+ return KeyframeResult(
443
+ frame_paths=frame_paths,
444
+ frame_timestamps=frame_timestamps,
445
+ output_directory=output_dir,
446
+ frame_count=len(frame_paths),
447
+ success=True,
448
+ )
449
+
450
+ except Exception as e:
451
+ error_message = f"Error processing video: {str(e)}"
452
+ print(error_message)
453
+ return KeyframeResult(
454
+ frame_paths=[],
455
+ frame_timestamps=[],
456
+ output_directory=output_dir,
457
+ frame_count=0,
458
+ success=False,
459
+ error_message=error_message,
460
+ )
461
+
462
+
463
+ def main():
464
+ load_dotenv()
465
+ print("Starting Video MCP Server...", file=sys.stderr)
466
+ mcp.run(transport="stdio")
467
+
468
+
469
+ # Make the module callable
470
+ def __call__():
471
+ """
472
+ Make the module callable for uvx.
473
+ This function is called when the module is executed directly.
474
+ """
475
+ main()
476
+
477
+
478
+ # Add this for compatibility with uvx
479
+ sys.modules[__name__].__call__ = __call__
480
+
481
+
482
+ # Run the server when the script is executed directly
483
+ if __name__ == "__main__":
484
+ main()
AWorld-main/aworlddistributed/mcp_servers/youtube_server.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Youtube Download MCP Server
3
+
4
+ This module provides MCP server functionality for downloading files from Youtube URLs.
5
+ It handles various download scenarios with proper validation, error handling,
6
+ and progress tracking.
7
+
8
+ Key features:
9
+ - File downloading from Youtube HTTP/HTTPS URLs
10
+ - Download progress tracking
11
+ - File validation
12
+ - Safe file saving
13
+
14
+ Main functions:
15
+ - mcpyoutubedownload: Downloads files from URLs of Youtube to local filesystem
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import time
21
+ import traceback
22
+ import urllib.parse
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+ from typing import Optional
26
+
27
+ from dotenv import load_dotenv
28
+ from mcp.server.fastmcp import FastMCP
29
+ from pydantic import BaseModel, Field
30
+ from selenium import webdriver
31
+ from selenium.webdriver.chrome.service import Service
32
+ from selenium.webdriver.common.by import By
33
+
34
+ from aworld.logs.util import logger
35
+
36
+ mcp = FastMCP("youtube-server")
37
+ _default_driver_path = os.environ.get(
38
+ "CHROME_DRIVER_PATH",
39
+ os.path.expanduser("~/Downloads/chromedriver-mac-arm64/chromedriver"),
40
+ )
41
+
42
+
43
+ class YoutubeDownloadResults(BaseModel):
44
+ """Download result model with file information"""
45
+
46
+ file_path: str
47
+ file_name: str
48
+ file_size: int
49
+ content_type: Optional[str] = None
50
+ success: bool
51
+ error: Optional[str] = None
52
+
53
+
54
+ @mcp.tool(
55
+ description="Download the youtube file from the URL and save to the local filesystem."
56
+ )
57
+ def download_youtube_files(
58
+ url: str = Field(
59
+ description="The URL of youtube file to download. Must be a String."
60
+ ),
61
+ output_dir: str = Field(
62
+ "/tmp/mcp_downloads",
63
+ description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
64
+ ),
65
+ timeout: int = Field(
66
+ 180, description="Download timeout in seconds (default: 180)."
67
+ ),
68
+ ) -> str:
69
+ """Download the youtube file from the URL and save to the local filesystem.
70
+
71
+ Args:
72
+ url: The URL of youtube file to download, must be a String
73
+ output_dir: Directory to save the downloaded files
74
+ timeout: Download timeout in seconds
75
+
76
+ Returns:
77
+ JSON string with download results information
78
+ """
79
+ # Handle Field objects if they're passed directly
80
+ if hasattr(url, "default") and not isinstance(url, str):
81
+ url = url.default
82
+
83
+ if hasattr(output_dir, "default") and not isinstance(output_dir, str):
84
+ output_dir = output_dir.default
85
+
86
+ if hasattr(timeout, "default") and not isinstance(timeout, int):
87
+ timeout = timeout.default
88
+
89
+ def _get_youtube_content(url: str, output_dir: str, timeout: int) -> None:
90
+ """Use Selenium to download YouTube content via cobalt.tools"""
91
+ try:
92
+ options = webdriver.ChromeOptions()
93
+ options.add_argument("--disable-blink-features=AutomationControlled")
94
+ # Set download file default path
95
+ prefs = {
96
+ "download.default_directory": output_dir,
97
+ "download.prompt_for_download": False,
98
+ "download.directory_upgrade": True,
99
+ "safebrowsing.enabled": True,
100
+ }
101
+ options.add_experimental_option("prefs", prefs)
102
+ # Create WebDriver object and launch Chrome browser
103
+ service = Service(executable_path=_default_driver_path)
104
+ driver = webdriver.Chrome(service=service, options=options)
105
+
106
+ logger.info(f"Opening cobalt.tools to download from {url}")
107
+ # Open target webpage
108
+ driver.get("https://cobalt.tools/")
109
+ # Wait for page to load
110
+ time.sleep(5)
111
+ # Find input field and enter YouTube link
112
+ input_field = driver.find_element(By.ID, "link-area")
113
+ input_field.send_keys(url)
114
+ time.sleep(5)
115
+ # Find download button and click
116
+ download_button = driver.find_element(By.ID, "download-button")
117
+ download_button.click()
118
+ time.sleep(5)
119
+
120
+ try:
121
+ # Handle bot detection popup
122
+ driver.find_element(
123
+ By.CLASS_NAME,
124
+ "button.elevated.popup-button.undefined.svelte-nnawom.active",
125
+ ).click()
126
+ except Exception as e:
127
+ logger.warning(f"Bot detection handling: {str(e)}")
128
+
129
+ # Wait for download to complete
130
+ cnt = 0
131
+ while (
132
+ len(os.listdir(output_dir)) == 0
133
+ or os.listdir(output_dir)[0].split(".")[-1] == "crdownload"
134
+ ):
135
+ time.sleep(3)
136
+ cnt += 3
137
+ if cnt >= timeout:
138
+ logger.warning(f"Download timeout after {timeout} seconds")
139
+ break
140
+
141
+ logger.info("Download process completed")
142
+
143
+ except Exception as e:
144
+ logger.error(f"Error during YouTube content download: {str(e)}")
145
+ raise
146
+ finally:
147
+ # Close browser
148
+ if "driver" in locals():
149
+ driver.quit()
150
+
151
+ def _download_single_file(
152
+ url: str, output_dir: str, filename: str, timeout: int
153
+ ) -> str:
154
+ """Download a single file from URL and save it to the local filesystem."""
155
+ try:
156
+ # Validate URL
157
+ if not url.startswith(("http://", "https://")):
158
+ raise ValueError(
159
+ "Invalid URL format. URL must start with http:// or https://"
160
+ )
161
+
162
+ # Create output directory if it doesn't exist
163
+ output_path = Path(output_dir)
164
+ output_path.mkdir(parents=True, exist_ok=True)
165
+
166
+ # Determine filename if not provided
167
+ if not filename:
168
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
169
+ if not filename:
170
+ filename = "downloaded_file"
171
+ filename += "_" + datetime.now().strftime("%Y%m%d_%H%M%S")
172
+
173
+ file_path = Path(os.path.join(output_path, filename))
174
+ file_path.mkdir(parents=True, exist_ok=True)
175
+ logger.info(f"Output path: {file_path}")
176
+
177
+ # check if video already exists with folder: /tmp/mcp_downloads
178
+ video_id = url.split("?v=")[-1].split("&")[0] if "?v=" in url else ""
179
+ base_path = os.getenv("FILESYSTEM_SERVER_WORKDIR")
180
+
181
+ # checker function
182
+ def find_existing_video(search_dir, video_id):
183
+ if not video_id:
184
+ return None
185
+
186
+ for item in os.listdir(search_dir):
187
+ item_path = os.path.join(search_dir, item)
188
+
189
+ if os.path.isfile(item_path) and video_id in item:
190
+ return item_path
191
+
192
+ elif os.path.isdir(item_path):
193
+ found = find_existing_video(item_path, video_id)
194
+ if found:
195
+ return found
196
+
197
+ return None
198
+
199
+ existing_file = find_existing_video(base_path, video_id)
200
+ if existing_file:
201
+ result = YoutubeDownloadResults(
202
+ file_path=existing_file,
203
+ file_name=os.path.basename(existing_file),
204
+ file_size=os.path.getsize(existing_file),
205
+ content_type="mp4",
206
+ success=True,
207
+ error=None,
208
+ )
209
+ logger.info(
210
+ f"Found {video_id} is already downloaded in: {existing_file}"
211
+ )
212
+ return result.model_dump_json()
213
+
214
+ logger.info(f"Downloading file from {url} to {file_path}")
215
+
216
+ _get_youtube_content(url, str(file_path), timeout)
217
+
218
+ # Check if download was successful
219
+ if len(os.listdir(file_path)) == 0:
220
+ raise FileNotFoundError("No files were downloaded")
221
+
222
+ download_file = os.path.join(file_path, os.listdir(file_path)[0])
223
+
224
+ # Get actual file size
225
+ actual_size = os.path.getsize(download_file)
226
+ logger.success(f"File downloaded successfully to {download_file}")
227
+
228
+ # Create result
229
+ result = YoutubeDownloadResults(
230
+ file_path=download_file,
231
+ file_name=os.listdir(file_path)[0],
232
+ file_size=actual_size,
233
+ content_type="mp4",
234
+ success=True,
235
+ error=None,
236
+ )
237
+
238
+ return result.model_dump_json()
239
+
240
+ except Exception as e:
241
+ error_msg = str(e)
242
+ logger.error(f"Download error: {traceback.format_exc()}")
243
+
244
+ result = YoutubeDownloadResults(
245
+ file_path="",
246
+ file_name="",
247
+ file_size=0,
248
+ content_type=None,
249
+ success=False,
250
+ error=error_msg,
251
+ )
252
+
253
+ return result.model_dump_json()
254
+
255
+ result_json = _download_single_file(url, output_dir, "", timeout)
256
+ result = YoutubeDownloadResults.model_validate_json(result_json)
257
+ return result.model_dump_json()
258
+
259
+
260
+ def main():
261
+ load_dotenv()
262
+ print("Starting YoutubeDownload MCP Server...", file=sys.stderr)
263
+ mcp.run(transport="stdio")
264
+
265
+
266
+ # Make the module callable
267
+ def __call__():
268
+ """
269
+ Make the module callable for uvx.
270
+ This function is called when the module is executed directly.
271
+ """
272
+ main()
273
+
274
+
275
+ sys.modules[__name__].__call__ = __call__
276
+
277
+ # Run the server when the script is executed directly
278
+ if __name__ == "__main__":
279
+ main()