Duibonduil commited on
Commit
3e11f9b
·
verified ·
1 Parent(s): cd8fc65

Upload 17 files

Browse files
mcp_servers/README.md ADDED
File without changes
mcp_servers/audio_server.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ import traceback
5
+ from typing import List
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+ from openai import OpenAI
9
+ from pydantic import Field
10
+
11
+ from aworld.logs.util import logger
12
+ from mcp_servers.utils import get_file_from_source
13
+
14
+ # Initialize MCP server
15
+ mcp = FastMCP("audio-server")
16
+
17
+
18
+ client = OpenAI(
19
+ api_key=os.getenv("AUDIO_LLM_API_KEY"), base_url=os.getenv("AUDIO_LLM_BASE_URL")
20
+ )
21
+
22
+ AUDIO_TRANSCRIBE = (
23
+ "Input is a base64 encoded audio. Transcribe the audio content. "
24
+ "Return a json string with the following format: "
25
+ '{"audio_text": "transcribed text from audio"}'
26
+ )
27
+
28
+
29
+ def encode_audio(audio_source: str, with_header: bool = True) -> str:
30
+ """
31
+ Encode audio to base64 format with robust file handling
32
+
33
+ Args:
34
+ audio_source: URL or local file path of the audio
35
+ with_header: Whether to include MIME type header
36
+
37
+ Returns:
38
+ str: Base64 encoded audio string, with MIME type prefix if with_header is True
39
+
40
+ Raises:
41
+ ValueError: When audio source is invalid or audio format is not supported
42
+ IOError: When audio file cannot be read
43
+ """
44
+ if not audio_source:
45
+ raise ValueError("Audio source cannot be empty")
46
+
47
+ try:
48
+ # Get file with validation (only audio files allowed)
49
+ file_path, mime_type, content = get_file_from_source(
50
+ audio_source,
51
+ allowed_mime_prefixes=["audio/"],
52
+ max_size_mb=50.0, # 50MB limit for audio files
53
+ type="audio", # Specify type as audio to handle audio files
54
+ )
55
+
56
+ # Encode to base64
57
+ audio_base64 = base64.b64encode(content).decode()
58
+
59
+ # Format with header if requested
60
+ final_audio = (
61
+ f"data:{mime_type};base64,{audio_base64}" if with_header else audio_base64
62
+ )
63
+
64
+ # Clean up temporary file if it was created for a URL
65
+ if file_path != os.path.abspath(audio_source) and os.path.exists(file_path):
66
+ os.unlink(file_path)
67
+
68
+ return final_audio
69
+
70
+ except Exception:
71
+ logger.error(
72
+ f"Error encoding audio from {audio_source}: {traceback.format_exc()}"
73
+ )
74
+ raise
75
+
76
+
77
+ @mcp.tool(description="Transcribe the given audio in a list of filepaths or urls.")
78
+ async def mcp_transcribe_audio(
79
+ audio_urls: List[str] = Field(
80
+ description="The input audio in given a list of filepaths or urls."
81
+ ),
82
+ ) -> str:
83
+ """
84
+ Transcribe the given audio in a list of filepaths or urls.
85
+
86
+ Args:
87
+ audio_urls: List of audio file paths or URLs
88
+
89
+ Returns:
90
+ str: JSON string containing transcriptions
91
+ """
92
+ transcriptions = []
93
+ for audio_url in audio_urls:
94
+ try:
95
+ # Get file with validation (only audio files allowed)
96
+ file_path, _, _ = get_file_from_source(
97
+ audio_url,
98
+ allowed_mime_prefixes=["audio/"],
99
+ max_size_mb=50.0, # 50MB limit for audio files
100
+ type="audio", # Specify type as audio to handle audio files
101
+ )
102
+
103
+ # Use the file for transcription
104
+ with open(file_path, "rb") as audio_file:
105
+ transcription = client.audio.transcriptions.create(
106
+ file=audio_file,
107
+ model=os.getenv("AUDIO_LLM_MODEL_NAME"),
108
+ response_format="text",
109
+ )
110
+ transcriptions.append(transcription)
111
+
112
+ # Clean up temporary file if it was created for a URL
113
+ if file_path != os.path.abspath(audio_url) and os.path.exists(file_path):
114
+ os.unlink(file_path)
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error transcribing {audio_url}: {traceback.format_exc()}")
118
+ transcriptions.append(f"Error: {str(e)}")
119
+
120
+ logger.info(f"---get_text_by_transcribe-transcription:{transcriptions}")
121
+ return json.dumps(transcriptions, ensure_ascii=False)
122
+
123
+
124
+ def main():
125
+ from dotenv import load_dotenv
126
+ load_dotenv()
127
+
128
+ print("Starting Audio MCP Server...", file=sys.stderr)
129
+ mcp.run(transport="stdio")
130
+
131
+
132
+ # Make the module callable
133
+ def __call__():
134
+ """
135
+ Make the module callable for uvx.
136
+ This function is called when the module is executed directly.
137
+ """
138
+ main()
139
+
140
+
141
+ # Add this for compatibility with uvx
142
+ import sys
143
+
144
+ sys.modules[__name__].__call__ = __call__
145
+
146
+ # Run the server when the script is executed directly
147
+ if __name__ == "__main__":
148
+ main()
mcp_servers/aworldsearch_server.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+
7
+ import aiohttp
8
+ from typing import List, Dict, Any, Optional, Union
9
+ from dotenv import load_dotenv
10
+ from mcp.server import FastMCP
11
+ from pydantic import Field
12
+ from mcp.types import TextContent
13
+
14
+ from aworld.logs.util import logger
15
+
16
+ mcp = FastMCP("aworldsearch-server")
17
+
18
+
19
+ async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
20
+ """Execute a single search query, returns None on error"""
21
+ try:
22
+ url = os.getenv('AWORLD_SEARCH_URL')
23
+ searchMode = os.getenv('AWORLD_SEARCH_SEARCHMODE')
24
+ source = os.getenv('AWORLD_SEARCH_SOURCE')
25
+ domain = os.getenv('AWORLD_SEARCH_DOMAIN')
26
+ uid = os.getenv('AWORLD_SEARCH_UID')
27
+ if not url or not searchMode or not source or not domain:
28
+ logger.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
29
+ return None
30
+
31
+ headers = {
32
+ 'Content-Type': 'application/json'
33
+ }
34
+ data = {
35
+ "domain": domain,
36
+ "extParams": {},
37
+ "page": 0,
38
+ "pageSize": num,
39
+ "query": query,
40
+ "searchMode": searchMode,
41
+ "source": source,
42
+ "userId": uid
43
+ }
44
+
45
+ async with aiohttp.ClientSession() as session:
46
+ try:
47
+ async with session.post(url, headers=headers, json=data) as response:
48
+ if response.status != 200:
49
+ logger.warning(f"Query failed: {query}, status code: {response.status}")
50
+ return None
51
+
52
+ result = await response.json()
53
+ return result
54
+ except aiohttp.ClientError:
55
+ logger.warning(f"Request error: {query}")
56
+ return None
57
+ except Exception:
58
+ logger.warning(f"Query exception: {query}")
59
+ return None
60
+
61
+
62
+ def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
63
+ """Filter valid document results, returns empty list if input is None"""
64
+ if result is None:
65
+ return []
66
+
67
+ try:
68
+ valid_docs = []
69
+
70
+ # Check success field
71
+ if not result.get("success"):
72
+ return valid_docs
73
+
74
+ # Check searchDocs field
75
+ search_docs = result.get("searchDocs", [])
76
+ if not search_docs:
77
+ return valid_docs
78
+
79
+ # Extract required fields
80
+ required_fields = ["title", "docAbstract", "url", "doc"]
81
+
82
+ for doc in search_docs:
83
+ # Check if all required fields exist and are not empty
84
+ is_valid = True
85
+ for field in required_fields:
86
+ if field not in doc or not doc[field]:
87
+ is_valid = False
88
+ break
89
+
90
+ if is_valid:
91
+ # Keep only required fields
92
+ filtered_doc = {field: doc[field] for field in required_fields}
93
+ valid_docs.append(filtered_doc)
94
+
95
+ return valid_docs
96
+ except Exception:
97
+ return []
98
+
99
+
100
+ @mcp.tool(description="Search based on the user's input query list")
101
+ async def search(
102
+ query_list: List[str] = Field(
103
+ description="List format, queries to search for"
104
+ ),
105
+ num: int = Field(
106
+ 5,
107
+ description="Maximum number of results per query, default is 5, please keep the total results within 15"
108
+ )
109
+ ) -> Union[str, TextContent]:
110
+ """Execute search main function, supports single query or query list"""
111
+ try:
112
+ # Get configuration from environment variables
113
+ env_total_num = os.getenv('AWORLD_SEARCH_TOTAL_NUM')
114
+ if env_total_num and env_total_num.isdigit():
115
+ # Force override input num parameter with environment variable
116
+ num = int(env_total_num)
117
+
118
+ # If no queries provided, return empty list
119
+ if not query_list:
120
+ # Initialize TextContent with additional parameters
121
+ return TextContent(
122
+ type="text",
123
+ text="", # Empty string instead of None
124
+ **{"metadata": {}} # Pass as additional fields
125
+ )
126
+
127
+ # When query count is >= 3 or slice_num is set, use corresponding value
128
+ slice_num = os.getenv('AWORLD_SEARCH_SLICE_NUM')
129
+ if slice_num and slice_num.isdigit():
130
+ actual_num = int(slice_num)
131
+ else:
132
+ actual_num = 2 if len(query_list) >= 3 else num
133
+
134
+ # Execute all queries in parallel
135
+ tasks = [search_single(q, actual_num) for q in query_list]
136
+ raw_results = await asyncio.gather(*tasks)
137
+
138
+ # Filter and merge results
139
+ all_valid_docs = []
140
+ for result in raw_results:
141
+ valid_docs = filter_valid_docs(result)
142
+ all_valid_docs.extend(valid_docs)
143
+
144
+ # If no valid results found, return empty list
145
+ if not all_valid_docs:
146
+ # Initialize TextContent with additional parameters
147
+ return TextContent(
148
+ type="text",
149
+ text="", # Empty string instead of None
150
+ **{"metadata": {}} # Pass as additional fields
151
+ )
152
+
153
+ # Format results as JSON
154
+ result_json = json.dumps(all_valid_docs, ensure_ascii=False)
155
+
156
+ # Create dictionary structure directly
157
+ combined_query = ",".join(query_list)
158
+
159
+ search_items = []
160
+ for doc in all_valid_docs:
161
+ search_items.append({
162
+ "title": doc.get("title", ""),
163
+ "url": doc.get("url", ""),
164
+ "content": doc.get("doc", "") # Map doc field to content
165
+ })
166
+
167
+ search_output_dict = {
168
+ "query": combined_query,
169
+ "results": search_items
170
+ }
171
+
172
+ # Log results
173
+ logger.info(f"Completed {len(query_list)} queries, found {len(all_valid_docs)} valid documents")
174
+
175
+ # Initialize TextContent with additional parameters
176
+ return TextContent(
177
+ type="text",
178
+ text=result_json,
179
+ **{"metadata": search_output_dict} # Pass processed data as metadata
180
+ )
181
+ except Exception as e:
182
+ # Handle errors
183
+ logger.error(f"Search error: {e}")
184
+ # Initialize TextContent with additional parameters
185
+ return TextContent(
186
+ type="text",
187
+ text="", # Empty string instead of None
188
+ **{"metadata": {}} # Pass as additional fields
189
+ )
190
+
191
+
192
+ def main():
193
+ from dotenv import load_dotenv
194
+
195
+ load_dotenv(override=True)
196
+
197
+ print("Starting Audio MCP aworldsearch-server...", file=sys.stderr)
198
+ mcp.run(transport="stdio")
199
+
200
+
201
+ # Make the module callable
202
+ def __call__():
203
+ """
204
+ Make the module callable for uvx.
205
+ This function is called when the module is executed directly.
206
+ """
207
+ main()
208
+
209
+
210
+ sys.modules[__name__].__call__ = __call__
211
+
212
+ if __name__ == "__main__":
213
+ main()
214
+
215
+ # if __name__ == "__main__":
216
+ # # Configure logging
217
+ # logging.basicConfig(
218
+ # level=logging.INFO,
219
+ # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
220
+ # )
221
+ #
222
+ #
223
+ # # Test single query
224
+ # # asyncio.run(search("Alibaba financial report"))
225
+ #
226
+ # # Test multiple queries
227
+ # test_queries = ["Alibaba financial report", "Tencent financial report", "Baidu financial report"]
228
+ # asyncio.run(search(query_list=test_queries))
mcp_servers/browser_server.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Browser MCP Server
3
+
4
+ This module provides MCP server functionality for browser automation and interaction.
5
+ It handles tasks such as web scraping, form submission, and automated browsing.
6
+
7
+ Main functions:
8
+ - browse_url: Opens a URL and performs specified actions
9
+ - submit_form: Fills and submits forms on web pages
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ import traceback
16
+
17
+ from browser_use import Agent
18
+ from browser_use.agent.views import AgentHistoryList
19
+ from browser_use.browser.browser import Browser, BrowserConfig
20
+ from browser_use.browser.context import BrowserContext, BrowserContextConfig
21
+ from dotenv import load_dotenv
22
+ from langchain_openai import ChatOpenAI
23
+ from mcp.server.fastmcp import FastMCP
24
+ from pydantic import Field
25
+
26
+ from aworld.logs.util import logger
27
+
28
+ mcp = FastMCP("browser-server")
29
+ browser_system_prompt = """
30
+ ===== NAVIGATION STRATEGY =====
31
+ 1. START: Navigate to the most authoritative source for this information
32
+ - For general queries: Use Google with specific search terms
33
+ - For known sources: Go directly to the relevant website
34
+
35
+ 2. EVALUATE: Assess each page methodically
36
+ - Scan headings and highlighted text first
37
+ - Look for data tables, charts, or official statistics
38
+ - Check publication dates for timeliness
39
+
40
+ 3. EXTRACT: Capture exactly what's needed
41
+ - Take screenshots of visual evidence (charts, tables, etc.)
42
+ - Copy precise text that answers the query
43
+ - Note source URLs for citation
44
+
45
+ 4. DOWNLOAD: Save the most relevant file to local path for further processing
46
+ - Save the text if possible for futher text reading and analysis
47
+ - Save the image if possible for futher image reasoning analysis
48
+ - Save the pdf if possible for futher pdf reading and analysis
49
+
50
+ 5. ROBOT DETECTION:
51
+ - If the page is a robot detection page, abort immediately
52
+ - Navigate to the most authoritative source for similar information instead
53
+
54
+ ===== EFFICIENCY GUIDELINES =====
55
+ - Use specific search queries with key terms from the task
56
+ - Avoid getting distracted by tangential information
57
+ - If blocked by paywalls, try archive.org or similar alternatives
58
+ - Document each significant finding clearly and concisely
59
+
60
+ Your goal is to extract precisely the information needed with minimal browsing steps.
61
+ """
62
+
63
+
64
+ @mcp.tool(description="Perform browser actions using the browser-use package.")
65
+ async def browser_use(
66
+ task: str = Field(description="The task to perform using the browser."),
67
+ ) -> str:
68
+ """
69
+ Perform browser actions using the browser-use package.
70
+ Args:
71
+ task (str): The task to perform using the browser.
72
+ Returns:
73
+ str: The result of the browser actions.
74
+ """
75
+ browser = Browser(
76
+ config=BrowserConfig(
77
+ headless=False,
78
+ new_context_config=BrowserContextConfig(
79
+ disable_security=True,
80
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
81
+ minimum_wait_page_load_time=10,
82
+ maximum_wait_page_load_time=30,
83
+ ),
84
+ )
85
+ )
86
+ browser_context = BrowserContext(
87
+ config=BrowserContextConfig(
88
+ trace_path=os.getenv("LOG_FILE_PATH" + "/browser_trace.log")
89
+ ),
90
+ browser=browser,
91
+ )
92
+ agent = Agent(
93
+ task=task,
94
+ llm=ChatOpenAI(
95
+ model=os.getenv("LLM_MODEL_NAME"),
96
+ api_key=os.getenv("LLM_API_KEY"),
97
+ base_url=os.getenv("LLM_BASE_URL"),
98
+ model_name=os.getenv("LLM_MODEL_NAME"),
99
+ openai_api_base=os.getenv("LLM_BASE_URL"),
100
+ openai_api_key=os.getenv("LLM_API_KEY"),
101
+ temperature=1.0,
102
+ ),
103
+ browser_context=browser_context,
104
+ extend_system_message=browser_system_prompt,
105
+ )
106
+ try:
107
+ browser_execution: AgentHistoryList = await agent.run(max_steps=50)
108
+ if (
109
+ browser_execution is not None
110
+ and browser_execution.is_done()
111
+ and browser_execution.is_successful()
112
+ ):
113
+ exec_trace = browser_execution.extracted_content()
114
+ logger.info(
115
+ ">>> 🌏 Browse Execution Succeed!\n"
116
+ f">>> 💡 Result: {json.dumps(exec_trace, ensure_ascii=False, indent=4)}\n"
117
+ ">>> 🌏 Browse Execution Succeed!\n"
118
+ )
119
+ return browser_execution.final_result()
120
+ else:
121
+ return f"Browser execution failed for task: {task}"
122
+ except Exception as e:
123
+ logger.error(f"Browser execution failed: {traceback.format_exc()}")
124
+ return f"Browser execution failed for task: {task} due to {str(e)}"
125
+ finally:
126
+ await browser.close()
127
+ logger.info("Browser Closed!")
128
+
129
+
130
+ def main():
131
+ load_dotenv()
132
+ print("Starting Browser MCP Server...", file=sys.stderr)
133
+ mcp.run(transport="stdio")
134
+
135
+
136
+ # Make the module callable
137
+ def __call__():
138
+ """
139
+ Make the module callable for uvx.
140
+ This function is called when the module is executed directly.
141
+ """
142
+ main()
143
+
144
+
145
+ sys.modules[__name__].__call__ = __call__
146
+
147
+ # Run the server when the script is executed directly
148
+ if __name__ == "__main__":
149
+ main()
mcp_servers/chunk_server.py ADDED
File without changes
mcp_servers/chunk_utils.py ADDED
File without changes
mcp_servers/document_server.py ADDED
@@ -0,0 +1,998 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Document MCP Server
3
+
4
+ This module provides MCP server functionality for document processing and analysis.
5
+ It handles various document formats including:
6
+ - Text files
7
+ - PDF documents
8
+ - Word documents (DOCX)
9
+ - Excel spreadsheets
10
+ - PowerPoint presentations
11
+ - JSON and XML files
12
+ - Source code files
13
+
14
+ Each document type has specialized processing functions that extract content,
15
+ structure, and metadata. The server focuses on local file processing with
16
+ appropriate validation and error handling.
17
+
18
+ Main functions:
19
+ - mcpreadtext: Reads plain text files
20
+ - mcpreadpdf: Reads PDF files with optional image extraction
21
+ - mcpreaddocx: Reads Word documents
22
+ - mcpreadexcel: Reads Excel spreadsheets
23
+ - mcpreadpptx: Reads PowerPoint presentations
24
+ - mcpreadjson: Reads and parses JSON/JSONL files
25
+ - mcpreadxml: Reads and parses XML files
26
+ - mcpreadsourcecode: Reads and analyzes source code files
27
+ """
28
+
29
+ import io
30
+ import json
31
+ import os
32
+ import sys
33
+ import tempfile
34
+ import traceback
35
+ from datetime import date, datetime
36
+ from typing import Any, Dict, List, Optional
37
+
38
+ import fitz
39
+ import html2text
40
+ import pandas as pd
41
+ import xmltodict
42
+ from bs4 import BeautifulSoup
43
+ from docx2markdown._docx_to_markdown import docx_to_markdown
44
+ from dotenv import load_dotenv
45
+ from mcp.server.fastmcp import FastMCP
46
+ from PIL import Image, ImageDraw, ImageFont
47
+ from pptx import Presentation
48
+ from pydantic import BaseModel, Field
49
+ from PyPDF2 import PdfReader
50
+ from tabulate import tabulate
51
+ from xls2xlsx import XLS2XLSX
52
+
53
+ from aworld.logs.util import logger
54
+ from aworld.utils import import_package
55
+ from mcp_servers.image_server import encode_images
56
+
57
+ mcp = FastMCP("document-server")
58
+
59
+
60
+ # Define model classes for different document types
61
+ class TextDocument(BaseModel):
62
+ """Model representing a text document"""
63
+
64
+ content: str
65
+ file_path: str
66
+ file_name: str
67
+ file_size: int
68
+ last_modified: str
69
+
70
+
71
+ class HtmlDocument(BaseModel):
72
+ """Model representing an HTML document"""
73
+
74
+ content: str # Extracted text content
75
+ html_content: str # Original HTML content
76
+ file_path: str
77
+ file_name: str
78
+ file_size: int
79
+ last_modified: str
80
+ title: Optional[str] = None
81
+ links: Optional[List[Dict[str, str]]] = None
82
+ images: Optional[List[Dict[str, str]]] = None
83
+ tables: Optional[List[str]] = None
84
+ markdown: Optional[str] = None # HTML converted to Markdown format
85
+
86
+
87
+ class JsonDocument(BaseModel):
88
+ """Model representing a JSON document"""
89
+
90
+ format: str # "json" or "jsonl"
91
+ type: Optional[str] = None # "array" or "object" for standard JSON
92
+ count: Optional[int] = None
93
+ keys: Optional[List[str]] = None
94
+ data: Any
95
+ file_path: str
96
+ file_name: str
97
+
98
+
99
+ class XmlDocument(BaseModel):
100
+ """Model representing an XML document"""
101
+
102
+ content: Dict
103
+ file_path: str
104
+ file_name: str
105
+
106
+
107
+ class PdfImage(BaseModel):
108
+ """Model representing an image extracted from a PDF"""
109
+
110
+ page: int
111
+ format: str
112
+ width: int
113
+ height: int
114
+ path: str
115
+
116
+
117
+ class PdfDocument(BaseModel):
118
+ """Model representing a PDF document"""
119
+
120
+ content: str
121
+ file_path: str
122
+ file_name: str
123
+ page_count: int
124
+ images: Optional[List[PdfImage]] = None
125
+ image_count: Optional[int] = None
126
+ image_dir: Optional[str] = None
127
+ error: Optional[str] = None
128
+
129
+
130
+ class PdfResult(BaseModel):
131
+ """Model representing results from processing multiple PDF documents"""
132
+
133
+ total_files: int
134
+ success_count: int
135
+ failed_count: int
136
+ results: List[PdfDocument]
137
+
138
+
139
+ class DocxDocument(BaseModel):
140
+ """Model representing a Word document"""
141
+
142
+ content: str
143
+ file_path: str
144
+ file_name: str
145
+
146
+
147
+ class ExcelSheet(BaseModel):
148
+ """Model representing a sheet in an Excel file"""
149
+
150
+ name: str
151
+ data: List[Dict[str, Any]]
152
+ markdown_table: str
153
+ row_count: int
154
+ column_count: int
155
+
156
+
157
+ class ExcelDocument(BaseModel):
158
+ """Model representing an Excel document"""
159
+
160
+ file_name: str
161
+ file_path: str
162
+ processed_path: Optional[str] = None
163
+ file_type: str
164
+ sheet_count: int
165
+ sheet_names: List[str]
166
+ sheets: List[ExcelSheet]
167
+ success: bool = True
168
+ error: Optional[str] = None
169
+
170
+
171
+ class ExcelResult(BaseModel):
172
+ """Model representing results from processing multiple Excel documents"""
173
+
174
+ total_files: int
175
+ success_count: int
176
+ failed_count: int
177
+ results: List[ExcelDocument]
178
+
179
+
180
+ class PowerPointSlide(BaseModel):
181
+ """Model representing a slide in a PowerPoint presentation"""
182
+
183
+ slide_number: int
184
+ image: str # Base64 encoded image
185
+
186
+
187
+ class PowerPointDocument(BaseModel):
188
+ """Model representing a PowerPoint document"""
189
+
190
+ file_path: str
191
+ file_name: str
192
+ slide_count: int
193
+ slides: List[PowerPointSlide]
194
+
195
+
196
+ class SourceCodeDocument(BaseModel):
197
+ """Model representing a source code document"""
198
+
199
+ content: str
200
+ file_type: str
201
+ file_path: str
202
+ file_name: str
203
+ line_count: int
204
+ size_bytes: int
205
+ last_modified: str
206
+ classes: Optional[List[str]] = None
207
+ functions: Optional[List[str]] = None
208
+ imports: Optional[List[str]] = None
209
+ package: Optional[List[str]] = None
210
+ methods: Optional[List[str]] = None
211
+ includes: Optional[List[str]] = None
212
+
213
+
214
+ class DocumentError(BaseModel):
215
+ """Model representing an error in document processing"""
216
+
217
+ error: str
218
+ file_path: Optional[str] = None
219
+ file_name: Optional[str] = None
220
+
221
+
222
+ class ComplexEncoder(json.JSONEncoder):
223
+ def default(self, o):
224
+ if isinstance(o, datetime):
225
+ return o.strftime("%Y-%m-%d %H:%M:%S")
226
+ elif isinstance(o, date):
227
+ return o.strftime("%Y-%m-%d")
228
+ else:
229
+ return json.JSONEncoder.default(self, o)
230
+
231
+
232
+ def handle_error(e: Exception, error_type: str, file_path: Optional[str] = None) -> str:
233
+ """Unified error handling and return standard format error message"""
234
+ error_msg = f"{error_type} error: {str(e)}"
235
+ logger.error(traceback.format_exc())
236
+
237
+ error = DocumentError(
238
+ error=error_msg,
239
+ file_path=file_path,
240
+ file_name=os.path.basename(file_path) if file_path else None,
241
+ )
242
+
243
+ return error.model_dump_json()
244
+
245
+
246
+ def check_file_readable(document_path: str) -> str:
247
+ """Check if file exists and is readable, return error message or None"""
248
+ if not os.path.exists(document_path):
249
+ return f"File does not exist: {document_path}"
250
+ if not os.access(document_path, os.R_OK):
251
+ return f"File is not readable: {document_path}"
252
+ return None
253
+
254
+
255
+ @mcp.tool(
256
+ description="Read and return content from local text file. Cannot process https://URLs files."
257
+ )
258
+ def mcpreadtext(
259
+ document_path: str = Field(description="The input local text file path."),
260
+ ) -> str:
261
+ """Read and return content from local text file. Cannot process https://URLs files."""
262
+ error = check_file_readable(document_path)
263
+ if error:
264
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
265
+
266
+ try:
267
+ with open(document_path, "r", encoding="utf-8") as f:
268
+ content = f.read()
269
+
270
+ result = TextDocument(
271
+ content=content,
272
+ file_path=document_path,
273
+ file_name=os.path.basename(document_path),
274
+ file_size=os.path.getsize(document_path),
275
+ last_modified=datetime.fromtimestamp(
276
+ os.path.getmtime(document_path)
277
+ ).strftime("%Y-%m-%d %H:%M:%S"),
278
+ )
279
+
280
+ return result.model_dump_json()
281
+ except Exception as e:
282
+ return handle_error(e, "Text file reading", document_path)
283
+
284
+
285
+ @mcp.tool(
286
+ description="Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."
287
+ )
288
+ def mcpreadjson(
289
+ document_path: str = Field(description="Local path to JSON or JSONL file"),
290
+ is_jsonl: bool = Field(
291
+ default=False,
292
+ description="Whether the file is in JSONL format (one JSON object per line)",
293
+ ),
294
+ ) -> str:
295
+ """Read and parse JSON or JSONL file, return the parsed content. Cannot process https://URLs files."""
296
+ error = check_file_readable(document_path)
297
+ if error:
298
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
299
+
300
+ try:
301
+ # Choose processing method based on file type
302
+ if is_jsonl:
303
+ # Process JSONL file (one JSON object per line)
304
+ results = []
305
+ with open(document_path, "r", encoding="utf-8") as f:
306
+ for line_num, line in enumerate(f, 1):
307
+ line = line.strip()
308
+ if not line:
309
+ continue
310
+ try:
311
+ json_obj = json.loads(line)
312
+ results.append(json_obj)
313
+ except json.JSONDecodeError as e:
314
+ logger.warning(
315
+ f"JSON parsing error at line {line_num}: {str(e)}"
316
+ )
317
+
318
+ # Create result model
319
+ result = JsonDocument(
320
+ format="jsonl",
321
+ count=len(results),
322
+ data=results,
323
+ file_path=document_path,
324
+ file_name=os.path.basename(document_path),
325
+ )
326
+
327
+ else:
328
+ # Process standard JSON file
329
+ with open(document_path, "r", encoding="utf-8") as f:
330
+ data = json.load(f)
331
+
332
+ # Create result model based on data type
333
+ if isinstance(data, list):
334
+ result = JsonDocument(
335
+ format="json",
336
+ type="array",
337
+ count=len(data),
338
+ data=data,
339
+ file_path=document_path,
340
+ file_name=os.path.basename(document_path),
341
+ )
342
+ else:
343
+ result = JsonDocument(
344
+ format="json",
345
+ type="object",
346
+ keys=list(data.keys()) if isinstance(data, dict) else [],
347
+ data=data,
348
+ file_path=document_path,
349
+ file_name=os.path.basename(document_path),
350
+ )
351
+
352
+ return result.model_dump_json()
353
+
354
+ except json.JSONDecodeError as e:
355
+ return handle_error(e, "JSON parsing", document_path)
356
+ except Exception as e:
357
+ return handle_error(e, "JSON file reading", document_path)
358
+
359
+
360
+ @mcp.tool(
361
+ description="Read and return content from XML file. return the parsed content. Cannot process https://URLs files."
362
+ )
363
+ def mcpreadxml(
364
+ document_path: str = Field(description="The local input XML file path."),
365
+ ) -> str:
366
+ """Read and return content from XML file. Cannot process https://URLs files."""
367
+ error = check_file_readable(document_path)
368
+ if error:
369
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
370
+
371
+ try:
372
+ with open(document_path, "r", encoding="utf-8") as f:
373
+ data = f.read()
374
+
375
+ result = XmlDocument(
376
+ content=xmltodict.parse(data),
377
+ file_path=document_path,
378
+ file_name=os.path.basename(document_path),
379
+ )
380
+
381
+ return result.model_dump_json()
382
+ except Exception as e:
383
+ return handle_error(e, "XML file reading", document_path)
384
+
385
+
386
+ @mcp.tool(
387
+ description="Read and return content from PDF file with optional image extraction. return the parsed content. Cannot process https://URLs files."
388
+ )
389
+ def mcpreadpdf(
390
+ document_paths: List[str] = Field(description="The local input PDF file paths."),
391
+ extract_images: bool = Field(
392
+ default=False, description="Whether to extract images from PDF (default: False)"
393
+ ),
394
+ ) -> str:
395
+ """Read and return content from PDF file with optional image extraction. Cannot process https://URLs files."""
396
+ try:
397
+
398
+ results = []
399
+ success_count = 0
400
+ failed_count = 0
401
+
402
+ for document_path in document_paths:
403
+ error = check_file_readable(document_path)
404
+ if error:
405
+ results.append(
406
+ PdfDocument(
407
+ content="",
408
+ file_path=document_path,
409
+ file_name=os.path.basename(document_path),
410
+ page_count=0,
411
+ error=error,
412
+ )
413
+ )
414
+ failed_count += 1
415
+ continue
416
+
417
+ try:
418
+ with open(document_path, "rb") as f:
419
+ reader = PdfReader(f)
420
+ content = " ".join(page.extract_text() for page in reader.pages)
421
+ page_count = len(reader.pages)
422
+
423
+ pdf_result = PdfDocument(
424
+ content=content,
425
+ file_path=document_path,
426
+ file_name=os.path.basename(document_path),
427
+ page_count=page_count,
428
+ )
429
+
430
+ # Extract images if requested
431
+ if extract_images:
432
+ images_data = []
433
+ # Use /tmp directory for storing images
434
+ output_dir = "/tmp/pdf_images"
435
+
436
+ # Create output directory if it doesn't exist
437
+ os.makedirs(output_dir, exist_ok=True)
438
+
439
+ # Generate a unique subfolder based on filename to avoid conflicts
440
+ pdf_name = os.path.splitext(os.path.basename(document_path))[0]
441
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
442
+ image_dir = os.path.join(output_dir, f"{pdf_name}_{timestamp}")
443
+ os.makedirs(image_dir, exist_ok=True)
444
+
445
+ try:
446
+ # Open PDF with PyMuPDF
447
+ pdf_document = fitz.open(document_path)
448
+
449
+ # Iterate through each page
450
+ for page_index in range(len(pdf_document)):
451
+ page = pdf_document[page_index]
452
+
453
+ # Get image list
454
+ image_list = page.get_images(full=True)
455
+
456
+ # Process each image
457
+ for img_index, img in enumerate(image_list):
458
+ # Extract image information
459
+ xref = img[0]
460
+ base_image = pdf_document.extract_image(xref)
461
+ image_bytes = base_image["image"]
462
+ image_ext = base_image["ext"]
463
+
464
+ # Save image to file in /tmp directory
465
+ img_filename = f"pdf_image_p{page_index+1}_{img_index+1}.{image_ext}"
466
+ img_path = os.path.join(image_dir, img_filename)
467
+
468
+ with open(img_path, "wb") as img_file:
469
+ img_file.write(image_bytes)
470
+ logger.success(f"Image saved: {img_path}")
471
+
472
+ # Get image dimensions
473
+ with Image.open(img_path) as img:
474
+ width, height = img.size
475
+
476
+ # Add to results with file path instead of base64
477
+ images_data.append(
478
+ PdfImage(
479
+ page=page_index + 1,
480
+ format=image_ext,
481
+ width=width,
482
+ height=height,
483
+ path=img_path,
484
+ )
485
+ )
486
+
487
+ pdf_result.images = images_data
488
+ pdf_result.image_count = len(images_data)
489
+ pdf_result.image_dir = image_dir
490
+
491
+ except Exception as img_error:
492
+ logger.error(f"Error extracting images: {str(img_error)}")
493
+ # Don't clean up on error so we can keep any successfully extracted images
494
+ pdf_result.error = str(img_error)
495
+
496
+ results.append(pdf_result)
497
+ success_count += 1
498
+
499
+ except Exception as e:
500
+ results.append(
501
+ PdfDocument(
502
+ content="",
503
+ file_path=document_path,
504
+ file_name=os.path.basename(document_path),
505
+ page_count=0,
506
+ error=str(e),
507
+ )
508
+ )
509
+ failed_count += 1
510
+
511
+ # Create final result
512
+ pdf_result = PdfResult(
513
+ total_files=len(document_paths),
514
+ success_count=success_count,
515
+ failed_count=failed_count,
516
+ results=results,
517
+ )
518
+
519
+ return pdf_result.model_dump_json()
520
+
521
+ except Exception as e:
522
+ return handle_error(e, "PDF file reading")
523
+
524
+
525
+ @mcp.tool(
526
+ description="Read and return content from Word file. return the parsed content. Cannot process https://URLs files."
527
+ )
528
+ def mcpreaddocx(
529
+ document_path: str = Field(description="The local input Word file path."),
530
+ ) -> str:
531
+ """Read and return content from Word file. Cannot process https://URLs files."""
532
+ error = check_file_readable(document_path)
533
+ if error:
534
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
535
+
536
+ try:
537
+
538
+ file_name = os.path.basename(document_path)
539
+ md_file_path = f"{file_name}.md"
540
+ docx_to_markdown(document_path, md_file_path)
541
+
542
+ with open(md_file_path, "r", encoding="utf-8") as f:
543
+ content = f.read()
544
+
545
+ os.remove(md_file_path)
546
+
547
+ result = DocxDocument(
548
+ content=content, file_path=document_path, file_name=file_name
549
+ )
550
+
551
+ return result.model_dump_json()
552
+ except Exception as e:
553
+ return handle_error(e, "Word file reading", document_path)
554
+
555
+
556
+ @mcp.tool(
557
+ description="Read multiple Excel/CSV files and convert sheets to Markdown tables. return the parsed content. Cannot process https://URLs files."
558
+ )
559
+ def mcpreadexcel(
560
+ document_paths: List[str] = Field(
561
+ description="List of local input Excel/CSV file paths."
562
+ ),
563
+ max_rows: int = Field(
564
+ 1000, description="Maximum number of rows to read per sheet (default: 1000)"
565
+ ),
566
+ convert_xls_to_xlsx: bool = Field(
567
+ False,
568
+ description="Whether to convert XLS files to XLSX format (default: False)",
569
+ ),
570
+ ) -> str:
571
+ """Read multiple Excel/CSV files and convert sheets to Markdown tables. Cannot process https://URLs files."""
572
+ try:
573
+
574
+ # Import required packages
575
+ import_package("tabulate")
576
+
577
+ # Import xls2xlsx package if conversion is requested
578
+ if convert_xls_to_xlsx:
579
+ import_package("xls2xlsx")
580
+
581
+ all_results = []
582
+ temp_files = [] # Track temporary files for cleanup
583
+ success_count = 0
584
+ failed_count = 0
585
+
586
+ # Process each file
587
+ for document_path in document_paths:
588
+ # Check if file exists and is readable
589
+ error = check_file_readable(document_path)
590
+ if error:
591
+ all_results.append(
592
+ ExcelDocument(
593
+ file_name=os.path.basename(document_path),
594
+ file_path=document_path,
595
+ file_type="UNKNOWN",
596
+ sheet_count=0,
597
+ sheet_names=[],
598
+ sheets=[],
599
+ success=False,
600
+ error=error,
601
+ )
602
+ )
603
+ failed_count += 1
604
+ continue
605
+
606
+ try:
607
+ # Check file extension
608
+ file_ext = os.path.splitext(document_path)[1].lower()
609
+
610
+ # Validate file type
611
+ if file_ext not in [".csv", ".xls", ".xlsx", ".xlsm"]:
612
+ error_msg = f"Unsupported file format: {file_ext}. Only CSV, XLS, XLSX, and XLSM formats are supported."
613
+ all_results.append(
614
+ ExcelDocument(
615
+ file_name=os.path.basename(document_path),
616
+ file_path=document_path,
617
+ file_type=file_ext.replace(".", "").upper(),
618
+ sheet_count=0,
619
+ sheet_names=[],
620
+ sheets=[],
621
+ success=False,
622
+ error=error_msg,
623
+ )
624
+ )
625
+ failed_count += 1
626
+ continue
627
+
628
+ # Convert XLS to XLSX if requested and file is XLS
629
+ processed_path = document_path
630
+ if convert_xls_to_xlsx and file_ext == ".xls":
631
+ try:
632
+ logger.info(f"Converting XLS to XLSX: {document_path}")
633
+ converter = XLS2XLSX(document_path)
634
+ # Create temp file with xlsx extension
635
+ xlsx_path = (
636
+ os.path.splitext(document_path)[0] + "_converted.xlsx"
637
+ )
638
+ converter.to_xlsx(xlsx_path)
639
+ processed_path = xlsx_path
640
+ temp_files.append(xlsx_path) # Track for cleanup
641
+ logger.success(f"Converted XLS to XLSX: {xlsx_path}")
642
+ except Exception as conv_error:
643
+ logger.error(f"XLS to XLSX conversion error: {str(conv_error)}")
644
+ # Continue with original file if conversion fails
645
+
646
+ excel_sheets = []
647
+ sheet_names = []
648
+
649
+ # Handle CSV files differently
650
+ if file_ext == ".csv":
651
+ # For CSV files, create a single sheet with the file name
652
+ sheet_name = os.path.basename(document_path).replace(".csv", "")
653
+ df = pd.read_csv(processed_path, nrows=max_rows)
654
+
655
+ # Create markdown table
656
+ markdown_table = "*Empty table*"
657
+ if not df.empty:
658
+ headers = df.columns.tolist()
659
+ table_data = df.values.tolist()
660
+ markdown_table = tabulate(
661
+ table_data, headers=headers, tablefmt="pipe"
662
+ )
663
+
664
+ if len(df) >= max_rows:
665
+ markdown_table += (
666
+ f"\n\n*Note: Table truncated to {max_rows} rows*"
667
+ )
668
+
669
+ # Create sheet model
670
+ excel_sheets.append(
671
+ ExcelSheet(
672
+ name=sheet_name,
673
+ data=df.to_dict(orient="records"),
674
+ markdown_table=markdown_table,
675
+ row_count=len(df),
676
+ column_count=len(df.columns),
677
+ )
678
+ )
679
+
680
+ sheet_names = [sheet_name]
681
+
682
+ else:
683
+ # For Excel files, process all sheets
684
+ with pd.ExcelFile(processed_path) as xls:
685
+ sheet_names = xls.sheet_names
686
+
687
+ for sheet_name in sheet_names:
688
+ # Read Excel sheet into DataFrame with row limit
689
+ df = pd.read_excel(
690
+ xls, sheet_name=sheet_name, nrows=max_rows
691
+ )
692
+
693
+ # Create markdown table
694
+ markdown_table = "*Empty table*"
695
+ if not df.empty:
696
+ headers = df.columns.tolist()
697
+ table_data = df.values.tolist()
698
+ markdown_table = tabulate(
699
+ table_data, headers=headers, tablefmt="pipe"
700
+ )
701
+
702
+ if len(df) >= max_rows:
703
+ markdown_table += f"\n\n*Note: Table truncated to {max_rows} rows*"
704
+
705
+ # Create sheet model
706
+ excel_sheets.append(
707
+ ExcelSheet(
708
+ name=sheet_name,
709
+ data=df.to_dict(orient="records"),
710
+ markdown_table=markdown_table,
711
+ row_count=len(df),
712
+ column_count=len(df.columns),
713
+ )
714
+ )
715
+
716
+ # Create result for this file
717
+ file_result = ExcelDocument(
718
+ file_name=os.path.basename(document_path),
719
+ file_path=document_path,
720
+ processed_path=(
721
+ processed_path if processed_path != document_path else None
722
+ ),
723
+ file_type=file_ext.replace(".", "").upper(),
724
+ sheet_count=len(sheet_names),
725
+ sheet_names=sheet_names,
726
+ sheets=excel_sheets,
727
+ success=True,
728
+ )
729
+
730
+ all_results.append(file_result)
731
+ success_count += 1
732
+
733
+ except Exception as file_error:
734
+ # Handle errors for individual files
735
+ error_msg = str(file_error)
736
+ logger.error(f"File reading error for {document_path}: {error_msg}")
737
+ all_results.append(
738
+ ExcelDocument(
739
+ file_name=os.path.basename(document_path),
740
+ file_path=document_path,
741
+ file_type=os.path.splitext(document_path)[1]
742
+ .replace(".", "")
743
+ .upper(),
744
+ sheet_count=0,
745
+ sheet_names=[],
746
+ sheets=[],
747
+ success=False,
748
+ error=error_msg,
749
+ )
750
+ )
751
+ failed_count += 1
752
+
753
+ # Clean up temporary files
754
+ for temp_file in temp_files:
755
+ try:
756
+ if os.path.exists(temp_file):
757
+ os.remove(temp_file)
758
+ logger.info(f"Removed temporary file: {temp_file}")
759
+ except Exception as cleanup_error:
760
+ logger.warning(
761
+ f"Error cleaning up temporary file {temp_file}: {str(cleanup_error)}"
762
+ )
763
+
764
+ # Create final result
765
+ excel_result = ExcelResult(
766
+ total_files=len(document_paths),
767
+ success_count=success_count,
768
+ failed_count=failed_count,
769
+ results=all_results,
770
+ )
771
+
772
+ return excel_result.model_dump_json()
773
+
774
+ except Exception as e:
775
+ return handle_error(e, "Excel/CSV files processing")
776
+
777
+
778
+ @mcp.tool(
779
+ description="Read and convert PowerPoint slides to base64 encoded images. return the parsed content. Cannot process https://URLs files."
780
+ )
781
+ def mcpreadpptx(
782
+ document_path: str = Field(description="The local input PowerPoint file path."),
783
+ ) -> str:
784
+ """Read and convert PowerPoint slides to base64 encoded images. Cannot process https://URLs files."""
785
+ error = check_file_readable(document_path)
786
+ if error:
787
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
788
+
789
+ # Create temporary directory
790
+ temp_dir = tempfile.mkdtemp()
791
+ slides_data = []
792
+
793
+ try:
794
+ presentation = Presentation(document_path)
795
+ total_slides = len(presentation.slides)
796
+
797
+ if total_slides == 0:
798
+ raise ValueError("PPT file does not contain any slides")
799
+
800
+ # Process each slide
801
+ for i, slide in enumerate(presentation.slides):
802
+ # Set slide dimensions
803
+ slide_width_px = 1920 # 16:9 ratio
804
+ slide_height_px = 1080
805
+
806
+ # Create blank image
807
+ slide_img = Image.new("RGB", (slide_width_px, slide_height_px), "white")
808
+ draw = ImageDraw.Draw(slide_img)
809
+ font = ImageFont.load_default()
810
+
811
+ # Draw slide number
812
+ draw.text((20, 20), f"Slide {i+1}/{total_slides}", fill="black", font=font)
813
+
814
+ # Process shapes in the slide
815
+ for shape in slide.shapes:
816
+ try:
817
+ # Process images
818
+ if hasattr(shape, "image") and shape.image:
819
+ image_stream = io.BytesIO(shape.image.blob)
820
+ img = Image.open(image_stream)
821
+ left = int(
822
+ shape.left * slide_width_px / presentation.slide_width
823
+ )
824
+ top = int(
825
+ shape.top * slide_height_px / presentation.slide_height
826
+ )
827
+ slide_img.paste(img, (left, top))
828
+
829
+ # Process text
830
+ elif hasattr(shape, "text") and shape.text:
831
+ text_left = int(
832
+ shape.left * slide_width_px / presentation.slide_width
833
+ )
834
+ text_top = int(
835
+ shape.top * slide_height_px / presentation.slide_height
836
+ )
837
+ draw.text(
838
+ (text_left, text_top),
839
+ shape.text,
840
+ fill="black",
841
+ font=font,
842
+ )
843
+
844
+ except Exception as shape_error:
845
+ logger.warning(
846
+ f"Error processing shape in slide {i+1}: {str(shape_error)}"
847
+ )
848
+
849
+ # Save slide image
850
+ img_path = os.path.join(temp_dir, f"slide_{i+1}.jpg")
851
+ slide_img.save(img_path, "JPEG")
852
+
853
+ # Convert to base64
854
+ base64_image = encode_images(img_path)
855
+ slides_data.append(
856
+ PowerPointSlide(
857
+ slide_number=i + 1, image=f"data:image/jpeg;base64,{base64_image}"
858
+ )
859
+ )
860
+
861
+ # Create result
862
+ result = PowerPointDocument(
863
+ file_path=document_path,
864
+ file_name=os.path.basename(document_path),
865
+ slide_count=total_slides,
866
+ slides=slides_data,
867
+ )
868
+
869
+ return result.model_dump_json()
870
+
871
+ except Exception as e:
872
+ return handle_error(e, "PowerPoint processing", document_path)
873
+ finally:
874
+ # Clean up temporary files
875
+ try:
876
+ for file in os.listdir(temp_dir):
877
+ os.remove(os.path.join(temp_dir, file))
878
+ os.rmdir(temp_dir)
879
+ except Exception as cleanup_error:
880
+ logger.warning(f"Error cleaning up temporary files: {str(cleanup_error)}")
881
+
882
+
883
+ @mcp.tool(
884
+ description="Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."
885
+ )
886
+ def mcpreadhtmltext(
887
+ document_path: str = Field(description="Local HTML file path or Web URL."),
888
+ extract_links: bool = Field(
889
+ default=True, description="Whether to extract link information"
890
+ ),
891
+ extract_images: bool = Field(
892
+ default=True, description="Whether to extract image information"
893
+ ),
894
+ extract_tables: bool = Field(
895
+ default=True, description="Whether to extract table information"
896
+ ),
897
+ convert_to_markdown: bool = Field(
898
+ default=True, description="Whether to convert HTML to Markdown format"
899
+ ),
900
+ ) -> str:
901
+ """Read HTML file and extract text content, optionally extract links, images, and table information, and convert to Markdown format."""
902
+ error = check_file_readable(document_path)
903
+ if error:
904
+ return DocumentError(error=error, file_path=document_path).model_dump_json()
905
+
906
+ try:
907
+
908
+ # Read HTML file
909
+ with open(document_path, "r", encoding="utf-8") as f:
910
+ html_content = f.read()
911
+
912
+ # Parse HTML using BeautifulSoup
913
+ soup = BeautifulSoup(html_content, "html.parser")
914
+
915
+ # Extract text content (remove script and style content)
916
+ for script in soup(["script", "style"]):
917
+ script.extract()
918
+ text_content = soup.get_text(separator="\n", strip=True)
919
+
920
+ # Extract title
921
+ title = soup.title.string if soup.title else None
922
+
923
+ # Initialize result object
924
+ result = HtmlDocument(
925
+ content=text_content,
926
+ html_content=html_content,
927
+ file_path=document_path,
928
+ file_name=os.path.basename(document_path),
929
+ file_size=os.path.getsize(document_path),
930
+ last_modified=datetime.fromtimestamp(
931
+ os.path.getmtime(document_path)
932
+ ).strftime("%Y-%m-%d %H:%M:%S"),
933
+ title=title,
934
+ )
935
+
936
+ # Extract links
937
+ if extract_links:
938
+ links = []
939
+ for link in soup.find_all("a"):
940
+ href = link.get("href")
941
+ text = link.get_text(strip=True)
942
+ if href:
943
+ links.append({"url": href, "text": text})
944
+ result.links = links
945
+
946
+ # Extract images
947
+ if extract_images:
948
+ images = []
949
+ for img in soup.find_all("img"):
950
+ src = img.get("src")
951
+ alt = img.get("alt", "")
952
+ if src:
953
+ images.append({"src": src, "alt": alt})
954
+ result.images = images
955
+
956
+ # Extract tables
957
+ if extract_tables:
958
+ tables = []
959
+ for table in soup.find_all("table"):
960
+ tables.append(str(table))
961
+ result.tables = tables
962
+
963
+ # Convert to Markdown
964
+ if convert_to_markdown:
965
+ h = html2text.HTML2Text()
966
+ h.ignore_links = False
967
+ h.ignore_images = False
968
+ h.ignore_tables = False
969
+ markdown_content = h.handle(html_content)
970
+ result.markdown = markdown_content
971
+
972
+ return result.model_dump_json()
973
+
974
+ except Exception as e:
975
+ return handle_error(e, "HTML file reading", document_path)
976
+
977
+
978
+ def main():
979
+ load_dotenv()
980
+
981
+ print("Starting Document MCP Server...", file=sys.stderr)
982
+ mcp.run(transport="stdio")
983
+
984
+
985
+ # Make the module callable
986
+ def __call__():
987
+ """
988
+ Make the module callable for uvx.
989
+ This function is called when the module is executed directly.
990
+ """
991
+ main()
992
+
993
+
994
+ sys.modules[__name__].__call__ = __call__
995
+
996
+ # Run the server when the script is executed directly
997
+ if __name__ == "__main__":
998
+ main()
mcp_servers/download_server.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Download MCP Server
3
+
4
+ This module provides MCP server functionality for downloading files from URLs.
5
+ It handles various download scenarios with proper validation, error handling,
6
+ and progress tracking.
7
+
8
+ Key features:
9
+ - File downloading from HTTP/HTTPS URLs
10
+ - Download progress tracking
11
+ - File validation
12
+ - Safe file saving
13
+
14
+ Main functions:
15
+ - mcpdownload: Downloads files from URLs to local filesystem
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import traceback
21
+ import urllib.parse
22
+ from pathlib import Path
23
+ from typing import List, Optional
24
+
25
+ import requests
26
+ from dotenv import load_dotenv
27
+ from mcp.server.fastmcp import FastMCP
28
+ from pydantic import BaseModel, Field
29
+
30
+ from aworld.logs.util import logger
31
+
32
+ mcp = FastMCP("download-server")
33
+
34
+
35
+ class DownloadResult(BaseModel):
36
+ """Download result model with file information"""
37
+
38
+ file_path: str
39
+ file_name: str
40
+ file_size: int
41
+ content_type: Optional[str] = None
42
+ success: bool
43
+ error: Optional[str] = None
44
+
45
+
46
+ class DownloadResults(BaseModel):
47
+ """Download results model for multiple files"""
48
+
49
+ results: List[DownloadResult]
50
+ success_count: int
51
+ failed_count: int
52
+
53
+
54
+ @mcp.tool(description="Download files from URLs and save to the local filesystem.")
55
+ def mcpdownloadfiles(
56
+ urls: List[str] = Field(
57
+ ..., description="The URLs of the files to download. Must be a list of URLs."
58
+ ),
59
+ output_dir: str = Field(
60
+ "/tmp/mcp_downloads",
61
+ description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
62
+ ),
63
+ timeout: int = Field(60, description="Download timeout in seconds (default: 60)."),
64
+ ) -> str:
65
+ """Download files from URLs and save to the local filesystem.
66
+
67
+ Args:
68
+ urls: The URLs of the files to download, must be a list of URLs
69
+ output_dir: Directory to save the downloaded files
70
+ timeout: Download timeout in seconds
71
+
72
+ Returns:
73
+ JSON string with download results information
74
+ """
75
+ results = []
76
+ success_count = 0
77
+ failed_count = 0
78
+
79
+ for single_url in urls:
80
+ result_json = _download_single_file(single_url, output_dir, "", timeout)
81
+ result = DownloadResult.model_validate_json(result_json)
82
+ results.append(result)
83
+
84
+ if result.success:
85
+ success_count += 1
86
+ else:
87
+ failed_count += 1
88
+
89
+ batch_results = DownloadResults(
90
+ results=results, success_count=success_count, failed_count=failed_count
91
+ )
92
+
93
+ return batch_results.model_dump_json()
94
+
95
+
96
+ def _download_single_file(
97
+ url: str, output_dir: str, filename: str, timeout: int
98
+ ) -> str:
99
+ """Download a single file from URL and save it to the local filesystem."""
100
+ try:
101
+ # Validate URL
102
+ if not url.startswith(("http://", "https://")):
103
+ raise ValueError(
104
+ "Invalid URL format. URL must start with http:// or https://"
105
+ )
106
+
107
+ # Create output directory if it doesn't exist
108
+ output_path = Path(output_dir)
109
+ output_path.mkdir(parents=True, exist_ok=True)
110
+
111
+ # Determine filename if not provided
112
+ if not filename:
113
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
114
+ if not filename:
115
+ filename = "downloaded_file"
116
+
117
+ # Full path to save the file
118
+ file_path = os.path.join(output_path, filename)
119
+
120
+ logger.info(f"Downloading file from {url} to {file_path}")
121
+ # Download the file with progress tracking
122
+ headers = {
123
+ "User-Agent": (
124
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
125
+ "AWorld/1.0 (https://github.com/inclusionAI/AWorld; [email protected]) "
126
+ "Python/requests "
127
+ ),
128
+ "Accept": "text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,image/webp,*/*;q=0.8",
129
+ "Accept-Language": "en-US,en;q=0.5",
130
+ "Accept-Encoding": "gzip, deflate, br",
131
+ "Connection": "keep-alive",
132
+ }
133
+
134
+ response = requests.get(url, headers=headers, stream=True, timeout=timeout)
135
+ response.raise_for_status()
136
+
137
+ # Get content type and size
138
+ content_type = response.headers.get("Content-Type")
139
+
140
+ # Save the file
141
+ with open(file_path, "wb") as f:
142
+ for chunk in response.iter_content(chunk_size=8192):
143
+ if chunk:
144
+ f.write(chunk)
145
+
146
+ # Get actual file size
147
+ actual_size = os.path.getsize(file_path)
148
+
149
+ logger.info(f"File downloaded successfully to {file_path}")
150
+
151
+ # Create result
152
+ result = DownloadResult(
153
+ file_path=file_path,
154
+ file_name=filename,
155
+ file_size=actual_size,
156
+ content_type=content_type,
157
+ success=True,
158
+ error=None,
159
+ )
160
+
161
+ return result.model_dump_json()
162
+
163
+ except Exception as e:
164
+ error_msg = str(e)
165
+ logger.error(f"Download error: {traceback.format_exc()}")
166
+
167
+ result = DownloadResult(
168
+ file_path="",
169
+ file_name="",
170
+ file_size=0,
171
+ content_type=None,
172
+ success=False,
173
+ error=error_msg,
174
+ )
175
+
176
+ return result.model_dump_json()
177
+
178
+
179
+ def main():
180
+ load_dotenv()
181
+
182
+ print("Starting Download MCP Server...", file=sys.stderr)
183
+ mcp.run(transport="stdio")
184
+
185
+
186
+ # Make the module callable
187
+ def __call__():
188
+ """
189
+ Make the module callable for uvx.
190
+ This function is called when the module is executed directly.
191
+ """
192
+ main()
193
+
194
+
195
+ sys.modules[__name__].__call__ = __call__
196
+
197
+ # Run the server when the script is executed directly
198
+ if __name__ == "__main__":
199
+ main()
mcp_servers/gen_video_server.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ import sys
6
+
7
+ from dotenv import load_dotenv
8
+ from mcp.server import FastMCP
9
+ from pydantic import Field
10
+ from typing_extensions import Any
11
+
12
+ from aworld.logs.util import logger
13
+
14
+ mcp = FastMCP("gen-video-server")
15
+
16
+ @mcp.tool(description="Submit video generation task based on text content")
17
+ def video_tasks(prompt: str = Field(description="The text prompt to generate a video")) -> Any:
18
+ """Generate video from text prompt"""
19
+ api_key = os.getenv('DASHSCOPE_API_KEY')
20
+ submit_url = os.getenv('DASHSCOPE_VIDEO_SUBMIT_URL', '')
21
+ query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
22
+
23
+ if not api_key or not submit_url or not query_base_url:
24
+ logger.warning("Query failed: DASHSCOPE_API_KEY, DASHSCOPE_VIDEO_SUBMIT_URL, DASHSCOPE_QUERY_BASE_URL environment variables are not set")
25
+ return None
26
+
27
+ headers = {
28
+ 'X-DashScope-Async': 'enable',
29
+ 'Authorization': f'Bearer {api_key}',
30
+ 'Content-Type': 'application/json'
31
+ }
32
+
33
+ # Get parameters from environment variables or use defaults
34
+ model = os.getenv('DASHSCOPE_VIDEO_MODEL', 'wanx2.1-t2v-turbo')
35
+ size = os.getenv('DASHSCOPE_VIDEO_SIZE', '1280*720')
36
+
37
+ # Note: Currently the API only supports generating one video at a time
38
+ # But we keep the num parameter for API compatibility
39
+
40
+ task_data = {
41
+ "model": model,
42
+ "input": {
43
+ "prompt": prompt
44
+ },
45
+ "parameters": {
46
+ "size": size
47
+ }
48
+ }
49
+
50
+ try:
51
+ # Step 1: Submit task to generate video
52
+ logger.info("Submitting task to generate video...")
53
+
54
+ response = requests.post(submit_url, headers=headers, json=task_data)
55
+
56
+ if response.status_code != 200:
57
+ logger.warning(f"Task submission failed with status code {response.status_code}")
58
+ return None
59
+
60
+ result = response.json()
61
+
62
+ # Check if task was successfully submitted
63
+ if not result.get("output") or not result.get("output").get("task_id"):
64
+ logger.warning("Failed to get task_id from response")
65
+ return None
66
+
67
+ # Extract task ID
68
+ task_id = result.get("output").get("task_id")
69
+ logger.info(f"Task submitted successfully. Task ID: {task_id}")
70
+ return json.dumps({"task_id": task_id})
71
+ except Exception as e:
72
+ logger.warning(f"Exception occurred: {e}")
73
+ return None
74
+
75
+
76
+ @mcp.tool(description="Query video by task ID")
77
+ def get_video_by_taskid(task_id: str = Field(description="Task ID needed to query the video")) -> Any:
78
+ """Generate video from text prompt"""
79
+ api_key = os.getenv('DASHSCOPE_API_KEY')
80
+ query_base_url = os.getenv('DASHSCOPE_QUERY_BASE_URL', '')
81
+
82
+
83
+ try:
84
+ # Step 2: Poll for results
85
+ max_attempts = int(os.getenv('DASHSCOPE_VIDEO_RETRY_TIMES', 10)) # Increased default retries for video
86
+ wait_time = int(os.getenv('DASHSCOPE_VIDEO_SLEEP_TIME', 5)) # Increased default wait time for video
87
+ query_url = f"{query_base_url}{task_id}"
88
+
89
+ for attempt in range(max_attempts):
90
+ logger.info(f"Polling attempt {attempt + 1}/{max_attempts}...")
91
+
92
+ # Poll for results
93
+ query_response = requests.get(query_url, headers={'Authorization': f'Bearer {api_key}'})
94
+
95
+ if query_response.status_code != 200:
96
+ logger.info(f"Poll request failed with status code {query_response.status_code}")
97
+ time.sleep(wait_time)
98
+ continue
99
+
100
+ try:
101
+ query_result = query_response.json()
102
+ except json.JSONDecodeError as e:
103
+ logger.warning(f"Failed to parse response as JSON: {e}")
104
+ time.sleep(wait_time)
105
+ continue
106
+
107
+ # Check task status
108
+ task_status = query_result.get("output", {}).get("task_status")
109
+
110
+ if task_status == "SUCCEEDED":
111
+ # Extract video URL
112
+ video_url = query_result.get("output", {}).get("video_url")
113
+
114
+ if video_url:
115
+ # Return as array of objects with video_url for consistency with image API
116
+ return json.dumps({"video_url": video_url})
117
+ else:
118
+ logger.info("Video URL not found in the response")
119
+ return None
120
+ elif task_status in ["PENDING", "RUNNING"]:
121
+ # If still running, continue to next polling attempt
122
+ logger.info(f"query_video Task status: {task_status}, continuing to next poll...")
123
+ time.sleep(wait_time)
124
+ continue
125
+ elif task_status == "FAILED":
126
+ logger.warning("Task failed")
127
+ return None
128
+ else:
129
+ # Any other status, return None
130
+ logger.warning(f"Unexpected status: {task_status}")
131
+ return None
132
+
133
+ # If we get here, polling timed out
134
+ logger.warning("Polling timed out after maximum attempts")
135
+ return None
136
+
137
+ except Exception as e:
138
+ logger.warning(f"Exception occurred: {e}")
139
+ return None
140
+
141
+
142
+ def main():
143
+ from dotenv import load_dotenv
144
+
145
+ load_dotenv(override=True)
146
+
147
+ print("Starting Audio MCP gen-video-server...", file=sys.stderr)
148
+ mcp.run(transport="stdio")
149
+
150
+
151
+ # Make the module callable
152
+ def __call__():
153
+ """
154
+ Make the module callable for uvx.
155
+ This function is called when the module is executed directly.
156
+ """
157
+ main()
158
+
159
+
160
+ sys.modules[__name__].__call__ = __call__
161
+
162
+ if __name__ == "__main__":
163
+ main()
164
+
165
+
166
+ # For testing without MCP
167
+ # result = video_tasks("A cat running under moonlight")
168
+ # print("\nFinal Result:")
169
+ # print(result)
170
+ # result = get_video_by_taskid("ccd25d03-76cc-49d1-a991-ad073b8c6ada")
171
+ # print("\nFinal Result:")
172
+ # print(result)
mcp_servers/image_server.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image MCP Server
3
+
4
+ This module provides MCP server functionality for image processing and analysis.
5
+ It handles image encoding, optimization, and various image analysis tasks such as
6
+ OCR (Optical Character Recognition) and visual reasoning.
7
+
8
+ The server supports both local image files and remote image URLs with proper validation
9
+ and handles various image formats including JPEG, PNG, GIF, and others.
10
+
11
+ Main functions:
12
+ - encode_images: Encodes images to base64 format with optimization
13
+ - optimize_image: Resizes and optimizes images for better performance
14
+ - Various MCP tools for image analysis and processing
15
+ """
16
+
17
+ # import asyncio
18
+ import base64
19
+ import os
20
+ from io import BytesIO
21
+ from typing import Any, Dict, List
22
+
23
+ from mcp.server.fastmcp import FastMCP
24
+ from openai import OpenAI
25
+ from PIL import Image
26
+ from pydantic import Field
27
+
28
+ from aworld.logs.util import logger
29
+ from mcp_servers.utils import get_file_from_source
30
+
31
+ # Initialize MCP server
32
+ mcp = FastMCP("image-server")
33
+
34
+
35
+ IMAGE_OCR = (
36
+ "Input is a base64 encoded image. Read text from image if present. "
37
+ "Return a json string with the following format: "
38
+ '{"image_text": "text from image"}'
39
+ )
40
+
41
+ IMAGE_REASONING = (
42
+ "Input is a base64 encoded image. Given user's task: {task}, "
43
+ "solve it following the guide line:\n"
44
+ "1. Careful visual inspection\n"
45
+ "2. Contextual reasoning\n"
46
+ "3. Text transcription where relevant\n"
47
+ "4. Logical deduction from visual evidence\n"
48
+ "Return a json string with the following format: "
49
+ '{"image_reasoning_result": "reasoning result given task and image"}'
50
+ )
51
+
52
+
53
+ def optimize_image(image_data: bytes, max_size: int = 1024) -> bytes:
54
+ """
55
+ Optimize image by resizing if needed
56
+
57
+ Args:
58
+ image_data: Raw image data
59
+ max_size: Maximum dimension size in pixels
60
+
61
+ Returns:
62
+ bytes: Optimized image data
63
+
64
+ Raises:
65
+ ValueError: When image cannot be processed
66
+ """
67
+ try:
68
+ image = Image.open(BytesIO(image_data))
69
+
70
+ # Resize if image is too large
71
+ if max(image.size) > max_size:
72
+ ratio = max_size / max(image.size)
73
+ new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
74
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
75
+
76
+ # Save to buffer
77
+ buffered = BytesIO()
78
+ image_format = image.format if image.format else "JPEG"
79
+ image.save(buffered, format=image_format)
80
+ return buffered.getvalue()
81
+
82
+ except Exception as e:
83
+ logger.warning(f"Failed to optimize image: {str(e)}")
84
+ return image_data # Return original data if optimization fails
85
+
86
+
87
+ def encode_images(image_sources: List[str], with_header: bool = True) -> List[str]:
88
+ """
89
+ Encode images to base64 format with robust file handling
90
+
91
+ Args:
92
+ image_sources: List of URLs or local file paths of images
93
+ with_header: Whether to include MIME type header
94
+
95
+ Returns:
96
+ List[str]: Base64 encoded image strings, with MIME type prefix if with_header is True
97
+
98
+ Raises:
99
+ ValueError: When image source is invalid or image format is not supported
100
+ """
101
+ if not image_sources:
102
+ raise ValueError("Image sources cannot be empty")
103
+
104
+ images = []
105
+ for image_source in image_sources:
106
+ try:
107
+ # Get file with validation (only image files allowed)
108
+ file_path, mime_type, content = get_file_from_source(
109
+ image_source,
110
+ allowed_mime_prefixes=["image/"],
111
+ max_size_mb=10.0, # 10MB limit for images
112
+ type="image",
113
+ )
114
+
115
+ # Optimize image
116
+ optimized_content = optimize_image(content)
117
+
118
+ # Encode to base64
119
+ image_base64 = base64.b64encode(optimized_content).decode()
120
+
121
+ # Format with header if requested
122
+ final_image = f"data:{mime_type};base64,{image_base64}" if with_header else image_base64
123
+
124
+ images.append(final_image)
125
+
126
+ # Clean up temporary file if it was created for a URL
127
+ if file_path != os.path.abspath(image_source) and os.path.exists(file_path):
128
+ os.unlink(file_path)
129
+
130
+ except Exception as e:
131
+ logger.error(f"Error encoding image from {image_source}: {str(e)}")
132
+ raise
133
+
134
+ return images
135
+
136
+
137
+ def image_to_base64(image_path):
138
+ try:
139
+ # 打开图片
140
+ with Image.open(image_path) as image:
141
+ buffered = BytesIO()
142
+ image_format = image.format if image.format else "JPEG"
143
+ image.save(buffered, format=image_format)
144
+ image_bytes = buffered.getvalue()
145
+ base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
146
+ return base64_encoded
147
+ except Exception as e:
148
+ print(f"Base64 error: {e}")
149
+ return None
150
+
151
+
152
+ def create_image_contents(prompt: str, image_base64: List[str]) -> List[Dict[str, Any]]:
153
+ """Create uniform image format for querying llm."""
154
+ content = [
155
+ {"type": "text", "text": prompt},
156
+ ]
157
+ content.extend([{"type": "image_url", "image_url": {"url": url}} for url in image_base64])
158
+ return content
159
+
160
+
161
+ @mcp.tool(
162
+ description="solve the question by careful reasoning given the image(s) in given filepath or url, including reasoning, ocr, etc."
163
+ )
164
+ def mcp_image_recognition(
165
+ image_urls: List[str] = Field(description="The input image(s) in given a list of filepaths or urls."),
166
+ question: str = Field(description="The question to ask."),
167
+ ) -> str:
168
+ """solve the question by careful reasoning given the image(s) in given filepath or url."""
169
+
170
+ try:
171
+ image_base64 = image_to_base64(image_urls[0])
172
+ logger.info(f"image_base64:{image_urls[0]}")
173
+ reasoning_prompt = question
174
+ messages = [
175
+ {"role": "system", "content": "You are a helpful assistant."},
176
+ {
177
+ "role": "user",
178
+ "content": [
179
+ {"type": "text", "text": reasoning_prompt},
180
+ {
181
+ "type": "image_url",
182
+ "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
183
+ },
184
+ ],
185
+ },
186
+ ]
187
+
188
+ client = OpenAI(api_key=os.getenv("IMAGE_LLM_API_KEY"), base_url=os.getenv("IMAGE_LLM_BASE_URL"))
189
+ response = client.chat.completions.create(
190
+ model=os.getenv("IMAGE_LLM_MODEL_NAME"),
191
+ messages=messages,
192
+ )
193
+
194
+ logger.info(f"response:{response.choices[0]}")
195
+ image_reasoning_result = response.choices[0].message.content
196
+
197
+ except Exception as e:
198
+ image_reasoning_result = ""
199
+ import traceback
200
+
201
+ traceback.print_exc()
202
+ logger.error(f"image_reasoning_result-Execute error: {e}")
203
+
204
+ logger.info(f"---get_reasoning_by_image-image_reasoning_result:{image_reasoning_result}")
205
+
206
+ return image_reasoning_result
207
+
208
+
209
+ def main():
210
+ from dotenv import load_dotenv
211
+
212
+ load_dotenv()
213
+
214
+ print("Starting Image MCP Server...", file=sys.stderr)
215
+ mcp.run(transport="stdio")
216
+
217
+
218
+ # Make the module callable
219
+ def __call__():
220
+ """
221
+ Make the module callable for uvx.
222
+ This function is called when the module is executed directly.
223
+ """
224
+ main()
225
+
226
+
227
+ # Add this for compatibility with uvx
228
+ import sys
229
+
230
+ sys.modules[__name__].__call__ = __call__
231
+
232
+ # Run the server when the script is executed directly
233
+ if __name__ == "__main__":
234
+ main()
mcp_servers/picsearch_server.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+
7
+ import aiohttp
8
+ from typing import List, Dict, Any, Optional
9
+ from dotenv import load_dotenv
10
+ from mcp.server import FastMCP
11
+ from pydantic import Field
12
+
13
+ from aworld.logs.util import logger
14
+
15
+
16
+ mcp = FastMCP("picsearch-server")
17
+
18
+ async def search_single(query: str, num: int = 5) -> Optional[Dict[str, Any]]:
19
+ """Execute a single search query, returns None on error"""
20
+ try:
21
+ url = os.getenv('PIC_SEARCH_URL')
22
+ searchMode = os.getenv('PIC_SEARCH_SEARCHMODE')
23
+ source = os.getenv('PIC_SEARCH_SOURCE')
24
+ domain = os.getenv('PIC_SEARCH_DOMAIN')
25
+ uid = os.getenv('PIC_SEARCH_UID')
26
+ if not url or not searchMode or not source or not domain:
27
+ logger.warning(f"Query failed: url, searchMode, source, domain parameters incomplete")
28
+ return None
29
+
30
+ headers = {
31
+ 'Content-Type': 'application/json'
32
+ }
33
+ data = {
34
+ "domain": domain,
35
+ "extParams": {
36
+ "contentType": "llmWholeImage"
37
+ },
38
+ "page": 0,
39
+ "pageSize": num,
40
+ "query": query,
41
+ "searchMode": searchMode,
42
+ "source": source,
43
+ "userId": uid
44
+ }
45
+
46
+ async with aiohttp.ClientSession() as session:
47
+ try:
48
+ async with session.post(url, headers=headers, json=data) as response:
49
+ if response.status != 200:
50
+ logger.warning(f"Query failed: {query}, status code: {response.status}")
51
+ return None
52
+
53
+ result = await response.json()
54
+ return result
55
+ except aiohttp.ClientError:
56
+ logger.warning(f"Request error: {query}")
57
+ return None
58
+ except Exception:
59
+ logger.warning(f"Query exception: {query}")
60
+ return None
61
+
62
+
63
+ def filter_valid_docs(result: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
64
+ """Filter valid document results, returns empty list if input is None"""
65
+ if result is None:
66
+ return []
67
+
68
+ try:
69
+ valid_docs = []
70
+
71
+ # Check success field
72
+ if not result.get("success"):
73
+ return valid_docs
74
+
75
+ # Check searchDocs field
76
+ search_docs = result.get("searchImages", [])
77
+ if not search_docs:
78
+ return valid_docs
79
+
80
+ # Extract required fields
81
+ required_fields = ["title", "picUrl"]
82
+
83
+ for doc in search_docs:
84
+ # Check if all required fields exist and are not empty
85
+ is_valid = True
86
+ for field in required_fields:
87
+ if field not in doc or not doc[field]:
88
+ is_valid = False
89
+ break
90
+
91
+ if is_valid:
92
+ # Keep only required fields
93
+ filtered_doc = {field: doc[field] for field in required_fields}
94
+ valid_docs.append(filtered_doc)
95
+
96
+ return valid_docs
97
+ except Exception:
98
+ return []
99
+
100
+ @mcp.tool(description="Search Picture based on the user's input query")
101
+ async def search(
102
+ query: str = Field(
103
+ description="The query to search for picture"
104
+ ),
105
+ num: int = Field(
106
+ 5,
107
+ description="Maximum number of results to return, default is 5"
108
+ )
109
+ ) -> Any:
110
+ """Execute search function for a single query"""
111
+ try:
112
+ # Get configuration from environment variables
113
+ env_total_num = os.getenv('PIC_SEARCH_TOTAL_NUM')
114
+ if env_total_num and env_total_num.isdigit():
115
+ # Force override input num parameter with environment variable
116
+ num = int(env_total_num)
117
+
118
+ # If no query provided, return empty list
119
+ if not query:
120
+ return json.dumps([])
121
+
122
+ # Get actual number of results to return
123
+ slice_num = os.getenv('PIC_SEARCH_SLICE_NUM')
124
+ if slice_num and slice_num.isdigit():
125
+ actual_num = int(slice_num)
126
+ else:
127
+ actual_num = num
128
+
129
+ # Execute the query
130
+ result = await search_single(query, actual_num)
131
+
132
+ # Filter results
133
+ valid_docs = filter_valid_docs(result)
134
+
135
+ # Return results
136
+ result_json = json.dumps(valid_docs, ensure_ascii=False)
137
+ logger.info(f"Completed query: '{query}', found {len(valid_docs)} valid documents")
138
+ logger.info(result_json)
139
+
140
+ return result_json
141
+ except Exception as e:
142
+ # Return empty list on exception
143
+ logger.error(f"Error processing query: {str(e)}")
144
+ return json.dumps([])
145
+
146
+
147
+ def main():
148
+ from dotenv import load_dotenv
149
+
150
+ load_dotenv()
151
+
152
+ print("Starting Audio MCP picsearch-server...", file=sys.stderr)
153
+ mcp.run(transport="stdio")
154
+
155
+
156
+ # Make the module callable
157
+ def __call__():
158
+ """
159
+ Make the module callable for uvx.
160
+ This function is called when the module is executed directly.
161
+ """
162
+ main()
163
+
164
+ sys.modules[__name__].__call__ = __call__
165
+
166
+ if __name__ == "__main__":
167
+ main()
168
+
169
+ # if __name__ == "__main__":
170
+ # # Configure logging
171
+ # logging.basicConfig(
172
+ # level=logging.INFO,
173
+ # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
174
+ # )
175
+ #
176
+ #
177
+ # # Test single query
178
+ # asyncio.run(search(query="Image search test"))
179
+ #
180
+ # # Test multiple queries no longer applies
mcp_servers/reasoning_server.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import traceback
4
+
5
+ from dotenv import load_dotenv
6
+ from mcp.server.fastmcp import FastMCP
7
+ from pydantic import Field
8
+
9
+ from aworld.config.conf import AgentConfig
10
+ from aworld.logs.util import logger
11
+ from aworld.models.llm import call_llm_model, get_llm_model
12
+
13
+ # Initialize MCP server
14
+ mcp = FastMCP("reasoning-server")
15
+
16
+
17
+ @mcp.tool(
18
+ description="Perform complex problem reasoning using powerful reasoning model."
19
+ )
20
+ def complex_problem_reasoning(
21
+ question: str = Field(
22
+ description="The input question for complex problem reasoning,"
23
+ + " such as math and code contest problem",
24
+ ),
25
+ original_task: str = Field(
26
+ default="",
27
+ description="The original task description."
28
+ + " This argument could be fetched from the <task>TASK</task> tag",
29
+ ),
30
+ ) -> str:
31
+ """
32
+ Perform complex problem reasoning using Powerful Reasoning model,
33
+ such as riddle, game or competition-level STEM(including code) problems.
34
+
35
+ Args:
36
+ question: The input question for complex problem reasoning
37
+ original_task: The original task description (optional)
38
+
39
+ Returns:
40
+ str: The reasoning result from the model
41
+ """
42
+ try:
43
+ # Prepare the prompt with both the question and original task if provided
44
+ prompt = question
45
+ if original_task:
46
+ prompt = f"Original Task: {original_task}\n\nQuestion: {question}"
47
+
48
+ # Call the LLM model for reasoning
49
+ response = call_llm_model(
50
+ llm_model=get_llm_model(
51
+ conf=AgentConfig(
52
+ llm_provider="openai",
53
+ llm_model_name="anthropic/claude-3.7-sonnet:thinking",
54
+ llm_api_key=os.getenv("LLM_API_KEY", "your_openai_api_key"),
55
+ llm_base_url=os.getenv("LLM_BASE_URL", "your_openai_base_url"),
56
+ )
57
+ ),
58
+ messages=[
59
+ {
60
+ "role": "system",
61
+ "content": (
62
+ "You are an expert at solving complex problems including math,"
63
+ " code contests, riddles, and puzzles."
64
+ " Provide detailed step-by-step reasoning and a clear final answer."
65
+ ),
66
+ },
67
+ {"role": "user", "content": prompt},
68
+ ],
69
+ temperature=float(os.getenv("LLM_TEMPERATURE", "0.3")),
70
+ )
71
+
72
+ # Extract the reasoning result
73
+ reasoning_result = response.content
74
+
75
+ logger.info("Complex reasoning completed successfully")
76
+ return reasoning_result
77
+
78
+ except Exception as e:
79
+ logger.error(f"Error in complex problem reasoning: {traceback.format_exc()}")
80
+ return f"Error performing reasoning: {str(e)}"
81
+
82
+
83
+ def main():
84
+ load_dotenv()
85
+ print("Starting Reasoning MCP Server...", file=sys.stderr)
86
+ mcp.run(transport="stdio")
87
+
88
+
89
+ # Make the module callable
90
+ def __call__():
91
+ """
92
+ Make the module callable for uvx.
93
+ This function is called when the module is executed directly.
94
+ """
95
+ main()
96
+
97
+
98
+ sys.modules[__name__].__call__ = __call__
99
+
100
+ # Run the server when the script is executed directly
101
+ if __name__ == "__main__":
102
+ main()
mcp_servers/requirements.txt ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ opencv-python~=4.11.0.86
2
+ pyshine~=0.0.9
3
+ playwright~=1.50.0
4
+ gymnasium~=1.1.0
5
+ pygame~=2.6.1
6
+ openpyxl~=3.1.5
7
+ xls2xlsx~=0.2.0
8
+ wikipedia~=1.4.0
9
+ duckduckgo_search~=7.5.2
10
+ baidusearch~=1.0.3
11
+ langchain-experimental~=0.3.4
12
+ prometheus_client~=0.21.1
13
+ opentelemetry-sdk~=1.32.1
14
+ opentelemetry-api~=1.32.1
15
+ opentelemetry-exporter-otlp~=1.32.1
16
+ opentelemetry-instrumentation-system-metrics~=0.53b1
17
+ aiohttp~=3.11.16
18
+ requests~=2.32.3
19
+ loguru~=0.7.3
20
+ oss2
21
+
22
+ fastapi
23
+ pydantic
24
+ selenium
25
+ frontend
26
+ tools
27
+ PyPDF2
28
+ html2text
29
+ xmltodict
30
+ docx2markdown
31
+ python-pptx
32
+ browser_use
33
+ faiss-cpu==1.9.0
34
+ pycryptodome
35
+ tabulate
36
+ flask
37
+ fitz
38
+ terminal_controller # mcp server: https://github.com/GongRzhe/terminal-controller-mcp
39
+ mcp_server_calculator # mcp server: https://github.com/githejie/mcp-server-calculator
mcp_servers/search_server.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search MCP Server
3
+
4
+ This module provides MCP server functionality for performing web searches using various search engines.
5
+ It supports structured queries and returns formatted search results.
6
+
7
+ Key features:
8
+ - Perform web searches using Exa, Google, and DuckDuckGo
9
+ - Filter and format search results
10
+ - Validate and process search queries
11
+
12
+ Main functions:
13
+ - mcpsearchexa: Searches the web using Exa
14
+ - mcpsearchgoogle: Searches the web using Google
15
+ - mcpsearchduckduckgo: Searches the web using DuckDuckGo
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import traceback
21
+ from typing import List, Optional
22
+
23
+ import requests
24
+ from dotenv import load_dotenv
25
+ from mcp.server.fastmcp import FastMCP
26
+ from pydantic import BaseModel, Field
27
+
28
+ from aworld.logs.util import logger
29
+
30
+ # Initialize MCP server
31
+ mcp = FastMCP("search-server")
32
+
33
+
34
+ # Base search result model that all providers will use
35
+ class SearchResult(BaseModel):
36
+ """Base search result model with common fields"""
37
+
38
+ id: str
39
+ title: str
40
+ url: str
41
+ snippet: str
42
+ source: str # Which search engine provided this result
43
+
44
+
45
+ class GoogleSearchResult(SearchResult):
46
+ """Google-specific search result model"""
47
+
48
+ displayLink: str = ""
49
+ formattedUrl: str = ""
50
+ htmlSnippet: str = ""
51
+ htmlTitle: str = ""
52
+ kind: str = ""
53
+ link: str = ""
54
+
55
+
56
+ class SearchResponse(BaseModel):
57
+ """Unified search response model"""
58
+
59
+ query: str
60
+ results: List[SearchResult]
61
+ count: int
62
+ source: str
63
+ error: Optional[str] = None
64
+
65
+
66
+ @mcp.tool(description="Search the web using Google Custom Search API.")
67
+ def mcpsearchgoogle(
68
+ query: str = Field(..., description="The search query string."),
69
+ num_results: int = Field(
70
+ 10, description="Number of search results to return (default 10)."
71
+ ),
72
+ safe_search: bool = Field(
73
+ True, description="Whether to enable safe search filtering."
74
+ ),
75
+ language: str = Field("en", description="Language code for search results."),
76
+ country: str = Field("us", description="Country code for search results."),
77
+ ) -> str:
78
+ """
79
+ Search the web using Google Custom Search API.
80
+
81
+ Requires GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables to be set.
82
+ """
83
+ try:
84
+ api_key = os.environ.get("GOOGLE_API_KEY")
85
+ cse_id = os.environ.get("GOOGLE_CSE_ID")
86
+
87
+ if not api_key:
88
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
89
+ if not cse_id:
90
+ raise ValueError("GOOGLE_CSE_ID environment variable not set")
91
+
92
+ # Ensure num_results is within valid range
93
+ num_results = max(1, num_results)
94
+
95
+ # Build the Google Custom Search API URL
96
+ url = "https://www.googleapis.com/customsearch/v1"
97
+ params = {
98
+ "key": api_key,
99
+ "cx": cse_id,
100
+ "q": query,
101
+ "num": num_results,
102
+ "safe": "active" if safe_search else "off",
103
+ "hl": language,
104
+ "gl": country,
105
+ }
106
+
107
+ logger.info(f"Google search starts for query: {query}")
108
+ response = requests.get(url, params=params, timeout=10)
109
+ response.raise_for_status()
110
+
111
+ data = response.json()
112
+ search_results = []
113
+
114
+ if "items" in data:
115
+ for i, item in enumerate(data["items"]):
116
+ result = GoogleSearchResult(
117
+ id=f"google-{i}",
118
+ title=item.get("title", ""),
119
+ url=item.get("link", ""),
120
+ snippet=item.get("snippet", ""),
121
+ source="google",
122
+ displayLink=item.get("displayLink", ""),
123
+ formattedUrl=item.get("formattedUrl", ""),
124
+ htmlSnippet=item.get("htmlSnippet", ""),
125
+ htmlTitle=item.get("htmlTitle", ""),
126
+ kind=item.get("kind", ""),
127
+ link=item.get("link", ""),
128
+ )
129
+ search_results.append(result)
130
+
131
+ return SearchResponse(
132
+ query=query,
133
+ results=search_results,
134
+ count=len(search_results),
135
+ source="google",
136
+ ).model_dump_json()
137
+
138
+ except Exception as e:
139
+ logger.error(f"Google search error: {traceback.format_exc()}")
140
+ return SearchResponse(
141
+ query=query, results=[], count=0, source="google", error=str(e)
142
+ ).model_dump_json()
143
+
144
+
145
+ def main():
146
+ load_dotenv()
147
+
148
+ print("Starting Search MCP Server...", file=sys.stderr)
149
+ mcp.run(transport="stdio")
150
+
151
+
152
+ # Make the module callable
153
+ def __call__():
154
+ """
155
+ Make the module callable for uvx.
156
+ This function is called when the module is executed directly.
157
+ """
158
+ main()
159
+
160
+
161
+ sys.modules[__name__].__call__ = __call__
162
+
163
+ # Run the server when the script is executed directly
164
+ if __name__ == "__main__":
165
+ main()
mcp_servers/utils.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import tempfile
5
+ from typing import List, Optional, Tuple
6
+ from urllib.parse import urlparse
7
+
8
+ import requests
9
+ from mcp.server import FastMCP
10
+
11
+ from aworld.logs.util import logger
12
+
13
+
14
+ def get_mime_type(file_path: str, default_mime: Optional[str] = None) -> str:
15
+ """
16
+ Detect MIME type of a file using python-magic if available,
17
+ otherwise fallback to extension-based detection.
18
+
19
+ Args:
20
+ file_path: Path to the file
21
+ default_mime: Default MIME type to return if detection fails
22
+
23
+ Returns:
24
+ str: Detected MIME type
25
+ """
26
+ # Try using python-magic for accurate MIME type detection
27
+ try:
28
+ # mime = magic.Magic(mime=True)
29
+ # return mime.from_file(file_path)
30
+ return "audio/mpeg"
31
+ except (AttributeError, IOError):
32
+ # Fallback to extension-based detection
33
+ extension_mime_map = {
34
+ # Audio formats
35
+ ".mp3": "audio/mpeg",
36
+ ".wav": "audio/wav",
37
+ ".ogg": "audio/ogg",
38
+ ".m4a": "audio/mp4",
39
+ ".flac": "audio/flac",
40
+ # Image formats
41
+ ".jpg": "image/jpeg",
42
+ ".jpeg": "image/jpeg",
43
+ ".png": "image/png",
44
+ ".gif": "image/gif",
45
+ ".webp": "image/webp",
46
+ ".bmp": "image/bmp",
47
+ ".tiff": "image/tiff",
48
+ # Video formats
49
+ ".mp4": "video/mp4",
50
+ ".avi": "video/x-msvideo",
51
+ ".mov": "video/quicktime",
52
+ ".mkv": "video/x-matroska",
53
+ ".webm": "video/webm",
54
+ }
55
+
56
+ ext = os.path.splitext(file_path)[1].lower()
57
+ return extension_mime_map.get(ext, default_mime or "application/octet-stream")
58
+
59
+
60
+ def is_url(path_or_url: str) -> bool:
61
+ """
62
+ Check if the given string is a URL.
63
+
64
+ Args:
65
+ path_or_url: String to check
66
+
67
+ Returns:
68
+ bool: True if the string is a URL, False otherwise
69
+ """
70
+ parsed = urlparse(path_or_url)
71
+ return bool(parsed.scheme and parsed.netloc)
72
+
73
+
74
+ def get_file_from_source(
75
+ source: str,
76
+ allowed_mime_prefixes: List[str] = None,
77
+ max_size_mb: float = 100.0,
78
+ timeout: int = 60,
79
+ type: str = "image",
80
+ ) -> Tuple[str, str, bytes]:
81
+ """
82
+ Unified function to get file content from a URL or local path with validation.
83
+
84
+ Args:
85
+ source: URL or local file path
86
+ allowed_mime_prefixes: List of allowed MIME type prefixes (e.g., ['audio/', 'video/'])
87
+ max_size_mb: Maximum allowed file size in MB
88
+ timeout: Timeout for URL requests in seconds
89
+
90
+ Returns:
91
+ Tuple[str, str, bytes]: (file_path, mime_type, file_content)
92
+ - For URLs, file_path will be a temporary file path
93
+ - For local files, file_path will be the original path
94
+
95
+ Raises:
96
+ ValueError: When file doesn't exist, exceeds size limit, or has invalid MIME type
97
+ IOError: When file cannot be read
98
+ requests.RequestException: When URL request fails
99
+ """
100
+ max_size_bytes = max_size_mb * 1024 * 1024
101
+ temp_file = None
102
+
103
+ try:
104
+ if is_url(source):
105
+ # Handle URL
106
+ logger.info(f"Downloading file from URL: {source}")
107
+ response = requests.get(source, stream=True, timeout=timeout)
108
+ response.raise_for_status()
109
+
110
+ # Check Content-Length if available
111
+ content_length = response.headers.get("Content-Length")
112
+ if content_length and int(content_length) > max_size_bytes:
113
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
114
+
115
+ # Create a temporary file
116
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
117
+ file_path = temp_file.name
118
+
119
+ # Download content in chunks to avoid memory issues
120
+ content = bytearray()
121
+ downloaded_size = 0
122
+ for chunk in response.iter_content(chunk_size=8192):
123
+ downloaded_size += len(chunk)
124
+ if downloaded_size > max_size_bytes:
125
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
126
+ temp_file.write(chunk)
127
+ content.extend(chunk)
128
+
129
+ temp_file.close()
130
+
131
+ # Get MIME type
132
+ if type == "audio":
133
+ mime_type = "audio/mpeg"
134
+ elif type == "image":
135
+ mime_type = "image/jpeg"
136
+ elif type == "video":
137
+ mime_type = "video/mp4"
138
+
139
+ # mime_type = get_mime_type(file_path)
140
+
141
+ # For URLs where magic fails, try to use Content-Type header
142
+ if mime_type == "application/octet-stream":
143
+ content_type = response.headers.get("Content-Type", "").split(";")[0]
144
+ if content_type:
145
+ mime_type = content_type
146
+ else:
147
+ # Handle local file
148
+ file_path = os.path.abspath(source)
149
+
150
+ # Check if file exists
151
+ if not os.path.exists(file_path):
152
+ raise ValueError(f"File not found: {file_path}")
153
+
154
+ # Check file size
155
+ file_size = os.path.getsize(file_path)
156
+ if file_size > max_size_bytes:
157
+ raise ValueError(f"File size exceeds limit of {max_size_mb}MB")
158
+
159
+ # Get MIME type
160
+ if type == "audio":
161
+ mime_type = "audio/mpeg"
162
+ elif type == "image":
163
+ mime_type = "image/jpeg"
164
+ elif type == "video":
165
+ mime_type = "video/mp4"
166
+ # mime_type = get_mime_type(file_path)
167
+
168
+ # Read file content
169
+ with open(file_path, "rb") as f:
170
+ content = f.read()
171
+
172
+ # Validate MIME type if allowed_mime_prefixes is provided
173
+ if allowed_mime_prefixes:
174
+ if not any(
175
+ mime_type.startswith(prefix) for prefix in allowed_mime_prefixes
176
+ ):
177
+ allowed_types = ", ".join(allowed_mime_prefixes)
178
+ raise ValueError(
179
+ f"Invalid file type: {mime_type}. Allowed types: {allowed_types}"
180
+ )
181
+
182
+ return file_path, mime_type, content
183
+
184
+ except Exception as e:
185
+ # Clean up temporary file if an error occurs
186
+ if temp_file and os.path.exists(temp_file.name):
187
+ os.unlink(temp_file.name)
188
+ raise e
189
+
190
+
191
+ if __name__ == "__main__":
192
+ mcp_tools=[]
193
+ logger.info(f"{json.dumps(mcp_tools, indent=4, ensure_ascii=False)}")
mcp_servers/video_server.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: disable=E1101
2
+
3
+ import base64
4
+ import os
5
+ import sys
6
+ import traceback
7
+ from dataclasses import dataclass
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ import cv2
11
+ import numpy as np
12
+ from dotenv import load_dotenv
13
+ from mcp.server.fastmcp import FastMCP
14
+ from openai import OpenAI
15
+ from pydantic import Field
16
+
17
+ from aworld.logs.util import logger
18
+ from mcp_servers.utils import get_file_from_source
19
+
20
+ client = OpenAI(api_key=os.getenv("VIDEO_LLM_API_KEY"), base_url=os.getenv("VIDEO_LLM_BASE_URL"))
21
+
22
+ # Initialize MCP server
23
+ mcp = FastMCP("Video Server")
24
+
25
+
26
+ @dataclass
27
+ class KeyframeResult:
28
+ """Result of keyframe extraction from a video.
29
+
30
+ Attributes:
31
+ frame_paths: List of file paths to the saved keyframes
32
+ frame_timestamps: List of timestamps (in seconds) corresponding to each frame
33
+ output_directory: Directory where frames were saved
34
+ frame_count: Number of frames extracted
35
+ success: Whether the extraction was successful
36
+ error_message: Error message if extraction failed, None otherwise
37
+ """
38
+
39
+ frame_paths: List[str]
40
+ frame_timestamps: List[float]
41
+ output_directory: str
42
+ frame_count: int
43
+ success: bool
44
+ error_message: Optional[str] = None
45
+
46
+
47
+ VIDEO_ANALYZE = (
48
+ "Input is a sequence of video frames. Given user's task: {task}. "
49
+ "analyze the video content following these steps:\n"
50
+ "1. Temporal sequence understanding\n"
51
+ "2. Motion and action analysis\n"
52
+ "3. Scene context interpretation\n"
53
+ "4. Object and person tracking\n"
54
+ "Return a json string with the following format: "
55
+ '{{"video_analysis_result": "analysis result given task and video frames"}}'
56
+ )
57
+
58
+
59
+ VIDEO_EXTRACT_SUBTITLES = (
60
+ "Input is a sequence of video frames. "
61
+ "Extract all subtitles (if present) in the video. "
62
+ "Return a json string with the following format: "
63
+ '{"video_subtitles": "extracted subtitles from video"}'
64
+ )
65
+
66
+ VIDEO_SUMMARIZE = (
67
+ "Input is a sequence of video frames. "
68
+ "Summarize the main content of the video. "
69
+ "Include key points, main topics, and important visual elements. "
70
+ "Return a json string with the following format: "
71
+ '{"video_summary": "concise summary of the video content"}'
72
+ )
73
+
74
+
75
+ def get_video_frames(
76
+ video_source: str,
77
+ sample_rate: int = 2,
78
+ start_time: float = 0,
79
+ end_time: float = None,
80
+ ) -> List[Dict[str, Any]]:
81
+ """
82
+ Get frames from video with given sample rate using robust file handling
83
+
84
+ Args:
85
+ video_source: Path or URL to the video file
86
+ sample_rate: Number of frames to sample per second
87
+ start_time: Start time of the video segment in seconds (default: 0)
88
+ end_time: End time of the video segment in seconds (default: None, meaning the end of the video)
89
+
90
+ Returns:
91
+ List[Dict[str, Any]]: List of dictionaries containing frame data and timestamp
92
+
93
+ Raises:
94
+ ValueError: When video file cannot be opened or is not a valid video
95
+ """
96
+ try:
97
+ # Get file with validation (only video files allowed)
98
+ file_path, _, _ = get_file_from_source(
99
+ video_source,
100
+ allowed_mime_prefixes=["video/"],
101
+ max_size_mb=2500.0, # 2500MB limit for videos
102
+ type="video", # Specify type as video to handle video files
103
+ )
104
+
105
+ # Open video file
106
+ video = cv2.VideoCapture(file_path)
107
+ if not video.isOpened():
108
+ raise ValueError(f"Could not open video file: {file_path}")
109
+
110
+ fps = video.get(cv2.CAP_PROP_FPS)
111
+ frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
112
+ video_duration = frame_count / fps # 30s
113
+
114
+ if end_time is None:
115
+ end_time = video_duration
116
+
117
+ if start_time > end_time:
118
+ raise ValueError("Start time cannot be greater than end time.")
119
+
120
+ if start_time < 0:
121
+ start_time = 0
122
+
123
+ if end_time > video_duration:
124
+ end_time = video_duration
125
+
126
+ start_frame = int(start_time * fps)
127
+ end_frame = int(end_time * fps)
128
+
129
+ all_frames = []
130
+ frames = []
131
+
132
+ # Calculate frame interval based on sample rate
133
+ frame_interval = max(1, int(fps / sample_rate))
134
+
135
+ # Set the video capture to the start frame
136
+ video.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
137
+
138
+ for i in range(start_frame, end_frame):
139
+ ret, frame = video.read()
140
+ if not ret:
141
+ break
142
+
143
+ # Convert frame to JPEG format
144
+ _, buffer = cv2.imencode(".jpg", frame)
145
+ frame_data = base64.b64encode(buffer).decode("utf-8")
146
+
147
+ # Add data URL prefix for JPEG image
148
+ frame_data = f"data:image/jpeg;base64,{frame_data}"
149
+
150
+ all_frames.append({"data": frame_data, "time": i / fps})
151
+
152
+ for i in range(0, len(all_frames), frame_interval):
153
+ frames.append(all_frames[i])
154
+
155
+ video.release()
156
+
157
+ # Clean up temporary file if it was created for a URL
158
+ if file_path != os.path.abspath(video_source) and os.path.exists(file_path):
159
+ os.unlink(file_path)
160
+
161
+ if not frames:
162
+ raise ValueError(f"Could not extract any frames from video: {video_source}")
163
+
164
+ return frames
165
+
166
+ except Exception as e:
167
+ logger.error(f"Error extracting frames from {video_source}: {str(e)}")
168
+ raise
169
+
170
+
171
+ def create_video_content(prompt: str, video_frames: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
172
+ """Create uniform video format for querying llm."""
173
+ content = [{"type": "text", "text": prompt}]
174
+ content.extend([{"type": "image_url", "image_url": {"url": frame["data"]}} for frame in video_frames])
175
+ return content
176
+
177
+
178
+ @mcp.tool(description="Analyze the video content by the given question.")
179
+ def mcp_analyze_video(
180
+ video_url: str = Field(description="The input video in given filepath or url."),
181
+ question: str = Field(description="The question to analyze."),
182
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
183
+ start_time: float = Field(default=0, description="Start time of the video segment in seconds."),
184
+ end_time: float = Field(default=None, description="End time of the video segment in seconds."),
185
+ ) -> str:
186
+ """analyze the video content by the given question."""
187
+
188
+ try:
189
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
190
+ logger.info(f"---len video_frames:{len(video_frames)}")
191
+ interval = 20
192
+ frame_nums = 30
193
+ all_res = []
194
+ for i in range(0, len(video_frames), interval):
195
+ inputs = []
196
+ cur_frames = video_frames[i : i + frame_nums]
197
+ content = create_video_content(VIDEO_ANALYZE.format(task=question), cur_frames)
198
+ inputs.append({"role": "user", "content": content})
199
+ try:
200
+ response = client.chat.completions.create(
201
+ model=os.getenv("VIDEO_LLM_MODEL_NAME"),
202
+ messages=inputs,
203
+ temperature=0,
204
+ )
205
+ cur_video_analysis_result = response.choices[0].message.content
206
+ except Exception:
207
+ cur_video_analysis_result = ""
208
+ all_res.append(f"result of video part {int(i / interval + 1)}: {cur_video_analysis_result}")
209
+ if i + frame_nums >= len(video_frames):
210
+ break
211
+ video_analysis_result = "\n".join(all_res)
212
+
213
+ except (ValueError, IOError, RuntimeError):
214
+ video_analysis_result = ""
215
+ logger.error(f"video_analysis-Execute error: {traceback.format_exc()}")
216
+
217
+ logger.info(f"---get_analysis_by_video-video_analysis_result:{video_analysis_result}")
218
+ return video_analysis_result
219
+
220
+
221
+ @mcp.tool(description="Extract subtitles from the video.")
222
+ def mcp_extract_video_subtitles(
223
+ video_url: str = Field(description="The input video in given filepath or url."),
224
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
225
+ start_time: float = Field(default=0, description="Start time of the video segment in seconds."),
226
+ end_time: float = Field(default=None, description="End time of the video segment in seconds."),
227
+ ) -> str:
228
+ """extract subtitles from the video."""
229
+ inputs = []
230
+ try:
231
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
232
+ content = create_video_content(VIDEO_EXTRACT_SUBTITLES, video_frames)
233
+ inputs.append({"role": "user", "content": content})
234
+
235
+ response = client.chat.completions.create(
236
+ model=os.getenv("VIDEO_LLM_MODEL_NAME"),
237
+ messages=inputs,
238
+ temperature=0,
239
+ )
240
+ video_subtitles = response.choices[0].message.content
241
+ except (ValueError, IOError, RuntimeError):
242
+ video_subtitles = ""
243
+ logger.error(f"video_subtitles-Execute error: {traceback.format_exc()}")
244
+
245
+ logger.info(f"---get_subtitles_from_video-video_subtitles:{video_subtitles}")
246
+ return video_subtitles
247
+
248
+
249
+ @mcp.tool(description="Summarize the main content of the video.")
250
+ def mcp_summarize_video(
251
+ video_url: str = Field(description="The input video in given filepath or url."),
252
+ sample_rate: int = Field(default=2, description="Sample n frames per second."),
253
+ start_time: float = Field(default=0, description="Start time of the video segment in seconds."),
254
+ end_time: float = Field(default=None, description="End time of the video segment in seconds."),
255
+ ) -> str:
256
+ """summarize the main content of the video."""
257
+ try:
258
+ video_frames = get_video_frames(video_url, sample_rate, start_time, end_time)
259
+ logger.info(f"---len video_frames:{len(video_frames)}")
260
+ interval = 490
261
+ frame_nums = 500
262
+ all_res = []
263
+ for i in range(0, len(video_frames), interval):
264
+ inputs = []
265
+ cur_frames = video_frames[i : i + frame_nums]
266
+ content = create_video_content(VIDEO_SUMMARIZE, cur_frames)
267
+ inputs.append({"role": "user", "content": content})
268
+ try:
269
+ response = client.chat.completions.create(
270
+ model=os.getenv("VIDEO_LLM_MODEL_NAME"),
271
+ messages=inputs,
272
+ temperature=0,
273
+ )
274
+ logger.info(f"---response:{response}")
275
+ cur_video_summary = response.choices[0].message.content
276
+ except Exception:
277
+ cur_video_summary = ""
278
+ all_res.append(f"summary of video part {int(i / interval + 1)}: {cur_video_summary}")
279
+ logger.info(f"summary of video part {int(i / interval + 1)}: {cur_video_summary}")
280
+ video_summary = "\n".join(all_res)
281
+
282
+ except (ValueError, IOError, RuntimeError):
283
+ video_summary = ""
284
+ logger.error(f"video_summary-Execute error: {traceback.format_exc()}")
285
+
286
+ logger.info(f"---get_summary_from_video-video_summary:{video_summary}")
287
+ return video_summary
288
+
289
+
290
+ @mcp.tool(description="Extract key frames around the target time with scene detection")
291
+ def get_video_keyframes(
292
+ video_path: str = Field(description="The input video in given filepath or url."),
293
+ target_time: int = Field(
294
+ description=(
295
+ "The specific time point for extraction, centered within the window_size argument, the unit is of second."
296
+ )
297
+ ),
298
+ window_size: int = Field(
299
+ default=5,
300
+ description="The window size for extraction, the unit is of second.",
301
+ ),
302
+ cleanup: bool = Field(
303
+ default=False,
304
+ description="Whether to delete the original video file after processing.",
305
+ ),
306
+ output_dir: str = Field(
307
+ default=os.getenv("FILESYSTEM_SERVER_WORKDIR", "./keyframes"),
308
+ description="Directory where extracted frames will be saved.",
309
+ ),
310
+ ) -> KeyframeResult:
311
+ """Extract key frames around the target time with scene detection.
312
+
313
+ This function extracts frames from a video file around a specific time point,
314
+ using scene detection to identify significant changes between frames. Only frames
315
+ with substantial visual differences are saved, reducing redundancy.
316
+
317
+ Args:
318
+ video_path: Path or URL to the video file
319
+ target_time: Specific time point (in seconds) to extract frames around
320
+ window_size: Time window (in seconds) centered on target_time
321
+ cleanup: Whether to delete the original video file after processing
322
+ output_dir: Directory where extracted frames will be saved
323
+
324
+ Returns:
325
+ KeyframeResult: A dataclass containing paths to saved frames, timestamps,
326
+ and metadata about the extraction process
327
+
328
+ Raises:
329
+ Exception: Exceptions are caught internally and reported in the result
330
+ """
331
+
332
+ def save_frames(frames, frame_times, output_dir) -> Tuple[List[str], List[float]]:
333
+ """Save extracted frames to disk"""
334
+ os.makedirs(output_dir, exist_ok=True)
335
+ saved_paths = []
336
+ saved_timestamps = []
337
+ for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
338
+ filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
339
+ os.makedirs(output_dir, exist_ok=True)
340
+ saved_paths = []
341
+ saved_timestamps = []
342
+
343
+ for _, (frame, timestamp) in enumerate(zip(frames, frame_times)):
344
+ filename = f"{output_dir}/frame_{timestamp:.2f}s.jpg"
345
+ cv2.imwrite(filename, frame)
346
+ saved_paths.append(filename)
347
+ saved_timestamps.append(timestamp)
348
+
349
+ return saved_paths, saved_timestamps
350
+
351
+ def extract_keyframes(video_path, target_time, window_size) -> Tuple[List[Any], List[float]]:
352
+ """Extract key frames around the target time with scene detection"""
353
+ cap = cv2.VideoCapture(video_path)
354
+ fps = cap.get(cv2.CAP_PROP_FPS)
355
+
356
+ # Calculate frame numbers for the time window
357
+ start_frame = int((target_time - window_size / 2) * fps)
358
+ end_frame = int((target_time + window_size / 2) * fps)
359
+
360
+ frames = []
361
+ frame_times = []
362
+
363
+ # Set video position to start_frame
364
+ cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, start_frame))
365
+
366
+ prev_frame = None
367
+ while cap.isOpened():
368
+ frame_pos = cap.get(cv2.CAP_PROP_POS_FRAMES)
369
+ if frame_pos >= end_frame:
370
+ break
371
+
372
+ ret, frame = cap.read()
373
+ if not ret:
374
+ break
375
+
376
+ # Convert frame to grayscale for scene detection
377
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
378
+
379
+ # If this is the first frame, save it
380
+ if prev_frame is None:
381
+ frames.append(frame)
382
+ frame_times.append(frame_pos / fps)
383
+ else:
384
+ # Calculate difference between current and previous frame
385
+ diff = cv2.absdiff(gray, prev_frame)
386
+ mean_diff = np.mean(diff)
387
+
388
+ # If significant change detected, save frame
389
+ if mean_diff > 20: # Threshold for scene change
390
+ frames.append(frame)
391
+ frame_times.append(frame_pos / fps)
392
+
393
+ prev_frame = gray
394
+
395
+ cap.release()
396
+ return frames, frame_times
397
+
398
+ try:
399
+ # Extract keyframes
400
+ frames, frame_times = extract_keyframes(video_path, target_time, window_size)
401
+
402
+ # Save frames
403
+ frame_paths, frame_timestamps = save_frames(frames, frame_times, output_dir)
404
+
405
+ # Cleanup
406
+ if cleanup and os.path.exists(video_path):
407
+ os.remove(video_path)
408
+
409
+ return KeyframeResult(
410
+ frame_paths=frame_paths,
411
+ frame_timestamps=frame_timestamps,
412
+ output_directory=output_dir,
413
+ frame_count=len(frame_paths),
414
+ success=True,
415
+ )
416
+
417
+ except Exception as e:
418
+ error_message = f"Error processing video: {str(e)}"
419
+ print(error_message)
420
+ return KeyframeResult(
421
+ frame_paths=[],
422
+ frame_timestamps=[],
423
+ output_directory=output_dir,
424
+ frame_count=0,
425
+ success=False,
426
+ error_message=error_message,
427
+ )
428
+
429
+
430
+ def main():
431
+ load_dotenv()
432
+ print("Starting Video MCP Server...", file=sys.stderr)
433
+ mcp.run(transport="stdio")
434
+
435
+
436
+ # Make the module callable
437
+ def __call__():
438
+ """
439
+ Make the module callable for uvx.
440
+ This function is called when the module is executed directly.
441
+ """
442
+ main()
443
+
444
+
445
+ # Add this for compatibility with uvx
446
+ sys.modules[__name__].__call__ = __call__
447
+
448
+
449
+ # Run the server when the script is executed directly
450
+ if __name__ == "__main__":
451
+ main()
mcp_servers/youtube_server.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Youtube Download MCP Server
3
+
4
+ This module provides MCP server functionality for downloading files from Youtube URLs.
5
+ It handles various download scenarios with proper validation, error handling,
6
+ and progress tracking.
7
+
8
+ Key features:
9
+ - File downloading from Youtube HTTP/HTTPS URLs
10
+ - Download progress tracking
11
+ - File validation
12
+ - Safe file saving
13
+
14
+ Main functions:
15
+ - mcpyoutubedownload: Downloads files from URLs of Youtube to local filesystem
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import time
21
+ import traceback
22
+ import urllib.parse
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+ from typing import Optional
26
+
27
+ from dotenv import load_dotenv
28
+ from mcp.server.fastmcp import FastMCP
29
+ from pydantic import BaseModel, Field
30
+ from selenium import webdriver
31
+ from selenium.webdriver.chrome.service import Service
32
+ from selenium.webdriver.common.by import By
33
+
34
+ from aworld.logs.util import logger
35
+
36
+ mcp = FastMCP("youtube-server")
37
+ _default_driver_path = os.environ.get(
38
+ "CHROME_DRIVER_PATH",
39
+ os.path.expanduser("~/Downloads/chromedriver-mac-arm64/chromedriver"),
40
+ )
41
+
42
+
43
+ class YoutubeDownloadResults(BaseModel):
44
+ """Download result model with file information"""
45
+
46
+ file_path: str
47
+ file_name: str
48
+ file_size: int
49
+ content_type: Optional[str] = None
50
+ success: bool
51
+ error: Optional[str] = None
52
+
53
+
54
+ @mcp.tool(
55
+ description="Download the youtube file from the URL and save to the local filesystem."
56
+ )
57
+ def download_youtube_files(
58
+ url: str = Field(
59
+ description="The URL of youtube file to download. Must be a String."
60
+ ),
61
+ output_dir: str = Field(
62
+ "/tmp/mcp_downloads",
63
+ description="Directory to save the downloaded files (default: /tmp/mcp_downloads).",
64
+ ),
65
+ timeout: int = Field(
66
+ 180, description="Download timeout in seconds (default: 180)."
67
+ ),
68
+ ) -> str:
69
+ """Download the youtube file from the URL and save to the local filesystem.
70
+
71
+ Args:
72
+ url: The URL of youtube file to download, must be a String
73
+ output_dir: Directory to save the downloaded files
74
+ timeout: Download timeout in seconds
75
+
76
+ Returns:
77
+ JSON string with download results information
78
+ """
79
+ # Handle Field objects if they're passed directly
80
+ if hasattr(url, "default") and not isinstance(url, str):
81
+ url = url.default
82
+
83
+ if hasattr(output_dir, "default") and not isinstance(output_dir, str):
84
+ output_dir = output_dir.default
85
+
86
+ if hasattr(timeout, "default") and not isinstance(timeout, int):
87
+ timeout = timeout.default
88
+
89
+ def _get_youtube_content(url: str, output_dir: str, timeout: int) -> None:
90
+ """Use Selenium to download YouTube content via cobalt.tools"""
91
+ try:
92
+ options = webdriver.ChromeOptions()
93
+ options.add_argument("--disable-blink-features=AutomationControlled")
94
+ # Set download file default path
95
+ prefs = {
96
+ "download.default_directory": output_dir,
97
+ "download.prompt_for_download": False,
98
+ "download.directory_upgrade": True,
99
+ "safebrowsing.enabled": True,
100
+ }
101
+ options.add_experimental_option("prefs", prefs)
102
+ # Create WebDriver object and launch Chrome browser
103
+ service = Service(executable_path=_default_driver_path)
104
+ driver = webdriver.Chrome(service=service, options=options)
105
+
106
+ logger.info(f"Opening cobalt.tools to download from {url}")
107
+ # Open target webpage
108
+ driver.get("https://cobalt.tools/")
109
+ # Wait for page to load
110
+ time.sleep(5)
111
+ # Find input field and enter YouTube link
112
+ input_field = driver.find_element(By.ID, "link-area")
113
+ input_field.send_keys(url)
114
+ time.sleep(5)
115
+ # Find download button and click
116
+ download_button = driver.find_element(By.ID, "download-button")
117
+ download_button.click()
118
+ time.sleep(5)
119
+
120
+ try:
121
+ # Handle bot detection popup
122
+ driver.find_element(
123
+ By.CLASS_NAME,
124
+ "button.elevated.popup-button.undefined.svelte-nnawom.active",
125
+ ).click()
126
+ except Exception as e:
127
+ logger.warning(f"Bot detection handling: {str(e)}")
128
+
129
+ # Wait for download to complete
130
+ cnt = 0
131
+ while (
132
+ len(os.listdir(output_dir)) == 0
133
+ or os.listdir(output_dir)[0].split(".")[-1] == "crdownload"
134
+ ):
135
+ time.sleep(3)
136
+ cnt += 3
137
+ if cnt >= timeout:
138
+ logger.warning(f"Download timeout after {timeout} seconds")
139
+ break
140
+
141
+ logger.info("Download process completed")
142
+
143
+ except Exception as e:
144
+ logger.error(f"Error during YouTube content download: {str(e)}")
145
+ raise
146
+ finally:
147
+ # Close browser
148
+ if "driver" in locals():
149
+ driver.quit()
150
+
151
+ def _download_single_file(
152
+ url: str, output_dir: str, filename: str, timeout: int
153
+ ) -> str:
154
+ """Download a single file from URL and save it to the local filesystem."""
155
+ try:
156
+ # Validate URL
157
+ if not url.startswith(("http://", "https://")):
158
+ raise ValueError(
159
+ "Invalid URL format. URL must start with http:// or https://"
160
+ )
161
+
162
+ # Create output directory if it doesn't exist
163
+ output_path = Path(output_dir)
164
+ output_path.mkdir(parents=True, exist_ok=True)
165
+
166
+ # Determine filename if not provided
167
+ if not filename:
168
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
169
+ if not filename:
170
+ filename = "downloaded_file"
171
+ filename += "_" + datetime.now().strftime("%Y%m%d_%H%M%S")
172
+
173
+ file_path = Path(os.path.join(output_path, filename))
174
+ file_path.mkdir(parents=True, exist_ok=True)
175
+ logger.info(f"Output path: {file_path}")
176
+
177
+ # check if video already exists with folder: /tmp/mcp_downloads
178
+ video_id = url.split("?v=")[-1].split("&")[0] if "?v=" in url else ""
179
+ base_path = os.getenv("FILESYSTEM_SERVER_WORKDIR")
180
+
181
+ # checker function
182
+ def find_existing_video(search_dir, video_id):
183
+ if not video_id:
184
+ return None
185
+
186
+ for item in os.listdir(search_dir):
187
+ item_path = os.path.join(search_dir, item)
188
+
189
+ if os.path.isfile(item_path) and video_id in item:
190
+ return item_path
191
+
192
+ elif os.path.isdir(item_path):
193
+ found = find_existing_video(item_path, video_id)
194
+ if found:
195
+ return found
196
+
197
+ return None
198
+
199
+ existing_file = find_existing_video(base_path, video_id)
200
+ if existing_file:
201
+ result = YoutubeDownloadResults(
202
+ file_path=existing_file,
203
+ file_name=os.path.basename(existing_file),
204
+ file_size=os.path.getsize(existing_file),
205
+ content_type="mp4",
206
+ success=True,
207
+ error=None,
208
+ )
209
+ logger.info(
210
+ f"Found {video_id} is already downloaded in: {existing_file}"
211
+ )
212
+ return result.model_dump_json()
213
+
214
+ logger.info(f"Downloading file from {url} to {file_path}")
215
+
216
+ _get_youtube_content(url, str(file_path), timeout)
217
+
218
+ # Check if download was successful
219
+ if len(os.listdir(file_path)) == 0:
220
+ raise FileNotFoundError("No files were downloaded")
221
+
222
+ download_file = os.path.join(file_path, os.listdir(file_path)[0])
223
+
224
+ # Get actual file size
225
+ actual_size = os.path.getsize(download_file)
226
+ logger.success(f"File downloaded successfully to {download_file}")
227
+
228
+ # Create result
229
+ result = YoutubeDownloadResults(
230
+ file_path=download_file,
231
+ file_name=os.listdir(file_path)[0],
232
+ file_size=actual_size,
233
+ content_type="mp4",
234
+ success=True,
235
+ error=None,
236
+ )
237
+
238
+ return result.model_dump_json()
239
+
240
+ except Exception as e:
241
+ error_msg = str(e)
242
+ logger.error(f"Download error: {traceback.format_exc()}")
243
+
244
+ result = YoutubeDownloadResults(
245
+ file_path="",
246
+ file_name="",
247
+ file_size=0,
248
+ content_type=None,
249
+ success=False,
250
+ error=error_msg,
251
+ )
252
+
253
+ return result.model_dump_json()
254
+
255
+ result_json = _download_single_file(url, output_dir, "", timeout)
256
+ result = YoutubeDownloadResults.model_validate_json(result_json)
257
+ return result.model_dump_json()
258
+
259
+
260
+ def main():
261
+ load_dotenv()
262
+ print("Starting YoutubeDownload MCP Server...", file=sys.stderr)
263
+ mcp.run(transport="stdio")
264
+
265
+
266
+ # Make the module callable
267
+ def __call__():
268
+ """
269
+ Make the module callable for uvx.
270
+ This function is called when the module is executed directly.
271
+ """
272
+ main()
273
+
274
+
275
+ sys.modules[__name__].__call__ = __call__
276
+
277
+ # Run the server when the script is executed directly
278
+ if __name__ == "__main__":
279
+ main()