Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,152 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import asyncio
|
2 |
import os
|
3 |
import json
|
4 |
-
from typing import List, Dict, Any, Union
|
5 |
from contextlib import AsyncExitStack
|
6 |
-
import logging
|
|
|
7 |
|
8 |
-
|
|
|
9 |
import gradio as gr
|
10 |
-
from gradio.components.chatbot import ChatMessage
|
|
|
|
|
|
|
11 |
from mcp import ClientSession, StdioServerParameters
|
12 |
from mcp.client.stdio import stdio_client
|
13 |
-
# Removed Anthropic import
|
14 |
-
from dotenv import load_dotenv
|
15 |
|
16 |
# --- Configuration ---
|
17 |
-
load_dotenv()
|
18 |
-
|
|
|
|
|
|
|
19 |
HF_API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen3-235B-A22B/v1/chat/completions"
|
20 |
-
MODEL_NAME = "Qwen/Qwen3-235B-A22B" #
|
21 |
-
MAX_TOKENS = 1500 #
|
22 |
-
HTTP_TIMEOUT =
|
|
|
|
|
|
|
23 |
|
24 |
# --- Logging Setup ---
|
25 |
-
logging.basicConfig(
|
|
|
|
|
|
|
26 |
logger = logging.getLogger(__name__)
|
27 |
|
28 |
# --- Async Event Loop ---
|
29 |
-
#
|
30 |
-
# This avoids potential issues in some environments (like notebooks)
|
31 |
try:
|
32 |
loop = asyncio.get_running_loop()
|
33 |
except RuntimeError:
|
34 |
loop = asyncio.new_event_loop()
|
35 |
asyncio.set_event_loop(loop)
|
|
|
36 |
|
|
|
37 |
class MCPClientWrapper:
|
|
|
|
|
|
|
|
|
38 |
def __init__(self):
|
39 |
-
|
40 |
-
self.session: ClientSession
|
41 |
-
self.exit_stack: AsyncExitStack
|
42 |
self.tools: List[Dict[str, Any]] = []
|
43 |
-
|
44 |
-
|
45 |
-
if not HF_TOKEN:
|
46 |
-
logger.warning("HF_TOKEN environment variable not found. Hugging Face API calls will fail.")
|
47 |
-
# Optionally raise an error or handle this more gracefully
|
48 |
-
# raise ValueError("HF_TOKEN environment variable is required.")
|
49 |
-
self.hf_token = HF_TOKEN
|
50 |
-
# Initialize HTTP client (will be managed by AsyncExitStack)
|
51 |
-
self.http_client: httpx.AsyncClient | None = None
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
|
57 |
async def _connect(self, server_path: str) -> str:
|
58 |
-
|
|
|
59 |
if self.exit_stack:
|
60 |
-
logger.info("Closing existing connection and resources.")
|
61 |
await self.exit_stack.aclose()
|
62 |
-
|
63 |
-
self.
|
64 |
-
self.
|
|
|
|
|
65 |
|
66 |
-
logger.info(f"Attempting to connect to MCP server: {server_path}")
|
67 |
-
self.exit_stack = AsyncExitStack()
|
68 |
|
69 |
try:
|
70 |
-
# Determine command
|
71 |
is_python = server_path.lower().endswith('.py')
|
72 |
command = "python" if is_python else "node"
|
73 |
logger.info(f"Using command '{command}' for server.")
|
74 |
|
75 |
-
# Configure server parameters
|
76 |
server_params = StdioServerParameters(
|
77 |
command=command,
|
78 |
args=[server_path],
|
79 |
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
|
80 |
)
|
81 |
|
82 |
-
# Establish
|
|
|
83 |
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
|
84 |
self.stdio, self.write = stdio_transport
|
85 |
logger.info("Stdio transport established.")
|
86 |
|
87 |
-
|
88 |
self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
|
89 |
await self.session.initialize()
|
90 |
-
logger.info("MCP session initialized.")
|
91 |
-
|
92 |
-
# Initialize
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
# List
|
|
|
97 |
response = await self.session.list_tools()
|
98 |
self.tools = [{
|
99 |
"name": tool.name,
|
100 |
"description": tool.description,
|
101 |
-
"input_schema": tool.inputSchema # Keep schema for potential
|
102 |
} for tool in response.tools]
|
103 |
-
logger.info(f"Available tools: {[tool['name'] for tool in self.tools]}")
|
104 |
-
|
105 |
tool_names = [tool["name"] for tool in self.tools]
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
except Exception as e:
|
109 |
logger.error(f"Connection failed: {e}", exc_info=True)
|
110 |
-
#
|
111 |
if self.exit_stack:
|
112 |
await self.exit_stack.aclose()
|
113 |
self.exit_stack = None
|
114 |
self.session = None
|
115 |
self.http_client = None
|
116 |
-
return f"Connection
|
|
|
|
|
|
|
|
|
117 |
|
118 |
def _format_tools_for_prompt(self) -> str:
|
119 |
-
|
120 |
if not self.tools:
|
121 |
-
return "No tools available."
|
122 |
|
123 |
tool_descriptions = []
|
124 |
for tool in self.tools:
|
125 |
-
|
126 |
-
desc = f"- Name: {tool['name']}\n"
|
127 |
desc += f" Description: {tool['description']}\n"
|
128 |
-
|
|
|
129 |
tool_descriptions.append(desc)
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
def _build_system_prompt(self) -> str:
|
138 |
-
|
139 |
-
|
140 |
tool_info = self._format_tools_for_prompt()
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
144 |
|
145 |
-
async def _call_huggingface_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any]
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
return {"error": "API client not configured."}
|
150 |
|
151 |
headers = {
|
152 |
"Authorization": f"Bearer {self.hf_token}",
|
@@ -156,403 +206,492 @@ class MCPClientWrapper:
|
|
156 |
"model": MODEL_NAME,
|
157 |
"messages": messages,
|
158 |
"max_tokens": MAX_TOKENS,
|
159 |
-
"stream": False, #
|
160 |
-
#
|
161 |
# "temperature": 0.7,
|
|
|
162 |
}
|
163 |
|
164 |
-
logger.info(f"Sending request to HF API. Message count: {len(messages)}")
|
165 |
-
#
|
166 |
-
# logger.debug(f"Payload: {
|
167 |
|
168 |
try:
|
|
|
|
|
|
|
|
|
|
|
169 |
response = await self.http_client.post(HF_API_URL, headers=headers, json=payload)
|
170 |
-
response.raise_for_status() #
|
171 |
-
logger.info(f"Received response from HF API
|
172 |
return response.json()
|
173 |
|
174 |
except httpx.HTTPStatusError as e:
|
175 |
-
logger.error(f"HTTP error
|
176 |
-
return {"error": f"API request failed
|
|
|
|
|
|
|
177 |
except httpx.RequestError as e:
|
178 |
-
logger.error(f"
|
179 |
return {"error": f"API request failed: {e}"}
|
180 |
except json.JSONDecodeError as e:
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
return {"error": "Failed to decode API JSON response.", "raw_response": raw_text.decode(errors='ignore')}
|
186 |
except Exception as e:
|
187 |
-
|
|
|
188 |
return {"error": f"An unexpected error occurred: {e}"}
|
189 |
|
190 |
-
def process_message(self, message: str, history: List[
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
#
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
response_data = await self._call_huggingface_api(hf_messages)
|
239 |
|
240 |
-
#
|
241 |
-
result_messages_for_gradio = []
|
242 |
-
|
243 |
-
# --- Handle API Response ---
|
244 |
if not response_data or "error" in response_data:
|
245 |
-
error_msg = response_data.get("error", "Unknown API error") if response_data else "No response
|
246 |
details = response_data.get("details", "") if response_data else ""
|
247 |
-
logger.error(f"API call failed: {error_msg}
|
248 |
-
|
249 |
"role": "assistant",
|
250 |
-
"content": f"Sorry,
|
251 |
})
|
252 |
-
return
|
253 |
|
254 |
-
# Extract
|
255 |
try:
|
256 |
-
# Adjust parsing based on actual HF API response structure for non-streaming chat completions
|
257 |
-
# Common structures: response_data['choices'][0]['message']['content']
|
258 |
-
# Or sometimes: response_data['generated_text']
|
259 |
assistant_content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
260 |
-
|
|
|
261 |
assistant_content = response_data["generated_text"]
|
262 |
-
|
263 |
if not assistant_content:
|
264 |
-
logger.error(f"Could not extract assistant content
|
265 |
raise ValueError("Empty or missing assistant content in API response.")
|
266 |
-
|
267 |
-
logger.
|
268 |
-
# logger.debug(f"Assistant content raw: {assistant_content}")
|
269 |
|
270 |
except (KeyError, IndexError, ValueError, TypeError) as e:
|
271 |
-
logger.error(f"Error parsing API response structure: {e}. Response: {response_data}", exc_info=True)
|
272 |
-
|
273 |
"role": "assistant",
|
274 |
-
"content": f"Sorry, I received an unexpected response format from the language model. Error: {e}"
|
275 |
})
|
276 |
-
return
|
277 |
|
278 |
-
# --- Check for Tool Use ---
|
279 |
-
# Try to parse the entire response as JSON (as instructed in the prompt)
|
280 |
tool_call_data = None
|
281 |
try:
|
|
|
282 |
potential_tool_call = json.loads(assistant_content)
|
283 |
-
#
|
284 |
if isinstance(potential_tool_call, dict) and "tool_name" in potential_tool_call and "tool_input" in potential_tool_call:
|
285 |
tool_call_data = potential_tool_call
|
286 |
-
logger.info(f"Detected tool call: {tool_call_data['tool_name']}")
|
287 |
else:
|
288 |
-
#
|
289 |
-
logger.info("
|
290 |
-
|
291 |
except json.JSONDecodeError:
|
292 |
-
# Not JSON,
|
293 |
-
logger.info("
|
294 |
-
|
295 |
|
296 |
-
# --- Process Tool Call or Text
|
297 |
if tool_call_data:
|
298 |
-
#
|
299 |
-
tool_name = tool_call_data
|
300 |
-
tool_args = tool_call_data
|
301 |
-
|
302 |
-
# Check if the requested tool is valid/available
|
303 |
available_tool_names = [t["name"] for t in self.tools]
|
304 |
-
if tool_name not in available_tool_names:
|
305 |
-
logger.warning(f"LLM requested unavailable tool: {tool_name}")
|
306 |
-
# Inform the user and potentially ask the LLM again without the tool result
|
307 |
-
result_messages_for_gradio.append({
|
308 |
-
"role": "assistant",
|
309 |
-
"content": f"I wanted to use the '{tool_name}' tool, but it seems it's not available right now. I'll try to answer without it."
|
310 |
-
})
|
311 |
-
# Optionally, make *another* call to the LLM telling it the tool failed.
|
312 |
-
# For simplicity here, we'll just stop.
|
313 |
-
|
314 |
-
# Or, make another call telling the LLM the tool is unavailable:
|
315 |
-
# hf_messages.append({"role": "assistant", "content": assistant_content}) # Add the LLM's attempt
|
316 |
-
# hf_messages.append({"role": "user", "content": f"The tool '{tool_name}' is not available. Please answer without using tools."})
|
317 |
-
# follow_up_response_data = await self._call_huggingface_api(hf_messages)
|
318 |
-
# ... process follow_up_response_data ... (similar to text response handling)
|
319 |
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
"role": "assistant",
|
324 |
-
"content": f"I
|
325 |
-
"metadata": { # Keep metadata for potential UI enhancements
|
326 |
-
"title": f"β³ Using tool: {tool_name}",
|
327 |
-
"log": f"Parameters: {json.dumps(tool_args, ensure_ascii=False)}", # Use ensure_ascii=False for readability
|
328 |
-
"status": "pending",
|
329 |
-
"id": f"tool_call_{tool_name}"
|
330 |
-
}
|
331 |
})
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
"role": "assistant",
|
334 |
-
"content": f"
|
335 |
-
"metadata": {
|
336 |
-
"parent_id": f"tool_call_{tool_name}",
|
337 |
-
"id": f"params_{tool_name}",
|
338 |
-
"title": "Tool Parameters"
|
339 |
-
}
|
340 |
})
|
341 |
-
|
342 |
-
|
343 |
try:
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
|
|
|
|
|
|
362 |
})
|
363 |
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
try:
|
367 |
-
|
368 |
-
|
369 |
-
if
|
370 |
-
|
371 |
-
|
372 |
-
result_messages_for_gradio.append({
|
373 |
-
"role": "assistant",
|
374 |
-
"content": display_content,
|
375 |
-
"metadata": {"parent_id": f"result_{tool_name}", "id": f"image_{tool_name}", "title": "Image Result"}
|
376 |
-
})
|
377 |
-
display_content = None # Mark as handled
|
378 |
-
else:
|
379 |
-
# Display other JSON nicely formatted
|
380 |
-
display_content = f"```json\n{json.dumps(result_json, indent=2, ensure_ascii=False)}\n```"
|
381 |
-
except json.JSONDecodeError:
|
382 |
-
# Not JSON, display as plain code block if it's not empty
|
383 |
-
if tool_result_content:
|
384 |
-
display_content = f"```\n{tool_result_content}\n```"
|
385 |
else:
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
final_response_data = await self._call_huggingface_api(hf_messages)
|
405 |
-
|
406 |
-
# Process the final response from the LLM
|
407 |
-
if final_response_data and "error" not in final_response_data:
|
408 |
-
try:
|
409 |
-
final_assistant_content = final_response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
410 |
-
if not final_assistant_content and "generated_text" in final_response_data:
|
411 |
-
final_assistant_content = final_response_data["generated_text"]
|
412 |
-
|
413 |
-
if final_assistant_content:
|
414 |
-
result_messages_for_gradio.append({
|
415 |
-
"role": "assistant",
|
416 |
-
"content": final_assistant_content
|
417 |
-
})
|
418 |
-
else:
|
419 |
-
raise ValueError("Empty or missing final assistant content.")
|
420 |
-
except (KeyError, IndexError, ValueError, TypeError) as e:
|
421 |
-
logger.error(f"Error parsing final API response: {e}. Response: {final_response_data}", exc_info=True)
|
422 |
-
result_messages_for_gradio.append({
|
423 |
-
"role": "assistant",
|
424 |
-
"content": f"Sorry, I couldn't process the tool result properly. Error: {e}"
|
425 |
-
})
|
426 |
-
else:
|
427 |
-
# Handle error in the *second* API call
|
428 |
-
error_msg = final_response_data.get("error", "Unknown API error") if final_response_data else "No final response"
|
429 |
-
details = final_response_data.get("details", "") if final_response_data else ""
|
430 |
-
logger.error(f"Final API call failed: {error_msg} {details}")
|
431 |
-
result_messages_for_gradio.append({
|
432 |
-
"role": "assistant",
|
433 |
-
"content": f"Sorry, I encountered an error after using the tool: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
|
434 |
-
})
|
435 |
-
|
436 |
-
except Exception as e:
|
437 |
-
logger.error(f"Error calling MCP tool {tool_name}: {e}", exc_info=True)
|
438 |
-
# Mark Gradio message as failed
|
439 |
-
if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
|
440 |
-
result_messages_for_gradio[-2]["metadata"]["status"] = "error"
|
441 |
-
result_messages_for_gradio[-2]["metadata"]["title"] = f"β Error using tool: {tool_name}"
|
442 |
-
# Inform user about the tool call failure
|
443 |
-
result_messages_for_gradio.append({
|
444 |
-
"role": "assistant",
|
445 |
-
"content": f"Sorry, I encountered an error when trying to use the tool '{tool_name}': {e}"
|
446 |
-
})
|
447 |
-
# Don't proceed to call LLM again if tool failed
|
448 |
|
449 |
else:
|
450 |
-
#
|
451 |
-
logger.info("Adding
|
452 |
-
|
453 |
"role": "assistant",
|
454 |
"content": assistant_content
|
455 |
})
|
456 |
|
457 |
-
# Return the list of
|
458 |
-
return
|
459 |
|
460 |
async def close_connection(self):
|
461 |
-
|
462 |
if self.exit_stack:
|
463 |
-
logger.info("Closing MCP connection and HTTP client.")
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
|
469 |
-
# --- Gradio Interface
|
470 |
-
client = MCPClientWrapper() # Instantiate the wrapper
|
471 |
|
472 |
-
def
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
# Connection Row
|
479 |
with gr.Row():
|
480 |
server_path = gr.Textbox(
|
481 |
label="MCP Server Script Path",
|
482 |
-
placeholder="Enter path to server script
|
483 |
-
|
484 |
-
value="gradio_mcp_server.py",
|
485 |
scale=3
|
486 |
)
|
487 |
-
connect_btn = gr.Button("Connect to MCP Server", scale=1)
|
488 |
|
489 |
-
status = gr.Textbox(label="Status", interactive=False, placeholder="Not connected")
|
490 |
|
491 |
-
# Chatbot
|
492 |
chatbot = gr.Chatbot(
|
493 |
label="Conversation",
|
494 |
-
|
495 |
-
|
496 |
-
height=600,
|
497 |
show_copy_button=True,
|
498 |
-
bubble_full_width=False, #
|
499 |
-
avatar_images=("π€", "π€") # User and
|
|
|
|
|
500 |
)
|
501 |
|
502 |
# Input Row
|
503 |
with gr.Row():
|
504 |
msg_textbox = gr.Textbox(
|
505 |
label="Your Message",
|
506 |
-
placeholder=
|
507 |
scale=4,
|
508 |
-
autofocus=True
|
|
|
|
|
509 |
)
|
510 |
-
# Submit button (alternative to pressing Enter)
|
511 |
-
# submit_btn = gr.Button("Send", scale=1, variant="primary")
|
512 |
-
# Clear button
|
513 |
clear_btn = gr.Button("ποΈ Clear Chat", scale=1)
|
514 |
|
515 |
# --- Event Handlers ---
|
516 |
-
# Connect
|
517 |
connect_btn.click(
|
518 |
-
client.connect, #
|
519 |
-
inputs=[server_path], #
|
520 |
-
outputs=[status] #
|
521 |
)
|
522 |
|
523 |
-
#
|
524 |
-
|
525 |
-
client.process_message, #
|
526 |
-
inputs=[msg_textbox, chatbot], #
|
527 |
-
outputs=[chatbot, msg_textbox] #
|
528 |
)
|
529 |
-
# If using a Send button:
|
530 |
-
# submit_btn.click(client.process_message, [msg_textbox, chatbot], [chatbot, msg_textbox])
|
531 |
|
532 |
-
# Clear
|
|
|
|
|
|
|
533 |
clear_btn.click(
|
534 |
-
|
535 |
-
[],
|
536 |
-
[chatbot,
|
537 |
-
queue=False
|
538 |
)
|
539 |
|
540 |
-
#
|
541 |
-
#
|
542 |
-
# Note: Graceful shutdown in Gradio can be tricky.
|
543 |
-
# demo.unload(client.close_connection) # Requires Gradio 4+ and might need async handling adjustments
|
544 |
|
|
|
545 |
return demo
|
546 |
|
547 |
-
# --- Main Execution ---
|
548 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
549 |
# Check for Hugging Face token on startup
|
550 |
if not HF_TOKEN:
|
551 |
-
print("\n" + "
|
552 |
print(" WARNING: HF_TOKEN environment variable not found! ")
|
553 |
-
print(" Please set it in your .env file or environment.")
|
554 |
-
print(" The application will run, but
|
555 |
-
print("
|
556 |
-
|
557 |
-
|
558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Gradio Chat Interface for MCP (Meta Calling Protocol) Client
|
4 |
+
using Hugging Face Inference API for the Language Model.
|
5 |
+
"""
|
6 |
+
|
7 |
import asyncio
|
8 |
import os
|
9 |
import json
|
10 |
+
from typing import List, Dict, Any, Union, Optional, Tuple
|
11 |
from contextlib import AsyncExitStack
|
12 |
+
import logging
|
13 |
+
import traceback
|
14 |
|
15 |
+
# Third-party libraries
|
16 |
+
import httpx # For async HTTP requests
|
17 |
import gradio as gr
|
18 |
+
from gradio.components.chatbot import ChatMessage # Although type="messages" uses dicts primarily
|
19 |
+
from dotenv import load_dotenv
|
20 |
+
|
21 |
+
# MCP specific imports
|
22 |
from mcp import ClientSession, StdioServerParameters
|
23 |
from mcp.client.stdio import stdio_client
|
|
|
|
|
24 |
|
25 |
# --- Configuration ---
|
26 |
+
load_dotenv() # Load environment variables from .env file
|
27 |
+
|
28 |
+
# Hugging Face API Configuration
|
29 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
30 |
+
# Specify the desired Hugging Face model endpoint
|
31 |
HF_API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen3-235B-A22B/v1/chat/completions"
|
32 |
+
MODEL_NAME = "Qwen/Qwen3-235B-A22B" # Model name for payload and display
|
33 |
+
MAX_TOKENS = 1500 # Max tokens for the LLM response
|
34 |
+
HTTP_TIMEOUT = 120 # Increased timeout for potentially slow model responses
|
35 |
+
|
36 |
+
# Default MCP Server Script Path
|
37 |
+
DEFAULT_SERVER_SCRIPT = "gradio_mcp_server.py"
|
38 |
|
39 |
# --- Logging Setup ---
|
40 |
+
logging.basicConfig(
|
41 |
+
level=logging.INFO, # Set to DEBUG for more verbose output
|
42 |
+
format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s'
|
43 |
+
)
|
44 |
logger = logging.getLogger(__name__)
|
45 |
|
46 |
# --- Async Event Loop ---
|
47 |
+
# Get the current event loop or create a new one if none exists
|
|
|
48 |
try:
|
49 |
loop = asyncio.get_running_loop()
|
50 |
except RuntimeError:
|
51 |
loop = asyncio.new_event_loop()
|
52 |
asyncio.set_event_loop(loop)
|
53 |
+
logger.info("Asyncio event loop initialized.")
|
54 |
|
55 |
+
# --- MCP Client Wrapper Class ---
|
56 |
class MCPClientWrapper:
|
57 |
+
"""
|
58 |
+
Manages the connection to the MCP server, interaction with Hugging Face API,
|
59 |
+
and Gradio message processing logic.
|
60 |
+
"""
|
61 |
def __init__(self):
|
62 |
+
"""Initializes the wrapper, loading configuration."""
|
63 |
+
self.session: Optional[ClientSession] = None
|
64 |
+
self.exit_stack: Optional[AsyncExitStack] = None
|
65 |
self.tools: List[Dict[str, Any]] = []
|
66 |
+
self.http_client: Optional[httpx.AsyncClient] = None
|
67 |
+
self.hf_token: Optional[str] = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
if not self.hf_token:
|
70 |
+
logger.warning("HF_TOKEN environment variable not found. Hugging Face API calls will be disabled.")
|
71 |
+
else:
|
72 |
+
# Log only a part of the token for verification, NEVER the full token.
|
73 |
+
logger.info(f"HF_TOKEN loaded successfully (starts with: {self.hf_token[:4]}...).")
|
74 |
|
75 |
async def _connect(self, server_path: str) -> str:
|
76 |
+
"""Establishes connection to the MCP server and initializes HTTP client."""
|
77 |
+
# Gracefully close existing resources if reconnecting
|
78 |
if self.exit_stack:
|
79 |
+
logger.info("Closing existing connection and resources before reconnecting.")
|
80 |
await self.exit_stack.aclose()
|
81 |
+
# Explicitly reset state variables
|
82 |
+
self.exit_stack = None
|
83 |
+
self.session = None
|
84 |
+
self.http_client = None
|
85 |
+
self.tools = []
|
86 |
|
87 |
+
logger.info(f"Attempting to connect to MCP server script: {server_path}")
|
88 |
+
self.exit_stack = AsyncExitStack()
|
89 |
|
90 |
try:
|
91 |
+
# Determine server command (python or node)
|
92 |
is_python = server_path.lower().endswith('.py')
|
93 |
command = "python" if is_python else "node"
|
94 |
logger.info(f"Using command '{command}' for server.")
|
95 |
|
96 |
+
# Configure MCP server parameters
|
97 |
server_params = StdioServerParameters(
|
98 |
command=command,
|
99 |
args=[server_path],
|
100 |
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
|
101 |
)
|
102 |
|
103 |
+
# --- Establish MCP Connection ---
|
104 |
+
logger.info("Initializing stdio transport...")
|
105 |
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
|
106 |
self.stdio, self.write = stdio_transport
|
107 |
logger.info("Stdio transport established.")
|
108 |
|
109 |
+
logger.info("Initializing MCP client session...")
|
110 |
self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
|
111 |
await self.session.initialize()
|
112 |
+
logger.info("MCP session initialized successfully.")
|
113 |
+
|
114 |
+
# --- Initialize HTTP Client for Hugging Face ---
|
115 |
+
if self.hf_token:
|
116 |
+
logger.info("Initializing HTTP client for Hugging Face API...")
|
117 |
+
self.http_client = await self.exit_stack.enter_async_context(
|
118 |
+
httpx.AsyncClient(timeout=HTTP_TIMEOUT)
|
119 |
+
)
|
120 |
+
logger.info("HTTP client initialized successfully.")
|
121 |
+
else:
|
122 |
+
logger.warning("HTTP client NOT initialized because HF_TOKEN is missing.")
|
123 |
+
self.http_client = None # Ensure it's None
|
124 |
|
125 |
+
# --- List Available MCP Tools ---
|
126 |
+
logger.info("Listing available tools from MCP server...")
|
127 |
response = await self.session.list_tools()
|
128 |
self.tools = [{
|
129 |
"name": tool.name,
|
130 |
"description": tool.description,
|
131 |
+
"input_schema": tool.inputSchema # Keep schema for potential richer prompts
|
132 |
} for tool in response.tools]
|
|
|
|
|
133 |
tool_names = [tool["name"] for tool in self.tools]
|
134 |
+
logger.info(f"Available tools retrieved: {tool_names if tool_names else 'None'}")
|
135 |
+
|
136 |
+
# --- Prepare Connection Status Message ---
|
137 |
+
connection_status = f"Connected to MCP server. Available tools: {', '.join(tool_names) if tool_names else 'None'}."
|
138 |
+
if not self.http_client:
|
139 |
+
connection_status += " Warning: Hugging Face client is INACTIVE (missing token)."
|
140 |
+
return connection_status
|
141 |
|
142 |
except Exception as e:
|
143 |
logger.error(f"Connection failed: {e}", exc_info=True)
|
144 |
+
# Ensure cleanup if connection fails at any point
|
145 |
if self.exit_stack:
|
146 |
await self.exit_stack.aclose()
|
147 |
self.exit_stack = None
|
148 |
self.session = None
|
149 |
self.http_client = None
|
150 |
+
return f"Connection Failed: {e}"
|
151 |
+
|
152 |
+
def connect(self, server_path: str) -> str:
|
153 |
+
"""Synchronous wrapper for the async connect method."""
|
154 |
+
return loop.run_until_complete(self._connect(server_path))
|
155 |
|
156 |
def _format_tools_for_prompt(self) -> str:
|
157 |
+
"""Formats the available tool descriptions for the LLM prompt."""
|
158 |
if not self.tools:
|
159 |
+
return "No tools are available for use."
|
160 |
|
161 |
tool_descriptions = []
|
162 |
for tool in self.tools:
|
163 |
+
desc = f"- Tool Name: `{tool['name']}`\n"
|
|
|
164 |
desc += f" Description: {tool['description']}\n"
|
165 |
+
# Optionally include schema for complex tools, keep it concise if possible
|
166 |
+
desc += f" Input Format (JSON Schema): {json.dumps(tool['input_schema'])}"
|
167 |
tool_descriptions.append(desc)
|
168 |
|
169 |
+
# Specific instructions for the LLM on how to invoke a tool
|
170 |
+
instruction = (
|
171 |
+
"You have access to the following tools:\n"
|
172 |
+
f"{chr(10).join(tool_descriptions)}\n\n" # Use newline character explicitly
|
173 |
+
"To use a tool, you MUST respond ONLY with a single JSON object "
|
174 |
+
"containing 'tool_name' and 'tool_input' keys, like this:\n"
|
175 |
+
"```json\n"
|
176 |
+
"{\n"
|
177 |
+
' "tool_name": "<name_of_tool>",\n'
|
178 |
+
' "tool_input": { <arguments_object> }\n'
|
179 |
+
"}\n"
|
180 |
+
"```\n"
|
181 |
+
"Do not include any other text, markdown formatting, or explanations "
|
182 |
+
"before or after the JSON object when calling a tool."
|
183 |
+
)
|
184 |
+
return instruction
|
185 |
|
186 |
def _build_system_prompt(self) -> str:
|
187 |
+
"""Constructs the system prompt, including tool usage instructions."""
|
188 |
+
base_prompt = "You are a helpful assistant. Respond concisely and accurately."
|
189 |
tool_info = self._format_tools_for_prompt()
|
190 |
+
# Only add tool info if tools are actually available
|
191 |
+
if self.tools:
|
192 |
+
return f"{base_prompt}\n\n{tool_info}"
|
193 |
+
else:
|
194 |
+
return base_prompt
|
195 |
|
196 |
+
async def _call_huggingface_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any]:
|
197 |
+
"""Makes the API call to the Hugging Face Inference endpoint."""
|
198 |
+
# This function assumes self.hf_token and self.http_client are valid,
|
199 |
+
# checked by the calling function (_process_query).
|
|
|
200 |
|
201 |
headers = {
|
202 |
"Authorization": f"Bearer {self.hf_token}",
|
|
|
206 |
"model": MODEL_NAME,
|
207 |
"messages": messages,
|
208 |
"max_tokens": MAX_TOKENS,
|
209 |
+
"stream": False, # Use non-streaming for simplicity
|
210 |
+
# Optional parameters:
|
211 |
# "temperature": 0.7,
|
212 |
+
# "top_p": 0.9,
|
213 |
}
|
214 |
|
215 |
+
logger.info(f"Sending request to HF API ({MODEL_NAME}). Message count: {len(messages)}.")
|
216 |
+
# Avoid logging full payload in production if it contains sensitive data
|
217 |
+
# logger.debug(f"Payload (first message role): {messages[0]['role'] if messages else 'N/A'}")
|
218 |
|
219 |
try:
|
220 |
+
# Ensure http_client exists (redundant check for safety)
|
221 |
+
if not self.http_client:
|
222 |
+
logger.error("FATAL: _call_huggingface_api called but self.http_client is None!")
|
223 |
+
return {"error": "Internal state error: HTTP client is missing."}
|
224 |
+
|
225 |
response = await self.http_client.post(HF_API_URL, headers=headers, json=payload)
|
226 |
+
response.raise_for_status() # Raises HTTPStatusError for 4xx/5xx responses
|
227 |
+
logger.info(f"Received successful response from HF API (Status: {response.status_code}).")
|
228 |
return response.json()
|
229 |
|
230 |
except httpx.HTTPStatusError as e:
|
231 |
+
logger.error(f"HF API HTTP error: {e.response.status_code} - Response: {e.response.text}", exc_info=True)
|
232 |
+
return {"error": f"API request failed ({e.response.status_code})", "details": e.response.text}
|
233 |
+
except httpx.TimeoutException as e:
|
234 |
+
logger.error(f"HF API request timed out after {HTTP_TIMEOUT}s: {e}", exc_info=True)
|
235 |
+
return {"error": "API request timed out."}
|
236 |
except httpx.RequestError as e:
|
237 |
+
logger.error(f"HF API request error: {e}", exc_info=True)
|
238 |
return {"error": f"API request failed: {e}"}
|
239 |
except json.JSONDecodeError as e:
|
240 |
+
# Handle cases where the response is not valid JSON
|
241 |
+
response_text = await response.aread() if 'response' in locals() else b'Unknown response'
|
242 |
+
logger.error(f"Failed to decode JSON response from HF API: {e}. Raw text: {response_text.decode(errors='ignore')}", exc_info=True)
|
243 |
+
return {"error": "Invalid JSON response from API.", "raw_response": response_text.decode(errors='ignore')}
|
|
|
244 |
except Exception as e:
|
245 |
+
# Catch any other unexpected errors during the API call
|
246 |
+
logger.error(f"An unexpected error occurred during HF API call: {e}", exc_info=True)
|
247 |
return {"error": f"An unexpected error occurred: {e}"}
|
248 |
|
249 |
+
def process_message(self, message: str, history: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], Dict]:
|
250 |
+
"""
|
251 |
+
Handles incoming user messages, processes them using the LLM and tools,
|
252 |
+
and returns the updated conversation history for Gradio.
|
253 |
+
|
254 |
+
Args:
|
255 |
+
message: The new message text from the user.
|
256 |
+
history: The current conversation history (List of {'role':..., 'content':...} dicts).
|
257 |
+
|
258 |
+
Returns:
|
259 |
+
A tuple containing:
|
260 |
+
- The complete updated conversation history (List of dicts).
|
261 |
+
- A Gradio update dictionary to clear the input textbox.
|
262 |
+
"""
|
263 |
+
logger.info(f"Processing message: '{message[:50]}...'")
|
264 |
+
logger.debug(f"Received history (type: {type(history)}, len: {len(history)}).")
|
265 |
+
if history:
|
266 |
+
logger.debug(f"First history item type: {type(history[0])}, Keys: {history[0].keys() if isinstance(history[0], dict) else 'N/A'}")
|
267 |
+
|
268 |
+
# --- Create a working copy of the history ---
|
269 |
+
# Avoids modifying the state Gradio passed in directly.
|
270 |
+
current_conversation_history = list(history)
|
271 |
+
|
272 |
+
# --- Validate Connection State ---
|
273 |
+
if not self.session:
|
274 |
+
logger.warning("MCP session not available in process_message. Aborting.")
|
275 |
+
current_conversation_history.append({"role": "user", "content": message})
|
276 |
+
current_conversation_history.append({"role": "assistant", "content": "Error: Not connected to MCP server. Please connect first."})
|
277 |
+
return current_conversation_history, gr.update(value="") # Clear input
|
278 |
+
|
279 |
+
if not self.http_client or not self.hf_token:
|
280 |
+
logger.warning("Hugging Face client/token not ready in process_message. Aborting.")
|
281 |
+
current_conversation_history.append({"role": "user", "content": message})
|
282 |
+
current_conversation_history.append({"role": "assistant", "content": "Error: Hugging Face client is not configured (missing token or connection issue?). Cannot process request."})
|
283 |
+
return current_conversation_history, gr.update(value="") # Clear input
|
284 |
+
|
285 |
+
# --- Append User Message to Working History ---
|
286 |
+
current_conversation_history.append({"role": "user", "content": message})
|
287 |
+
|
288 |
+
# --- Process Query Asynchronously ---
|
289 |
+
# Pass the full history (including new user message) to the async worker.
|
290 |
+
# Expect a list of *new* assistant messages generated in this turn.
|
291 |
+
try:
|
292 |
+
new_assistant_messages: List[Dict[str, Any]] = loop.run_until_complete(
|
293 |
+
self._process_query(current_conversation_history)
|
294 |
+
)
|
295 |
+
except Exception as e:
|
296 |
+
# Catch unexpected errors during the async processing itself
|
297 |
+
logger.error(f"Error during loop.run_until_complete(_process_query): {e}", exc_info=True)
|
298 |
+
# Add an error message to the output
|
299 |
+
new_assistant_messages = [{
|
300 |
+
"role": "assistant",
|
301 |
+
"content": f"An internal error occurred while processing your request: {e}"
|
302 |
+
}]
|
303 |
+
|
304 |
+
# --- Combine History for Return ---
|
305 |
+
# final_history includes the original history, the user message, and the new assistant messages.
|
306 |
+
final_history = current_conversation_history + new_assistant_messages
|
307 |
+
|
308 |
+
logger.debug(f"Returning updated history (len: {len(final_history)}).")
|
309 |
+
|
310 |
+
# --- Return Updated State to Gradio ---
|
311 |
+
return final_history, gr.update(value="") # Return new history and clear input
|
312 |
+
|
313 |
+
|
314 |
+
async def _process_query(self, conversation_history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
315 |
+
"""
|
316 |
+
Async function to handle the core logic: call LLM, handle potential tool calls.
|
317 |
+
|
318 |
+
Args:
|
319 |
+
conversation_history: The full conversation history up to and including
|
320 |
+
the latest user message.
|
321 |
+
|
322 |
+
Returns:
|
323 |
+
A list containing the new assistant message(s) generated in this turn
|
324 |
+
(text response, tool interactions, errors, etc.).
|
325 |
+
"""
|
326 |
+
# List to hold the new message(s) generated by the assistant in this turn.
|
327 |
+
new_turn_messages = []
|
328 |
+
|
329 |
+
# --- Prepare Messages for LLM API ---
|
330 |
+
hf_messages = []
|
331 |
+
# Add system prompt if not already present or if history is empty
|
332 |
+
if not conversation_history or conversation_history[0].get("role") != "system":
|
333 |
+
logger.debug("Adding system prompt.")
|
334 |
+
hf_messages.append({"role": "system", "content": self._build_system_prompt()})
|
335 |
+
|
336 |
+
# Process conversation history for the API call
|
337 |
+
for msg in conversation_history:
|
338 |
+
role = msg.get("role")
|
339 |
+
content = msg.get("content")
|
340 |
+
if not role or content is None:
|
341 |
+
logger.warning(f"Skipping message with missing role/content: {msg}")
|
342 |
+
continue
|
343 |
+
content_str = content if isinstance(content, str) else json.dumps(content)
|
344 |
+
|
345 |
+
# Add valid roles, prevent duplicate system prompts if handled above
|
346 |
+
if role in ["user", "assistant"]:
|
347 |
+
hf_messages.append({"role": role, "content": content_str})
|
348 |
+
elif role == "system" and not hf_messages: # Only add if system prompt wasn't added at start
|
349 |
+
hf_messages.append({"role": role, "content": content_str})
|
350 |
+
|
351 |
+
# --- Pre-API Call State Check ---
|
352 |
+
token_ok = bool(self.hf_token)
|
353 |
+
# Ensure http_client is not None and is the correct type
|
354 |
+
client_ok = isinstance(self.http_client, httpx.AsyncClient)
|
355 |
+
logger.info(f"State before API call: Token OK? {token_ok}, HTTP Client OK? {client_ok}")
|
356 |
+
|
357 |
+
if not (token_ok and client_ok):
|
358 |
+
logger.error("Pre-API call check FAILED: Token or Client not ready.")
|
359 |
+
new_turn_messages.append({
|
360 |
+
"role": "assistant",
|
361 |
+
"content": "Internal Error: API client configuration problem detected before making the call."
|
362 |
+
})
|
363 |
+
return new_turn_messages # Return error message
|
364 |
+
|
365 |
+
# --- Make the First API Call ---
|
366 |
+
logger.info("Making initial call to Hugging Face API...")
|
367 |
response_data = await self._call_huggingface_api(hf_messages)
|
368 |
|
369 |
+
# --- Handle Initial API Response ---
|
|
|
|
|
|
|
370 |
if not response_data or "error" in response_data:
|
371 |
+
error_msg = response_data.get("error", "Unknown API error") if response_data else "No response received"
|
372 |
details = response_data.get("details", "") if response_data else ""
|
373 |
+
logger.error(f"Initial API call failed: {error_msg}")
|
374 |
+
new_turn_messages.append({
|
375 |
"role": "assistant",
|
376 |
+
"content": f"Sorry, there was an error calling the language model: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
|
377 |
})
|
378 |
+
return new_turn_messages # Return list with error message
|
379 |
|
380 |
+
# --- Extract Assistant Content ---
|
381 |
try:
|
|
|
|
|
|
|
382 |
assistant_content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
383 |
+
# Fallback for models that might use 'generated_text'
|
384 |
+
if not assistant_content and "generated_text" in response_data:
|
385 |
assistant_content = response_data["generated_text"]
|
|
|
386 |
if not assistant_content:
|
387 |
+
logger.error(f"Could not extract assistant content. Response keys: {response_data.keys()}")
|
388 |
raise ValueError("Empty or missing assistant content in API response.")
|
389 |
+
logger.info("Successfully extracted assistant content from initial response.")
|
390 |
+
# logger.debug(f"Assistant raw content: {assistant_content}") # Be cautious logging full content
|
|
|
391 |
|
392 |
except (KeyError, IndexError, ValueError, TypeError) as e:
|
393 |
+
logger.error(f"Error parsing initial API response structure: {e}. Response: {response_data}", exc_info=True)
|
394 |
+
new_turn_messages.append({
|
395 |
"role": "assistant",
|
396 |
+
"content": f"Sorry, I received an unexpected response format from the language model. Parsing Error: {e}"
|
397 |
})
|
398 |
+
return new_turn_messages # Return list with error message
|
399 |
|
400 |
+
# --- Check for Tool Use Request ---
|
|
|
401 |
tool_call_data = None
|
402 |
try:
|
403 |
+
# The LLM was instructed to respond *only* with JSON for tool calls
|
404 |
potential_tool_call = json.loads(assistant_content)
|
405 |
+
# Validate if it looks like our expected tool call structure
|
406 |
if isinstance(potential_tool_call, dict) and "tool_name" in potential_tool_call and "tool_input" in potential_tool_call:
|
407 |
tool_call_data = potential_tool_call
|
408 |
+
logger.info(f"Detected tool call request for: {tool_call_data['tool_name']}")
|
409 |
else:
|
410 |
+
# Valid JSON, but not the specific format we requested for tools
|
411 |
+
logger.info("Assistant response is valid JSON, but not a recognized tool call format. Treating as text.")
|
412 |
+
# Keep assistant_content as is, tool_call_data remains None
|
413 |
except json.JSONDecodeError:
|
414 |
+
# Not JSON, so definitely treat as a regular text response
|
415 |
+
logger.info("Assistant response is not JSON, treating as standard text response.")
|
416 |
+
# Keep assistant_content as is, tool_call_data remains None
|
417 |
|
418 |
+
# --- Process Based on Tool Call or Text ---
|
419 |
if tool_call_data:
|
420 |
+
# --- Handle Tool Call ---
|
421 |
+
tool_name = tool_call_data.get("tool_name")
|
422 |
+
tool_args = tool_call_data.get("tool_input", {}) # Default to empty dict if missing
|
|
|
|
|
423 |
available_tool_names = [t["name"] for t in self.tools]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
+
if not tool_name or tool_name not in available_tool_names:
|
426 |
+
logger.warning(f"LLM requested invalid or unavailable tool: '{tool_name}'")
|
427 |
+
new_turn_messages.append({
|
428 |
"role": "assistant",
|
429 |
+
"content": f"I tried to use a tool named '{tool_name}', but it seems it's not available or the request was malformed. I will proceed without it."
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
})
|
431 |
+
# NOTE: Consider calling the LLM again here to inform it the tool failed.
|
432 |
+
# For simplicity, we just return the warning message for now.
|
433 |
+
return new_turn_messages
|
434 |
+
|
435 |
+
# --- Tool is valid, proceed ---
|
436 |
+
logger.info(f"Executing valid tool call: {tool_name}")
|
437 |
+
# Add messages to Gradio indicating tool use initiation
|
438 |
+
new_turn_messages.append({
|
439 |
+
"role": "assistant",
|
440 |
+
"content": f"Okay, I need to use the **{tool_name}** tool.",
|
441 |
+
"metadata": {"title": f"β³ Using tool: {tool_name}", "status": "pending", "id": f"tool_call_{tool_name}"}
|
442 |
+
})
|
443 |
+
# Display parameters used (use ensure_ascii=False for better readability if needed)
|
444 |
+
new_turn_messages.append({
|
445 |
+
"role": "assistant",
|
446 |
+
"content": f"Parameters:\n```json\n{json.dumps(tool_args, indent=2, ensure_ascii=False)}\n```",
|
447 |
+
"metadata": {"parent_id": f"tool_call_{tool_name}", "id": f"params_{tool_name}", "title": "Tool Parameters"}
|
448 |
+
})
|
449 |
+
|
450 |
+
# --- Call the Actual MCP Tool ---
|
451 |
+
try:
|
452 |
+
mcp_result = await self.session.call_tool(tool_name, tool_args)
|
453 |
+
tool_result_content = mcp_result.content
|
454 |
+
logger.info(f"Successfully received result from MCP tool: {tool_name}")
|
455 |
+
# Update Gradio message status to 'done'
|
456 |
+
if new_turn_messages and "metadata" in new_turn_messages[-2]:
|
457 |
+
new_turn_messages[-2]["metadata"]["status"] = "done"
|
458 |
+
new_turn_messages[-2]["metadata"]["title"] = f"β
Used tool: {tool_name}"
|
459 |
+
|
460 |
+
# --- Display Tool Result in Gradio ---
|
461 |
+
new_turn_messages.append({
|
462 |
"role": "assistant",
|
463 |
+
"content": f"Result from **{tool_name}**:",
|
464 |
+
"metadata": {"title": f"Tool Result: {tool_name}", "status": "done", "id": f"result_{tool_name}"}
|
|
|
|
|
|
|
|
|
465 |
})
|
466 |
+
# Format result for display (handle JSON, images, etc.)
|
467 |
+
display_content = tool_result_content
|
468 |
try:
|
469 |
+
result_json = json.loads(tool_result_content)
|
470 |
+
if isinstance(result_json, dict) and result_json.get("type") == "image" and "url" in result_json:
|
471 |
+
# Handle image result - Gradio chatbot can display images via dict path
|
472 |
+
display_content = {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")}
|
473 |
+
new_turn_messages.append({
|
474 |
+
"role": "assistant", "content": display_content, # Send the dict
|
475 |
+
"metadata": {"parent_id": f"result_{tool_name}", "id": f"image_{tool_name}", "title": "Image Result"}
|
476 |
+
})
|
477 |
+
display_content = None # Mark as handled so raw isn't added below
|
478 |
+
else:
|
479 |
+
# Nicely format other JSON
|
480 |
+
display_content = f"```json\n{json.dumps(result_json, indent=2, ensure_ascii=False)}\n```"
|
481 |
+
except (json.JSONDecodeError, TypeError):
|
482 |
+
# Not JSON or image, display as plain code block if not empty
|
483 |
+
display_content = f"```\n{tool_result_content}\n```" if tool_result_content else "_Tool returned empty content._"
|
484 |
+
|
485 |
+
# Add the formatted/raw result if not handled above (e.g., image)
|
486 |
+
if display_content:
|
487 |
+
new_turn_messages.append({
|
488 |
+
"role": "assistant", "content": display_content,
|
489 |
+
"metadata": {"parent_id": f"result_{tool_name}", "id": f"raw_result_{tool_name}", "title": "Formatted Output"}
|
490 |
})
|
491 |
|
492 |
+
# --- Send Tool Result Back to LLM for Final Response ---
|
493 |
+
# Prepare message history for the second LLM call
|
494 |
+
hf_messages_for_final_call = list(hf_messages) # Start with messages from first call
|
495 |
+
# Add the assistant's message that *was* the tool call JSON
|
496 |
+
hf_messages_for_final_call.append({"role": "assistant", "content": assistant_content})
|
497 |
+
# Add a user message containing the tool's result
|
498 |
+
hf_messages_for_final_call.append({
|
499 |
+
"role": "user",
|
500 |
+
"content": f"The '{tool_name}' tool execution resulted in:\n```\n{tool_result_content}\n```\nPlease summarize this result or continue based on it."
|
501 |
+
})
|
502 |
+
|
503 |
+
logger.info("Sending tool result back to HF API for final interpretation.")
|
504 |
+
# --- Pre-API Call State Check (Again) ---
|
505 |
+
token_ok_final = bool(self.hf_token)
|
506 |
+
client_ok_final = isinstance(self.http_client, httpx.AsyncClient)
|
507 |
+
logger.info(f"State before final API call: Token OK? {token_ok_final}, HTTP Client OK? {client_ok_final}")
|
508 |
+
|
509 |
+
if not (token_ok_final and client_ok_final):
|
510 |
+
logger.error("Pre-API call check FAILED before final call.")
|
511 |
+
new_turn_messages.append({"role": "assistant", "content": "Internal Error: Client state issue before getting final response after tool use."})
|
512 |
+
# Return messages generated so far (tool use + error)
|
513 |
+
return new_turn_messages
|
514 |
+
|
515 |
+
# --- Make the Second API Call ---
|
516 |
+
final_response_data = await self._call_huggingface_api(hf_messages_for_final_call)
|
517 |
+
|
518 |
+
# --- Process Final LLM Response ---
|
519 |
+
if final_response_data and "error" not in final_response_data:
|
520 |
try:
|
521 |
+
final_assistant_content = final_response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
522 |
+
# ... (fallback for generated_text) ...
|
523 |
+
if final_assistant_content:
|
524 |
+
logger.info("Successfully extracted final assistant response after tool use.")
|
525 |
+
new_turn_messages.append({"role": "assistant", "content": final_assistant_content})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
else:
|
527 |
+
raise ValueError("Empty final assistant content after tool use.")
|
528 |
+
except Exception as e:
|
529 |
+
logger.error(f"Error parsing final API response after tool use: {e}", exc_info=True)
|
530 |
+
new_turn_messages.append({"role": "assistant", "content": f"Sorry, error processing the final response after tool use: {e}"})
|
531 |
+
else: # Handle error in the second API call itself
|
532 |
+
error_msg = final_response_data.get("error", "API Error") if final_response_data else "API Error"
|
533 |
+
details = final_response_data.get("details", "") if final_response_data else ""
|
534 |
+
logger.error(f"Final API call (after tool use) failed: {error_msg}")
|
535 |
+
new_turn_messages.append({"role": "assistant", "content": f"Sorry, error processing tool result with LLM: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")})
|
536 |
+
|
537 |
+
except Exception as e: # Handle error during the MCP tool call (`session.call_tool`)
|
538 |
+
logger.error(f"Error calling MCP tool '{tool_name}': {e}", exc_info=True)
|
539 |
+
# Update Gradio status to 'error'
|
540 |
+
if new_turn_messages and "metadata" in new_turn_messages[-2]:
|
541 |
+
new_turn_messages[-2]["metadata"]["status"] = "error"
|
542 |
+
new_turn_messages[-2]["metadata"]["title"] = f"β Error using tool: {tool_name}"
|
543 |
+
# Add error message for the user
|
544 |
+
new_turn_messages.append({"role": "assistant", "content": f"Sorry, I encountered an error when trying to use the tool '{tool_name}': {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
|
546 |
else:
|
547 |
+
# --- Handle Regular Text Response ---
|
548 |
+
logger.info("Adding standard text response to Gradio output.")
|
549 |
+
new_turn_messages.append({
|
550 |
"role": "assistant",
|
551 |
"content": assistant_content
|
552 |
})
|
553 |
|
554 |
+
# Return the list of *new* assistant messages generated in this turn
|
555 |
+
return new_turn_messages
|
556 |
|
557 |
async def close_connection(self):
|
558 |
+
"""Closes the MCP connection and HTTP client gracefully."""
|
559 |
if self.exit_stack:
|
560 |
+
logger.info("Closing MCP connection and HTTP client resources.")
|
561 |
+
try:
|
562 |
+
await self.exit_stack.aclose()
|
563 |
+
except Exception as e:
|
564 |
+
logger.error(f"Error during resource cleanup: {e}", exc_info=True)
|
565 |
+
finally:
|
566 |
+
# Reset state variables regardless of cleanup success
|
567 |
+
self.exit_stack = None
|
568 |
+
self.session = None
|
569 |
+
self.http_client = None
|
570 |
+
self.tools = []
|
571 |
+
logger.info("Resources cleanup attempted.")
|
572 |
+
else:
|
573 |
+
logger.info("Close connection called but no active connection found.")
|
574 |
+
|
575 |
|
576 |
+
# --- Gradio Interface Definition ---
|
577 |
+
client = MCPClientWrapper() # Instantiate the client wrapper globally
|
578 |
|
579 |
+
def create_gradio_interface() -> gr.Blocks:
|
580 |
+
"""Creates and configures the Gradio interface."""
|
581 |
+
logger.info("Creating Gradio interface.")
|
582 |
+
with gr.Blocks(
|
583 |
+
title="MCP Client + HF Inference",
|
584 |
+
theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky),
|
585 |
+
css="#chatbot { font-size: 1.1em; } .message { padding: 10px !important; }" # Example CSS
|
586 |
+
) as demo:
|
587 |
+
gr.Markdown(f"# π€ MCP Assistant ({MODEL_NAME})")
|
588 |
+
gr.Markdown("Connect to an MCP server and chat with a Hugging Face LLM.")
|
589 |
|
590 |
# Connection Row
|
591 |
with gr.Row():
|
592 |
server_path = gr.Textbox(
|
593 |
label="MCP Server Script Path",
|
594 |
+
placeholder="Enter path to server script",
|
595 |
+
value=DEFAULT_SERVER_SCRIPT, # Use default value
|
|
|
596 |
scale=3
|
597 |
)
|
598 |
+
connect_btn = gr.Button("π Connect to MCP Server", variant="primary", scale=1)
|
599 |
|
600 |
+
status = gr.Textbox(label="Connection Status", interactive=False, placeholder="Not connected.")
|
601 |
|
602 |
+
# Chatbot Display
|
603 |
chatbot = gr.Chatbot(
|
604 |
label="Conversation",
|
605 |
+
elem_id="chatbot",
|
606 |
+
height=650,
|
|
|
607 |
show_copy_button=True,
|
608 |
+
bubble_full_width=False, # Chat bubbles don't span full width
|
609 |
+
avatar_images=("π€", "π€"), # User and Hugging Face avatars
|
610 |
+
type="messages", # IMPORTANT: Use the dictionary format
|
611 |
+
show_label=False # Hide the "Conversation" label above chat
|
612 |
)
|
613 |
|
614 |
# Input Row
|
615 |
with gr.Row():
|
616 |
msg_textbox = gr.Textbox(
|
617 |
label="Your Message",
|
618 |
+
placeholder="Type your message here and press Enter...",
|
619 |
scale=4,
|
620 |
+
autofocus=True,
|
621 |
+
show_label=False, # Hide the "Your Message" label
|
622 |
+
container=False # Remove container padding/border for tighter look
|
623 |
)
|
|
|
|
|
|
|
624 |
clear_btn = gr.Button("ποΈ Clear Chat", scale=1)
|
625 |
|
626 |
# --- Event Handlers ---
|
627 |
+
# Connect Button Action
|
628 |
connect_btn.click(
|
629 |
+
fn=client.connect, # Call the connect method
|
630 |
+
inputs=[server_path], # Pass the server path textbox
|
631 |
+
outputs=[status] # Update the status textbox
|
632 |
)
|
633 |
|
634 |
+
# Message Submission Action (Enter key in textbox)
|
635 |
+
msg_textbox.submit(
|
636 |
+
fn=client.process_message, # Call the main message processing function
|
637 |
+
inputs=[msg_textbox, chatbot], # Pass current message and chat history
|
638 |
+
outputs=[chatbot, msg_textbox] # Update chat history and clear message box
|
639 |
)
|
|
|
|
|
640 |
|
641 |
+
# Clear Button Action
|
642 |
+
def clear_chat_and_input():
|
643 |
+
logger.info("Clear chat button clicked.")
|
644 |
+
return [], "" # Return empty list for chatbot, empty string for textbox
|
645 |
clear_btn.click(
|
646 |
+
fn=clear_chat_and_input,
|
647 |
+
inputs=[],
|
648 |
+
outputs=[chatbot, msg_textbox],
|
649 |
+
queue=False # Don't queue this action
|
650 |
)
|
651 |
|
652 |
+
# Handle application shutdown (optional, but good practice)
|
653 |
+
# demo.unload(client.close_connection) # Requires newer Gradio, async handling can be complex
|
|
|
|
|
654 |
|
655 |
+
logger.info("Gradio interface created successfully.")
|
656 |
return demo
|
657 |
|
658 |
+
# --- Main Execution Block ---
|
659 |
if __name__ == "__main__":
|
660 |
+
print("\n" + "="*60)
|
661 |
+
print(" MCP Client with Hugging Face Inference API ")
|
662 |
+
print(f" Model: {MODEL_NAME}")
|
663 |
+
print("="*60 + "\n")
|
664 |
+
|
665 |
# Check for Hugging Face token on startup
|
666 |
if not HF_TOKEN:
|
667 |
+
print("\n" + "*"*60)
|
668 |
print(" WARNING: HF_TOKEN environment variable not found! ")
|
669 |
+
print(" Please set it in your .env file or environment variables.")
|
670 |
+
print(" The application will run, but language model features")
|
671 |
+
print(" requiring the Hugging Face API will be disabled.")
|
672 |
+
print("*"*60 + "\n")
|
673 |
+
else:
|
674 |
+
print("β HF_TOKEN found.\n")
|
675 |
+
|
676 |
+
# Create and launch the Gradio interface
|
677 |
+
interface = create_gradio_interface()
|
678 |
+
print("Launching Gradio interface...")
|
679 |
+
# Use server_name="0.0.0.0" to make accessible on local network
|
680 |
+
# Use share=True for a temporary public link (requires Gradio account sometimes)
|
681 |
+
interface.launch(debug=True, server_name="0.0.0.0")
|
682 |
+
|
683 |
+
print("\nInterface launched. Access it at the URL provided above.")
|
684 |
+
print("Press Ctrl+C to stop the server.")
|
685 |
+
|
686 |
+
# Optional: Add explicit cleanup on exit using asyncio if demo.unload isn't used/sufficient
|
687 |
+
try:
|
688 |
+
# Gradio's launch() typically blocks, so this part might only run after shutdown
|
689 |
+
pass
|
690 |
+
except KeyboardInterrupt:
|
691 |
+
logger.info("KeyboardInterrupt received, attempting shutdown.")
|
692 |
+
if client:
|
693 |
+
print("Closing connections...")
|
694 |
+
loop.run_until_complete(client.close_connection())
|
695 |
+
print("Cleanup complete.")
|
696 |
+
finally:
|
697 |
+
logger.info("Application shutting down.")
|