Nymbo commited on
Commit
0366bb5
·
verified ·
1 Parent(s): 956ea73

just kidding I meant 235B-A22B

Browse files
Files changed (1) hide show
  1. app.py +504 -151
app.py CHANGED
@@ -1,205 +1,558 @@
1
- """
2
- app.py – Hugging Face Space
3
- Swaps Anthropic for HF Serverless Inference (Qwen3-235B-A22B)
4
- """
5
-
6
  import asyncio
7
  import os
8
  import json
9
  from typing import List, Dict, Any, Union
10
  from contextlib import AsyncExitStack
 
11
 
 
12
  import gradio as gr
13
  from gradio.components.chatbot import ChatMessage
14
  from mcp import ClientSession, StdioServerParameters
15
  from mcp.client.stdio import stdio_client
 
16
  from dotenv import load_dotenv
17
- from huggingface_hub import InferenceClient # NEW ✨
18
 
 
19
  load_dotenv()
20
-
21
- loop = asyncio.new_event_loop()
22
- asyncio.set_event_loop(loop)
23
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  class MCPClientWrapper:
26
- """
27
- Wraps an MCP stdio client + a chat LLM (Qwen3-235B-A22B via HF Serverless).
28
- """
29
-
30
  def __init__(self):
31
- self.session = None
32
- self.exit_stack = None
 
33
  self.tools: List[Dict[str, Any]] = []
 
 
 
 
 
 
 
 
 
34
 
35
- # --- NEW: Hugging Face client ---------------------------------------
36
- self.hf_client = InferenceClient(
37
- model="Qwen/Qwen3-235B-A22B",
38
- token=os.getenv("HUGGINGFACE_API_TOKEN")
39
- )
40
- # --------------------------------------------------------------------
41
-
42
- # ─────────────────────────── MCP CONNECTION ────────────────────────────
43
  def connect(self, server_path: str) -> str:
 
44
  return loop.run_until_complete(self._connect(server_path))
45
 
46
  async def _connect(self, server_path: str) -> str:
 
47
  if self.exit_stack:
 
48
  await self.exit_stack.aclose()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- self.exit_stack = AsyncExitStack()
51
-
52
- is_python = server_path.endswith(".py")
53
- command = "python" if is_python else "node"
54
-
55
- server_params = StdioServerParameters(
56
- command=command,
57
- args=[server_path],
58
- env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"},
59
- )
60
 
61
- stdio_transport = await self.exit_stack.enter_async_context(
62
- stdio_client(server_params)
63
- )
64
- self.stdio, self.write = stdio_transport
65
 
66
- self.session = await self.exit_stack.enter_async_context(
67
- ClientSession(self.stdio, self.write)
68
- )
69
- await self.session.initialize()
70
 
71
- response = await self.session.list_tools()
72
- self.tools = [
73
- {
74
  "name": tool.name,
75
  "description": tool.description,
76
- "input_schema": tool.inputSchema,
77
- }
78
- for tool in response.tools
79
- ]
80
-
81
- tool_names = [tool["name"] for tool in self.tools]
82
- return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
83
-
84
- # ──────────────────────────── CHAT HANDLER ─────────────────────────────
85
- def process_message(
86
- self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
87
- ) -> tuple:
88
- if not self.session:
89
- return (
90
- history
91
- + [
92
- {"role": "user", "content": message},
93
- {
94
- "role": "assistant",
95
- "content": "Please connect to an MCP server first.",
96
- },
97
- ],
98
- gr.Textbox(value=""),
99
- )
100
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  new_messages = loop.run_until_complete(self._process_query(message, history))
102
- return (
103
- history + [{"role": "user", "content": message}] + new_messages,
104
- gr.Textbox(value=""),
105
- )
106
 
107
- # ────────────────────────── INTERNAL LLM CALL ─────────────────────────
108
- async def _process_query(
109
- self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
110
- ):
111
- """
112
- Pushes the whole chat history to Qwen3-235B-A22B and returns its reply.
113
- Tool calls are *not* forwarded – the HF endpoint only returns text.
114
- """
115
- # 1️⃣ Build message list in OpenAI-style dicts
116
- messages: List[Dict[str, str]] = []
117
- for item in history:
118
- if isinstance(item, ChatMessage):
119
- role, content = item.role, item.content
120
- else:
121
- role, content = item.get("role"), item.get("content")
122
-
123
- if role in {"user", "assistant", "system"}:
124
- messages.append({"role": role, "content": content})
125
- messages.append({"role": "user", "content": message})
126
-
127
- # 2️⃣ Serialise to Qwen chat-markup
128
- prompt_parts = []
129
- for m in messages:
130
- role = m["role"]
131
- prompt_parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>")
132
- prompt_parts.append("<|im_start|>assistant") # model will complete here
133
- prompt = "\n".join(prompt_parts)
134
-
135
- # 3️⃣ Call HF Serverless in a threadpool (non-blocking)
136
- async def _generate():
137
- return self.hf_client.text_generation(
138
- prompt,
139
- max_new_tokens=1024,
140
- temperature=0.7,
141
- stop_sequences=["<|im_end|>", "<|im_start|>"],
142
- )
143
 
144
- assistant_text: str = await asyncio.get_running_loop().run_in_executor(
145
- None, _generate
146
- )
147
 
148
- # 4️⃣ Return in Gradio-friendly format
149
- return [{"role": "assistant", "content": assistant_text.strip()}]
 
 
 
 
 
 
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- # ──────────────────────────── GRADIO UI ───────────────────────────────────
153
- client = MCPClientWrapper()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
 
 
155
 
156
  def gradio_interface():
157
- with gr.Blocks(title="MCP Weather Client") as demo:
158
- gr.Markdown("# MCP Weather Assistant")
159
- gr.Markdown("Connect to your MCP weather server and chat with the assistant")
160
-
161
- with gr.Row(equal_height=True):
162
- with gr.Column(scale=4):
163
- server_path = gr.Textbox(
164
- label="Server Script Path",
165
- placeholder="Enter path to server script (e.g., weather.py)",
166
- value="gradio_mcp_server.py",
167
- )
168
- with gr.Column(scale=1):
169
- connect_btn = gr.Button("Connect")
170
-
171
- status = gr.Textbox(label="Connection Status", interactive=False)
 
 
172
 
 
173
  chatbot = gr.Chatbot(
 
174
  value=[],
175
- height=500,
176
- type="messages",
177
  show_copy_button=True,
178
- avatar_images=("👤", "🤖"),
 
179
  )
180
 
181
- with gr.Row(equal_height=True):
182
- msg = gr.Textbox(
183
- label="Your Question",
184
- placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
 
185
  scale=4,
 
186
  )
187
- clear_btn = gr.Button("Clear Chat", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- connect_btn.click(client.connect, inputs=server_path, outputs=status)
190
- msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
191
- clear_btn.click(lambda: [], None, chatbot)
 
192
 
193
  return demo
194
 
195
-
196
- # ──────────────────────────── ENTRY POINT ────────────────────────────────
197
  if __name__ == "__main__":
198
- if not os.getenv("HUGGINGFACE_API_TOKEN"):
199
- print(
200
- "Warning: HUGGINGFACE_API_TOKEN not found in environment. "
201
- "Set it in your .env file or Space secrets."
202
- )
 
 
203
 
204
  interface = gradio_interface()
205
- interface.launch(debug=True) # ← typo fixed
 
 
 
 
 
 
1
  import asyncio
2
  import os
3
  import json
4
  from typing import List, Dict, Any, Union
5
  from contextlib import AsyncExitStack
6
+ import logging # Added for better debugging
7
 
8
+ import httpx # Added for making HTTP requests
9
  import gradio as gr
10
  from gradio.components.chatbot import ChatMessage
11
  from mcp import ClientSession, StdioServerParameters
12
  from mcp.client.stdio import stdio_client
13
+ # Removed Anthropic import
14
  from dotenv import load_dotenv
 
15
 
16
+ # --- Configuration ---
17
  load_dotenv()
18
+ HF_TOKEN = os.getenv("HF_TOKEN") # Changed from ANTHROPIC_API_KEY
19
+ HF_API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen3-235B-A22B/v1/chat/completions"
20
+ MODEL_NAME = "Qwen/Qwen3-235B-A22B" # Define model name
21
+ MAX_TOKENS = 1500 # Increased token limit slightly for potentially more verbose model
22
+ HTTP_TIMEOUT = 60 # Timeout for API requests in seconds
23
+
24
+ # --- Logging Setup ---
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # --- Async Event Loop ---
29
+ # Use asyncio.get_event_loop() which handles loop creation/getting existing loop
30
+ # This avoids potential issues in some environments (like notebooks)
31
+ try:
32
+ loop = asyncio.get_running_loop()
33
+ except RuntimeError:
34
+ loop = asyncio.new_event_loop()
35
+ asyncio.set_event_loop(loop)
36
 
37
  class MCPClientWrapper:
 
 
 
 
38
  def __init__(self):
39
+ # Initialize session, stack, and tools list
40
+ self.session: ClientSession | None = None
41
+ self.exit_stack: AsyncExitStack | None = None
42
  self.tools: List[Dict[str, Any]] = []
43
+ # Removed Anthropic client initialization
44
+ # Add Hugging Face token check
45
+ if not HF_TOKEN:
46
+ logger.warning("HF_TOKEN environment variable not found. Hugging Face API calls will fail.")
47
+ # Optionally raise an error or handle this more gracefully
48
+ # raise ValueError("HF_TOKEN environment variable is required.")
49
+ self.hf_token = HF_TOKEN
50
+ # Initialize HTTP client (will be managed by AsyncExitStack)
51
+ self.http_client: httpx.AsyncClient | None = None
52
 
 
 
 
 
 
 
 
 
53
  def connect(self, server_path: str) -> str:
54
+ # Run the async connection logic in the event loop
55
  return loop.run_until_complete(self._connect(server_path))
56
 
57
  async def _connect(self, server_path: str) -> str:
58
+ # Gracefully close existing connections and resources if reconnecting
59
  if self.exit_stack:
60
+ logger.info("Closing existing connection and resources.")
61
  await self.exit_stack.aclose()
62
+ self.exit_stack = None # Reset stack
63
+ self.session = None # Reset session
64
+ self.http_client = None # Reset client
65
+
66
+ logger.info(f"Attempting to connect to MCP server: {server_path}")
67
+ self.exit_stack = AsyncExitStack() # Create a new exit stack for managing resources
68
+
69
+ try:
70
+ # Determine command based on file extension
71
+ is_python = server_path.lower().endswith('.py')
72
+ command = "python" if is_python else "node"
73
+ logger.info(f"Using command '{command}' for server.")
74
+
75
+ # Configure server parameters
76
+ server_params = StdioServerParameters(
77
+ command=command,
78
+ args=[server_path],
79
+ env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
80
+ )
81
 
82
+ # Establish stdio transport with the MCP server
83
+ stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
84
+ self.stdio, self.write = stdio_transport
85
+ logger.info("Stdio transport established.")
 
 
 
 
 
 
86
 
87
+ # Initialize the MCP client session
88
+ self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
89
+ await self.session.initialize()
90
+ logger.info("MCP session initialized.")
91
 
92
+ # Initialize the HTTP client for Hugging Face API calls
93
+ self.http_client = await self.exit_stack.enter_async_context(httpx.AsyncClient(timeout=HTTP_TIMEOUT))
94
+ logger.info("HTTP client initialized.")
 
95
 
96
+ # List available tools from the MCP server
97
+ response = await self.session.list_tools()
98
+ self.tools = [{
99
  "name": tool.name,
100
  "description": tool.description,
101
+ "input_schema": tool.inputSchema # Keep schema for potential future use or richer prompts
102
+ } for tool in response.tools]
103
+ logger.info(f"Available tools: {[tool['name'] for tool in self.tools]}")
104
+
105
+ tool_names = [tool["name"] for tool in self.tools]
106
+ return f"Connected to MCP server. Available tools: {', '.join(tool_names) if tool_names else 'None'}"
107
+
108
+ except Exception as e:
109
+ logger.error(f"Connection failed: {e}", exc_info=True)
110
+ # Clean up resources if connection failed midway
111
+ if self.exit_stack:
112
+ await self.exit_stack.aclose()
113
+ self.exit_stack = None
114
+ self.session = None
115
+ self.http_client = None
116
+ return f"Connection failed: {e}"
117
+
118
+ def _format_tools_for_prompt(self) -> str:
119
+ # Create a description of tools for the LLM prompt
120
+ if not self.tools:
121
+ return "No tools available."
122
+
123
+ tool_descriptions = []
124
+ for tool in self.tools:
125
+ # Describe the tool and its expected input format (JSON schema)
126
+ desc = f"- Name: {tool['name']}\n"
127
+ desc += f" Description: {tool['description']}\n"
128
+ desc += f" Input JSON Schema: {json.dumps(tool['input_schema'], indent=2)}"
129
+ tool_descriptions.append(desc)
130
+
131
+ return "You have access to the following tools:\n" + "\n".join(tool_descriptions) + \
132
+ "\n\nTo use a tool, respond ONLY with a single JSON object matching this structure: " + \
133
+ "{\"tool_name\": \"<name_of_tool>\", \"tool_input\": {<arguments_as_object>}}. " + \
134
+ "Do not add any other text, explanation, or markdown formatting around the JSON object."
135
+
136
+
137
+ def _build_system_prompt(self) -> str:
138
+ # Construct the system prompt including tool instructions
139
+ system_prompt = "You are a helpful assistant."
140
+ tool_info = self._format_tools_for_prompt()
141
+ if tool_info != "No tools available.":
142
+ system_prompt += "\n\n" + tool_info
143
+ return system_prompt
144
+
145
+ async def _call_huggingface_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any] | None:
146
+ # Helper function to call the Hugging Face Inference API
147
+ if not self.hf_token or not self.http_client:
148
+ logger.error("Hugging Face token or HTTP client not available.")
149
+ return {"error": "API client not configured."}
150
+
151
+ headers = {
152
+ "Authorization": f"Bearer {self.hf_token}",
153
+ "Content-Type": "application/json",
154
+ }
155
+ payload = {
156
+ "model": MODEL_NAME,
157
+ "messages": messages,
158
+ "max_tokens": MAX_TOKENS,
159
+ "stream": False, # Keeping it simple, not streaming for now
160
+ # Add other parameters like temperature if needed
161
+ # "temperature": 0.7,
162
+ }
163
+
164
+ logger.info(f"Sending request to HF API. Message count: {len(messages)}")
165
+ # Log message content carefully, maybe just roles or lengths in production
166
+ # logger.debug(f"Payload: {json.dumps(payload, indent=2)}")
167
+
168
+ try:
169
+ response = await self.http_client.post(HF_API_URL, headers=headers, json=payload)
170
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
171
+ logger.info(f"Received response from HF API. Status: {response.status_code}")
172
+ return response.json()
173
+
174
+ except httpx.HTTPStatusError as e:
175
+ logger.error(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
176
+ return {"error": f"API request failed: {e.response.status_code}", "details": e.response.text}
177
+ except httpx.RequestError as e:
178
+ logger.error(f"Request error occurred: {e}")
179
+ return {"error": f"API request failed: {e}"}
180
+ except json.JSONDecodeError as e:
181
+ logger.error(f"Failed to decode JSON response: {e}")
182
+ # Attempt to get raw text if JSON decoding fails
183
+ raw_text = await response.aread() if 'response' in locals() else b""
184
+ logger.error(f"Raw Response: {raw_text.decode(errors='ignore')}")
185
+ return {"error": "Failed to decode API JSON response.", "raw_response": raw_text.decode(errors='ignore')}
186
+ except Exception as e:
187
+ logger.error(f"An unexpected error occurred during API call: {e}", exc_info=True)
188
+ return {"error": f"An unexpected error occurred: {e}"}
189
+
190
+ def process_message(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> tuple:
191
+ # Check if connected to MCP server
192
+ if not self.session or not self.http_client:
193
+ # Append user message and error message to history
194
+ history.append({"role": "user", "content": message})
195
+ history.append({"role": "assistant", "content": "Error: Please connect to the MCP server and ensure HF_TOKEN is set."})
196
+ # Return updated history and clear input textbox
197
+ return history, gr.Textbox(value="")
198
+
199
+ # Run the async query processing logic
200
  new_messages = loop.run_until_complete(self._process_query(message, history))
 
 
 
 
201
 
202
+ # Append the original user message and the new assistant messages to history
203
+ history.append({"role": "user", "content": message})
204
+ history.extend(new_messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ # Return updated history and clear input textbox
207
+ return history, gr.Textbox(value="")
 
208
 
209
+ async def _process_query(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> List[Dict[str, Any]]:
210
+ # Build the list of messages in the format Hugging Face expects
211
+ hf_messages = [{"role": "system", "content": self._build_system_prompt()}]
212
+ for msg in history:
213
+ # Convert Gradio ChatMessage or dict to the required format
214
+ if isinstance(msg, ChatMessage):
215
+ role, content = msg.role, msg.content
216
+ else:
217
+ role, content = msg.get("role"), msg.get("content")
218
 
219
+ # Ensure content is a string (handle potential image dicts if added later)
220
+ if isinstance(content, dict):
221
+ # Handle potential dict content (like images) - skip or represent as text for now
222
+ content_str = json.dumps(content) # Or some other representation
223
+ logger.warning(f"Found non-string content in history for role {role}, converting to JSON string.")
224
+ else:
225
+ content_str = str(content) # Ensure it's a string
226
+
227
+ # Map roles if needed (e.g., 'bot' -> 'assistant') - current roles seem fine
228
+ if role in ["user", "assistant"]:
229
+ hf_messages.append({"role": role, "content": content_str})
230
+ elif role == "system" and len(hf_messages) > 1: # Avoid duplicate system prompts if history already has one
231
+ logger.warning("Skipping additional system message found in history.")
232
+ # Handle tool results if they were stored in history differently (not standard here)
233
+
234
+ # Add the current user message
235
+ hf_messages.append({"role": "user", "content": message})
236
+
237
+ # --- Make the API Call ---
238
+ response_data = await self._call_huggingface_api(hf_messages)
239
+
240
+ # Prepare list to hold messages for Gradio display
241
+ result_messages_for_gradio = []
242
+
243
+ # --- Handle API Response ---
244
+ if not response_data or "error" in response_data:
245
+ error_msg = response_data.get("error", "Unknown API error") if response_data else "No response from API"
246
+ details = response_data.get("details", "") if response_data else ""
247
+ logger.error(f"API call failed: {error_msg} {details}")
248
+ result_messages_for_gradio.append({
249
+ "role": "assistant",
250
+ "content": f"Sorry, I encountered an error calling the language model: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
251
+ })
252
+ return result_messages_for_gradio # Return error message to Gradio
253
+
254
+ # Extract the assistant's reply content
255
+ try:
256
+ # Adjust parsing based on actual HF API response structure for non-streaming chat completions
257
+ # Common structures: response_data['choices'][0]['message']['content']
258
+ # Or sometimes: response_data['generated_text']
259
+ assistant_content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
260
+ if not assistant_content and "generated_text" in response_data: # Fallback for some models/endpoints
261
+ assistant_content = response_data["generated_text"]
262
+
263
+ if not assistant_content:
264
+ logger.error(f"Could not extract assistant content from response: {response_data}")
265
+ raise ValueError("Empty or missing assistant content in API response.")
266
+
267
+ logger.info("Received assistant content.")
268
+ # logger.debug(f"Assistant content raw: {assistant_content}")
269
+
270
+ except (KeyError, IndexError, ValueError, TypeError) as e:
271
+ logger.error(f"Error parsing API response structure: {e}. Response: {response_data}", exc_info=True)
272
+ result_messages_for_gradio.append({
273
+ "role": "assistant",
274
+ "content": f"Sorry, I received an unexpected response format from the language model. Error: {e}"
275
+ })
276
+ return result_messages_for_gradio
277
+
278
+ # --- Check for Tool Use ---
279
+ # Try to parse the entire response as JSON (as instructed in the prompt)
280
+ tool_call_data = None
281
+ try:
282
+ potential_tool_call = json.loads(assistant_content)
283
+ # Check if it matches the expected tool call structure
284
+ if isinstance(potential_tool_call, dict) and "tool_name" in potential_tool_call and "tool_input" in potential_tool_call:
285
+ tool_call_data = potential_tool_call
286
+ logger.info(f"Detected tool call: {tool_call_data['tool_name']}")
287
+ else:
288
+ # It's valid JSON, but not the tool format we asked for. Treat as text.
289
+ logger.info("Response is JSON, but not a recognized tool call format.")
290
+ pass # Keep assistant_content as is
291
+ except json.JSONDecodeError:
292
+ # Not JSON, assume it's a regular text response
293
+ logger.info("Response is not JSON, treating as text.")
294
+ pass # Keep assistant_content as is
295
+
296
+ # --- Process Tool Call or Text Response ---
297
+ if tool_call_data:
298
+ # It's a tool call!
299
+ tool_name = tool_call_data["tool_name"]
300
+ tool_args = tool_call_data["tool_input"]
301
+
302
+ # Check if the requested tool is valid/available
303
+ available_tool_names = [t["name"] for t in self.tools]
304
+ if tool_name not in available_tool_names:
305
+ logger.warning(f"LLM requested unavailable tool: {tool_name}")
306
+ # Inform the user and potentially ask the LLM again without the tool result
307
+ result_messages_for_gradio.append({
308
+ "role": "assistant",
309
+ "content": f"I wanted to use the '{tool_name}' tool, but it seems it's not available right now. I'll try to answer without it."
310
+ })
311
+ # Optionally, make *another* call to the LLM telling it the tool failed.
312
+ # For simplicity here, we'll just stop.
313
+
314
+ # Or, make another call telling the LLM the tool is unavailable:
315
+ # hf_messages.append({"role": "assistant", "content": assistant_content}) # Add the LLM's attempt
316
+ # hf_messages.append({"role": "user", "content": f"The tool '{tool_name}' is not available. Please answer without using tools."})
317
+ # follow_up_response_data = await self._call_huggingface_api(hf_messages)
318
+ # ... process follow_up_response_data ... (similar to text response handling)
319
 
320
+ else:
321
+ # Add messages to Gradio indicating tool use (similar to original)
322
+ result_messages_for_gradio.append({
323
+ "role": "assistant",
324
+ "content": f"I need to use the **{tool_name}** tool to answer that.",
325
+ "metadata": { # Keep metadata for potential UI enhancements
326
+ "title": f"⏳ Using tool: {tool_name}",
327
+ "log": f"Parameters: {json.dumps(tool_args, ensure_ascii=False)}", # Use ensure_ascii=False for readability
328
+ "status": "pending",
329
+ "id": f"tool_call_{tool_name}"
330
+ }
331
+ })
332
+ result_messages_for_gradio.append({
333
+ "role": "assistant",
334
+ "content": f"```json\n{json.dumps(tool_args, indent=2, ensure_ascii=False)}\n```",
335
+ "metadata": {
336
+ "parent_id": f"tool_call_{tool_name}",
337
+ "id": f"params_{tool_name}",
338
+ "title": "Tool Parameters"
339
+ }
340
+ })
341
+
342
+ # --- Call the actual MCP tool ---
343
+ try:
344
+ logger.info(f"Calling MCP tool: {tool_name} with args: {tool_args}")
345
+ mcp_result = await self.session.call_tool(tool_name, tool_args)
346
+ logger.info(f"Received result from tool: {tool_name}")
347
+ tool_result_content = mcp_result.content
348
+ # Mark Gradio message as done
349
+ if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
350
+ result_messages_for_gradio[-2]["metadata"]["status"] = "done"
351
+ result_messages_for_gradio[-2]["metadata"]["title"] = f"✅ Used tool: {tool_name}"
352
+
353
+ # Prepare tool result for Gradio display
354
+ result_messages_for_gradio.append({
355
+ "role": "assistant",
356
+ "content": f"Result from **{tool_name}**:",
357
+ "metadata": {
358
+ "title": f"Tool Result: {tool_name}",
359
+ "status": "done",
360
+ "id": f"result_{tool_name}"
361
+ }
362
+ })
363
+
364
+ # Attempt to format tool result nicely for Gradio (handle JSON, images, etc.)
365
+ display_content = tool_result_content # Default to raw content
366
+ try:
367
+ # Try parsing as JSON
368
+ result_json = json.loads(tool_result_content)
369
+ if isinstance(result_json, dict) and result_json.get("type") == "image" and "url" in result_json:
370
+ # Handle image result
371
+ display_content = {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")}
372
+ result_messages_for_gradio.append({
373
+ "role": "assistant",
374
+ "content": display_content,
375
+ "metadata": {"parent_id": f"result_{tool_name}", "id": f"image_{tool_name}", "title": "Image Result"}
376
+ })
377
+ display_content = None # Mark as handled
378
+ else:
379
+ # Display other JSON nicely formatted
380
+ display_content = f"```json\n{json.dumps(result_json, indent=2, ensure_ascii=False)}\n```"
381
+ except json.JSONDecodeError:
382
+ # Not JSON, display as plain code block if it's not empty
383
+ if tool_result_content:
384
+ display_content = f"```\n{tool_result_content}\n```"
385
+ else:
386
+ display_content = "_Tool returned empty content_"
387
+
388
+ if display_content: # Add the formatted/raw result if not handled (like image)
389
+ result_messages_for_gradio.append({
390
+ "role": "assistant",
391
+ "content": display_content,
392
+ "metadata": {"parent_id": f"result_{tool_name}", "id": f"raw_result_{tool_name}", "title": "Formatted Output"}
393
+ })
394
+
395
+
396
+ # --- Send tool result back to LLM ---
397
+ # Append the *original* assistant message (the tool call JSON) and the user message with the result
398
+ hf_messages.append({"role": "assistant", "content": assistant_content})
399
+ # Use a clear format for the tool result for the LLM
400
+ user_tool_result_message = f"Tool result for {tool_name}:\n```\n{tool_result_content}\n```"
401
+ hf_messages.append({"role": "user", "content": user_tool_result_message})
402
+
403
+ logger.info("Sending tool result back to HF API for final response.")
404
+ final_response_data = await self._call_huggingface_api(hf_messages)
405
+
406
+ # Process the final response from the LLM
407
+ if final_response_data and "error" not in final_response_data:
408
+ try:
409
+ final_assistant_content = final_response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
410
+ if not final_assistant_content and "generated_text" in final_response_data:
411
+ final_assistant_content = final_response_data["generated_text"]
412
+
413
+ if final_assistant_content:
414
+ result_messages_for_gradio.append({
415
+ "role": "assistant",
416
+ "content": final_assistant_content
417
+ })
418
+ else:
419
+ raise ValueError("Empty or missing final assistant content.")
420
+ except (KeyError, IndexError, ValueError, TypeError) as e:
421
+ logger.error(f"Error parsing final API response: {e}. Response: {final_response_data}", exc_info=True)
422
+ result_messages_for_gradio.append({
423
+ "role": "assistant",
424
+ "content": f"Sorry, I couldn't process the tool result properly. Error: {e}"
425
+ })
426
+ else:
427
+ # Handle error in the *second* API call
428
+ error_msg = final_response_data.get("error", "Unknown API error") if final_response_data else "No final response"
429
+ details = final_response_data.get("details", "") if final_response_data else ""
430
+ logger.error(f"Final API call failed: {error_msg} {details}")
431
+ result_messages_for_gradio.append({
432
+ "role": "assistant",
433
+ "content": f"Sorry, I encountered an error after using the tool: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
434
+ })
435
+
436
+ except Exception as e:
437
+ logger.error(f"Error calling MCP tool {tool_name}: {e}", exc_info=True)
438
+ # Mark Gradio message as failed
439
+ if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
440
+ result_messages_for_gradio[-2]["metadata"]["status"] = "error"
441
+ result_messages_for_gradio[-2]["metadata"]["title"] = f"❌ Error using tool: {tool_name}"
442
+ # Inform user about the tool call failure
443
+ result_messages_for_gradio.append({
444
+ "role": "assistant",
445
+ "content": f"Sorry, I encountered an error when trying to use the tool '{tool_name}': {e}"
446
+ })
447
+ # Don't proceed to call LLM again if tool failed
448
+
449
+ else:
450
+ # It's a regular text response, just add it
451
+ logger.info("Adding regular text response to Gradio output.")
452
+ result_messages_for_gradio.append({
453
+ "role": "assistant",
454
+ "content": assistant_content
455
+ })
456
+
457
+ # Return the list of messages to be added to the Gradio chatbot
458
+ return result_messages_for_gradio
459
+
460
+ async def close_connection(self):
461
+ # Method to explicitly close connections if needed (e.g., on app shutdown)
462
+ if self.exit_stack:
463
+ logger.info("Closing MCP connection and HTTP client.")
464
+ await self.exit_stack.aclose()
465
+ self.exit_stack = None
466
+ self.session = None
467
+ self.http_client = None
468
 
469
+ # --- Gradio Interface Setup ---
470
+ client = MCPClientWrapper() # Instantiate the wrapper
471
 
472
  def gradio_interface():
473
+ # Create the Gradio Blocks UI
474
+ with gr.Blocks(title="MCP Client + HF Inference", theme=gr.themes.Soft()) as demo:
475
+ gr.Markdown("# MCP Assistant (Hugging Face Backend)")
476
+ gr.Markdown(f"Connect to your MCP server and chat with an assistant powered by `{MODEL_NAME}`.")
477
+
478
+ # Connection Row
479
+ with gr.Row():
480
+ server_path = gr.Textbox(
481
+ label="MCP Server Script Path",
482
+ placeholder="Enter path to server script (e.g., weather.py)",
483
+ # Default to a common name, update if yours is different
484
+ value="gradio_mcp_server.py",
485
+ scale=3
486
+ )
487
+ connect_btn = gr.Button("Connect to MCP Server", scale=1)
488
+
489
+ status = gr.Textbox(label="Status", interactive=False, placeholder="Not connected")
490
 
491
+ # Chatbot display
492
  chatbot = gr.Chatbot(
493
+ label="Conversation",
494
  value=[],
495
+ elem_id="chatbot", # Add elem_id for potential CSS styling
496
+ height=600,
497
  show_copy_button=True,
498
+ bubble_full_width=False, # Improves readability
499
+ avatar_images=("👤", "🤗") # User and HF avatar
500
  )
501
 
502
+ # Input Row
503
+ with gr.Row():
504
+ msg_textbox = gr.Textbox(
505
+ label="Your Message",
506
+ placeholder=f"Ask a question...",
507
  scale=4,
508
+ autofocus=True # Focus input on load
509
  )
510
+ # Submit button (alternative to pressing Enter)
511
+ # submit_btn = gr.Button("Send", scale=1, variant="primary")
512
+ # Clear button
513
+ clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
514
+
515
+ # --- Event Handlers ---
516
+ # Connect button action
517
+ connect_btn.click(
518
+ client.connect, # Function to call
519
+ inputs=[server_path], # Input component(s)
520
+ outputs=[status] # Output component(s)
521
+ )
522
+
523
+ # Function to handle message submission (Enter key or Send button)
524
+ submit_action = msg_textbox.submit(
525
+ client.process_message, # Function to call
526
+ inputs=[msg_textbox, chatbot], # Input components: message text, current chat history
527
+ outputs=[chatbot, msg_textbox] # Output components: updated chat history, cleared message box
528
+ )
529
+ # If using a Send button:
530
+ # submit_btn.click(client.process_message, [msg_textbox, chatbot], [chatbot, msg_textbox])
531
+
532
+ # Clear button action
533
+ clear_btn.click(
534
+ lambda: ([], None), # Function to return empty list for chatbot and None for status (optional)
535
+ [], # No inputs
536
+ [chatbot, status], # Components to clear/reset
537
+ queue=False # Run immediately
538
+ )
539
 
540
+ # Define app shutdown behavior (optional but good practice)
541
+ # This attempts to close connections when Gradio shuts down
542
+ # Note: Graceful shutdown in Gradio can be tricky.
543
+ # demo.unload(client.close_connection) # Requires Gradio 4+ and might need async handling adjustments
544
 
545
  return demo
546
 
547
+ # --- Main Execution ---
 
548
  if __name__ == "__main__":
549
+ # Check for Hugging Face token on startup
550
+ if not HF_TOKEN:
551
+ print("\n" + "="*50)
552
+ print(" WARNING: HF_TOKEN environment variable not found! ")
553
+ print(" Please set it in your .env file or environment.")
554
+ print(" The application will run, but API calls will fail. ")
555
+ print("="*50 + "\n")
556
 
557
  interface = gradio_interface()
558
+ interface.launch(debug=True)