Spaces:
Sleeping
Sleeping
just kidding I meant 235B-A22B
Browse files
app.py
CHANGED
@@ -1,205 +1,558 @@
|
|
1 |
-
"""
|
2 |
-
app.py – Hugging Face Space
|
3 |
-
Swaps Anthropic for HF Serverless Inference (Qwen3-235B-A22B)
|
4 |
-
"""
|
5 |
-
|
6 |
import asyncio
|
7 |
import os
|
8 |
import json
|
9 |
from typing import List, Dict, Any, Union
|
10 |
from contextlib import AsyncExitStack
|
|
|
11 |
|
|
|
12 |
import gradio as gr
|
13 |
from gradio.components.chatbot import ChatMessage
|
14 |
from mcp import ClientSession, StdioServerParameters
|
15 |
from mcp.client.stdio import stdio_client
|
|
|
16 |
from dotenv import load_dotenv
|
17 |
-
from huggingface_hub import InferenceClient # NEW ✨
|
18 |
|
|
|
19 |
load_dotenv()
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
class MCPClientWrapper:
|
26 |
-
"""
|
27 |
-
Wraps an MCP stdio client + a chat LLM (Qwen3-235B-A22B via HF Serverless).
|
28 |
-
"""
|
29 |
-
|
30 |
def __init__(self):
|
31 |
-
|
32 |
-
self.
|
|
|
33 |
self.tools: List[Dict[str, Any]] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
# --- NEW: Hugging Face client ---------------------------------------
|
36 |
-
self.hf_client = InferenceClient(
|
37 |
-
model="Qwen/Qwen3-235B-A22B",
|
38 |
-
token=os.getenv("HUGGINGFACE_API_TOKEN")
|
39 |
-
)
|
40 |
-
# --------------------------------------------------------------------
|
41 |
-
|
42 |
-
# ─────────────────────────── MCP CONNECTION ────────────────────────────
|
43 |
def connect(self, server_path: str) -> str:
|
|
|
44 |
return loop.run_until_complete(self._connect(server_path))
|
45 |
|
46 |
async def _connect(self, server_path: str) -> str:
|
|
|
47 |
if self.exit_stack:
|
|
|
48 |
await self.exit_stack.aclose()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
server_params = StdioServerParameters(
|
56 |
-
command=command,
|
57 |
-
args=[server_path],
|
58 |
-
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"},
|
59 |
-
)
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
await self.session.initialize()
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
{
|
74 |
"name": tool.name,
|
75 |
"description": tool.description,
|
76 |
-
"input_schema": tool.inputSchema
|
77 |
-
}
|
78 |
-
for tool in
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
new_messages = loop.run_until_complete(self._process_query(message, history))
|
102 |
-
return (
|
103 |
-
history + [{"role": "user", "content": message}] + new_messages,
|
104 |
-
gr.Textbox(value=""),
|
105 |
-
)
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
):
|
111 |
-
"""
|
112 |
-
Pushes the whole chat history to Qwen3-235B-A22B and returns its reply.
|
113 |
-
Tool calls are *not* forwarded – the HF endpoint only returns text.
|
114 |
-
"""
|
115 |
-
# 1️⃣ Build message list in OpenAI-style dicts
|
116 |
-
messages: List[Dict[str, str]] = []
|
117 |
-
for item in history:
|
118 |
-
if isinstance(item, ChatMessage):
|
119 |
-
role, content = item.role, item.content
|
120 |
-
else:
|
121 |
-
role, content = item.get("role"), item.get("content")
|
122 |
-
|
123 |
-
if role in {"user", "assistant", "system"}:
|
124 |
-
messages.append({"role": role, "content": content})
|
125 |
-
messages.append({"role": "user", "content": message})
|
126 |
-
|
127 |
-
# 2️⃣ Serialise to Qwen chat-markup
|
128 |
-
prompt_parts = []
|
129 |
-
for m in messages:
|
130 |
-
role = m["role"]
|
131 |
-
prompt_parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>")
|
132 |
-
prompt_parts.append("<|im_start|>assistant") # model will complete here
|
133 |
-
prompt = "\n".join(prompt_parts)
|
134 |
-
|
135 |
-
# 3️⃣ Call HF Serverless in a threadpool (non-blocking)
|
136 |
-
async def _generate():
|
137 |
-
return self.hf_client.text_generation(
|
138 |
-
prompt,
|
139 |
-
max_new_tokens=1024,
|
140 |
-
temperature=0.7,
|
141 |
-
stop_sequences=["<|im_end|>", "<|im_start|>"],
|
142 |
-
)
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
)
|
147 |
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
|
|
|
|
155 |
|
156 |
def gradio_interface():
|
157 |
-
|
158 |
-
|
159 |
-
gr.Markdown("
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
172 |
|
|
|
173 |
chatbot = gr.Chatbot(
|
|
|
174 |
value=[],
|
175 |
-
|
176 |
-
|
177 |
show_copy_button=True,
|
178 |
-
|
|
|
179 |
)
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
185 |
scale=4,
|
|
|
186 |
)
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
192 |
|
193 |
return demo
|
194 |
|
195 |
-
|
196 |
-
# ──────────────────────────── ENTRY POINT ────────────────────────────────
|
197 |
if __name__ == "__main__":
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
)
|
|
|
|
|
203 |
|
204 |
interface = gradio_interface()
|
205 |
-
interface.launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import asyncio
|
2 |
import os
|
3 |
import json
|
4 |
from typing import List, Dict, Any, Union
|
5 |
from contextlib import AsyncExitStack
|
6 |
+
import logging # Added for better debugging
|
7 |
|
8 |
+
import httpx # Added for making HTTP requests
|
9 |
import gradio as gr
|
10 |
from gradio.components.chatbot import ChatMessage
|
11 |
from mcp import ClientSession, StdioServerParameters
|
12 |
from mcp.client.stdio import stdio_client
|
13 |
+
# Removed Anthropic import
|
14 |
from dotenv import load_dotenv
|
|
|
15 |
|
16 |
+
# --- Configuration ---
|
17 |
load_dotenv()
|
18 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # Changed from ANTHROPIC_API_KEY
|
19 |
+
HF_API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen3-235B-A22B/v1/chat/completions"
|
20 |
+
MODEL_NAME = "Qwen/Qwen3-235B-A22B" # Define model name
|
21 |
+
MAX_TOKENS = 1500 # Increased token limit slightly for potentially more verbose model
|
22 |
+
HTTP_TIMEOUT = 60 # Timeout for API requests in seconds
|
23 |
+
|
24 |
+
# --- Logging Setup ---
|
25 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
26 |
+
logger = logging.getLogger(__name__)
|
27 |
+
|
28 |
+
# --- Async Event Loop ---
|
29 |
+
# Use asyncio.get_event_loop() which handles loop creation/getting existing loop
|
30 |
+
# This avoids potential issues in some environments (like notebooks)
|
31 |
+
try:
|
32 |
+
loop = asyncio.get_running_loop()
|
33 |
+
except RuntimeError:
|
34 |
+
loop = asyncio.new_event_loop()
|
35 |
+
asyncio.set_event_loop(loop)
|
36 |
|
37 |
class MCPClientWrapper:
|
|
|
|
|
|
|
|
|
38 |
def __init__(self):
|
39 |
+
# Initialize session, stack, and tools list
|
40 |
+
self.session: ClientSession | None = None
|
41 |
+
self.exit_stack: AsyncExitStack | None = None
|
42 |
self.tools: List[Dict[str, Any]] = []
|
43 |
+
# Removed Anthropic client initialization
|
44 |
+
# Add Hugging Face token check
|
45 |
+
if not HF_TOKEN:
|
46 |
+
logger.warning("HF_TOKEN environment variable not found. Hugging Face API calls will fail.")
|
47 |
+
# Optionally raise an error or handle this more gracefully
|
48 |
+
# raise ValueError("HF_TOKEN environment variable is required.")
|
49 |
+
self.hf_token = HF_TOKEN
|
50 |
+
# Initialize HTTP client (will be managed by AsyncExitStack)
|
51 |
+
self.http_client: httpx.AsyncClient | None = None
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def connect(self, server_path: str) -> str:
|
54 |
+
# Run the async connection logic in the event loop
|
55 |
return loop.run_until_complete(self._connect(server_path))
|
56 |
|
57 |
async def _connect(self, server_path: str) -> str:
|
58 |
+
# Gracefully close existing connections and resources if reconnecting
|
59 |
if self.exit_stack:
|
60 |
+
logger.info("Closing existing connection and resources.")
|
61 |
await self.exit_stack.aclose()
|
62 |
+
self.exit_stack = None # Reset stack
|
63 |
+
self.session = None # Reset session
|
64 |
+
self.http_client = None # Reset client
|
65 |
+
|
66 |
+
logger.info(f"Attempting to connect to MCP server: {server_path}")
|
67 |
+
self.exit_stack = AsyncExitStack() # Create a new exit stack for managing resources
|
68 |
+
|
69 |
+
try:
|
70 |
+
# Determine command based on file extension
|
71 |
+
is_python = server_path.lower().endswith('.py')
|
72 |
+
command = "python" if is_python else "node"
|
73 |
+
logger.info(f"Using command '{command}' for server.")
|
74 |
+
|
75 |
+
# Configure server parameters
|
76 |
+
server_params = StdioServerParameters(
|
77 |
+
command=command,
|
78 |
+
args=[server_path],
|
79 |
+
env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
|
80 |
+
)
|
81 |
|
82 |
+
# Establish stdio transport with the MCP server
|
83 |
+
stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
|
84 |
+
self.stdio, self.write = stdio_transport
|
85 |
+
logger.info("Stdio transport established.")
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
# Initialize the MCP client session
|
88 |
+
self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
|
89 |
+
await self.session.initialize()
|
90 |
+
logger.info("MCP session initialized.")
|
91 |
|
92 |
+
# Initialize the HTTP client for Hugging Face API calls
|
93 |
+
self.http_client = await self.exit_stack.enter_async_context(httpx.AsyncClient(timeout=HTTP_TIMEOUT))
|
94 |
+
logger.info("HTTP client initialized.")
|
|
|
95 |
|
96 |
+
# List available tools from the MCP server
|
97 |
+
response = await self.session.list_tools()
|
98 |
+
self.tools = [{
|
99 |
"name": tool.name,
|
100 |
"description": tool.description,
|
101 |
+
"input_schema": tool.inputSchema # Keep schema for potential future use or richer prompts
|
102 |
+
} for tool in response.tools]
|
103 |
+
logger.info(f"Available tools: {[tool['name'] for tool in self.tools]}")
|
104 |
+
|
105 |
+
tool_names = [tool["name"] for tool in self.tools]
|
106 |
+
return f"Connected to MCP server. Available tools: {', '.join(tool_names) if tool_names else 'None'}"
|
107 |
+
|
108 |
+
except Exception as e:
|
109 |
+
logger.error(f"Connection failed: {e}", exc_info=True)
|
110 |
+
# Clean up resources if connection failed midway
|
111 |
+
if self.exit_stack:
|
112 |
+
await self.exit_stack.aclose()
|
113 |
+
self.exit_stack = None
|
114 |
+
self.session = None
|
115 |
+
self.http_client = None
|
116 |
+
return f"Connection failed: {e}"
|
117 |
+
|
118 |
+
def _format_tools_for_prompt(self) -> str:
|
119 |
+
# Create a description of tools for the LLM prompt
|
120 |
+
if not self.tools:
|
121 |
+
return "No tools available."
|
122 |
+
|
123 |
+
tool_descriptions = []
|
124 |
+
for tool in self.tools:
|
125 |
+
# Describe the tool and its expected input format (JSON schema)
|
126 |
+
desc = f"- Name: {tool['name']}\n"
|
127 |
+
desc += f" Description: {tool['description']}\n"
|
128 |
+
desc += f" Input JSON Schema: {json.dumps(tool['input_schema'], indent=2)}"
|
129 |
+
tool_descriptions.append(desc)
|
130 |
+
|
131 |
+
return "You have access to the following tools:\n" + "\n".join(tool_descriptions) + \
|
132 |
+
"\n\nTo use a tool, respond ONLY with a single JSON object matching this structure: " + \
|
133 |
+
"{\"tool_name\": \"<name_of_tool>\", \"tool_input\": {<arguments_as_object>}}. " + \
|
134 |
+
"Do not add any other text, explanation, or markdown formatting around the JSON object."
|
135 |
+
|
136 |
+
|
137 |
+
def _build_system_prompt(self) -> str:
|
138 |
+
# Construct the system prompt including tool instructions
|
139 |
+
system_prompt = "You are a helpful assistant."
|
140 |
+
tool_info = self._format_tools_for_prompt()
|
141 |
+
if tool_info != "No tools available.":
|
142 |
+
system_prompt += "\n\n" + tool_info
|
143 |
+
return system_prompt
|
144 |
+
|
145 |
+
async def _call_huggingface_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any] | None:
|
146 |
+
# Helper function to call the Hugging Face Inference API
|
147 |
+
if not self.hf_token or not self.http_client:
|
148 |
+
logger.error("Hugging Face token or HTTP client not available.")
|
149 |
+
return {"error": "API client not configured."}
|
150 |
+
|
151 |
+
headers = {
|
152 |
+
"Authorization": f"Bearer {self.hf_token}",
|
153 |
+
"Content-Type": "application/json",
|
154 |
+
}
|
155 |
+
payload = {
|
156 |
+
"model": MODEL_NAME,
|
157 |
+
"messages": messages,
|
158 |
+
"max_tokens": MAX_TOKENS,
|
159 |
+
"stream": False, # Keeping it simple, not streaming for now
|
160 |
+
# Add other parameters like temperature if needed
|
161 |
+
# "temperature": 0.7,
|
162 |
+
}
|
163 |
+
|
164 |
+
logger.info(f"Sending request to HF API. Message count: {len(messages)}")
|
165 |
+
# Log message content carefully, maybe just roles or lengths in production
|
166 |
+
# logger.debug(f"Payload: {json.dumps(payload, indent=2)}")
|
167 |
+
|
168 |
+
try:
|
169 |
+
response = await self.http_client.post(HF_API_URL, headers=headers, json=payload)
|
170 |
+
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
171 |
+
logger.info(f"Received response from HF API. Status: {response.status_code}")
|
172 |
+
return response.json()
|
173 |
+
|
174 |
+
except httpx.HTTPStatusError as e:
|
175 |
+
logger.error(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
|
176 |
+
return {"error": f"API request failed: {e.response.status_code}", "details": e.response.text}
|
177 |
+
except httpx.RequestError as e:
|
178 |
+
logger.error(f"Request error occurred: {e}")
|
179 |
+
return {"error": f"API request failed: {e}"}
|
180 |
+
except json.JSONDecodeError as e:
|
181 |
+
logger.error(f"Failed to decode JSON response: {e}")
|
182 |
+
# Attempt to get raw text if JSON decoding fails
|
183 |
+
raw_text = await response.aread() if 'response' in locals() else b""
|
184 |
+
logger.error(f"Raw Response: {raw_text.decode(errors='ignore')}")
|
185 |
+
return {"error": "Failed to decode API JSON response.", "raw_response": raw_text.decode(errors='ignore')}
|
186 |
+
except Exception as e:
|
187 |
+
logger.error(f"An unexpected error occurred during API call: {e}", exc_info=True)
|
188 |
+
return {"error": f"An unexpected error occurred: {e}"}
|
189 |
+
|
190 |
+
def process_message(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> tuple:
|
191 |
+
# Check if connected to MCP server
|
192 |
+
if not self.session or not self.http_client:
|
193 |
+
# Append user message and error message to history
|
194 |
+
history.append({"role": "user", "content": message})
|
195 |
+
history.append({"role": "assistant", "content": "Error: Please connect to the MCP server and ensure HF_TOKEN is set."})
|
196 |
+
# Return updated history and clear input textbox
|
197 |
+
return history, gr.Textbox(value="")
|
198 |
+
|
199 |
+
# Run the async query processing logic
|
200 |
new_messages = loop.run_until_complete(self._process_query(message, history))
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# Append the original user message and the new assistant messages to history
|
203 |
+
history.append({"role": "user", "content": message})
|
204 |
+
history.extend(new_messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
+
# Return updated history and clear input textbox
|
207 |
+
return history, gr.Textbox(value="")
|
|
|
208 |
|
209 |
+
async def _process_query(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> List[Dict[str, Any]]:
|
210 |
+
# Build the list of messages in the format Hugging Face expects
|
211 |
+
hf_messages = [{"role": "system", "content": self._build_system_prompt()}]
|
212 |
+
for msg in history:
|
213 |
+
# Convert Gradio ChatMessage or dict to the required format
|
214 |
+
if isinstance(msg, ChatMessage):
|
215 |
+
role, content = msg.role, msg.content
|
216 |
+
else:
|
217 |
+
role, content = msg.get("role"), msg.get("content")
|
218 |
|
219 |
+
# Ensure content is a string (handle potential image dicts if added later)
|
220 |
+
if isinstance(content, dict):
|
221 |
+
# Handle potential dict content (like images) - skip or represent as text for now
|
222 |
+
content_str = json.dumps(content) # Or some other representation
|
223 |
+
logger.warning(f"Found non-string content in history for role {role}, converting to JSON string.")
|
224 |
+
else:
|
225 |
+
content_str = str(content) # Ensure it's a string
|
226 |
+
|
227 |
+
# Map roles if needed (e.g., 'bot' -> 'assistant') - current roles seem fine
|
228 |
+
if role in ["user", "assistant"]:
|
229 |
+
hf_messages.append({"role": role, "content": content_str})
|
230 |
+
elif role == "system" and len(hf_messages) > 1: # Avoid duplicate system prompts if history already has one
|
231 |
+
logger.warning("Skipping additional system message found in history.")
|
232 |
+
# Handle tool results if they were stored in history differently (not standard here)
|
233 |
+
|
234 |
+
# Add the current user message
|
235 |
+
hf_messages.append({"role": "user", "content": message})
|
236 |
+
|
237 |
+
# --- Make the API Call ---
|
238 |
+
response_data = await self._call_huggingface_api(hf_messages)
|
239 |
+
|
240 |
+
# Prepare list to hold messages for Gradio display
|
241 |
+
result_messages_for_gradio = []
|
242 |
+
|
243 |
+
# --- Handle API Response ---
|
244 |
+
if not response_data or "error" in response_data:
|
245 |
+
error_msg = response_data.get("error", "Unknown API error") if response_data else "No response from API"
|
246 |
+
details = response_data.get("details", "") if response_data else ""
|
247 |
+
logger.error(f"API call failed: {error_msg} {details}")
|
248 |
+
result_messages_for_gradio.append({
|
249 |
+
"role": "assistant",
|
250 |
+
"content": f"Sorry, I encountered an error calling the language model: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
|
251 |
+
})
|
252 |
+
return result_messages_for_gradio # Return error message to Gradio
|
253 |
+
|
254 |
+
# Extract the assistant's reply content
|
255 |
+
try:
|
256 |
+
# Adjust parsing based on actual HF API response structure for non-streaming chat completions
|
257 |
+
# Common structures: response_data['choices'][0]['message']['content']
|
258 |
+
# Or sometimes: response_data['generated_text']
|
259 |
+
assistant_content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
260 |
+
if not assistant_content and "generated_text" in response_data: # Fallback for some models/endpoints
|
261 |
+
assistant_content = response_data["generated_text"]
|
262 |
+
|
263 |
+
if not assistant_content:
|
264 |
+
logger.error(f"Could not extract assistant content from response: {response_data}")
|
265 |
+
raise ValueError("Empty or missing assistant content in API response.")
|
266 |
+
|
267 |
+
logger.info("Received assistant content.")
|
268 |
+
# logger.debug(f"Assistant content raw: {assistant_content}")
|
269 |
+
|
270 |
+
except (KeyError, IndexError, ValueError, TypeError) as e:
|
271 |
+
logger.error(f"Error parsing API response structure: {e}. Response: {response_data}", exc_info=True)
|
272 |
+
result_messages_for_gradio.append({
|
273 |
+
"role": "assistant",
|
274 |
+
"content": f"Sorry, I received an unexpected response format from the language model. Error: {e}"
|
275 |
+
})
|
276 |
+
return result_messages_for_gradio
|
277 |
+
|
278 |
+
# --- Check for Tool Use ---
|
279 |
+
# Try to parse the entire response as JSON (as instructed in the prompt)
|
280 |
+
tool_call_data = None
|
281 |
+
try:
|
282 |
+
potential_tool_call = json.loads(assistant_content)
|
283 |
+
# Check if it matches the expected tool call structure
|
284 |
+
if isinstance(potential_tool_call, dict) and "tool_name" in potential_tool_call and "tool_input" in potential_tool_call:
|
285 |
+
tool_call_data = potential_tool_call
|
286 |
+
logger.info(f"Detected tool call: {tool_call_data['tool_name']}")
|
287 |
+
else:
|
288 |
+
# It's valid JSON, but not the tool format we asked for. Treat as text.
|
289 |
+
logger.info("Response is JSON, but not a recognized tool call format.")
|
290 |
+
pass # Keep assistant_content as is
|
291 |
+
except json.JSONDecodeError:
|
292 |
+
# Not JSON, assume it's a regular text response
|
293 |
+
logger.info("Response is not JSON, treating as text.")
|
294 |
+
pass # Keep assistant_content as is
|
295 |
+
|
296 |
+
# --- Process Tool Call or Text Response ---
|
297 |
+
if tool_call_data:
|
298 |
+
# It's a tool call!
|
299 |
+
tool_name = tool_call_data["tool_name"]
|
300 |
+
tool_args = tool_call_data["tool_input"]
|
301 |
+
|
302 |
+
# Check if the requested tool is valid/available
|
303 |
+
available_tool_names = [t["name"] for t in self.tools]
|
304 |
+
if tool_name not in available_tool_names:
|
305 |
+
logger.warning(f"LLM requested unavailable tool: {tool_name}")
|
306 |
+
# Inform the user and potentially ask the LLM again without the tool result
|
307 |
+
result_messages_for_gradio.append({
|
308 |
+
"role": "assistant",
|
309 |
+
"content": f"I wanted to use the '{tool_name}' tool, but it seems it's not available right now. I'll try to answer without it."
|
310 |
+
})
|
311 |
+
# Optionally, make *another* call to the LLM telling it the tool failed.
|
312 |
+
# For simplicity here, we'll just stop.
|
313 |
+
|
314 |
+
# Or, make another call telling the LLM the tool is unavailable:
|
315 |
+
# hf_messages.append({"role": "assistant", "content": assistant_content}) # Add the LLM's attempt
|
316 |
+
# hf_messages.append({"role": "user", "content": f"The tool '{tool_name}' is not available. Please answer without using tools."})
|
317 |
+
# follow_up_response_data = await self._call_huggingface_api(hf_messages)
|
318 |
+
# ... process follow_up_response_data ... (similar to text response handling)
|
319 |
|
320 |
+
else:
|
321 |
+
# Add messages to Gradio indicating tool use (similar to original)
|
322 |
+
result_messages_for_gradio.append({
|
323 |
+
"role": "assistant",
|
324 |
+
"content": f"I need to use the **{tool_name}** tool to answer that.",
|
325 |
+
"metadata": { # Keep metadata for potential UI enhancements
|
326 |
+
"title": f"⏳ Using tool: {tool_name}",
|
327 |
+
"log": f"Parameters: {json.dumps(tool_args, ensure_ascii=False)}", # Use ensure_ascii=False for readability
|
328 |
+
"status": "pending",
|
329 |
+
"id": f"tool_call_{tool_name}"
|
330 |
+
}
|
331 |
+
})
|
332 |
+
result_messages_for_gradio.append({
|
333 |
+
"role": "assistant",
|
334 |
+
"content": f"```json\n{json.dumps(tool_args, indent=2, ensure_ascii=False)}\n```",
|
335 |
+
"metadata": {
|
336 |
+
"parent_id": f"tool_call_{tool_name}",
|
337 |
+
"id": f"params_{tool_name}",
|
338 |
+
"title": "Tool Parameters"
|
339 |
+
}
|
340 |
+
})
|
341 |
+
|
342 |
+
# --- Call the actual MCP tool ---
|
343 |
+
try:
|
344 |
+
logger.info(f"Calling MCP tool: {tool_name} with args: {tool_args}")
|
345 |
+
mcp_result = await self.session.call_tool(tool_name, tool_args)
|
346 |
+
logger.info(f"Received result from tool: {tool_name}")
|
347 |
+
tool_result_content = mcp_result.content
|
348 |
+
# Mark Gradio message as done
|
349 |
+
if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
|
350 |
+
result_messages_for_gradio[-2]["metadata"]["status"] = "done"
|
351 |
+
result_messages_for_gradio[-2]["metadata"]["title"] = f"✅ Used tool: {tool_name}"
|
352 |
+
|
353 |
+
# Prepare tool result for Gradio display
|
354 |
+
result_messages_for_gradio.append({
|
355 |
+
"role": "assistant",
|
356 |
+
"content": f"Result from **{tool_name}**:",
|
357 |
+
"metadata": {
|
358 |
+
"title": f"Tool Result: {tool_name}",
|
359 |
+
"status": "done",
|
360 |
+
"id": f"result_{tool_name}"
|
361 |
+
}
|
362 |
+
})
|
363 |
+
|
364 |
+
# Attempt to format tool result nicely for Gradio (handle JSON, images, etc.)
|
365 |
+
display_content = tool_result_content # Default to raw content
|
366 |
+
try:
|
367 |
+
# Try parsing as JSON
|
368 |
+
result_json = json.loads(tool_result_content)
|
369 |
+
if isinstance(result_json, dict) and result_json.get("type") == "image" and "url" in result_json:
|
370 |
+
# Handle image result
|
371 |
+
display_content = {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")}
|
372 |
+
result_messages_for_gradio.append({
|
373 |
+
"role": "assistant",
|
374 |
+
"content": display_content,
|
375 |
+
"metadata": {"parent_id": f"result_{tool_name}", "id": f"image_{tool_name}", "title": "Image Result"}
|
376 |
+
})
|
377 |
+
display_content = None # Mark as handled
|
378 |
+
else:
|
379 |
+
# Display other JSON nicely formatted
|
380 |
+
display_content = f"```json\n{json.dumps(result_json, indent=2, ensure_ascii=False)}\n```"
|
381 |
+
except json.JSONDecodeError:
|
382 |
+
# Not JSON, display as plain code block if it's not empty
|
383 |
+
if tool_result_content:
|
384 |
+
display_content = f"```\n{tool_result_content}\n```"
|
385 |
+
else:
|
386 |
+
display_content = "_Tool returned empty content_"
|
387 |
+
|
388 |
+
if display_content: # Add the formatted/raw result if not handled (like image)
|
389 |
+
result_messages_for_gradio.append({
|
390 |
+
"role": "assistant",
|
391 |
+
"content": display_content,
|
392 |
+
"metadata": {"parent_id": f"result_{tool_name}", "id": f"raw_result_{tool_name}", "title": "Formatted Output"}
|
393 |
+
})
|
394 |
+
|
395 |
+
|
396 |
+
# --- Send tool result back to LLM ---
|
397 |
+
# Append the *original* assistant message (the tool call JSON) and the user message with the result
|
398 |
+
hf_messages.append({"role": "assistant", "content": assistant_content})
|
399 |
+
# Use a clear format for the tool result for the LLM
|
400 |
+
user_tool_result_message = f"Tool result for {tool_name}:\n```\n{tool_result_content}\n```"
|
401 |
+
hf_messages.append({"role": "user", "content": user_tool_result_message})
|
402 |
+
|
403 |
+
logger.info("Sending tool result back to HF API for final response.")
|
404 |
+
final_response_data = await self._call_huggingface_api(hf_messages)
|
405 |
+
|
406 |
+
# Process the final response from the LLM
|
407 |
+
if final_response_data and "error" not in final_response_data:
|
408 |
+
try:
|
409 |
+
final_assistant_content = final_response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
410 |
+
if not final_assistant_content and "generated_text" in final_response_data:
|
411 |
+
final_assistant_content = final_response_data["generated_text"]
|
412 |
+
|
413 |
+
if final_assistant_content:
|
414 |
+
result_messages_for_gradio.append({
|
415 |
+
"role": "assistant",
|
416 |
+
"content": final_assistant_content
|
417 |
+
})
|
418 |
+
else:
|
419 |
+
raise ValueError("Empty or missing final assistant content.")
|
420 |
+
except (KeyError, IndexError, ValueError, TypeError) as e:
|
421 |
+
logger.error(f"Error parsing final API response: {e}. Response: {final_response_data}", exc_info=True)
|
422 |
+
result_messages_for_gradio.append({
|
423 |
+
"role": "assistant",
|
424 |
+
"content": f"Sorry, I couldn't process the tool result properly. Error: {e}"
|
425 |
+
})
|
426 |
+
else:
|
427 |
+
# Handle error in the *second* API call
|
428 |
+
error_msg = final_response_data.get("error", "Unknown API error") if final_response_data else "No final response"
|
429 |
+
details = final_response_data.get("details", "") if final_response_data else ""
|
430 |
+
logger.error(f"Final API call failed: {error_msg} {details}")
|
431 |
+
result_messages_for_gradio.append({
|
432 |
+
"role": "assistant",
|
433 |
+
"content": f"Sorry, I encountered an error after using the tool: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
|
434 |
+
})
|
435 |
+
|
436 |
+
except Exception as e:
|
437 |
+
logger.error(f"Error calling MCP tool {tool_name}: {e}", exc_info=True)
|
438 |
+
# Mark Gradio message as failed
|
439 |
+
if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
|
440 |
+
result_messages_for_gradio[-2]["metadata"]["status"] = "error"
|
441 |
+
result_messages_for_gradio[-2]["metadata"]["title"] = f"❌ Error using tool: {tool_name}"
|
442 |
+
# Inform user about the tool call failure
|
443 |
+
result_messages_for_gradio.append({
|
444 |
+
"role": "assistant",
|
445 |
+
"content": f"Sorry, I encountered an error when trying to use the tool '{tool_name}': {e}"
|
446 |
+
})
|
447 |
+
# Don't proceed to call LLM again if tool failed
|
448 |
+
|
449 |
+
else:
|
450 |
+
# It's a regular text response, just add it
|
451 |
+
logger.info("Adding regular text response to Gradio output.")
|
452 |
+
result_messages_for_gradio.append({
|
453 |
+
"role": "assistant",
|
454 |
+
"content": assistant_content
|
455 |
+
})
|
456 |
+
|
457 |
+
# Return the list of messages to be added to the Gradio chatbot
|
458 |
+
return result_messages_for_gradio
|
459 |
+
|
460 |
+
async def close_connection(self):
|
461 |
+
# Method to explicitly close connections if needed (e.g., on app shutdown)
|
462 |
+
if self.exit_stack:
|
463 |
+
logger.info("Closing MCP connection and HTTP client.")
|
464 |
+
await self.exit_stack.aclose()
|
465 |
+
self.exit_stack = None
|
466 |
+
self.session = None
|
467 |
+
self.http_client = None
|
468 |
|
469 |
+
# --- Gradio Interface Setup ---
|
470 |
+
client = MCPClientWrapper() # Instantiate the wrapper
|
471 |
|
472 |
def gradio_interface():
|
473 |
+
# Create the Gradio Blocks UI
|
474 |
+
with gr.Blocks(title="MCP Client + HF Inference", theme=gr.themes.Soft()) as demo:
|
475 |
+
gr.Markdown("# MCP Assistant (Hugging Face Backend)")
|
476 |
+
gr.Markdown(f"Connect to your MCP server and chat with an assistant powered by `{MODEL_NAME}`.")
|
477 |
+
|
478 |
+
# Connection Row
|
479 |
+
with gr.Row():
|
480 |
+
server_path = gr.Textbox(
|
481 |
+
label="MCP Server Script Path",
|
482 |
+
placeholder="Enter path to server script (e.g., weather.py)",
|
483 |
+
# Default to a common name, update if yours is different
|
484 |
+
value="gradio_mcp_server.py",
|
485 |
+
scale=3
|
486 |
+
)
|
487 |
+
connect_btn = gr.Button("Connect to MCP Server", scale=1)
|
488 |
+
|
489 |
+
status = gr.Textbox(label="Status", interactive=False, placeholder="Not connected")
|
490 |
|
491 |
+
# Chatbot display
|
492 |
chatbot = gr.Chatbot(
|
493 |
+
label="Conversation",
|
494 |
value=[],
|
495 |
+
elem_id="chatbot", # Add elem_id for potential CSS styling
|
496 |
+
height=600,
|
497 |
show_copy_button=True,
|
498 |
+
bubble_full_width=False, # Improves readability
|
499 |
+
avatar_images=("👤", "🤗") # User and HF avatar
|
500 |
)
|
501 |
|
502 |
+
# Input Row
|
503 |
+
with gr.Row():
|
504 |
+
msg_textbox = gr.Textbox(
|
505 |
+
label="Your Message",
|
506 |
+
placeholder=f"Ask a question...",
|
507 |
scale=4,
|
508 |
+
autofocus=True # Focus input on load
|
509 |
)
|
510 |
+
# Submit button (alternative to pressing Enter)
|
511 |
+
# submit_btn = gr.Button("Send", scale=1, variant="primary")
|
512 |
+
# Clear button
|
513 |
+
clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
|
514 |
+
|
515 |
+
# --- Event Handlers ---
|
516 |
+
# Connect button action
|
517 |
+
connect_btn.click(
|
518 |
+
client.connect, # Function to call
|
519 |
+
inputs=[server_path], # Input component(s)
|
520 |
+
outputs=[status] # Output component(s)
|
521 |
+
)
|
522 |
+
|
523 |
+
# Function to handle message submission (Enter key or Send button)
|
524 |
+
submit_action = msg_textbox.submit(
|
525 |
+
client.process_message, # Function to call
|
526 |
+
inputs=[msg_textbox, chatbot], # Input components: message text, current chat history
|
527 |
+
outputs=[chatbot, msg_textbox] # Output components: updated chat history, cleared message box
|
528 |
+
)
|
529 |
+
# If using a Send button:
|
530 |
+
# submit_btn.click(client.process_message, [msg_textbox, chatbot], [chatbot, msg_textbox])
|
531 |
+
|
532 |
+
# Clear button action
|
533 |
+
clear_btn.click(
|
534 |
+
lambda: ([], None), # Function to return empty list for chatbot and None for status (optional)
|
535 |
+
[], # No inputs
|
536 |
+
[chatbot, status], # Components to clear/reset
|
537 |
+
queue=False # Run immediately
|
538 |
+
)
|
539 |
|
540 |
+
# Define app shutdown behavior (optional but good practice)
|
541 |
+
# This attempts to close connections when Gradio shuts down
|
542 |
+
# Note: Graceful shutdown in Gradio can be tricky.
|
543 |
+
# demo.unload(client.close_connection) # Requires Gradio 4+ and might need async handling adjustments
|
544 |
|
545 |
return demo
|
546 |
|
547 |
+
# --- Main Execution ---
|
|
|
548 |
if __name__ == "__main__":
|
549 |
+
# Check for Hugging Face token on startup
|
550 |
+
if not HF_TOKEN:
|
551 |
+
print("\n" + "="*50)
|
552 |
+
print(" WARNING: HF_TOKEN environment variable not found! ")
|
553 |
+
print(" Please set it in your .env file or environment.")
|
554 |
+
print(" The application will run, but API calls will fail. ")
|
555 |
+
print("="*50 + "\n")
|
556 |
|
557 |
interface = gradio_interface()
|
558 |
+
interface.launch(debug=True)
|