Spaces:

Nymbo
/

MCP-Image-Gen

Sleeping

App Files Files Community

Nymbo commited on Apr 30

Commit

0366bb5

verified ·

1 Parent(s): 956ea73

just kidding I meant 235B-A22B

Browse files

Files changed (1) hide show

app.py +504 -151

app.py CHANGED Viewed

@@ -1,205 +1,558 @@
-"""
-app.py – Hugging Face Space
-Swaps Anthropic for HF Serverless Inference (Qwen3-235B-A22B)
-"""
 import asyncio
 import os
 import json
 from typing import List, Dict, Any, Union
 from contextlib import AsyncExitStack
 import gradio as gr
 from gradio.components.chatbot import ChatMessage
 from mcp import ClientSession, StdioServerParameters
 from mcp.client.stdio import stdio_client
 from dotenv import load_dotenv
-from huggingface_hub import InferenceClient   # NEW ✨
 load_dotenv()
-loop = asyncio.new_event_loop()
-asyncio.set_event_loop(loop)
 class MCPClientWrapper:
-    """
-    Wraps an MCP stdio client + a chat LLM (Qwen3-235B-A22B via HF Serverless).
-    """
     def __init__(self):
-        self.session = None
-        self.exit_stack = None
         self.tools: List[Dict[str, Any]] = []
-        # --- NEW: Hugging Face client ---------------------------------------
-        self.hf_client = InferenceClient(
-            model="Qwen/Qwen3-235B-A22B",
-            token=os.getenv("HUGGINGFACE_API_TOKEN")
-        )
-        # --------------------------------------------------------------------
-    # ─────────────────────────── MCP CONNECTION ────────────────────────────
     def connect(self, server_path: str) -> str:
         return loop.run_until_complete(self._connect(server_path))
     async def _connect(self, server_path: str) -> str:
         if self.exit_stack:
             await self.exit_stack.aclose()
-        self.exit_stack = AsyncExitStack()
-        is_python = server_path.endswith(".py")
-        command = "python" if is_python else "node"
-        server_params = StdioServerParameters(
-            command=command,
-            args=[server_path],
-            env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"},
-        )
-        stdio_transport = await self.exit_stack.enter_async_context(
-            stdio_client(server_params)
-        )
-        self.stdio, self.write = stdio_transport
-        self.session = await self.exit_stack.enter_async_context(
-            ClientSession(self.stdio, self.write)
-        )
-        await self.session.initialize()
-        response = await self.session.list_tools()
-        self.tools = [
-            {
                 "name": tool.name,
                 "description": tool.description,
-                "input_schema": tool.inputSchema,
-            }
-            for tool in response.tools
-        ]
-        tool_names = [tool["name"] for tool in self.tools]
-        return f"Connected to MCP server. Available tools: {', '.join(tool_names)}"
-    # ──────────────────────────── CHAT HANDLER ─────────────────────────────
-    def process_message(
-        self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
-    ) -> tuple:
-        if not self.session:
-            return (
-                history
-                + [
-                    {"role": "user", "content": message},
-                    {
-                        "role": "assistant",
-                        "content": "Please connect to an MCP server first.",
-                    },
-                ],
-                gr.Textbox(value=""),
-            )
         new_messages = loop.run_until_complete(self._process_query(message, history))
-        return (
-            history + [{"role": "user", "content": message}] + new_messages,
-            gr.Textbox(value=""),
-        )
-    # ────────────────────────── INTERNAL LLM CALL ─────────────────────────
-    async def _process_query(
-        self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]
-    ):
-        """
-        Pushes the whole chat history to Qwen3-235B-A22B and returns its reply.
-        Tool calls are *not* forwarded – the HF endpoint only returns text.
-        """
-        # 1️⃣ Build message list in OpenAI-style dicts
-        messages: List[Dict[str, str]] = []
-        for item in history:
-            if isinstance(item, ChatMessage):
-                role, content = item.role, item.content
-            else:
-                role, content = item.get("role"), item.get("content")
-            if role in {"user", "assistant", "system"}:
-                messages.append({"role": role, "content": content})
-        messages.append({"role": "user", "content": message})
-        # 2️⃣ Serialise to Qwen chat-markup
-        prompt_parts = []
-        for m in messages:
-            role = m["role"]
-            prompt_parts.append(f"<|im_start|>{role}\n{m['content']}<|im_end|>")
-        prompt_parts.append("<|im_start|>assistant")  # model will complete here
-        prompt = "\n".join(prompt_parts)
-        # 3️⃣ Call HF Serverless in a threadpool (non-blocking)
-        async def _generate():
-            return self.hf_client.text_generation(
-                prompt,
-                max_new_tokens=1024,
-                temperature=0.7,
-                stop_sequences=["<|im_end|>", "<|im_start|>"],
-            )
-        assistant_text: str = await asyncio.get_running_loop().run_in_executor(
-            None, _generate
-        )
-        # 4️⃣ Return in Gradio-friendly format
-        return [{"role": "assistant", "content": assistant_text.strip()}]
-# ──────────────────────────── GRADIO UI ───────────────────────────────────
-client = MCPClientWrapper()
 def gradio_interface():
-    with gr.Blocks(title="MCP Weather Client") as demo:
-        gr.Markdown("# MCP Weather Assistant")
-        gr.Markdown("Connect to your MCP weather server and chat with the assistant")
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=4):
-                server_path = gr.Textbox(
-                    label="Server Script Path",
-                    placeholder="Enter path to server script (e.g., weather.py)",
-                    value="gradio_mcp_server.py",
-                )
-            with gr.Column(scale=1):
-                connect_btn = gr.Button("Connect")
-        status = gr.Textbox(label="Connection Status", interactive=False)
         chatbot = gr.Chatbot(
             value=[],
-            height=500,
-            type="messages",
             show_copy_button=True,
-            avatar_images=("👤", "🤖"),
         )
-        with gr.Row(equal_height=True):
-            msg = gr.Textbox(
-                label="Your Question",
-                placeholder="Ask about weather or alerts (e.g., What's the weather in New York?)",
                 scale=4,
             )
-            clear_btn = gr.Button("Clear Chat", scale=1)
-        connect_btn.click(client.connect, inputs=server_path, outputs=status)
-        msg.submit(client.process_message, [msg, chatbot], [chatbot, msg])
-        clear_btn.click(lambda: [], None, chatbot)
     return demo
-# ──────────────────────────── ENTRY POINT ────────────────────────────────
 if __name__ == "__main__":
-    if not os.getenv("HUGGINGFACE_API_TOKEN"):
-        print(
-            "Warning: HUGGINGFACE_API_TOKEN not found in environment. "
-            "Set it in your .env file or Space secrets."
-        )
     interface = gradio_interface()
-    interface.launch(debug=True)  # ← typo fixed

 import asyncio
 import os
 import json
 from typing import List, Dict, Any, Union
 from contextlib import AsyncExitStack
+import logging # Added for better debugging
+import httpx # Added for making HTTP requests
 import gradio as gr
 from gradio.components.chatbot import ChatMessage
 from mcp import ClientSession, StdioServerParameters
 from mcp.client.stdio import stdio_client
+# Removed Anthropic import
 from dotenv import load_dotenv
+# --- Configuration ---
 load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN") # Changed from ANTHROPIC_API_KEY
+HF_API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen3-235B-A22B/v1/chat/completions"
+MODEL_NAME = "Qwen/Qwen3-235B-A22B" # Define model name
+MAX_TOKENS = 1500 # Increased token limit slightly for potentially more verbose model
+HTTP_TIMEOUT = 60 # Timeout for API requests in seconds
+# --- Logging Setup ---
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# --- Async Event Loop ---
+# Use asyncio.get_event_loop() which handles loop creation/getting existing loop
+# This avoids potential issues in some environments (like notebooks)
+try:
+    loop = asyncio.get_running_loop()
+except RuntimeError:
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
 class MCPClientWrapper:
     def __init__(self):
+        # Initialize session, stack, and tools list
+        self.session: ClientSession | None = None
+        self.exit_stack: AsyncExitStack | None = None
         self.tools: List[Dict[str, Any]] = []
+        # Removed Anthropic client initialization
+        # Add Hugging Face token check
+        if not HF_TOKEN:
+            logger.warning("HF_TOKEN environment variable not found. Hugging Face API calls will fail.")
+            # Optionally raise an error or handle this more gracefully
+            # raise ValueError("HF_TOKEN environment variable is required.")
+        self.hf_token = HF_TOKEN
+        # Initialize HTTP client (will be managed by AsyncExitStack)
+        self.http_client: httpx.AsyncClient | None = None
     def connect(self, server_path: str) -> str:
+        # Run the async connection logic in the event loop
         return loop.run_until_complete(self._connect(server_path))
     async def _connect(self, server_path: str) -> str:
+        # Gracefully close existing connections and resources if reconnecting
         if self.exit_stack:
+            logger.info("Closing existing connection and resources.")
             await self.exit_stack.aclose()
+            self.exit_stack = None # Reset stack
+            self.session = None # Reset session
+            self.http_client = None # Reset client
+        logger.info(f"Attempting to connect to MCP server: {server_path}")
+        self.exit_stack = AsyncExitStack() # Create a new exit stack for managing resources
+        try:
+            # Determine command based on file extension
+            is_python = server_path.lower().endswith('.py')
+            command = "python" if is_python else "node"
+            logger.info(f"Using command '{command}' for server.")
+            # Configure server parameters
+            server_params = StdioServerParameters(
+                command=command,
+                args=[server_path],
+                env={"PYTHONIOENCODING": "utf-8", "PYTHONUNBUFFERED": "1"}
+            )
+            # Establish stdio transport with the MCP server
+            stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
+            self.stdio, self.write = stdio_transport
+            logger.info("Stdio transport established.")
+            # Initialize the MCP client session
+            self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))
+            await self.session.initialize()
+            logger.info("MCP session initialized.")
+            # Initialize the HTTP client for Hugging Face API calls
+            self.http_client = await self.exit_stack.enter_async_context(httpx.AsyncClient(timeout=HTTP_TIMEOUT))
+            logger.info("HTTP client initialized.")
+            # List available tools from the MCP server
+            response = await self.session.list_tools()
+            self.tools = [{
                 "name": tool.name,
                 "description": tool.description,
+                "input_schema": tool.inputSchema # Keep schema for potential future use or richer prompts
+            } for tool in response.tools]
+            logger.info(f"Available tools: {[tool['name'] for tool in self.tools]}")
+            tool_names = [tool["name"] for tool in self.tools]
+            return f"Connected to MCP server. Available tools: {', '.join(tool_names) if tool_names else 'None'}"
+        except Exception as e:
+            logger.error(f"Connection failed: {e}", exc_info=True)
+            # Clean up resources if connection failed midway
+            if self.exit_stack:
+                await self.exit_stack.aclose()
+                self.exit_stack = None
+                self.session = None
+                self.http_client = None
+            return f"Connection failed: {e}"
+    def _format_tools_for_prompt(self) -> str:
+        # Create a description of tools for the LLM prompt
+        if not self.tools:
+            return "No tools available."
+        tool_descriptions = []
+        for tool in self.tools:
+            # Describe the tool and its expected input format (JSON schema)
+            desc = f"- Name: {tool['name']}\n"
+            desc += f"  Description: {tool['description']}\n"
+            desc += f"  Input JSON Schema: {json.dumps(tool['input_schema'], indent=2)}"
+            tool_descriptions.append(desc)
+        return "You have access to the following tools:\n" + "\n".join(tool_descriptions) + \
+               "\n\nTo use a tool, respond ONLY with a single JSON object matching this structure: " + \
+               "{\"tool_name\": \"<name_of_tool>\", \"tool_input\": {<arguments_as_object>}}. " + \
+               "Do not add any other text, explanation, or markdown formatting around the JSON object."
+    def _build_system_prompt(self) -> str:
+        # Construct the system prompt including tool instructions
+        system_prompt = "You are a helpful assistant."
+        tool_info = self._format_tools_for_prompt()
+        if tool_info != "No tools available.":
+            system_prompt += "\n\n" + tool_info
+        return system_prompt
+    async def _call_huggingface_api(self, messages: List[Dict[str, str]]) -> Dict[str, Any] | None:
+        # Helper function to call the Hugging Face Inference API
+        if not self.hf_token or not self.http_client:
+            logger.error("Hugging Face token or HTTP client not available.")
+            return {"error": "API client not configured."}
+        headers = {
+            "Authorization": f"Bearer {self.hf_token}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "model": MODEL_NAME,
+            "messages": messages,
+            "max_tokens": MAX_TOKENS,
+            "stream": False, # Keeping it simple, not streaming for now
+            # Add other parameters like temperature if needed
+            # "temperature": 0.7,
+        }
+        logger.info(f"Sending request to HF API. Message count: {len(messages)}")
+        # Log message content carefully, maybe just roles or lengths in production
+        # logger.debug(f"Payload: {json.dumps(payload, indent=2)}")
+        try:
+            response = await self.http_client.post(HF_API_URL, headers=headers, json=payload)
+            response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
+            logger.info(f"Received response from HF API. Status: {response.status_code}")
+            return response.json()
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error occurred: {e.response.status_code} - {e.response.text}")
+            return {"error": f"API request failed: {e.response.status_code}", "details": e.response.text}
+        except httpx.RequestError as e:
+            logger.error(f"Request error occurred: {e}")
+            return {"error": f"API request failed: {e}"}
+        except json.JSONDecodeError as e:
+             logger.error(f"Failed to decode JSON response: {e}")
+             # Attempt to get raw text if JSON decoding fails
+             raw_text = await response.aread() if 'response' in locals() else b""
+             logger.error(f"Raw Response: {raw_text.decode(errors='ignore')}")
+             return {"error": "Failed to decode API JSON response.", "raw_response": raw_text.decode(errors='ignore')}
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during API call: {e}", exc_info=True)
+            return {"error": f"An unexpected error occurred: {e}"}
+    def process_message(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> tuple:
+        # Check if connected to MCP server
+        if not self.session or not self.http_client:
+            # Append user message and error message to history
+            history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": "Error: Please connect to the MCP server and ensure HF_TOKEN is set."})
+            # Return updated history and clear input textbox
+            return history, gr.Textbox(value="")
+        # Run the async query processing logic
         new_messages = loop.run_until_complete(self._process_query(message, history))
+        # Append the original user message and the new assistant messages to history
+        history.append({"role": "user", "content": message})
+        history.extend(new_messages)
+        # Return updated history and clear input textbox
+        return history, gr.Textbox(value="")
+    async def _process_query(self, message: str, history: List[Union[Dict[str, Any], ChatMessage]]) -> List[Dict[str, Any]]:
+        # Build the list of messages in the format Hugging Face expects
+        hf_messages = [{"role": "system", "content": self._build_system_prompt()}]
+        for msg in history:
+            # Convert Gradio ChatMessage or dict to the required format
+            if isinstance(msg, ChatMessage):
+                role, content = msg.role, msg.content
+            else:
+                role, content = msg.get("role"), msg.get("content")
+            # Ensure content is a string (handle potential image dicts if added later)
+            if isinstance(content, dict):
+                # Handle potential dict content (like images) - skip or represent as text for now
+                 content_str = json.dumps(content) # Or some other representation
+                 logger.warning(f"Found non-string content in history for role {role}, converting to JSON string.")
+            else:
+                 content_str = str(content) # Ensure it's a string
+            # Map roles if needed (e.g., 'bot' -> 'assistant') - current roles seem fine
+            if role in ["user", "assistant"]:
+                hf_messages.append({"role": role, "content": content_str})
+            elif role == "system" and len(hf_messages) > 1: # Avoid duplicate system prompts if history already has one
+                logger.warning("Skipping additional system message found in history.")
+            # Handle tool results if they were stored in history differently (not standard here)
+        # Add the current user message
+        hf_messages.append({"role": "user", "content": message})
+        # --- Make the API Call ---
+        response_data = await self._call_huggingface_api(hf_messages)
+        # Prepare list to hold messages for Gradio display
+        result_messages_for_gradio = []
+        # --- Handle API Response ---
+        if not response_data or "error" in response_data:
+            error_msg = response_data.get("error", "Unknown API error") if response_data else "No response from API"
+            details = response_data.get("details", "") if response_data else ""
+            logger.error(f"API call failed: {error_msg} {details}")
+            result_messages_for_gradio.append({
+                "role": "assistant",
+                "content": f"Sorry, I encountered an error calling the language model: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
+            })
+            return result_messages_for_gradio # Return error message to Gradio
+        # Extract the assistant's reply content
+        try:
+            # Adjust parsing based on actual HF API response structure for non-streaming chat completions
+            # Common structures: response_data['choices'][0]['message']['content']
+            # Or sometimes: response_data['generated_text']
+            assistant_content = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
+            if not assistant_content and "generated_text" in response_data: # Fallback for some models/endpoints
+                assistant_content = response_data["generated_text"]
+            if not assistant_content:
+                 logger.error(f"Could not extract assistant content from response: {response_data}")
+                 raise ValueError("Empty or missing assistant content in API response.")
+            logger.info("Received assistant content.")
+            # logger.debug(f"Assistant content raw: {assistant_content}")
+        except (KeyError, IndexError, ValueError, TypeError) as e:
+            logger.error(f"Error parsing API response structure: {e}. Response: {response_data}", exc_info=True)
+            result_messages_for_gradio.append({
+                "role": "assistant",
+                "content": f"Sorry, I received an unexpected response format from the language model. Error: {e}"
+            })
+            return result_messages_for_gradio
+        # --- Check for Tool Use ---
+        # Try to parse the entire response as JSON (as instructed in the prompt)
+        tool_call_data = None
+        try:
+            potential_tool_call = json.loads(assistant_content)
+            # Check if it matches the expected tool call structure
+            if isinstance(potential_tool_call, dict) and "tool_name" in potential_tool_call and "tool_input" in potential_tool_call:
+                 tool_call_data = potential_tool_call
+                 logger.info(f"Detected tool call: {tool_call_data['tool_name']}")
+            else:
+                 # It's valid JSON, but not the tool format we asked for. Treat as text.
+                 logger.info("Response is JSON, but not a recognized tool call format.")
+                 pass # Keep assistant_content as is
+        except json.JSONDecodeError:
+            # Not JSON, assume it's a regular text response
+            logger.info("Response is not JSON, treating as text.")
+            pass # Keep assistant_content as is
+        # --- Process Tool Call or Text Response ---
+        if tool_call_data:
+            # It's a tool call!
+            tool_name = tool_call_data["tool_name"]
+            tool_args = tool_call_data["tool_input"]
+            # Check if the requested tool is valid/available
+            available_tool_names = [t["name"] for t in self.tools]
+            if tool_name not in available_tool_names:
+                logger.warning(f"LLM requested unavailable tool: {tool_name}")
+                # Inform the user and potentially ask the LLM again without the tool result
+                result_messages_for_gradio.append({
+                    "role": "assistant",
+                    "content": f"I wanted to use the '{tool_name}' tool, but it seems it's not available right now. I'll try to answer without it."
+                })
+                # Optionally, make *another* call to the LLM telling it the tool failed.
+                # For simplicity here, we'll just stop.
+                # Or, make another call telling the LLM the tool is unavailable:
+                # hf_messages.append({"role": "assistant", "content": assistant_content}) # Add the LLM's attempt
+                # hf_messages.append({"role": "user", "content": f"The tool '{tool_name}' is not available. Please answer without using tools."})
+                # follow_up_response_data = await self._call_huggingface_api(hf_messages)
+                # ... process follow_up_response_data ... (similar to text response handling)
+            else:
+                # Add messages to Gradio indicating tool use (similar to original)
+                result_messages_for_gradio.append({
+                    "role": "assistant",
+                    "content": f"I need to use the **{tool_name}** tool to answer that.",
+                    "metadata": { # Keep metadata for potential UI enhancements
+                        "title": f"⏳ Using tool: {tool_name}",
+                        "log": f"Parameters: {json.dumps(tool_args, ensure_ascii=False)}", # Use ensure_ascii=False for readability
+                        "status": "pending",
+                        "id": f"tool_call_{tool_name}"
+                    }
+                })
+                result_messages_for_gradio.append({
+                    "role": "assistant",
+                    "content": f"```json\n{json.dumps(tool_args, indent=2, ensure_ascii=False)}\n```",
+                    "metadata": {
+                        "parent_id": f"tool_call_{tool_name}",
+                        "id": f"params_{tool_name}",
+                        "title": "Tool Parameters"
+                    }
+                })
+                # --- Call the actual MCP tool ---
+                try:
+                    logger.info(f"Calling MCP tool: {tool_name} with args: {tool_args}")
+                    mcp_result = await self.session.call_tool(tool_name, tool_args)
+                    logger.info(f"Received result from tool: {tool_name}")
+                    tool_result_content = mcp_result.content
+                    # Mark Gradio message as done
+                    if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
+                        result_messages_for_gradio[-2]["metadata"]["status"] = "done"
+                        result_messages_for_gradio[-2]["metadata"]["title"] = f"✅ Used tool: {tool_name}"
+                    # Prepare tool result for Gradio display
+                    result_messages_for_gradio.append({
+                        "role": "assistant",
+                        "content": f"Result from **{tool_name}**:",
+                        "metadata": {
+                            "title": f"Tool Result: {tool_name}",
+                            "status": "done",
+                            "id": f"result_{tool_name}"
+                        }
+                    })
+                    # Attempt to format tool result nicely for Gradio (handle JSON, images, etc.)
+                    display_content = tool_result_content # Default to raw content
+                    try:
+                        # Try parsing as JSON
+                        result_json = json.loads(tool_result_content)
+                        if isinstance(result_json, dict) and result_json.get("type") == "image" and "url" in result_json:
+                            # Handle image result
+                             display_content = {"path": result_json["url"], "alt_text": result_json.get("message", "Generated image")}
+                             result_messages_for_gradio.append({
+                                 "role": "assistant",
+                                 "content": display_content,
+                                 "metadata": {"parent_id": f"result_{tool_name}", "id": f"image_{tool_name}", "title": "Image Result"}
+                             })
+                             display_content = None # Mark as handled
+                        else:
+                             # Display other JSON nicely formatted
+                             display_content = f"```json\n{json.dumps(result_json, indent=2, ensure_ascii=False)}\n```"
+                    except json.JSONDecodeError:
+                        # Not JSON, display as plain code block if it's not empty
+                        if tool_result_content:
+                             display_content = f"```\n{tool_result_content}\n```"
+                        else:
+                             display_content = "_Tool returned empty content_"
+                    if display_content: # Add the formatted/raw result if not handled (like image)
+                        result_messages_for_gradio.append({
+                            "role": "assistant",
+                            "content": display_content,
+                            "metadata": {"parent_id": f"result_{tool_name}", "id": f"raw_result_{tool_name}", "title": "Formatted Output"}
+                        })
+                    # --- Send tool result back to LLM ---
+                    # Append the *original* assistant message (the tool call JSON) and the user message with the result
+                    hf_messages.append({"role": "assistant", "content": assistant_content})
+                    # Use a clear format for the tool result for the LLM
+                    user_tool_result_message = f"Tool result for {tool_name}:\n```\n{tool_result_content}\n```"
+                    hf_messages.append({"role": "user", "content": user_tool_result_message})
+                    logger.info("Sending tool result back to HF API for final response.")
+                    final_response_data = await self._call_huggingface_api(hf_messages)
+                    # Process the final response from the LLM
+                    if final_response_data and "error" not in final_response_data:
+                        try:
+                            final_assistant_content = final_response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
+                            if not final_assistant_content and "generated_text" in final_response_data:
+                                final_assistant_content = final_response_data["generated_text"]
+                            if final_assistant_content:
+                                result_messages_for_gradio.append({
+                                    "role": "assistant",
+                                    "content": final_assistant_content
+                                })
+                            else:
+                                raise ValueError("Empty or missing final assistant content.")
+                        except (KeyError, IndexError, ValueError, TypeError) as e:
+                            logger.error(f"Error parsing final API response: {e}. Response: {final_response_data}", exc_info=True)
+                            result_messages_for_gradio.append({
+                                "role": "assistant",
+                                "content": f"Sorry, I couldn't process the tool result properly. Error: {e}"
+                            })
+                    else:
+                        # Handle error in the *second* API call
+                        error_msg = final_response_data.get("error", "Unknown API error") if final_response_data else "No final response"
+                        details = final_response_data.get("details", "") if final_response_data else ""
+                        logger.error(f"Final API call failed: {error_msg} {details}")
+                        result_messages_for_gradio.append({
+                            "role": "assistant",
+                            "content": f"Sorry, I encountered an error after using the tool: {error_msg}" + (f"\nDetails: ```\n{details}\n```" if details else "")
+                        })
+                except Exception as e:
+                    logger.error(f"Error calling MCP tool {tool_name}: {e}", exc_info=True)
+                    # Mark Gradio message as failed
+                    if result_messages_for_gradio and "metadata" in result_messages_for_gradio[-2]:
+                        result_messages_for_gradio[-2]["metadata"]["status"] = "error"
+                        result_messages_for_gradio[-2]["metadata"]["title"] = f"❌ Error using tool: {tool_name}"
+                    # Inform user about the tool call failure
+                    result_messages_for_gradio.append({
+                        "role": "assistant",
+                        "content": f"Sorry, I encountered an error when trying to use the tool '{tool_name}': {e}"
+                    })
+                    # Don't proceed to call LLM again if tool failed
+        else:
+            # It's a regular text response, just add it
+            logger.info("Adding regular text response to Gradio output.")
+            result_messages_for_gradio.append({
+                "role": "assistant",
+                "content": assistant_content
+            })
+        # Return the list of messages to be added to the Gradio chatbot
+        return result_messages_for_gradio
+    async def close_connection(self):
+        # Method to explicitly close connections if needed (e.g., on app shutdown)
+        if self.exit_stack:
+            logger.info("Closing MCP connection and HTTP client.")
+            await self.exit_stack.aclose()
+            self.exit_stack = None
+            self.session = None
+            self.http_client = None
+# --- Gradio Interface Setup ---
+client = MCPClientWrapper() # Instantiate the wrapper
 def gradio_interface():
+    # Create the Gradio Blocks UI
+    with gr.Blocks(title="MCP Client + HF Inference", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# MCP Assistant (Hugging Face Backend)")
+        gr.Markdown(f"Connect to your MCP server and chat with an assistant powered by `{MODEL_NAME}`.")
+        # Connection Row
+        with gr.Row():
+            server_path = gr.Textbox(
+                label="MCP Server Script Path",
+                placeholder="Enter path to server script (e.g., weather.py)",
+                # Default to a common name, update if yours is different
+                value="gradio_mcp_server.py",
+                scale=3
+            )
+            connect_btn = gr.Button("Connect to MCP Server", scale=1)
+        status = gr.Textbox(label="Status", interactive=False, placeholder="Not connected")
+        # Chatbot display
         chatbot = gr.Chatbot(
+            label="Conversation",
             value=[],
+            elem_id="chatbot", # Add elem_id for potential CSS styling
+            height=600,
             show_copy_button=True,
+            bubble_full_width=False, # Improves readability
+            avatar_images=("👤", "🤗") # User and HF avatar
         )
+        # Input Row
+        with gr.Row():
+            msg_textbox = gr.Textbox(
+                label="Your Message",
+                placeholder=f"Ask a question...",
                 scale=4,
+                autofocus=True # Focus input on load
             )
+            # Submit button (alternative to pressing Enter)
+            # submit_btn = gr.Button("Send", scale=1, variant="primary")
+            # Clear button
+            clear_btn = gr.Button("🗑️ Clear Chat", scale=1)
+        # --- Event Handlers ---
+        # Connect button action
+        connect_btn.click(
+            client.connect,         # Function to call
+            inputs=[server_path],   # Input component(s)
+            outputs=[status]        # Output component(s)
+        )
+        # Function to handle message submission (Enter key or Send button)
+        submit_action = msg_textbox.submit(
+            client.process_message, # Function to call
+            inputs=[msg_textbox, chatbot],    # Input components: message text, current chat history
+            outputs=[chatbot, msg_textbox]   # Output components: updated chat history, cleared message box
+        )
+        # If using a Send button:
+        # submit_btn.click(client.process_message, [msg_textbox, chatbot], [chatbot, msg_textbox])
+        # Clear button action
+        clear_btn.click(
+            lambda: ([], None), # Function to return empty list for chatbot and None for status (optional)
+            [],                 # No inputs
+            [chatbot, status], # Components to clear/reset
+            queue=False         # Run immediately
+        )
+        # Define app shutdown behavior (optional but good practice)
+        # This attempts to close connections when Gradio shuts down
+        # Note: Graceful shutdown in Gradio can be tricky.
+        # demo.unload(client.close_connection) # Requires Gradio 4+ and might need async handling adjustments
     return demo
+# --- Main Execution ---
 if __name__ == "__main__":
+    # Check for Hugging Face token on startup
+    if not HF_TOKEN:
+        print("\n" + "="*50)
+        print(" WARNING: HF_TOKEN environment variable not found! ")
+        print(" Please set it in your .env file or environment.")
+        print(" The application will run, but API calls will fail. ")
+        print("="*50 + "\n")
     interface = gradio_interface()
+    interface.launch(debug=True)