Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 25

Commit

8f6825e

1 Parent(s): 26e4907

Initial commit with LlamaIndex-based agent

Browse files

Files changed (1) hide show

app.py +298 -325

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.core.tools import FunctionTool
-from transformers import AutoTokenizer, pipeline
 import os
 import gradio as gr
 import requests
@@ -9,9 +10,6 @@ import pandas as pd
 import traceback
 import torch
 import re
-import gc
-from typing import List, Dict
-from datetime import datetime
 # Import real tool dependencies
 try:
@@ -21,7 +19,7 @@ except ImportError:
     DDGS = None
 try:
-    from sympy import sympify
     from sympy.core.sympify import SympifyError
 except ImportError:
     print("Warning: sympy not installed. Math calculator will be limited.")
@@ -30,460 +28,435 @@ except ImportError:
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MEMORY_LIMIT_GB = 16  # Your system's memory limit
 # --- Advanced Agent Definition ---
 class SmartAgent:
     def __init__(self):
-        print(f"Initializing Local LLM Agent (Memory Limit: {MEMORY_LIMIT_GB}GB)...")
-        self.model_loaded = False
-        # Model options sorted by capability (name, approx size in GB, quantization)
         model_options = [
-            ("google/flan-t5-large", 3, "8-bit"),  # Best balance for 16GB
-            ("google/flan-t5-base", 1, "8-bit"),    # Smaller fallback
-            ("facebook/opt-1.3b", 2.5, "8-bit")     # Alternative option
         ]
-        # Try loading models until success
-        for model_name, size_gb, quantization in model_options:
-            if size_gb <= MEMORY_LIMIT_GB and self._try_load_model(model_name, quantization):
-                self.model_loaded = True
-                break
-        if not self.model_loaded:
-            raise RuntimeError("Failed to load any suitable model within memory constraints")
-        # Initialize tools with enhanced implementations
         self.tools = [
             FunctionTool.from_defaults(
-                fn=self.smart_web_search,
                 name="web_search",
-                description="Searches the web for current information. Use for questions about recent events, people, or facts not in the model's training data."
             ),
             FunctionTool.from_defaults(
-                fn=self.robust_math_calculator,
                 name="math_calculator",
-                description="Solves mathematical expressions and equations. Use for calculations, arithmetic, algebra, or numerical problems."
             )
         ]
-        # Initialize ReAct agent with memory optimization
         try:
             self.agent = ReActAgent.from_tools(
                 tools=self.tools,
                 llm=self.llm,
                 verbose=True,
-                max_iterations=4,
-                react_context="""Think step by step. Use tools when needed:
-                - For current/recent information: web_search
-                - For calculations: math_calculator
-                - Be concise but accurate"""
             )
-            print("ReAct Agent initialized successfully")
         except Exception as e:
-            print(f"ReAct Agent init failed: {e}")
             self.agent = None
-    def _try_load_model(self, model_name: str, quantization: str) -> bool:
-        """Attempt to load model with memory constraints"""
-        try:
-            print(f"Loading {model_name} with {quantization} quantization...")
-            model_kwargs = {
-                "torch_dtype": torch.float16,
-                "low_cpu_mem_usage": True,
-            }
-            if quantization == "8-bit":
-                model_kwargs["load_in_8bit"] = True
-            elif quantization == "4-bit":
-                model_kwargs["load_in_4bit"] = True
-            self.llm = HuggingFaceLLM(
-                model_name=model_name,
-                tokenizer_name=model_name,
-                context_window=2048,
-                max_new_tokens=256,
-                generate_kwargs={
-                    "temperature": 0.4,
-                    "do_sample": True,
-                    "top_p": 0.9,
-                    "repetition_penalty": 1.1
-                },
-                device_map="auto" if torch.cuda.is_available() else "cpu",
-                model_kwargs=model_kwargs
-            )
-            # Test the model
-            test_response = self.llm.complete("Test response:")
-            if not test_response:
-                raise ValueError("Model failed test response")
-            print(f"Successfully loaded {model_name}")
-            return True
-        except Exception as e:
-            print(f"Failed to load {model_name}: {str(e)}")
-            self.cleanup_memory()
-            return False
-    def smart_web_search(self, query: str) -> str:
-        """Enhanced web search with focused results"""
-        print(f"Searching: {query[:60]}...")
         if not DDGS:
-            return "Web search unavailable (duckduckgo_search not installed)"
         try:
             with DDGS() as ddgs:
-                # Get focused results with longer snippets
-                results = list(ddgs.text(query, max_results=3))
-                if not results:
-                    return "No results found"
-                # Process results for key information
-                processed = []
-                for i, res in enumerate(results, 1):
-                    title = res.get('title', 'No title')
-                    body = res.get('body', 'No description')
-                    url = res.get('href', '')
-                    # Extract most relevant part for the query
-                    key_info = self._extract_relevant_info(query, body)
-                    processed.append(
-                        f"🔍 Result {i}:\n"
-                        f"Title: {title}\n"
-                        f"Info: {key_info[:250]}\n"
-                        f"Source: {url}\n"
-                    )
-                return "\n".join(processed)
         except Exception as e:
-            return f"Search error: {str(e)}"
-    def _extract_relevant_info(self, query: str, text: str) -> str:
-        """Extract the most relevant portion of text for the query"""
-        query_lower = query.lower()
-        text_lower = text.lower()
-        # Handle different question types
-        if any(w in query_lower for w in ['who is', 'biography', 'born']):
-            # Look for birth/death info
-            match = re.search(r"(born [^.]+? in [^.]+?\.)", text, re.I)
-            return match.group(1) if match else text[:250]
-        elif any(w in query_lower for w in ['died', 'death']):
-            match = re.search(r"(died [^.]+?\.)", text, re.I)
-            return match.group(1) if match else text[:250]
-        elif any(w in query_lower for w in ['award', 'prize', 'won']):
-            match = re.search(r"(awarded [^.]+? in [^.]+?\.)", text, re.I)
-            return match.group(1) if match else text[:250]
-        # Default: return first 250 chars with important sentences
-        sentences = re.split(r'(?<=[.!?]) +', text)
-        important = [s for s in sentences if any(w in s.lower() for w in query.lower().split())]
-        return " ".join(important[:3]) if important else text[:250]
-    def robust_math_calculator(self, expression: str) -> str:
-        """Improved math calculator with better parsing"""
-        print(f"Calculating: {expression}")
-        # Clean and preprocess the expression
-        expr = expression.strip("'\"")
-        # Replace words with operators
-        replacements = {
-            'plus': '+', 'minus': '-', 'times': '*', 'divided by': '/',
-            '^': '**', 'percent': '/100', 'modulo': '%'
-        }
-        for word, op in replacements.items():
-            expr = expr.replace(word, op)
-        # Extract math expression from text
-        math_match = re.search(r"([-+]?\d*\.?\d+[+\-*/%^()\s]+\d+\.?\d*)", expr)
-        if math_match:
-            expr = math_match.group(1)
-        # Safety check
-        allowed_chars = set("0123456789+-*/().%^ ")
-        if not all(c in allowed_chars for c in expr.replace(" ", "")):
-            return "Error: Invalid characters in expression"
-        try:
-            # Try direct evaluation first
-            result = eval(expr)
-            return f"Result: {result}"
-        except:
-            # Fallback to sympy if available
-            if sympify:
-                try:
-                    result = sympify(expr).evalf()
-                    return f"Result: {result}"
-                except SympifyError as e:
-                    return f"Math error: {str(e)}"
-            return "Error: Could not evaluate the expression"
-    def __call__(self, question: str) -> str:
-        """Main interface for answering questions"""
-        print(f"\nQuestion: {question[:100]}...")
         try:
-            # Step 1: Classify question type
-            q_type = self._classify_question(question)
-            # Step 2: Use appropriate strategy
-            if q_type == "fact":
-                return self._answer_fact_question(question)
-            elif q_type == "math":
-                return self._answer_math_question(question)
             else:
-                return self._answer_general_question(question)
         except Exception as e:
-            print(f"Error processing question: {str(e)}")
-            return self._fallback_response(question)
-    def _classify_question(self, question: str) -> str:
-        """Determine the type of question"""
-        q_lower = question.lower()
-        # Math questions
-        math_keywords = ['calculate', 'compute', 'sum', 'total', 'average',
-                        'percentage', 'equation', 'solve', 'math', 'number',
-                        '+', '-', '*', '/', '=']
-        if any(kw in q_lower for kw in math_keywords):
-            return "math"
-        # Fact-based questions
-        fact_keywords = ['current', 'latest', 'recent', 'today', 'news',
-                        'who is', 'what is', 'when did', 'where is',
-                        'competition', 'winner', 'recipient', 'nationality',
-                        'country', 'malko', 'century', 'award', 'born', 'died']
-        if any(kw in q_lower for kw in fact_keywords):
-            return "fact"
-        return "general"
-    def _answer_fact_question(self, question: str) -> str:
-        """Handle fact-based questions with web search"""
-        # Extract key entities for focused search
-        entities = re.findall(r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", question)
-        search_query = " ".join(entities[:3]) or question[:50]
-        # Get search results
-        search_results = self.smart_web_search(search_query)
-        # Process with LLM if available
-        if self.model_loaded:
-            prompt = f"""Question: {question}
-            Search Results:
-            {search_results}
-            Based ONLY on these results, provide a concise answer.
-            If the answer isn't there, say so."""
-            try:
-                response = self.llm.complete(prompt)
-                return str(response).strip()
-            except:
-                return f"Search results for '{search_query}':\n{search_results}"
-        return f"Search results for '{search_query}':\n{search_results}"
-    def _answer_math_question(self, question: str) -> str:
-        """Handle math questions with calculator"""
-        # Try to extract math expression
-        math_expr = re.search(r"([\d\s+\-*/().^]+)", question)
-        if math_expr:
-            return self.robust_math_calculator(math_expr.group(1))
-        # If no clear expression, use agent reasoning
-        if self.agent:
-            try:
-                response = self.agent.query(question)
-                return str(response)
-            except:
-                return self._fallback_response(question)
-        return self._fallback_response(question)
-    def _answer_general_question(self, question: str) -> str:
-        """Handle general knowledge questions"""
-        if self.agent:
-            try:
-                response = self.agent.query(question)
-                return str(response)
-            except:
-                return self._fallback_response(question)
-        # Fallback to simple LLM response
-        try:
-            response = self.llm.complete(question)
-            return str(response)
-        except:
-            return self._fallback_response(question)
-    def _fallback_response(self, question: str) -> str:
-        """Final fallback when all else fails"""
-        return f"I couldn't generate a complete answer for: {question[:150]}... Please try rephrasing or ask about something more specific."
-    def cleanup_memory(self):
-        """Clean up memory resources"""
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-# --- Submission Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Handle the full evaluation process"""
     space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # Initialize agent with memory management
     try:
         agent = SmartAgent()
     except Exception as e:
-        print(f"Agent initialization failed: {e}")
-        return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code URL: {agent_code}")
-    # Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
-        if not questions_data:
-            return "No questions received from server.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        return f"Error fetching questions: {e}", None
-    # Process Questions
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
-        question = item.get("question")
-        if not task_id or not question:
             continue
-        print(f"Processing question {i}/{len(questions_data)} (ID: {task_id})")
         try:
-            answer = agent(question)
             answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": answer[:2000]  # Limit answer length
             })
             results_log.append({
                 "Task ID": task_id,
-                "Question": question[:100] + "..." if len(question) > 100 else question,
-                "Answer": answer[:200] + "..." if len(answer) > 200 else answer
             })
-            # Clean memory every 5 questions
-            if i % 5 == 0:
-                agent.cleanup_memory()
         except Exception as e:
-            print(f"Error on question {task_id}: {e}")
             answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": f"Error processing question: {str(e)}"
             })
             results_log.append({
                 "Task ID": task_id,
-                "Question": question[:100] + "..." if len(question) > 100 else question,
-                "Answer": f"Error: {str(e)}"
             })
-    # Submit Answers
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
-    print(f"Submitting {len(answers_payload)} answers...")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
-        result = response.json()
-        status = (
-            f"✅ Submission Successful!\n\n"
-            f"User: {result.get('username')}\n"
-            f"Score: {result.get('score', 'N/A')}% "
-            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
-            f"Message: {result.get('message', '')}"
-        )
-        return status, pd.DataFrame(results_log)
     except Exception as e:
-        error_msg = f"❌ Submission Failed: {str(e)}"
         print(error_msg)
         return error_msg, pd.DataFrame(results_log)
 # --- Gradio UI ---
-with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
     gr.Markdown("""
-    # � Local LLM Agent Evaluation
-    **Run your local agent against the course evaluation questions**
-    """)
     with gr.Row():
-        gr.LoginButton()
-    run_btn = gr.Button(
-        "🚀 Run Evaluation & Submit Answers",
-        variant="primary"
-    )
-    status_out = gr.Textbox(
-        label="📋 Status",
-        interactive=False
     )
     results_table = gr.DataFrame(
-        label="📊 Results",
-        interactive=False,
         wrap=True
     )
-    run_btn.click(
         fn=run_and_submit_all,
-        outputs=[status_out, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "="*60)
-    print(f"🚀 Starting Agent Evaluation - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
-    print(f"Memory Limit: {MEMORY_LIMIT_GB}GB")
-    print("="*60)
     demo.launch(
         server_name="0.0.0.0",
-        server_port=7860
     )

+# app.py - Optimized for 16GB Memory
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core.agent import ReActAgent
 from llama_index.core.tools import FunctionTool
+from transformers import AutoTokenizer
 import os
 import gradio as gr
 import requests
 import traceback
 import torch
 import re
 # Import real tool dependencies
 try:
     DDGS = None
 try:
+    from sympy import sympify, solve, simplify, N
     from sympy.core.sympify import SympifyError
 except ImportError:
     print("Warning: sympy not installed. Math calculator will be limited.")
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Advanced Agent Definition ---
 class SmartAgent:
     def __init__(self):
+        print("Initializing Optimized LLM Agent for 16GB Memory...")
+        # Check available memory and CUDA
+        if torch.cuda.is_available():
+            print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
+            device_map = "auto"
+        else:
+            print("CUDA not available, using CPU")
+            device_map = "cpu"
+        # Use a better model for 16GB - these are proven to work well
         model_options = [
+            "microsoft/DialoGPT-medium",
+            "google/flan-t5-large",  # Better reasoning capability
+            "microsoft/DialoGPT-large",  # Good for conversation
         ]
+        model_name = model_options[1]  # flan-t5-large for better reasoning
+        print(f"Loading model: {model_name}")
+        try:
+            self.llm = HuggingFaceLLM(
+                model_name=model_name,
+                tokenizer_name=model_name,
+                context_window=2048,  # Larger context for better understanding
+                max_new_tokens=512,   # More tokens for detailed answers
+                generate_kwargs={
+                    "temperature": 0.1,   # Very low temperature for accuracy
+                    "do_sample": True,
+                    "top_p": 0.95,
+                    "repetition_penalty": 1.2,
+                    "pad_token_id": 0,  # Add explicit pad token
+                },
+                device_map=device_map,
+                model_kwargs={
+                    "torch_dtype": torch.float16,
+                    "low_cpu_mem_usage": True,
+                    "trust_remote_code": True,
+                },
+                # Better system message for instruction following
+                system_message="""You are a precise AI assistant. When asked a question:
+1. If it needs current information, use web_search tool
+2. If it involves calculations, use math_calculator tool
+3. Provide direct, accurate answers
+4. Always be specific and factual"""
+            )
+            print(f"Successfully loaded model: {model_name}")
+        except Exception as e:
+            print(f"Failed to load {model_name}: {e}")
+            # Try smaller fallback
+            fallback_model = "microsoft/DialoGPT-medium"
+            print(f"Falling back to: {fallback_model}")
+            self.llm = HuggingFaceLLM(
+                model_name=fallback_model,
+                tokenizer_name=fallback_model,
+                context_window=1024,
+                max_new_tokens=256,
+                generate_kwargs={
+                    "temperature": 0.1,
+                    "do_sample": True,
+                    "top_p": 0.9,
+                    "repetition_penalty": 1.1,
+                },
+                device_map=device_map,
+                model_kwargs={
+                    "torch_dtype": torch.float16,
+                    "low_cpu_mem_usage": True,
+                }
+            )
+            print(f"Successfully loaded fallback model: {fallback_model}")
+        # Define tools with improved implementations
         self.tools = [
             FunctionTool.from_defaults(
+                fn=self.web_search,
                 name="web_search",
+                description="Search the web for current information, facts, or recent events. Use when you need up-to-date information."
             ),
             FunctionTool.from_defaults(
+                fn=self.math_calculator,
                 name="math_calculator",
+                description="Perform mathematical calculations, solve equations, or evaluate mathematical expressions."
             )
         ]
+        # Create ReAct agent with better settings
         try:
             self.agent = ReActAgent.from_tools(
                 tools=self.tools,
                 llm=self.llm,
                 verbose=True,
+                max_iterations=5,  # Allow more iterations for complex problems
+                max_function_calls=10,  # Allow more tool calls
             )
+            print("ReAct Agent initialized successfully.")
         except Exception as e:
+            print(f"Error creating ReAct agent: {e}")
             self.agent = None
+    def web_search(self, query: str) -> str:
+        """Enhanced web search with better result formatting"""
+        print(f"🔍 Web search: {query}")
         if not DDGS:
+            return "Web search unavailable - duckduckgo_search not installed"
         try:
             with DDGS() as ddgs:
+                results = list(ddgs.text(query, max_results=8, region='wt-wt'))
+                if results:
+                    # Format results more concisely for the LLM
+                    formatted_results = []
+                    for i, r in enumerate(results[:5], 1):  # Top 5 results
+                        title = r.get('title', 'No title')
+                        body = r.get('body', 'No description')
+                        # Clean and truncate body
+                        body = body.replace('\n', ' ').strip()[:200]
+                        formatted_results.append(f"{i}. {title}: {body}")
+                    search_summary = f"Search results for '{query}':\n" + "\n".join(formatted_results)
+                    print(f"✅ Found {len(results)} results")
+                    return search_summary
+                else:
+                    return f"No results found for '{query}'. Try different keywords."
         except Exception as e:
+            print(f"❌ Web search error: {e}")
+            return f"Search error for '{query}': {str(e)}"
+    def math_calculator(self, expression: str) -> str:
+        """Enhanced math calculator with better parsing"""
+        print(f"🧮 Math calculation: {expression}")
+        if not sympify:
+            # Basic fallback
+            try:
+                # Clean expression
+                clean_expr = expression.replace('^', '**').replace('×', '*').replace('÷', '/')
+                result = eval(clean_expr)
+                return f"Result: {result}"
+            except Exception as e:
+                return f"Math error: {str(e)}"
+        try:
+            # Clean and prepare expression
+            clean_expr = expression.replace('^', '**').replace('×', '*').replace('÷', '/')
+            # Try to evaluate
+            result = sympify(clean_expr)
+            # If it's an equation, try to solve it
+            if '=' in expression:
+                # Extract variable and solve
+                parts = expression.split('=')
+                if len(parts) == 2:
+                    eq = sympify(f"Eq({parts[0]}, {parts[1]})")
+                    solution = solve(eq)
+                    return f"Solution: {solution}"
+            # Evaluate numerically
+            numerical_result = N(result, 10)  # 10 decimal places
+            return f"Result: {numerical_result}"
+        except Exception as e:
+            print(f"❌ Math error: {e}")
+            return f"Could not calculate '{expression}': {str(e)}"
+    def __call__(self, question: str) -> str:
+        print(f"🤔 Processing: {question[:100]}...")
+        # Enhanced question analysis
+        question_lower = question.lower()
+        # Better detection of search needs
+        search_indicators = [
+            'who is', 'what is', 'when did', 'where is', 'current', 'latest', 'recent',
+            'today', 'news', 'winner', 'recipient', 'nationality', 'born in', 'died',
+            'malko', 'competition', 'award', 'century', 'president', 'capital of',
+            'population of', 'founded', 'established', 'discovery', 'invented'
+        ]
+        # Math detection
+        math_indicators = [
+            'calculate', 'compute', 'solve', 'equation', 'sum', 'total', 'average',
+            'percentage', 'multiply', 'divide', 'add', 'subtract', '+', '-', '*', '/',
+            '=', 'x=', 'y=', 'find x', 'find y'
+        ]
+        needs_search = any(indicator in question_lower for indicator in search_indicators)
+        needs_math = any(indicator in question_lower for indicator in math_indicators)
+        # Has numbers in question
+        has_numbers = bool(re.search(r'\d', question))
+        if has_numbers and any(op in question for op in ['+', '-', '*', '/', '=', '^']):
+            needs_math = True
         try:
+            if self.agent:
+                # Use ReAct agent
+                response = self.agent.query(question)
+                response_str = str(response)
+                # Check response quality
+                if len(response_str.strip()) < 10 or any(bad in response_str.lower() for bad in ['error', 'sorry', 'cannot', "don't know"]):
+                    print("⚠️ Agent response seems poor, trying direct approach...")
+                    return self._direct_approach(question, needs_search, needs_math)
+                return response_str
             else:
+                return self._direct_approach(question, needs_search, needs_math)
         except Exception as e:
+            print(f"❌ Agent error: {str(e)}")
+            return self._direct_approach(question, needs_search, needs_math)
+    def _direct_approach(self, question: str, needs_search: bool, needs_math: bool) -> str:
+        """Direct tool usage when agent fails"""
+        if needs_search:
+            # Extract better search terms
+            important_words = []
+            words = question.replace('?', '').split()
+            skip_words = {'what', 'when', 'where', 'who', 'how', 'is', 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
+            for word in words:
+                clean_word = word.lower().strip('.,!?;:')
+                if len(clean_word) > 2 and clean_word not in skip_words:
+                    important_words.append(clean_word)
+            # Take up to 4 most important terms
+            search_query = ' '.join(important_words[:4])
+            if search_query:
+                result = self.web_search(search_query)
+                return f"Based on web search:\n\n{result}"
+        if needs_math:
+            # Extract mathematical expressions
+            math_expressions = re.findall(r'[\d+\-*/().\s=x]+', question)
+            for expr in math_expressions:
+                if any(op in expr for op in ['+', '-', '*', '/', '=']):
+                    result = self.math_calculator(expr.strip())
+                    return f"Mathematical calculation:\n{result}"
+        # Fallback: try to give a reasonable response
+        return f"I need more specific information to answer: {question[:100]}... Please provide additional context or rephrase your question."
+def cleanup_memory():
+    """Clean up GPU memory"""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        print("🧹 GPU memory cleared")
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Enhanced submission with better error handling"""
     space_id = os.getenv("SPACE_ID")
+    if not profile:
+        return "❌ Please Login to Hugging Face first.", None
+    username = f"{profile.username}"
+    print(f"👤 User: {username}")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    cleanup_memory()
+    # Initialize agent
     try:
         agent = SmartAgent()
     except Exception as e:
+        print(f"❌ Agent initialization failed: {e}")
+        return f"Failed to initialize agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    # Fetch questions
     try:
+        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
+        print(f"📋 Fetched {len(questions_data)} questions")
     except Exception as e:
+        return f"❌ Error fetching questions: {e}", None
+    # Process questions with better tracking
     results_log = []
     answers_payload = []
     for i, item in enumerate(questions_data, 1):
         task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or not question_text:
             continue
+        print(f"\n🔄 Question {i}/{len(questions_data)}: {task_id}")
+        print(f"Q: {question_text[:150]}...")
         try:
+            answer = agent(question_text)
+            # Ensure answer is not empty or generic
+            if not answer or len(answer.strip()) < 3:
+                answer = f"Unable to process question: {question_text[:50]}..."
             answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": answer
             })
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Answer": answer[:150] + "..." if len(answer) > 150 else answer
             })
+            print(f"✅ A: {answer[:100]}...")
+            # Memory cleanup every 3 questions
+            if i % 3 == 0:
+                cleanup_memory()
         except Exception as e:
+            print(f"❌ Error on {task_id}: {e}")
+            error_answer = f"Processing error: {str(e)[:100]}"
             answers_payload.append({
+                "task_id": task_id,
+                "submitted_answer": error_answer
             })
             results_log.append({
                 "Task ID": task_id,
+                "Question": question_text[:100] + "...",
+                "Answer": error_answer
             })
+    # Submit answers
     submission_data = {
         "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
+    print(f"\n📤 Submitting {len(answers_payload)} answers...")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
+        result_data = response.json()
+        score = result_data.get('score', 0)
+        correct = result_data.get('correct_count', 0)
+        total = result_data.get('total_attempted', len(answers_payload))
+        final_status = f"""🎉 Submission Complete!
+👤 User: {result_data.get('username')}
+📊 Score: {score}% ({correct}/{total} correct)
+💬 {result_data.get('message', 'No message')}
+Target: 30%+ ✓ {'ACHIEVED!' if score >= 30 else 'Need improvement'}"""
+        print(f"✅ Final Score: {score}%")
+        return final_status, pd.DataFrame(results_log)
     except Exception as e:
+        error_msg = f"❌ Submission failed: {str(e)}"
         print(error_msg)
         return error_msg, pd.DataFrame(results_log)
 # --- Gradio UI ---
+with gr.Blocks(title="Optimized Agent Evaluation", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Optimized Agent for 16GB Memory")
     gr.Markdown("""
+    **Target: 30%+ Score**
+    **Optimizations:**
+    - 🧠 Better model selection (flan-t5-large)
+    - 🔍 Enhanced web search with DuckDuckGo
+    - 🧮 Advanced math calculator with SymPy
+    - 🎯 Improved question analysis and routing
+    - 💾 Memory management for 16GB systems
+    - 🔧 Robust error handling and fallbacks
+    """)
     with gr.Row():
+        gr.LoginButton(scale=1)
+    with gr.Row():
+        run_button = gr.Button(
+            "🚀 Run Optimized Evaluation",
+            variant="primary",
+            size="lg",
+            scale=2
+        )
+    status_output = gr.Textbox(
+        label="📊 Status & Results",
+        lines=10,
+        interactive=False,
+        placeholder="Ready to run evaluation..."
     )
     results_table = gr.DataFrame(
+        label="📝 Detailed Results",
         wrap=True
     )
+    run_button.click(
         fn=run_and_submit_all,
+        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("🚀 Starting Optimized Agent for 16GB Memory...")
     demo.launch(
         server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
     )