LamiaYT commited on
Commit
8f6825e
ยท
1 Parent(s): 26e4907

Initial commit with LlamaIndex-based agent

Browse files
Files changed (1) hide show
  1. app.py +298 -325
app.py CHANGED
@@ -1,7 +1,8 @@
 
1
  from llama_index.llms.huggingface import HuggingFaceLLM
2
  from llama_index.core.agent import ReActAgent
3
  from llama_index.core.tools import FunctionTool
4
- from transformers import AutoTokenizer, pipeline
5
  import os
6
  import gradio as gr
7
  import requests
@@ -9,9 +10,6 @@ import pandas as pd
9
  import traceback
10
  import torch
11
  import re
12
- import gc
13
- from typing import List, Dict
14
- from datetime import datetime
15
 
16
  # Import real tool dependencies
17
  try:
@@ -21,7 +19,7 @@ except ImportError:
21
  DDGS = None
22
 
23
  try:
24
- from sympy import sympify
25
  from sympy.core.sympify import SympifyError
26
  except ImportError:
27
  print("Warning: sympy not installed. Math calculator will be limited.")
@@ -30,460 +28,435 @@ except ImportError:
30
 
31
  # --- Constants ---
32
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
33
- MEMORY_LIMIT_GB = 16 # Your system's memory limit
34
 
35
  # --- Advanced Agent Definition ---
36
  class SmartAgent:
37
  def __init__(self):
38
- print(f"Initializing Local LLM Agent (Memory Limit: {MEMORY_LIMIT_GB}GB)...")
39
- self.model_loaded = False
40
 
41
- # Model options sorted by capability (name, approx size in GB, quantization)
 
 
 
 
 
 
 
 
42
  model_options = [
43
- ("google/flan-t5-large", 3, "8-bit"), # Best balance for 16GB
44
- ("google/flan-t5-base", 1, "8-bit"), # Smaller fallback
45
- ("facebook/opt-1.3b", 2.5, "8-bit") # Alternative option
46
  ]
47
 
48
- # Try loading models until success
49
- for model_name, size_gb, quantization in model_options:
50
- if size_gb <= MEMORY_LIMIT_GB and self._try_load_model(model_name, quantization):
51
- self.model_loaded = True
52
- break
53
 
54
- if not self.model_loaded:
55
- raise RuntimeError("Failed to load any suitable model within memory constraints")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Initialize tools with enhanced implementations
58
  self.tools = [
59
  FunctionTool.from_defaults(
60
- fn=self.smart_web_search,
61
  name="web_search",
62
- description="Searches the web for current information. Use for questions about recent events, people, or facts not in the model's training data."
63
  ),
64
  FunctionTool.from_defaults(
65
- fn=self.robust_math_calculator,
66
  name="math_calculator",
67
- description="Solves mathematical expressions and equations. Use for calculations, arithmetic, algebra, or numerical problems."
68
  )
69
  ]
70
 
71
- # Initialize ReAct agent with memory optimization
72
  try:
73
  self.agent = ReActAgent.from_tools(
74
  tools=self.tools,
75
  llm=self.llm,
76
  verbose=True,
77
- max_iterations=4,
78
- react_context="""Think step by step. Use tools when needed:
79
- - For current/recent information: web_search
80
- - For calculations: math_calculator
81
- - Be concise but accurate"""
82
  )
83
- print("ReAct Agent initialized successfully")
84
  except Exception as e:
85
- print(f"ReAct Agent init failed: {e}")
86
  self.agent = None
87
 
88
- def _try_load_model(self, model_name: str, quantization: str) -> bool:
89
- """Attempt to load model with memory constraints"""
90
- try:
91
- print(f"Loading {model_name} with {quantization} quantization...")
92
-
93
- model_kwargs = {
94
- "torch_dtype": torch.float16,
95
- "low_cpu_mem_usage": True,
96
- }
97
-
98
- if quantization == "8-bit":
99
- model_kwargs["load_in_8bit"] = True
100
- elif quantization == "4-bit":
101
- model_kwargs["load_in_4bit"] = True
102
-
103
- self.llm = HuggingFaceLLM(
104
- model_name=model_name,
105
- tokenizer_name=model_name,
106
- context_window=2048,
107
- max_new_tokens=256,
108
- generate_kwargs={
109
- "temperature": 0.4,
110
- "do_sample": True,
111
- "top_p": 0.9,
112
- "repetition_penalty": 1.1
113
- },
114
- device_map="auto" if torch.cuda.is_available() else "cpu",
115
- model_kwargs=model_kwargs
116
- )
117
-
118
- # Test the model
119
- test_response = self.llm.complete("Test response:")
120
- if not test_response:
121
- raise ValueError("Model failed test response")
122
-
123
- print(f"Successfully loaded {model_name}")
124
- return True
125
-
126
- except Exception as e:
127
- print(f"Failed to load {model_name}: {str(e)}")
128
- self.cleanup_memory()
129
- return False
130
-
131
- def smart_web_search(self, query: str) -> str:
132
- """Enhanced web search with focused results"""
133
- print(f"Searching: {query[:60]}...")
134
 
135
  if not DDGS:
136
- return "Web search unavailable (duckduckgo_search not installed)"
137
 
138
  try:
139
  with DDGS() as ddgs:
140
- # Get focused results with longer snippets
141
- results = list(ddgs.text(query, max_results=3))
142
-
143
- if not results:
144
- return "No results found"
145
 
146
- # Process results for key information
147
- processed = []
148
- for i, res in enumerate(results, 1):
149
- title = res.get('title', 'No title')
150
- body = res.get('body', 'No description')
151
- url = res.get('href', '')
 
 
 
152
 
153
- # Extract most relevant part for the query
154
- key_info = self._extract_relevant_info(query, body)
 
 
 
155
 
156
- processed.append(
157
- f"๐Ÿ” Result {i}:\n"
158
- f"Title: {title}\n"
159
- f"Info: {key_info[:250]}\n"
160
- f"Source: {url}\n"
161
- )
162
-
163
- return "\n".join(processed)
164
  except Exception as e:
165
- return f"Search error: {str(e)}"
 
166
 
167
- def _extract_relevant_info(self, query: str, text: str) -> str:
168
- """Extract the most relevant portion of text for the query"""
169
- query_lower = query.lower()
170
- text_lower = text.lower()
171
-
172
- # Handle different question types
173
- if any(w in query_lower for w in ['who is', 'biography', 'born']):
174
- # Look for birth/death info
175
- match = re.search(r"(born [^.]+? in [^.]+?\.)", text, re.I)
176
- return match.group(1) if match else text[:250]
177
-
178
- elif any(w in query_lower for w in ['died', 'death']):
179
- match = re.search(r"(died [^.]+?\.)", text, re.I)
180
- return match.group(1) if match else text[:250]
181
 
182
- elif any(w in query_lower for w in ['award', 'prize', 'won']):
183
- match = re.search(r"(awarded [^.]+? in [^.]+?\.)", text, re.I)
184
- return match.group(1) if match else text[:250]
 
 
 
 
 
 
185
 
186
- # Default: return first 250 chars with important sentences
187
- sentences = re.split(r'(?<=[.!?]) +', text)
188
- important = [s for s in sentences if any(w in s.lower() for w in query.lower().split())]
189
- return " ".join(important[:3]) if important else text[:250]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- def robust_math_calculator(self, expression: str) -> str:
192
- """Improved math calculator with better parsing"""
193
- print(f"Calculating: {expression}")
194
 
195
- # Clean and preprocess the expression
196
- expr = expression.strip("'\"")
197
 
198
- # Replace words with operators
199
- replacements = {
200
- 'plus': '+', 'minus': '-', 'times': '*', 'divided by': '/',
201
- '^': '**', 'percent': '/100', 'modulo': '%'
202
- }
203
- for word, op in replacements.items():
204
- expr = expr.replace(word, op)
205
 
206
- # Extract math expression from text
207
- math_match = re.search(r"([-+]?\d*\.?\d+[+\-*/%^()\s]+\d+\.?\d*)", expr)
208
- if math_match:
209
- expr = math_match.group(1)
 
 
210
 
211
- # Safety check
212
- allowed_chars = set("0123456789+-*/().%^ ")
213
- if not all(c in allowed_chars for c in expr.replace(" ", "")):
214
- return "Error: Invalid characters in expression"
215
 
216
- try:
217
- # Try direct evaluation first
218
- result = eval(expr)
219
- return f"Result: {result}"
220
- except:
221
- # Fallback to sympy if available
222
- if sympify:
223
- try:
224
- result = sympify(expr).evalf()
225
- return f"Result: {result}"
226
- except SympifyError as e:
227
- return f"Math error: {str(e)}"
228
- return "Error: Could not evaluate the expression"
229
-
230
- def __call__(self, question: str) -> str:
231
- """Main interface for answering questions"""
232
- print(f"\nQuestion: {question[:100]}...")
233
 
234
  try:
235
- # Step 1: Classify question type
236
- q_type = self._classify_question(question)
237
-
238
- # Step 2: Use appropriate strategy
239
- if q_type == "fact":
240
- return self._answer_fact_question(question)
241
- elif q_type == "math":
242
- return self._answer_math_question(question)
 
 
 
243
  else:
244
- return self._answer_general_question(question)
245
 
246
  except Exception as e:
247
- print(f"Error processing question: {str(e)}")
248
- return self._fallback_response(question)
249
-
250
- def _classify_question(self, question: str) -> str:
251
- """Determine the type of question"""
252
- q_lower = question.lower()
253
-
254
- # Math questions
255
- math_keywords = ['calculate', 'compute', 'sum', 'total', 'average',
256
- 'percentage', 'equation', 'solve', 'math', 'number',
257
- '+', '-', '*', '/', '=']
258
- if any(kw in q_lower for kw in math_keywords):
259
- return "math"
260
-
261
- # Fact-based questions
262
- fact_keywords = ['current', 'latest', 'recent', 'today', 'news',
263
- 'who is', 'what is', 'when did', 'where is',
264
- 'competition', 'winner', 'recipient', 'nationality',
265
- 'country', 'malko', 'century', 'award', 'born', 'died']
266
- if any(kw in q_lower for kw in fact_keywords):
267
- return "fact"
268
-
269
- return "general"
270
-
271
- def _answer_fact_question(self, question: str) -> str:
272
- """Handle fact-based questions with web search"""
273
- # Extract key entities for focused search
274
- entities = re.findall(r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", question)
275
- search_query = " ".join(entities[:3]) or question[:50]
276
-
277
- # Get search results
278
- search_results = self.smart_web_search(search_query)
279
 
280
- # Process with LLM if available
281
- if self.model_loaded:
282
- prompt = f"""Question: {question}
283
- Search Results:
284
- {search_results}
285
 
286
- Based ONLY on these results, provide a concise answer.
287
- If the answer isn't there, say so."""
288
 
289
- try:
290
- response = self.llm.complete(prompt)
291
- return str(response).strip()
292
- except:
293
- return f"Search results for '{search_query}':\n{search_results}"
294
-
295
- return f"Search results for '{search_query}':\n{search_results}"
296
-
297
- def _answer_math_question(self, question: str) -> str:
298
- """Handle math questions with calculator"""
299
- # Try to extract math expression
300
- math_expr = re.search(r"([\d\s+\-*/().^]+)", question)
301
- if math_expr:
302
- return self.robust_math_calculator(math_expr.group(1))
303
-
304
- # If no clear expression, use agent reasoning
305
- if self.agent:
306
- try:
307
- response = self.agent.query(question)
308
- return str(response)
309
- except:
310
- return self._fallback_response(question)
311
 
312
- return self._fallback_response(question)
313
-
314
- def _answer_general_question(self, question: str) -> str:
315
- """Handle general knowledge questions"""
316
- if self.agent:
317
- try:
318
- response = self.agent.query(question)
319
- return str(response)
320
- except:
321
- return self._fallback_response(question)
322
 
323
- # Fallback to simple LLM response
324
- try:
325
- response = self.llm.complete(question)
326
- return str(response)
327
- except:
328
- return self._fallback_response(question)
329
 
330
- def _fallback_response(self, question: str) -> str:
331
- """Final fallback when all else fails"""
332
- return f"I couldn't generate a complete answer for: {question[:150]}... Please try rephrasing or ask about something more specific."
333
 
334
- def cleanup_memory(self):
335
- """Clean up memory resources"""
336
- if torch.cuda.is_available():
337
- torch.cuda.empty_cache()
338
- gc.collect()
339
 
340
 
341
- # --- Submission Logic ---
342
  def run_and_submit_all(profile: gr.OAuthProfile | None):
343
- """Handle the full evaluation process"""
344
  space_id = os.getenv("SPACE_ID")
345
 
346
- if profile:
347
- username = f"{profile.username}"
348
- print(f"User logged in: {username}")
349
- else:
350
- print("User not logged in.")
351
- return "Please Login to Hugging Face with the button.", None
352
 
353
  api_url = DEFAULT_API_URL
354
  questions_url = f"{api_url}/questions"
355
  submit_url = f"{api_url}/submit"
356
 
357
- # Initialize agent with memory management
 
 
358
  try:
359
  agent = SmartAgent()
360
  except Exception as e:
361
- print(f"Agent initialization failed: {e}")
362
- return f"Error initializing agent: {e}", None
363
 
364
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
365
- print(f"Agent code URL: {agent_code}")
366
 
367
- # Fetch Questions
368
- print(f"Fetching questions from: {questions_url}")
369
  try:
370
- response = requests.get(questions_url, timeout=15)
371
  response.raise_for_status()
372
  questions_data = response.json()
373
- if not questions_data:
374
- return "No questions received from server.", None
375
- print(f"Fetched {len(questions_data)} questions.")
376
  except Exception as e:
377
- return f"Error fetching questions: {e}", None
378
 
379
- # Process Questions
380
  results_log = []
381
  answers_payload = []
382
 
383
  for i, item in enumerate(questions_data, 1):
384
  task_id = item.get("task_id")
385
- question = item.get("question")
386
 
387
- if not task_id or not question:
388
  continue
389
 
390
- print(f"Processing question {i}/{len(questions_data)} (ID: {task_id})")
 
391
 
392
  try:
393
- answer = agent(question)
 
 
 
 
 
394
  answers_payload.append({
395
- "task_id": task_id,
396
- "submitted_answer": answer[:2000] # Limit answer length
397
  })
 
398
  results_log.append({
399
  "Task ID": task_id,
400
- "Question": question[:100] + "..." if len(question) > 100 else question,
401
- "Answer": answer[:200] + "..." if len(answer) > 200 else answer
402
  })
403
 
404
- # Clean memory every 5 questions
405
- if i % 5 == 0:
406
- agent.cleanup_memory()
 
 
407
 
408
  except Exception as e:
409
- print(f"Error on question {task_id}: {e}")
 
410
  answers_payload.append({
411
- "task_id": task_id,
412
- "submitted_answer": f"Error processing question: {str(e)}"
413
  })
414
  results_log.append({
415
  "Task ID": task_id,
416
- "Question": question[:100] + "..." if len(question) > 100 else question,
417
- "Answer": f"Error: {str(e)}"
418
  })
419
 
420
- # Submit Answers
421
  submission_data = {
422
  "username": username.strip(),
423
  "agent_code": agent_code,
424
  "answers": answers_payload
425
  }
426
 
427
- print(f"Submitting {len(answers_payload)} answers...")
 
428
  try:
429
- response = requests.post(submit_url, json=submission_data, timeout=60)
430
  response.raise_for_status()
431
- result = response.json()
432
 
433
- status = (
434
- f"โœ… Submission Successful!\n\n"
435
- f"User: {result.get('username')}\n"
436
- f"Score: {result.get('score', 'N/A')}% "
437
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
438
- f"Message: {result.get('message', '')}"
439
- )
440
- return status, pd.DataFrame(results_log)
 
 
 
 
 
 
441
 
442
  except Exception as e:
443
- error_msg = f"โŒ Submission Failed: {str(e)}"
444
  print(error_msg)
445
  return error_msg, pd.DataFrame(results_log)
446
 
447
 
448
  # --- Gradio UI ---
449
- with gr.Blocks(title="Local LLM Agent Evaluation") as demo:
 
450
  gr.Markdown("""
451
- # ๏ฟฝ Local LLM Agent Evaluation
452
- **Run your local agent against the course evaluation questions**
453
- """)
454
 
 
 
 
 
 
 
 
 
 
455
  with gr.Row():
456
- gr.LoginButton()
457
 
458
- run_btn = gr.Button(
459
- "๐Ÿš€ Run Evaluation & Submit Answers",
460
- variant="primary"
461
- )
 
 
 
462
 
463
- status_out = gr.Textbox(
464
- label="๐Ÿ“‹ Status",
465
- interactive=False
 
 
466
  )
467
 
468
  results_table = gr.DataFrame(
469
- label="๐Ÿ“Š Results",
470
- interactive=False,
471
  wrap=True
472
  )
473
 
474
- run_btn.click(
475
  fn=run_and_submit_all,
476
- outputs=[status_out, results_table]
477
  )
478
 
479
-
480
  if __name__ == "__main__":
481
- print("\n" + "="*60)
482
- print(f"๐Ÿš€ Starting Agent Evaluation - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
483
- print(f"Memory Limit: {MEMORY_LIMIT_GB}GB")
484
- print("="*60)
485
-
486
  demo.launch(
487
  server_name="0.0.0.0",
488
- server_port=7860
 
489
  )
 
1
+ # app.py - Optimized for 16GB Memory
2
  from llama_index.llms.huggingface import HuggingFaceLLM
3
  from llama_index.core.agent import ReActAgent
4
  from llama_index.core.tools import FunctionTool
5
+ from transformers import AutoTokenizer
6
  import os
7
  import gradio as gr
8
  import requests
 
10
  import traceback
11
  import torch
12
  import re
 
 
 
13
 
14
  # Import real tool dependencies
15
  try:
 
19
  DDGS = None
20
 
21
  try:
22
+ from sympy import sympify, solve, simplify, N
23
  from sympy.core.sympify import SympifyError
24
  except ImportError:
25
  print("Warning: sympy not installed. Math calculator will be limited.")
 
28
 
29
  # --- Constants ---
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
31
 
32
  # --- Advanced Agent Definition ---
33
  class SmartAgent:
34
  def __init__(self):
35
+ print("Initializing Optimized LLM Agent for 16GB Memory...")
 
36
 
37
+ # Check available memory and CUDA
38
+ if torch.cuda.is_available():
39
+ print(f"CUDA available. GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
40
+ device_map = "auto"
41
+ else:
42
+ print("CUDA not available, using CPU")
43
+ device_map = "cpu"
44
+
45
+ # Use a better model for 16GB - these are proven to work well
46
  model_options = [
47
+ "microsoft/DialoGPT-medium",
48
+ "google/flan-t5-large", # Better reasoning capability
49
+ "microsoft/DialoGPT-large", # Good for conversation
50
  ]
51
 
52
+ model_name = model_options[1] # flan-t5-large for better reasoning
53
+ print(f"Loading model: {model_name}")
 
 
 
54
 
55
+ try:
56
+ self.llm = HuggingFaceLLM(
57
+ model_name=model_name,
58
+ tokenizer_name=model_name,
59
+ context_window=2048, # Larger context for better understanding
60
+ max_new_tokens=512, # More tokens for detailed answers
61
+ generate_kwargs={
62
+ "temperature": 0.1, # Very low temperature for accuracy
63
+ "do_sample": True,
64
+ "top_p": 0.95,
65
+ "repetition_penalty": 1.2,
66
+ "pad_token_id": 0, # Add explicit pad token
67
+ },
68
+ device_map=device_map,
69
+ model_kwargs={
70
+ "torch_dtype": torch.float16,
71
+ "low_cpu_mem_usage": True,
72
+ "trust_remote_code": True,
73
+ },
74
+ # Better system message for instruction following
75
+ system_message="""You are a precise AI assistant. When asked a question:
76
+ 1. If it needs current information, use web_search tool
77
+ 2. If it involves calculations, use math_calculator tool
78
+ 3. Provide direct, accurate answers
79
+ 4. Always be specific and factual"""
80
+ )
81
+ print(f"Successfully loaded model: {model_name}")
82
+
83
+ except Exception as e:
84
+ print(f"Failed to load {model_name}: {e}")
85
+ # Try smaller fallback
86
+ fallback_model = "microsoft/DialoGPT-medium"
87
+ print(f"Falling back to: {fallback_model}")
88
+ self.llm = HuggingFaceLLM(
89
+ model_name=fallback_model,
90
+ tokenizer_name=fallback_model,
91
+ context_window=1024,
92
+ max_new_tokens=256,
93
+ generate_kwargs={
94
+ "temperature": 0.1,
95
+ "do_sample": True,
96
+ "top_p": 0.9,
97
+ "repetition_penalty": 1.1,
98
+ },
99
+ device_map=device_map,
100
+ model_kwargs={
101
+ "torch_dtype": torch.float16,
102
+ "low_cpu_mem_usage": True,
103
+ }
104
+ )
105
+ print(f"Successfully loaded fallback model: {fallback_model}")
106
 
107
+ # Define tools with improved implementations
108
  self.tools = [
109
  FunctionTool.from_defaults(
110
+ fn=self.web_search,
111
  name="web_search",
112
+ description="Search the web for current information, facts, or recent events. Use when you need up-to-date information."
113
  ),
114
  FunctionTool.from_defaults(
115
+ fn=self.math_calculator,
116
  name="math_calculator",
117
+ description="Perform mathematical calculations, solve equations, or evaluate mathematical expressions."
118
  )
119
  ]
120
 
121
+ # Create ReAct agent with better settings
122
  try:
123
  self.agent = ReActAgent.from_tools(
124
  tools=self.tools,
125
  llm=self.llm,
126
  verbose=True,
127
+ max_iterations=5, # Allow more iterations for complex problems
128
+ max_function_calls=10, # Allow more tool calls
 
 
 
129
  )
130
+ print("ReAct Agent initialized successfully.")
131
  except Exception as e:
132
+ print(f"Error creating ReAct agent: {e}")
133
  self.agent = None
134
 
135
+ def web_search(self, query: str) -> str:
136
+ """Enhanced web search with better result formatting"""
137
+ print(f"๐Ÿ” Web search: {query}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  if not DDGS:
140
+ return "Web search unavailable - duckduckgo_search not installed"
141
 
142
  try:
143
  with DDGS() as ddgs:
144
+ results = list(ddgs.text(query, max_results=8, region='wt-wt'))
 
 
 
 
145
 
146
+ if results:
147
+ # Format results more concisely for the LLM
148
+ formatted_results = []
149
+ for i, r in enumerate(results[:5], 1): # Top 5 results
150
+ title = r.get('title', 'No title')
151
+ body = r.get('body', 'No description')
152
+ # Clean and truncate body
153
+ body = body.replace('\n', ' ').strip()[:200]
154
+ formatted_results.append(f"{i}. {title}: {body}")
155
 
156
+ search_summary = f"Search results for '{query}':\n" + "\n".join(formatted_results)
157
+ print(f"โœ… Found {len(results)} results")
158
+ return search_summary
159
+ else:
160
+ return f"No results found for '{query}'. Try different keywords."
161
 
 
 
 
 
 
 
 
 
162
  except Exception as e:
163
+ print(f"โŒ Web search error: {e}")
164
+ return f"Search error for '{query}': {str(e)}"
165
 
166
+ def math_calculator(self, expression: str) -> str:
167
+ """Enhanced math calculator with better parsing"""
168
+ print(f"๐Ÿงฎ Math calculation: {expression}")
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ if not sympify:
171
+ # Basic fallback
172
+ try:
173
+ # Clean expression
174
+ clean_expr = expression.replace('^', '**').replace('ร—', '*').replace('รท', '/')
175
+ result = eval(clean_expr)
176
+ return f"Result: {result}"
177
+ except Exception as e:
178
+ return f"Math error: {str(e)}"
179
 
180
+ try:
181
+ # Clean and prepare expression
182
+ clean_expr = expression.replace('^', '**').replace('ร—', '*').replace('รท', '/')
183
+
184
+ # Try to evaluate
185
+ result = sympify(clean_expr)
186
+
187
+ # If it's an equation, try to solve it
188
+ if '=' in expression:
189
+ # Extract variable and solve
190
+ parts = expression.split('=')
191
+ if len(parts) == 2:
192
+ eq = sympify(f"Eq({parts[0]}, {parts[1]})")
193
+ solution = solve(eq)
194
+ return f"Solution: {solution}"
195
+
196
+ # Evaluate numerically
197
+ numerical_result = N(result, 10) # 10 decimal places
198
+ return f"Result: {numerical_result}"
199
+
200
+ except Exception as e:
201
+ print(f"โŒ Math error: {e}")
202
+ return f"Could not calculate '{expression}': {str(e)}"
203
 
204
+ def __call__(self, question: str) -> str:
205
+ print(f"๐Ÿค” Processing: {question[:100]}...")
 
206
 
207
+ # Enhanced question analysis
208
+ question_lower = question.lower()
209
 
210
+ # Better detection of search needs
211
+ search_indicators = [
212
+ 'who is', 'what is', 'when did', 'where is', 'current', 'latest', 'recent',
213
+ 'today', 'news', 'winner', 'recipient', 'nationality', 'born in', 'died',
214
+ 'malko', 'competition', 'award', 'century', 'president', 'capital of',
215
+ 'population of', 'founded', 'established', 'discovery', 'invented'
216
+ ]
217
 
218
+ # Math detection
219
+ math_indicators = [
220
+ 'calculate', 'compute', 'solve', 'equation', 'sum', 'total', 'average',
221
+ 'percentage', 'multiply', 'divide', 'add', 'subtract', '+', '-', '*', '/',
222
+ '=', 'x=', 'y=', 'find x', 'find y'
223
+ ]
224
 
225
+ needs_search = any(indicator in question_lower for indicator in search_indicators)
226
+ needs_math = any(indicator in question_lower for indicator in math_indicators)
 
 
227
 
228
+ # Has numbers in question
229
+ has_numbers = bool(re.search(r'\d', question))
230
+ if has_numbers and any(op in question for op in ['+', '-', '*', '/', '=', '^']):
231
+ needs_math = True
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  try:
234
+ if self.agent:
235
+ # Use ReAct agent
236
+ response = self.agent.query(question)
237
+ response_str = str(response)
238
+
239
+ # Check response quality
240
+ if len(response_str.strip()) < 10 or any(bad in response_str.lower() for bad in ['error', 'sorry', 'cannot', "don't know"]):
241
+ print("โš ๏ธ Agent response seems poor, trying direct approach...")
242
+ return self._direct_approach(question, needs_search, needs_math)
243
+
244
+ return response_str
245
  else:
246
+ return self._direct_approach(question, needs_search, needs_math)
247
 
248
  except Exception as e:
249
+ print(f"โŒ Agent error: {str(e)}")
250
+ return self._direct_approach(question, needs_search, needs_math)
251
+
252
+ def _direct_approach(self, question: str, needs_search: bool, needs_math: bool) -> str:
253
+ """Direct tool usage when agent fails"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ if needs_search:
256
+ # Extract better search terms
257
+ important_words = []
258
+ words = question.replace('?', '').split()
 
259
 
260
+ skip_words = {'what', 'when', 'where', 'who', 'how', 'is', 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
 
261
 
262
+ for word in words:
263
+ clean_word = word.lower().strip('.,!?;:')
264
+ if len(clean_word) > 2 and clean_word not in skip_words:
265
+ important_words.append(clean_word)
266
+
267
+ # Take up to 4 most important terms
268
+ search_query = ' '.join(important_words[:4])
269
+
270
+ if search_query:
271
+ result = self.web_search(search_query)
272
+ return f"Based on web search:\n\n{result}"
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ if needs_math:
275
+ # Extract mathematical expressions
276
+ math_expressions = re.findall(r'[\d+\-*/().\s=x]+', question)
277
+ for expr in math_expressions:
278
+ if any(op in expr for op in ['+', '-', '*', '/', '=']):
279
+ result = self.math_calculator(expr.strip())
280
+ return f"Mathematical calculation:\n{result}"
 
 
 
281
 
282
+ # Fallback: try to give a reasonable response
283
+ return f"I need more specific information to answer: {question[:100]}... Please provide additional context or rephrase your question."
 
 
 
 
284
 
 
 
 
285
 
286
+ def cleanup_memory():
287
+ """Clean up GPU memory"""
288
+ if torch.cuda.is_available():
289
+ torch.cuda.empty_cache()
290
+ print("๐Ÿงน GPU memory cleared")
291
 
292
 
 
293
  def run_and_submit_all(profile: gr.OAuthProfile | None):
294
+ """Enhanced submission with better error handling"""
295
  space_id = os.getenv("SPACE_ID")
296
 
297
+ if not profile:
298
+ return "โŒ Please Login to Hugging Face first.", None
299
+
300
+ username = f"{profile.username}"
301
+ print(f"๐Ÿ‘ค User: {username}")
 
302
 
303
  api_url = DEFAULT_API_URL
304
  questions_url = f"{api_url}/questions"
305
  submit_url = f"{api_url}/submit"
306
 
307
+ cleanup_memory()
308
+
309
+ # Initialize agent
310
  try:
311
  agent = SmartAgent()
312
  except Exception as e:
313
+ print(f"โŒ Agent initialization failed: {e}")
314
+ return f"Failed to initialize agent: {e}", None
315
 
316
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
317
 
318
+ # Fetch questions
 
319
  try:
320
+ response = requests.get(questions_url, timeout=30)
321
  response.raise_for_status()
322
  questions_data = response.json()
323
+ print(f"๐Ÿ“‹ Fetched {len(questions_data)} questions")
 
 
324
  except Exception as e:
325
+ return f"โŒ Error fetching questions: {e}", None
326
 
327
+ # Process questions with better tracking
328
  results_log = []
329
  answers_payload = []
330
 
331
  for i, item in enumerate(questions_data, 1):
332
  task_id = item.get("task_id")
333
+ question_text = item.get("question")
334
 
335
+ if not task_id or not question_text:
336
  continue
337
 
338
+ print(f"\n๐Ÿ”„ Question {i}/{len(questions_data)}: {task_id}")
339
+ print(f"Q: {question_text[:150]}...")
340
 
341
  try:
342
+ answer = agent(question_text)
343
+
344
+ # Ensure answer is not empty or generic
345
+ if not answer or len(answer.strip()) < 3:
346
+ answer = f"Unable to process question: {question_text[:50]}..."
347
+
348
  answers_payload.append({
349
+ "task_id": task_id,
350
+ "submitted_answer": answer
351
  })
352
+
353
  results_log.append({
354
  "Task ID": task_id,
355
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
356
+ "Answer": answer[:150] + "..." if len(answer) > 150 else answer
357
  })
358
 
359
+ print(f"โœ… A: {answer[:100]}...")
360
+
361
+ # Memory cleanup every 3 questions
362
+ if i % 3 == 0:
363
+ cleanup_memory()
364
 
365
  except Exception as e:
366
+ print(f"โŒ Error on {task_id}: {e}")
367
+ error_answer = f"Processing error: {str(e)[:100]}"
368
  answers_payload.append({
369
+ "task_id": task_id,
370
+ "submitted_answer": error_answer
371
  })
372
  results_log.append({
373
  "Task ID": task_id,
374
+ "Question": question_text[:100] + "...",
375
+ "Answer": error_answer
376
  })
377
 
378
+ # Submit answers
379
  submission_data = {
380
  "username": username.strip(),
381
  "agent_code": agent_code,
382
  "answers": answers_payload
383
  }
384
 
385
+ print(f"\n๐Ÿ“ค Submitting {len(answers_payload)} answers...")
386
+
387
  try:
388
+ response = requests.post(submit_url, json=submission_data, timeout=120)
389
  response.raise_for_status()
390
+ result_data = response.json()
391
 
392
+ score = result_data.get('score', 0)
393
+ correct = result_data.get('correct_count', 0)
394
+ total = result_data.get('total_attempted', len(answers_payload))
395
+
396
+ final_status = f"""๐ŸŽ‰ Submission Complete!
397
+
398
+ ๐Ÿ‘ค User: {result_data.get('username')}
399
+ ๐Ÿ“Š Score: {score}% ({correct}/{total} correct)
400
+ ๐Ÿ’ฌ {result_data.get('message', 'No message')}
401
+
402
+ Target: 30%+ โœ“ {'ACHIEVED!' if score >= 30 else 'Need improvement'}"""
403
+
404
+ print(f"โœ… Final Score: {score}%")
405
+ return final_status, pd.DataFrame(results_log)
406
 
407
  except Exception as e:
408
+ error_msg = f"โŒ Submission failed: {str(e)}"
409
  print(error_msg)
410
  return error_msg, pd.DataFrame(results_log)
411
 
412
 
413
  # --- Gradio UI ---
414
+ with gr.Blocks(title="Optimized Agent Evaluation", theme=gr.themes.Soft()) as demo:
415
+ gr.Markdown("# ๐Ÿš€ Optimized Agent for 16GB Memory")
416
  gr.Markdown("""
417
+ **Target: 30%+ Score**
 
 
418
 
419
+ **Optimizations:**
420
+ - ๐Ÿง  Better model selection (flan-t5-large)
421
+ - ๐Ÿ” Enhanced web search with DuckDuckGo
422
+ - ๐Ÿงฎ Advanced math calculator with SymPy
423
+ - ๐ŸŽฏ Improved question analysis and routing
424
+ - ๐Ÿ’พ Memory management for 16GB systems
425
+ - ๐Ÿ”ง Robust error handling and fallbacks
426
+ """)
427
+
428
  with gr.Row():
429
+ gr.LoginButton(scale=1)
430
 
431
+ with gr.Row():
432
+ run_button = gr.Button(
433
+ "๐Ÿš€ Run Optimized Evaluation",
434
+ variant="primary",
435
+ size="lg",
436
+ scale=2
437
+ )
438
 
439
+ status_output = gr.Textbox(
440
+ label="๐Ÿ“Š Status & Results",
441
+ lines=10,
442
+ interactive=False,
443
+ placeholder="Ready to run evaluation..."
444
  )
445
 
446
  results_table = gr.DataFrame(
447
+ label="๐Ÿ“ Detailed Results",
 
448
  wrap=True
449
  )
450
 
451
+ run_button.click(
452
  fn=run_and_submit_all,
453
+ outputs=[status_output, results_table]
454
  )
455
 
 
456
  if __name__ == "__main__":
457
+ print("๐Ÿš€ Starting Optimized Agent for 16GB Memory...")
 
 
 
 
458
  demo.launch(
459
  server_name="0.0.0.0",
460
+ server_port=7860,
461
+ show_error=True
462
  )