LamiaYT commited on
Commit
bf833c0
·
1 Parent(s): d07ba5a

Deploy GAIA agent

Browse files
Files changed (2) hide show
  1. app.py +197 -18
  2. requirements.txt +10 -6
app.py CHANGED
@@ -4,9 +4,10 @@ import os
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
 
 
7
 
8
  from smolagents import CodeAgent, tool
9
- from smolagents.models import LiteLLMModel # ✅ correct import
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -37,25 +38,196 @@ def simple_search(query: str) -> str:
37
  except Exception as e:
38
  return f"Search error: {e}"
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # --- Enhanced Agent using Light Model ---
42
  class BasicAgent:
43
  def __init__(self):
44
- print("BasicAgent initialized with LiteLLMModel (falcon-7b-instruct).")
45
- self.model = LiteLLMModel(
46
- model_id="tiiuae/falcon-7b-instruct",
47
- max_tokens=512,
48
- temperature=0.1
49
- )
50
- self.agent = CodeAgent(
51
- model=self.model,
52
- tools=[simple_search]
53
- )
54
 
55
  def __call__(self, question: str) -> str:
56
  print(f"Question: {question[:60]}...")
 
57
  try:
58
- return self.agent.run(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
  return f"Agent error: {e}"
61
 
@@ -83,14 +255,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
83
  return f"Error fetching questions: {e}", None
84
 
85
  logs, answers = [], []
86
- for item in questions:
87
  task_id = item.get("task_id")
88
  question = item.get("question")
89
  if not task_id or question is None:
90
  continue
 
 
91
  ans = agent(question)
92
  answers.append({"task_id": task_id, "submitted_answer": ans})
93
- logs.append({"Task ID": task_id, "Question": question, "Submitted Answer": ans})
94
 
95
  if not answers:
96
  return "Agent produced no answers.", pd.DataFrame(logs)
@@ -113,13 +287,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
113
  # --- Gradio Interface ---
114
  with gr.Blocks() as demo:
115
  gr.Markdown("# GAIA Agent Evaluation Runner")
 
 
116
  gr.LoginButton()
117
- run_button = gr.Button("Run Evaluation & Submit All Answers")
118
- status_box = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
 
 
 
119
  result_table = gr.DataFrame(label="Questions & Agent Answers", wrap=True)
120
 
121
  run_button.click(run_and_submit_all, outputs=[status_box, result_table])
122
 
123
  if __name__ == "__main__":
124
  print("Launching Gradio app...")
125
- demo.launch(debug=True, share=False)
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
8
+ import torch
9
 
10
  from smolagents import CodeAgent, tool
 
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
38
  except Exception as e:
39
  return f"Search error: {e}"
40
 
41
+ # --- Wikipedia Search Tool ---
42
+ @tool
43
+ def wikipedia_search(query: str) -> str:
44
+ """
45
+ Searches Wikipedia for information.
46
+
47
+ Args:
48
+ query (str): The search query text.
49
+
50
+ Returns:
51
+ str: Wikipedia search results.
52
+ """
53
+ try:
54
+ import wikipedia
55
+ wikipedia.set_lang("en")
56
+ results = wikipedia.search(query, results=3)
57
+ if not results:
58
+ return "No Wikipedia results found."
59
+
60
+ summaries = []
61
+ for title in results[:2]: # Get top 2 results
62
+ try:
63
+ page = wikipedia.page(title)
64
+ summary = wikipedia.summary(title, sentences=3)
65
+ summaries.append(f"**{title}**\n{summary}\nURL: {page.url}")
66
+ except:
67
+ continue
68
+
69
+ return "\n\n".join(summaries) if summaries else "No detailed results found."
70
+ except Exception as e:
71
+ return f"Wikipedia search error: {e}"
72
+
73
+ # --- Calculator Tool ---
74
+ @tool
75
+ def calculator(expression: str) -> str:
76
+ """
77
+ Evaluates mathematical expressions safely.
78
+
79
+ Args:
80
+ expression (str): Mathematical expression to evaluate.
81
+
82
+ Returns:
83
+ str: Result of the calculation.
84
+ """
85
+ try:
86
+ # Basic safety check
87
+ allowed_chars = set('0123456789+-*/.() ')
88
+ if not all(c in allowed_chars for c in expression):
89
+ return "Error: Invalid characters in expression"
90
+
91
+ result = eval(expression)
92
+ return str(result)
93
+ except Exception as e:
94
+ return f"Calculation error: {e}"
95
+
96
+ # --- Custom HuggingFace Model Wrapper ---
97
+ class HuggingFaceModel:
98
+ def __init__(self, model_name="microsoft/DialoGPT-small"):
99
+ """
100
+ Initialize with a lightweight model that fits in 16GB RAM
101
+ """
102
+ print(f"Loading model: {model_name}")
103
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
104
+
105
+ try:
106
+ # Use a smaller, more efficient model
107
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
108
+ if self.tokenizer.pad_token is None:
109
+ self.tokenizer.pad_token = self.tokenizer.eos_token
110
+
111
+ self.model = AutoModelForCausalLM.from_pretrained(
112
+ model_name,
113
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
114
+ device_map="auto" if self.device == "cuda" else None,
115
+ trust_remote_code=True
116
+ )
117
+
118
+ if self.device == "cpu":
119
+ self.model = self.model.to(self.device)
120
+
121
+ print(f"Model loaded successfully on {self.device}")
122
+
123
+ except Exception as e:
124
+ print(f"Error loading model: {e}")
125
+ # Fallback to an even smaller model
126
+ print("Falling back to distilgpt2...")
127
+ self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
128
+ self.tokenizer.pad_token = self.tokenizer.eos_token
129
+ self.model = AutoModelForCausalLM.from_pretrained("distilgpt2")
130
+ if self.device == "cuda":
131
+ self.model = self.model.to(self.device)
132
+
133
+ def generate(self, prompt: str, max_length: int = 512) -> str:
134
+ """
135
+ Generate text response from the model
136
+ """
137
+ try:
138
+ # Encode the prompt
139
+ inputs = self.tokenizer.encode(prompt, return_tensors="pt", truncate=True, max_length=400)
140
+ if self.device == "cuda":
141
+ inputs = inputs.to(self.device)
142
+
143
+ # Generate response
144
+ with torch.no_grad():
145
+ outputs = self.model.generate(
146
+ inputs,
147
+ max_length=min(max_length, inputs.size(1) + 200),
148
+ num_return_sequences=1,
149
+ temperature=0.7,
150
+ do_sample=True,
151
+ pad_token_id=self.tokenizer.eos_token_id,
152
+ eos_token_id=self.tokenizer.eos_token_id,
153
+ attention_mask=torch.ones_like(inputs)
154
+ )
155
+
156
+ # Decode the response
157
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
158
+
159
+ # Extract only the new part (remove the input prompt)
160
+ if response.startswith(prompt):
161
+ response = response[len(prompt):].strip()
162
+
163
+ return response if response else "I need more information to answer this question."
164
+
165
+ except Exception as e:
166
+ return f"Generation error: {e}"
167
 
168
+ # --- Simple Agent Implementation ---
169
  class BasicAgent:
170
  def __init__(self):
171
+ print("BasicAgent initializing with HuggingFace model...")
172
+ self.model = HuggingFaceModel("microsoft/DialoGPT-medium") # Changed to medium for better performance
173
+ self.tools = {
174
+ "search": simple_search,
175
+ "wikipedia": wikipedia_search,
176
+ "calculator": calculator
177
+ }
 
 
 
178
 
179
  def __call__(self, question: str) -> str:
180
  print(f"Question: {question[:60]}...")
181
+
182
  try:
183
+ # Simple logic to determine if we need tools
184
+ question_lower = question.lower()
185
+
186
+ # Check if it's a math question
187
+ if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', 'sum', 'total']):
188
+ # Try to extract mathematical expressions
189
+ import re
190
+ math_pattern = r'[\d\+\-\*/\.\(\)\s]+'
191
+ math_matches = re.findall(math_pattern, question)
192
+ if math_matches:
193
+ for match in math_matches:
194
+ if any(op in match for op in ['+', '-', '*', '/']):
195
+ calc_result = calculator(match.strip())
196
+ return f"The calculation result is: {calc_result}"
197
+
198
+ # Check if it needs web search
199
+ if any(word in question_lower for word in ['current', 'recent', 'latest', 'today', 'news', 'when', 'who', 'what']):
200
+ # Try Wikipedia first for factual questions
201
+ if any(word in question_lower for word in ['who is', 'what is', 'born', 'died', 'biography']):
202
+ wiki_result = wikipedia_search(question)
203
+ if "No Wikipedia results" not in wiki_result:
204
+ return wiki_result
205
+
206
+ # Fall back to web search
207
+ search_result = simple_search(question)
208
+ if "No results found" not in search_result:
209
+ return search_result
210
+
211
+ # For other questions, use the language model
212
+ prompt = f"""Question: {question}
213
+
214
+ Please provide a clear and accurate answer. If you're not sure about something, say so.
215
+
216
+ Answer:"""
217
+
218
+ response = self.model.generate(prompt, max_length=400)
219
+
220
+ # If the response is too short or generic, try to enhance it
221
+ if len(response.split()) < 5:
222
+ enhanced_prompt = f"""You are a helpful assistant. Answer this question with specific details:
223
+
224
+ {question}
225
+
226
+ Provide a comprehensive answer:"""
227
+ response = self.model.generate(enhanced_prompt, max_length=500)
228
+
229
+ return response.strip() if response.strip() else "I need more information to answer this question properly."
230
+
231
  except Exception as e:
232
  return f"Agent error: {e}"
233
 
 
255
  return f"Error fetching questions: {e}", None
256
 
257
  logs, answers = [], []
258
+ for i, item in enumerate(questions):
259
  task_id = item.get("task_id")
260
  question = item.get("question")
261
  if not task_id or question is None:
262
  continue
263
+
264
+ print(f"Processing question {i+1}/{len(questions)}: {task_id}")
265
  ans = agent(question)
266
  answers.append({"task_id": task_id, "submitted_answer": ans})
267
+ logs.append({"Task ID": task_id, "Question": question[:100] + "..." if len(question) > 100 else question, "Submitted Answer": ans[:200] + "..." if len(ans) > 200 else ans})
268
 
269
  if not answers:
270
  return "Agent produced no answers.", pd.DataFrame(logs)
 
287
  # --- Gradio Interface ---
288
  with gr.Blocks() as demo:
289
  gr.Markdown("# GAIA Agent Evaluation Runner")
290
+ gr.Markdown("This agent uses HuggingFace models locally (no API calls) to answer GAIA benchmark questions.")
291
+
292
  gr.LoginButton()
293
+
294
+ with gr.Row():
295
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
296
+
297
+ status_box = gr.Textbox(label="Status / Submission Result", lines=8, interactive=False)
298
  result_table = gr.DataFrame(label="Questions & Agent Answers", wrap=True)
299
 
300
  run_button.click(run_and_submit_all, outputs=[status_box, result_table])
301
 
302
  if __name__ == "__main__":
303
  print("Launching Gradio app...")
304
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,6 +1,10 @@
1
- smolagents
2
- gradio
3
- requests
4
- pandas
5
- litellm
6
- beautifulsoup4
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ transformers>=4.35.0
3
+ torch>=2.0.0
4
+ pandas>=1.5.0
5
+ requests>=2.28.0
6
+ beautifulsoup4>=4.11.0
7
+ wikipedia>=1.4.0
8
+ smolagents>=0.1.0
9
+ accelerate>=0.20.0
10
+ sentencepiece>=0.1.99