tatianija commited on
Commit
61401c1
·
verified ·
1 Parent(s): aa4ad12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -237
app.py CHANGED
@@ -18,15 +18,153 @@ cached_answers = {}
18
  cached_questions = []
19
  processing_status = {"is_processing": False, "progress": 0, "total": 0}
20
 
21
- # --- Basic Agent Definition ---
22
- class BasicAgent:
23
- def __init__(self, debug: bool = False):
24
  self.search = DuckDuckGoSearchTool()
 
25
  self.debug = debug
26
  if self.debug:
27
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def __call__(self, question: str) -> str:
 
 
 
30
  if self.debug:
31
  print(f"Agent received question: {question}")
32
 
@@ -35,36 +173,21 @@ class BasicAgent:
35
  return "Please provide a valid question."
36
 
37
  try:
38
- time.sleep(1)
39
- results = self.search(question)
40
-
41
- # Use truthfulness check and early return
42
- if not results:
43
- return "No results found for that query."
44
-
45
- # Direct access with get() method chaining
46
- top = results[0]
47
- title = top.get("title") or "No title"
48
- snippet = top.get("snippet", "").strip()
49
- link = top.get("link", "")
50
-
51
- # Build answer more efficiently
52
- parts = [f"**{title}**"]
53
- if snippet:
54
- parts.append(snippet)
55
- if link:
56
- parts.append(f"Source: {link}")
57
-
58
- answer = "\n".join(parts)
59
 
60
- except (IndexError, KeyError, AttributeError):
61
- # More specific exception handling
62
- answer = "Sorry, I couldn't process the search results properly."
63
  except Exception as e:
64
- answer = f"Sorry, I couldn't fetch results due to: {e}"
65
 
66
  if self.debug:
67
- print(f"Agent returning answer: {answer}")
68
 
69
  return answer
70
 
@@ -106,9 +229,9 @@ def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
106
  except Exception as e:
107
  return f"An unexpected error occurred: {e}", None
108
 
109
- def generate_answers_async(progress_callback=None):
110
  """
111
- Generate answers for all cached questions asynchronously.
112
  """
113
  global cached_answers, processing_status
114
 
@@ -120,7 +243,7 @@ def generate_answers_async(progress_callback=None):
120
  processing_status["total"] = len(cached_questions)
121
 
122
  try:
123
- agent = BasicAgent()
124
  cached_answers = {}
125
 
126
  for i, item in enumerate(cached_questions):
@@ -154,7 +277,7 @@ def generate_answers_async(progress_callback=None):
154
  finally:
155
  processing_status["is_processing"] = False
156
 
157
- def start_answer_generation():
158
  """
159
  Start the answer generation process in a separate thread.
160
  """
@@ -164,211 +287,21 @@ def start_answer_generation():
164
  if not cached_questions:
165
  return "No questions available. Please fetch questions first.", None
166
 
167
- # Start generation in background thread
168
- thread = threading.Thread(target=generate_answers_async)
169
- thread.daemon = True
170
- thread.start()
171
-
172
- return "Answer generation started. Check progress below.", None
173
-
174
- def get_generation_progress():
175
- """
176
- Get the current progress of answer generation.
177
- """
178
- if not processing_status["is_processing"] and processing_status["progress"] == 0:
179
- return "Not started", None
180
-
181
- if processing_status["is_processing"]:
182
- progress = processing_status["progress"]
183
- total = processing_status["total"]
184
- status_msg = f"Generating answers... {progress}/{total} completed"
185
- return status_msg, None
186
- else:
187
- # Generation completed
188
- if cached_answers:
189
- # Create DataFrame with results
190
- display_data = []
191
- for task_id, data in cached_answers.items():
192
- display_data.append({
193
- "Task ID": task_id,
194
- "Question": data["question"][:100] + "..." if len(data["question"]) > 100 else data["question"],
195
- "Generated Answer": data["answer"][:200] + "..." if len(data["answer"]) > 200 else data["answer"]
196
- })
197
-
198
- df = pd.DataFrame(display_data)
199
- status_msg = f"Answer generation completed! {len(cached_answers)} answers ready for submission."
200
- return status_msg, df
201
- else:
202
- return "Answer generation completed but no answers were generated.", None
203
-
204
- def submit_cached_answers(profile: gr.OAuthProfile | None):
205
- """
206
- Submit the cached answers to the evaluation API.
207
- """
208
- global cached_answers
209
-
210
- if not profile:
211
- return "Please log in to Hugging Face first.", None
212
-
213
- if not cached_answers:
214
- return "No cached answers available. Please generate answers first.", None
215
-
216
- username = profile.username
217
- space_id = os.getenv("SPACE_ID")
218
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
219
-
220
- # Prepare submission payload
221
- answers_payload = []
222
- for task_id, data in cached_answers.items():
223
- answers_payload.append({
224
- "task_id": task_id,
225
- "submitted_answer": data["answer"]
226
- })
227
-
228
- submission_data = {
229
- "username": username.strip(),
230
- "agent_code": agent_code,
231
- "answers": answers_payload
232
  }
233
 
234
- # Submit to API
235
- api_url = DEFAULT_API_URL
236
- submit_url = f"{api_url}/submit"
237
 
238
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
239
 
240
- try:
241
- response = requests.post(submit_url, json=submission_data, timeout=60)
242
- response.raise_for_status()
243
- result_data = response.json()
244
-
245
- final_status = (
246
- f"Submission Successful!\n"
247
- f"User: {result_data.get('username')}\n"
248
- f"Overall Score: {result_data.get('score', 'N/A')}% "
249
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
250
- f"Message: {result_data.get('message', 'No message received.')}"
251
- )
252
-
253
- # Create results DataFrame
254
- results_log = []
255
- for task_id, data in cached_answers.items():
256
- results_log.append({
257
- "Task ID": task_id,
258
- "Question": data["question"],
259
- "Submitted Answer": data["answer"]
260
- })
261
-
262
- results_df = pd.DataFrame(results_log)
263
- return final_status, results_df
264
-
265
- except requests.exceptions.HTTPError as e:
266
- error_detail = f"Server responded with status {e.response.status_code}."
267
- try:
268
- error_json = e.response.json()
269
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
270
- except:
271
- error_detail += f" Response: {e.response.text[:500]}"
272
- return f"Submission Failed: {error_detail}", None
273
-
274
- except requests.exceptions.Timeout:
275
- return "Submission Failed: The request timed out.", None
276
-
277
- except Exception as e:
278
- return f"Submission Failed: {e}", None
279
 
280
- def clear_cache():
281
- """
282
- Clear all cached data.
283
- """
284
- global cached_answers, cached_questions, processing_status
285
- cached_answers = {}
286
- cached_questions = []
287
- processing_status = {"is_processing": False, "progress": 0, "total": 0}
288
- return "Cache cleared successfully.", None
289
-
290
- # --- Enhanced Gradio Interface ---
291
- with gr.Blocks(title="Enhanced Agent Evaluation Runner") as demo:
292
- gr.Markdown("# Enhanced Agent Evaluation Runner with Answer Caching")
293
-
294
- with gr.Row():
295
- gr.LoginButton()
296
- clear_btn = gr.Button("Clear Cache", variant="secondary")
297
-
298
- with gr.Tab("Step 1: Fetch Questions"):
299
- gr.Markdown("### Fetch Questions from API")
300
- fetch_btn = gr.Button("Fetch Questions", variant="primary")
301
- fetch_status = gr.Textbox(label="Fetch Status", lines=2, interactive=False)
302
- questions_table = gr.DataFrame(label="Available Questions", wrap=True)
303
-
304
- fetch_btn.click(
305
- fn=fetch_questions,
306
- outputs=[fetch_status, questions_table]
307
- )
308
-
309
- with gr.Tab("Step 2: Generate Answers"):
310
- gr.Markdown("### Generate Answers (Background Processing)")
311
-
312
- with gr.Row():
313
- generate_btn = gr.Button("Start Answer Generation", variant="primary")
314
- refresh_btn = gr.Button("Refresh Progress", variant="secondary")
315
-
316
- generation_status = gr.Textbox(label="Generation Status", lines=2, interactive=False)
317
- answers_preview = gr.DataFrame(label="Generated Answers Preview", wrap=True)
318
-
319
- generate_btn.click(
320
- fn=start_answer_generation,
321
- outputs=[generation_status, answers_preview]
322
- )
323
-
324
- refresh_btn.click(
325
- fn=get_generation_progress,
326
- outputs=[generation_status, answers_preview]
327
- )
328
-
329
- with gr.Tab("Step 3: Submit Results"):
330
- gr.Markdown("### Submit Generated Answers")
331
- submit_btn = gr.Button("Submit Cached Answers", variant="primary")
332
- submission_status = gr.Textbox(label="Submission Status", lines=5, interactive=False)
333
- final_results = gr.DataFrame(label="Final Submission Results", wrap=True)
334
-
335
- submit_btn.click(
336
- fn=submit_cached_answers,
337
- outputs=[submission_status, final_results]
338
- )
339
-
340
- # Clear cache functionality
341
- clear_btn.click(
342
- fn=clear_cache,
343
- outputs=[fetch_status, questions_table]
344
- )
345
-
346
- # Auto-refresh progress every 5 seconds when generation is active
347
- demo.load(
348
- fn=get_generation_progress,
349
- outputs=[generation_status, answers_preview]
350
- )
351
-
352
- if __name__ == "__main__":
353
- print("\n" + "-"*30 + " Enhanced App Starting " + "-"*30)
354
-
355
- space_host_startup = os.getenv("SPACE_HOST")
356
- space_id_startup = os.getenv("SPACE_ID")
357
-
358
- if space_host_startup:
359
- print(f"✅ SPACE_HOST found: {space_host_startup}")
360
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
361
- else:
362
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
363
-
364
- if space_id_startup:
365
- print(f"✅ SPACE_ID found: {space_id_startup}")
366
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
367
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
368
- else:
369
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
370
-
371
- print("-"*(60 + len(" Enhanced App Starting ")) + "\n")
372
-
373
- print("Launching Enhanced Gradio Interface...")
374
- demo.launch(debug=True, share=False)
 
18
  cached_questions = []
19
  processing_status = {"is_processing": False, "progress": 0, "total": 0}
20
 
21
+ # --- Intelligent Agent with Conditional Search ---
22
+ class IntelligentAgent:
23
+ def __init__(self, debug: bool = False, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
24
  self.search = DuckDuckGoSearchTool()
25
+ self.client = InferenceClient(model=model_name)
26
  self.debug = debug
27
  if self.debug:
28
+ print(f"IntelligentAgent initialized with model: {model_name}")
29
+
30
+ def _should_search(self, question: str) -> bool:
31
+ """
32
+ Use LLM to determine if search is needed for the question.
33
+ Returns True if search is recommended, False otherwise.
34
+ """
35
+ decision_prompt = f"""You are an AI assistant that decides whether a web search is needed to answer questions accurately.
36
+
37
+ Analyze this question and decide if it requires real-time information, recent data, or specific facts that might not be in your training data.
38
+
39
+ SEARCH IS NEEDED for:
40
+ - Current events, news, recent developments
41
+ - Real-time data (weather, stock prices, sports scores)
42
+ - Specific factual information that changes frequently
43
+ - Recent product releases, company information
44
+ - Current status of people, organizations, or projects
45
+ - Location-specific current information
46
+
47
+ SEARCH IS NOT NEEDED for:
48
+ - General knowledge questions
49
+ - Mathematical calculations
50
+ - Programming concepts and syntax
51
+ - Historical facts (older than 1 year)
52
+ - Definitions of well-established concepts
53
+ - How-to instructions for common tasks
54
+ - Creative writing or opinion-based responses
55
+
56
+ Question: "{question}"
57
+
58
+ Respond with only "SEARCH" or "NO_SEARCH" followed by a brief reason (max 20 words).
59
+
60
+ Example responses:
61
+ - "SEARCH - Current weather data needed"
62
+ - "NO_SEARCH - Mathematical concept, general knowledge sufficient"
63
+ """
64
+
65
+ try:
66
+ response = self.client.text_generation(
67
+ decision_prompt,
68
+ max_new_tokens=50,
69
+ temperature=0.1,
70
+ do_sample=False
71
+ )
72
+
73
+ decision = response.strip().upper()
74
+ should_search = decision.startswith("SEARCH")
75
+
76
+ if self.debug:
77
+ print(f"Decision for '{question}': {decision}")
78
+
79
+ return should_search
80
+
81
+ except Exception as e:
82
+ if self.debug:
83
+ print(f"Error in search decision: {e}, defaulting to search")
84
+ # Default to search if decision fails
85
+ return True
86
+
87
+ def _answer_with_llm(self, question: str) -> str:
88
+ """
89
+ Generate answer using LLM without search.
90
+ """
91
+ answer_prompt = f"""You are a helpful AI assistant. Answer the following question based on your knowledge. Be accurate, concise, and helpful. If you're not certain about something, acknowledge the uncertainty.
92
+
93
+ Question: {question}
94
+
95
+ Answer:"""
96
+
97
+ try:
98
+ response = self.client.text_generation(
99
+ answer_prompt,
100
+ max_new_tokens=500,
101
+ temperature=0.3,
102
+ do_sample=True
103
+ )
104
+ return response.strip()
105
+
106
+ except Exception as e:
107
+ return f"Sorry, I encountered an error generating the response: {e}"
108
+
109
+ def _answer_with_search(self, question: str) -> str:
110
+ """
111
+ Generate answer using search results and LLM.
112
+ """
113
+ try:
114
+ # Perform search
115
+ search_results = self.search(question)
116
+
117
+ if not search_results:
118
+ return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question)
119
+
120
+ # Format search results
121
+ formatted_results = []
122
+ for i, result in enumerate(search_results[:3]): # Use top 3 results
123
+ title = result.get("title", "No title")
124
+ snippet = result.get("snippet", "").strip()
125
+ link = result.get("link", "")
126
+
127
+ formatted_results.append(f"Result {i+1}:\nTitle: {title}\nContent: {snippet}\nSource: {link}")
128
+
129
+ search_context = "\n\n".join(formatted_results)
130
+
131
+ # Generate answer using search context
132
+ answer_prompt = f"""You are a helpful AI assistant. Use the provided search results to answer the question accurately. Synthesize information from multiple sources when relevant, and cite sources when appropriate.
133
+
134
+ Question: {question}
135
+
136
+ Search Results:
137
+ {search_context}
138
+
139
+ Based on the search results above, provide a comprehensive answer to the question. If the search results don't fully answer the question, you can supplement with your general knowledge but clearly indicate what comes from the search results vs. your knowledge.
140
+
141
+ Answer:"""
142
+
143
+ try:
144
+ response = self.client.text_generation(
145
+ answer_prompt,
146
+ max_new_tokens=600,
147
+ temperature=0.3,
148
+ do_sample=True
149
+ )
150
+ return response.strip()
151
+
152
+ except Exception as e:
153
+ # Fallback to simple search result formatting
154
+ top_result = search_results[0]
155
+ title = top_result.get("title", "No title")
156
+ snippet = top_result.get("snippet", "").strip()
157
+ link = top_result.get("link", "")
158
+
159
+ return f"**{title}**\n\n{snippet}\n\nSource: {link}"
160
+
161
+ except Exception as e:
162
+ return f"Search failed: {e}. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question)
163
 
164
  def __call__(self, question: str) -> str:
165
+ """
166
+ Main entry point - decide whether to search and generate appropriate response.
167
+ """
168
  if self.debug:
169
  print(f"Agent received question: {question}")
170
 
 
173
  return "Please provide a valid question."
174
 
175
  try:
176
+ # Decide whether to search
177
+ if self._should_search(question):
178
+ if self.debug:
179
+ print("Using search-based approach")
180
+ answer = self._answer_with_search(question)
181
+ else:
182
+ if self.debug:
183
+ print("Using LLM-only approach")
184
+ answer = self._answer_with_llm(question)
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
 
186
  except Exception as e:
187
+ answer = f"Sorry, I encountered an error: {e}"
188
 
189
  if self.debug:
190
+ print(f"Agent returning answer: {answer[:100]}...")
191
 
192
  return answer
193
 
 
229
  except Exception as e:
230
  return f"An unexpected error occurred: {e}", None
231
 
232
+ def generate_answers_async(model_name: str = "meta-llama/Llama-3.1-8B-Instruct", progress_callback=None):
233
  """
234
+ Generate answers for all cached questions asynchronously using the intelligent agent.
235
  """
236
  global cached_answers, processing_status
237
 
 
243
  processing_status["total"] = len(cached_questions)
244
 
245
  try:
246
+ agent = IntelligentAgent(debug=True, model_name=model_name)
247
  cached_answers = {}
248
 
249
  for i, item in enumerate(cached_questions):
 
277
  finally:
278
  processing_status["is_processing"] = False
279
 
280
+ def start_answer_generation(model_choice: str):
281
  """
282
  Start the answer generation process in a separate thread.
283
  """
 
287
  if not cached_questions:
288
  return "No questions available. Please fetch questions first.", None
289
 
290
+ # Map model choice to actual model name
291
+ model_map = {
292
+ "Llama 3.1 8B": "meta-llama/Llama-3.1-8B-Instruct",
293
+ "Llama 3.1 70B": "meta-llama/Llama-3.1-70B-Instruct",
294
+ "Mistral 7B": "mistralai/Mistral-7B-Instruct-v0.3",
295
+ "CodeLlama 7B": "codellama/CodeLlama-7b-Instruct-hf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  }
297
 
298
+ selected_model = model_map.get(model_choice, "meta-llama/Llama-3.1-8B-Instruct")
 
 
299
 
300
+ # Start generation in background thread
301
+ thread = threading.Thread(target=generate_answers_async, args=(selected_model,))
302
+ thread.daemon = True
303
+ thread.start()
304
 
305
+ return f"Answer generation started using {model_choice}. Check progress below.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ def get_generation_progre