Update app.py
Browse files
app.py
CHANGED
@@ -1,240 +1,42 @@
|
|
1 |
-
import requests
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
import pandas as pd
|
6 |
import time
|
7 |
import re
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
from langchain_experimental.utilities import PythonREPL
|
12 |
-
from langchain.tools import Tool
|
13 |
-
|
14 |
-
# 1. Python REPL Tool
|
15 |
-
python_repl = PythonREPL()
|
16 |
-
repl_tool = Tool(
|
17 |
-
name="python_repl",
|
18 |
-
description="""
|
19 |
-
A Python REPL (Read-Eval-Print Loop) for executing Python code.
|
20 |
-
Use this tool for:
|
21 |
-
- Performing accurate calculations (arithmetic, complex math).
|
22 |
-
- Manipulating and analyzing data (e.g., lists, numbers).
|
23 |
-
- Executing small, self-contained Python scripts.
|
24 |
-
Input MUST be valid Python code, and all outputs must be printed.
|
25 |
-
""",
|
26 |
-
func=python_repl.run,
|
27 |
-
)
|
28 |
-
|
29 |
-
# 2. File Saver Tool
|
30 |
-
def download_and_save_file(args: dict) -> str:
|
31 |
-
"""
|
32 |
-
Downloads a file from a given URL and saves it to a specified local filename.
|
33 |
-
Input: JSON string with 'url' and 'local_filename' keys.
|
34 |
-
Example: {"url": "https://example.com/data.xlsx", "local_filename": "data.xlsx"}
|
35 |
-
"""
|
36 |
-
try:
|
37 |
-
if isinstance(args, str):
|
38 |
-
import json
|
39 |
-
args = json.loads(args)
|
40 |
-
url = args.get("url")
|
41 |
-
local_filename = args.get("local_filename")
|
42 |
-
if not url or not local_filename:
|
43 |
-
return "Error: Both 'url' and 'local_filename' must be provided."
|
44 |
-
response = requests.get(url, stream=True, timeout=30)
|
45 |
-
response.raise_for_status()
|
46 |
-
os.makedirs(os.path.dirname(local_filename) or '.', exist_ok=True)
|
47 |
-
with open(local_filename, 'wb') as f:
|
48 |
-
for chunk in response.iter_content(chunk_size=8192):
|
49 |
-
f.write(chunk)
|
50 |
-
return f"File downloaded successfully to {local_filename}"
|
51 |
-
except Exception as e:
|
52 |
-
return f"An unexpected error occurred: {e}"
|
53 |
-
|
54 |
-
file_saver_tool = Tool(
|
55 |
-
name="file_saver",
|
56 |
-
description="Downloads a file from a URL and saves it to a specified local filename. Input: JSON with 'url' and 'local_filename'.",
|
57 |
-
func=download_and_save_file,
|
58 |
-
)
|
59 |
-
|
60 |
-
# 3. Audio Transcriber Tool
|
61 |
-
import speech_recognition as sr
|
62 |
-
from pydub import AudioSegment
|
63 |
-
|
64 |
-
def transcribe_audio_from_path(local_audio_path: str, language: str = "en-US") -> str:
|
65 |
-
"""
|
66 |
-
Transcribes audio content from a local file path to text.
|
67 |
-
Only local file paths. Converts to WAV if needed.
|
68 |
-
"""
|
69 |
-
r = sr.Recognizer()
|
70 |
-
temp_wav_path = "temp_audio_to_transcribe.wav"
|
71 |
-
transcribed_text = ""
|
72 |
-
try:
|
73 |
-
if local_audio_path.startswith("http://") or local_audio_path.startswith("https://"):
|
74 |
-
return "Error: This tool only accepts local file paths, not URLs. Please use 'file_saver' first."
|
75 |
-
if not os.path.exists(local_audio_path):
|
76 |
-
return f"Error: Local audio file not found at '{local_audio_path}'."
|
77 |
-
audio = AudioSegment.from_file(local_audio_path)
|
78 |
-
audio.export(temp_wav_path, format="wav")
|
79 |
-
with sr.AudioFile(temp_wav_path) as source:
|
80 |
-
audio_listened = r.record(source)
|
81 |
-
try:
|
82 |
-
transcribed_text = r.recognize_google(audio_listened, language=language)
|
83 |
-
except sr.UnknownValueError:
|
84 |
-
return "Could not understand audio (speech not clear or too short)."
|
85 |
-
except sr.RequestError as e:
|
86 |
-
return f"Could not request results from Google Speech Recognition service; {e}"
|
87 |
-
except Exception as e:
|
88 |
-
return f"An unexpected error occurred during audio processing or transcription: {e}"
|
89 |
-
finally:
|
90 |
-
if os.path.exists(temp_wav_path):
|
91 |
-
os.remove(temp_wav_path)
|
92 |
-
return transcribed_text.strip()
|
93 |
-
|
94 |
-
audio_transcriber_tool = Tool(
|
95 |
-
name="audio_transcriber_tool",
|
96 |
-
description=(
|
97 |
-
"Transcribes audio content from a **local file path** to a text transcript. "
|
98 |
-
"Use for extracting spoken information from audio recordings downloaded using 'file_saver'."
|
99 |
-
),
|
100 |
-
func=transcribe_audio_from_path,
|
101 |
-
)
|
102 |
-
|
103 |
-
# 4. Gemini Multimodal Tool (for images)
|
104 |
-
import base64
|
105 |
-
from langchain.tools import Tool
|
106 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
107 |
-
from langchain_core.messages import HumanMessage
|
108 |
-
|
109 |
-
def analyze_image_with_gemini(args: dict) -> str:
|
110 |
-
"""
|
111 |
-
Analyzes an image using Gemini Multimodal LLM to answer a given question.
|
112 |
-
Input: JSON with 'image_path' and 'question'.
|
113 |
-
"""
|
114 |
-
try:
|
115 |
-
if isinstance(args, str):
|
116 |
-
import json
|
117 |
-
args = json.loads(args)
|
118 |
-
image_path = args.get("image_path")
|
119 |
-
question = args.get("question")
|
120 |
-
if not image_path or not question:
|
121 |
-
return "Error: Both 'image_path' and 'question' must be provided."
|
122 |
-
if not os.path.exists(image_path):
|
123 |
-
return f"Error: Local image file not found at '{image_path}'."
|
124 |
-
google_api_key = os.getenv("GOOGLE_API_KEY")
|
125 |
-
if not google_api_key:
|
126 |
-
return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
|
127 |
-
llm = ChatGoogleGenerativeAI(
|
128 |
-
model="gemini-2.0-flash",
|
129 |
-
google_api_key=google_api_key,
|
130 |
-
temperature=0.0
|
131 |
-
)
|
132 |
-
with open(image_path, "rb") as f:
|
133 |
-
image_bytes = f.read()
|
134 |
-
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
135 |
-
message = HumanMessage(
|
136 |
-
content=[
|
137 |
-
{"type": "text", "text": question},
|
138 |
-
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
139 |
-
]
|
140 |
-
)
|
141 |
-
response = llm.invoke([message])
|
142 |
-
return response.content
|
143 |
-
except Exception as e:
|
144 |
-
return f"Error in gemini_multimodal_tool: {e}"
|
145 |
-
|
146 |
-
gemini_multimodal_tool = Tool(
|
147 |
-
name="gemini_multimodal_tool",
|
148 |
-
description="Analyze an image with Gemini LLM. Input: JSON with 'image_path' and 'question'.",
|
149 |
-
func=analyze_image_with_gemini,
|
150 |
-
)
|
151 |
-
|
152 |
-
# 5. Wikipedia Search Tool
|
153 |
-
from langchain_community.document_loaders import WikipediaLoader
|
154 |
-
|
155 |
-
def wiki_search(query: str) -> str:
|
156 |
-
"""Search Wikipedia for a query and return up to 2 results."""
|
157 |
-
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
|
158 |
-
formatted_search_docs = "\n\n---\n\n".join(
|
159 |
-
[
|
160 |
-
f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
161 |
-
for doc in search_docs
|
162 |
-
])
|
163 |
-
return formatted_search_docs
|
164 |
-
|
165 |
-
wikipedia_search_tool2 = Tool(
|
166 |
-
name="wikipedia_search_tool2",
|
167 |
-
description="Search Wikipedia for a query and return up to 2 results.",
|
168 |
-
func=wiki_search,
|
169 |
-
)
|
170 |
-
|
171 |
-
# ========== END TOOLS SECTION ==========
|
172 |
-
|
173 |
-
# --- AGENT SECTION ---
|
174 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
175 |
-
from langchain.memory import ConversationSummaryMemory
|
176 |
from langchain.prompts import PromptTemplate
|
177 |
from langchain.agents import AgentExecutor, create_react_agent
|
|
|
178 |
from typing import List, Optional
|
179 |
|
180 |
-
|
|
|
181 |
|
182 |
-
#
|
183 |
-
|
184 |
-
if not google_api_key:
|
185 |
-
raise RuntimeError("GOOGLE_API_KEY not found in environment. Please set it.")
|
186 |
-
|
187 |
-
gemini_model = "gemini-2.0-flash"
|
188 |
-
llm_client = ChatGoogleGenerativeAI(
|
189 |
-
model=gemini_model,
|
190 |
-
google_api_key=google_api_key,
|
191 |
-
temperature=0,
|
192 |
-
)
|
193 |
-
summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
|
194 |
|
195 |
-
# Prompt
|
196 |
prompt = PromptTemplate(
|
197 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
198 |
template="""
|
199 |
-
You are a smart and helpful AI Agent/Assistant that excels at fact-based reasoning. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
{chat_history}
|
207 |
-
New input: {input}
|
208 |
-
---
|
209 |
-
{agent_scratchpad}
|
210 |
-
"""
|
211 |
-
)
|
212 |
-
|
213 |
-
tools = [repl_tool, file_saver_tool, audio_transcriber_tool, gemini_multimodal_tool, wikipedia_search_tool2]
|
214 |
-
|
215 |
-
summary_llm = ChatGoogleGenerativeAI(
|
216 |
-
model=gemini_model,
|
217 |
-
google_api_key=google_api_key,
|
218 |
-
temperature=0,
|
219 |
-
streaming=True
|
220 |
-
)
|
221 |
-
|
222 |
-
summary_react_agent = create_react_agent(
|
223 |
-
llm=summary_llm,
|
224 |
-
tools=tools,
|
225 |
-
prompt=prompt
|
226 |
)
|
227 |
|
|
|
228 |
class BasicAgent:
|
229 |
def __init__(
|
230 |
self,
|
231 |
-
agent,
|
232 |
-
|
233 |
-
|
234 |
-
handle_parsing_errors: bool = True,
|
235 |
-
max_iterations: int = 9,
|
236 |
-
memory: Optional[ConversationSummaryMemory] = None
|
237 |
-
) -> None:
|
238 |
self.agent = agent
|
239 |
self.tools = tools
|
240 |
self.verbose = verbose
|
@@ -249,6 +51,7 @@ class BasicAgent:
|
|
249 |
max_iterations=self.max_iterations,
|
250 |
memory=self.memory
|
251 |
)
|
|
|
252 |
def __call__(self, question: str) -> str:
|
253 |
result = self.agent_obj.invoke(
|
254 |
{"input": question},
|
@@ -256,10 +59,11 @@ class BasicAgent:
|
|
256 |
)
|
257 |
return result['output']
|
258 |
|
259 |
-
def run_and_submit_all(
|
260 |
space_id = os.getenv("SPACE_ID")
|
|
|
261 |
if profile:
|
262 |
-
username= f"{profile.username}"
|
263 |
print(f"User logged in: {username}")
|
264 |
else:
|
265 |
print("User not logged in.")
|
@@ -269,10 +73,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
269 |
questions_url = f"{api_url}/questions"
|
270 |
submit_url = f"{api_url}/submit"
|
271 |
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
274 |
print(agent_code)
|
275 |
|
|
|
276 |
print(f"Fetching questions from: {questions_url}")
|
277 |
try:
|
278 |
response = requests.get(questions_url, timeout=15)
|
@@ -283,8 +119,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
283 |
return "Fetched questions list is empty or invalid format.", None
|
284 |
print(f"Fetched {len(questions_data)} questions.")
|
285 |
except Exception as e:
|
|
|
286 |
return f"Error fetching questions: {e}", None
|
287 |
|
|
|
288 |
results_log = []
|
289 |
answers_payload = []
|
290 |
print(f"Running agent on {len(questions_data)} questions...")
|
@@ -294,22 +132,26 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
294 |
file_name = item.get("file_name")
|
295 |
full_question_for_agent = question_text
|
296 |
if file_name:
|
297 |
-
attachment_url = f"{
|
298 |
full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
|
299 |
-
print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
|
300 |
try:
|
301 |
submitted_answer = agent(full_question_for_agent)
|
302 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
303 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
304 |
-
time.sleep(
|
305 |
except Exception as e:
|
306 |
print(f"Error running agent on task {task_id}: {e}")
|
307 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
308 |
|
309 |
if not answers_payload:
|
|
|
310 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
311 |
|
312 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
|
|
|
313 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
314 |
try:
|
315 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
@@ -322,29 +164,27 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
322 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
323 |
f"Message: {result_data.get('message', 'No message received.')}"
|
324 |
)
|
325 |
-
cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status)
|
326 |
-
cleaned_final_status = cleaned_final_status.strip()
|
327 |
results_df = pd.DataFrame(results_log)
|
328 |
return cleaned_final_status, results_df
|
329 |
except Exception as e:
|
330 |
-
|
331 |
results_df = pd.DataFrame(results_log)
|
332 |
-
return
|
333 |
|
|
|
334 |
with gr.Blocks() as demo:
|
335 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
336 |
gr.Markdown(
|
337 |
"""
|
338 |
**Instructions:**
|
339 |
-
1.
|
340 |
-
2.
|
341 |
-
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
342 |
---
|
343 |
-
**
|
344 |
-
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
345 |
-
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
|
346 |
"""
|
347 |
)
|
|
|
348 |
gr.LoginButton()
|
349 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
350 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import requests
|
4 |
import pandas as pd
|
5 |
import time
|
6 |
import re
|
7 |
|
8 |
+
from langchain_openai import ChatOpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from langchain.prompts import PromptTemplate
|
10 |
from langchain.agents import AgentExecutor, create_react_agent
|
11 |
+
from langchain.memory import ConversationSummaryMemory
|
12 |
from typing import List, Optional
|
13 |
|
14 |
+
# === TOOL IMPORTS ===
|
15 |
+
from helper import repl_tool, file_saver_tool, audio_transcriber_tool, gemini_multimodal_tool, wikipedia_search_tool2
|
16 |
|
17 |
+
# Constants
|
18 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# --- Prompt ---
|
21 |
prompt = PromptTemplate(
|
22 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
23 |
template="""
|
24 |
+
You are a smart and helpful AI Agent/Assistant that excels at fact-based reasoning. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
|
25 |
+
[ ...cut for brevity: insert your strict format rules and examples here ... ]
|
26 |
+
{chat_history}
|
27 |
+
New input: {input}
|
28 |
+
---
|
29 |
+
{agent_scratchpad}
|
30 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
)
|
32 |
|
33 |
+
# === AGENT DEFINITION ===
|
34 |
class BasicAgent:
|
35 |
def __init__(
|
36 |
self,
|
37 |
+
agent, tools: List, verbose: bool = False, handle_parsing_errors: bool = True,
|
38 |
+
max_iterations: int = 9, memory: Optional[ConversationSummaryMemory] = None
|
39 |
+
):
|
|
|
|
|
|
|
|
|
40 |
self.agent = agent
|
41 |
self.tools = tools
|
42 |
self.verbose = verbose
|
|
|
51 |
max_iterations=self.max_iterations,
|
52 |
memory=self.memory
|
53 |
)
|
54 |
+
|
55 |
def __call__(self, question: str) -> str:
|
56 |
result = self.agent_obj.invoke(
|
57 |
{"input": question},
|
|
|
59 |
)
|
60 |
return result['output']
|
61 |
|
62 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
63 |
space_id = os.getenv("SPACE_ID")
|
64 |
+
|
65 |
if profile:
|
66 |
+
username = f"{profile.username}"
|
67 |
print(f"User logged in: {username}")
|
68 |
else:
|
69 |
print("User not logged in.")
|
|
|
73 |
questions_url = f"{api_url}/questions"
|
74 |
submit_url = f"{api_url}/submit"
|
75 |
|
76 |
+
# OpenAI API key only!
|
77 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
78 |
+
if not openai_api_key:
|
79 |
+
print("OpenAI API key not found in environment variables.")
|
80 |
+
return "OpenAI API key not found. Please set OPENAI_API_KEY environment variable.", None
|
81 |
+
|
82 |
+
# Use GPT-4o (or another allowed OpenAI model)
|
83 |
+
llm_client = ChatOpenAI(model='gpt-4o', temperature=0, api_key=openai_api_key)
|
84 |
+
|
85 |
+
# Tools: only offline/tools not requiring other APIs
|
86 |
+
tools = [
|
87 |
+
repl_tool,
|
88 |
+
file_saver_tool,
|
89 |
+
audio_transcriber_tool,
|
90 |
+
gemini_multimodal_tool, # If this is purely local or adapted for OpenAI images, otherwise remove!
|
91 |
+
wikipedia_search_tool2
|
92 |
+
]
|
93 |
+
|
94 |
+
summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
|
95 |
+
|
96 |
+
summary_react_agent = create_react_agent(
|
97 |
+
llm=llm_client,
|
98 |
+
tools=tools,
|
99 |
+
prompt=prompt
|
100 |
+
)
|
101 |
+
|
102 |
+
# 1. Instantiate Agent
|
103 |
+
try:
|
104 |
+
agent = BasicAgent(summary_react_agent, tools, True, True, 30, summary_memory)
|
105 |
+
except Exception as e:
|
106 |
+
print(f"Error instantiating agent: {e}")
|
107 |
+
return f"Error initializing agent: {e}", None
|
108 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
109 |
print(agent_code)
|
110 |
|
111 |
+
# 2. Fetch Questions
|
112 |
print(f"Fetching questions from: {questions_url}")
|
113 |
try:
|
114 |
response = requests.get(questions_url, timeout=15)
|
|
|
119 |
return "Fetched questions list is empty or invalid format.", None
|
120 |
print(f"Fetched {len(questions_data)} questions.")
|
121 |
except Exception as e:
|
122 |
+
print(f"Error fetching questions: {e}")
|
123 |
return f"Error fetching questions: {e}", None
|
124 |
|
125 |
+
# 3. Run your Agent
|
126 |
results_log = []
|
127 |
answers_payload = []
|
128 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
132 |
file_name = item.get("file_name")
|
133 |
full_question_for_agent = question_text
|
134 |
if file_name:
|
135 |
+
attachment_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
136 |
full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
|
137 |
+
print(f"Running agent on task {task_id}: {full_question_for_agent}", flush=True)
|
138 |
try:
|
139 |
submitted_answer = agent(full_question_for_agent)
|
140 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
141 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
142 |
+
time.sleep(2) # Decrease or remove if not rate-limited!
|
143 |
except Exception as e:
|
144 |
print(f"Error running agent on task {task_id}: {e}")
|
145 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
146 |
|
147 |
if not answers_payload:
|
148 |
+
print("Agent did not produce any answers to submit.")
|
149 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
150 |
|
151 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
152 |
+
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
153 |
+
print(status_update)
|
154 |
+
|
155 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
156 |
try:
|
157 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
|
164 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
165 |
f"Message: {result_data.get('message', 'No message received.')}"
|
166 |
)
|
167 |
+
cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status).strip()
|
|
|
168 |
results_df = pd.DataFrame(results_log)
|
169 |
return cleaned_final_status, results_df
|
170 |
except Exception as e:
|
171 |
+
print(f"Error submitting answers: {e}")
|
172 |
results_df = pd.DataFrame(results_log)
|
173 |
+
return f"Submission Failed: {e}", results_df
|
174 |
|
175 |
+
# --- Build Gradio Interface using Blocks ---
|
176 |
with gr.Blocks() as demo:
|
177 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
178 |
gr.Markdown(
|
179 |
"""
|
180 |
**Instructions:**
|
181 |
+
1. Log in to your Hugging Face account using the button below.
|
182 |
+
2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
|
|
183 |
---
|
184 |
+
**Note:** Only OpenAI API key is needed!
|
|
|
|
|
185 |
"""
|
186 |
)
|
187 |
+
|
188 |
gr.LoginButton()
|
189 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
190 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|