Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,263 @@
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
-
from agent import GaiaAgent
|
7 |
import time
|
8 |
-
|
9 |
-
import json
|
10 |
|
11 |
-
|
12 |
-
MODEL_RPM_LIMIT = 15
|
13 |
|
14 |
-
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
-
|
34 |
-
|
35 |
-
filename (str): The name of the file to load from.
|
36 |
-
Returns:
|
37 |
-
list: The loaded list of dictionaries, or an empty list if an error occurs.
|
38 |
"""
|
|
|
|
|
|
|
39 |
try:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"""
|
56 |
-
|
57 |
-
|
58 |
"""
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
if profile:
|
63 |
username= f"{profile.username}"
|
64 |
print(f"User logged in: {username}")
|
@@ -70,18 +269,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
70 |
questions_url = f"{api_url}/questions"
|
71 |
submit_url = f"{api_url}/submit"
|
72 |
|
73 |
-
|
74 |
-
try:
|
75 |
-
#agent = BasicAgent()
|
76 |
-
agent = GaiaAgent(model, MODEL_RPM_LIMIT )
|
77 |
-
except Exception as e:
|
78 |
-
print(f"Error instantiating agent: {e}")
|
79 |
-
return f"Error initializing agent: {e}", None
|
80 |
-
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
81 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
82 |
print(agent_code)
|
83 |
|
84 |
-
# 2. Fetch Questions
|
85 |
print(f"Fetching questions from: {questions_url}")
|
86 |
try:
|
87 |
response = requests.get(questions_url, timeout=15)
|
@@ -91,53 +282,34 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
91 |
print("Fetched questions list is empty.")
|
92 |
return "Fetched questions list is empty or invalid format.", None
|
93 |
print(f"Fetched {len(questions_data)} questions.")
|
94 |
-
except requests.exceptions.RequestException as e:
|
95 |
-
print(f"Error fetching questions: {e}")
|
96 |
-
return f"Error fetching questions: {e}", None
|
97 |
-
except requests.exceptions.JSONDecodeError as e:
|
98 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
99 |
-
print(f"Response text: {response.text[:500]}")
|
100 |
-
return f"Error decoding server response for questions: {e}", None
|
101 |
except Exception as e:
|
102 |
-
|
103 |
-
return f"An unexpected error occurred fetching questions: {e}", None
|
104 |
|
105 |
-
# 3. Run your Agent
|
106 |
results_log = []
|
107 |
answers_payload = []
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
127 |
-
time.sleep(60)
|
128 |
-
|
129 |
-
if not answers_payload:
|
130 |
-
print("Agent did not produce any answers to submit.")
|
131 |
-
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
132 |
-
else:
|
133 |
-
answers_payload = load_json_list("answers_payload.json")
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
138 |
-
print(status_update)
|
139 |
|
140 |
-
|
141 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
142 |
try:
|
143 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
@@ -150,38 +322,15 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
150 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
151 |
f"Message: {result_data.get('message', 'No message received.')}"
|
152 |
)
|
153 |
-
|
|
|
154 |
results_df = pd.DataFrame(results_log)
|
155 |
-
return
|
156 |
-
except requests.exceptions.HTTPError as e:
|
157 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
158 |
-
try:
|
159 |
-
error_json = e.response.json()
|
160 |
-
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
161 |
-
except requests.exceptions.JSONDecodeError:
|
162 |
-
error_detail += f" Response: {e.response.text[:500]}"
|
163 |
-
status_message = f"Submission Failed: {error_detail}"
|
164 |
-
print(status_message)
|
165 |
-
results_df = pd.DataFrame(results_log)
|
166 |
-
return status_message, results_df
|
167 |
-
except requests.exceptions.Timeout:
|
168 |
-
status_message = "Submission Failed: The request timed out."
|
169 |
-
print(status_message)
|
170 |
-
results_df = pd.DataFrame(results_log)
|
171 |
-
return status_message, results_df
|
172 |
-
except requests.exceptions.RequestException as e:
|
173 |
-
status_message = f"Submission Failed: Network error - {e}"
|
174 |
-
print(status_message)
|
175 |
-
results_df = pd.DataFrame(results_log)
|
176 |
-
return status_message, results_df
|
177 |
except Exception as e:
|
178 |
status_message = f"An unexpected error occurred during submission: {e}"
|
179 |
-
print(status_message)
|
180 |
results_df = pd.DataFrame(results_log)
|
181 |
return status_message, results_df
|
182 |
|
183 |
-
|
184 |
-
# --- Build Gradio Interface using Blocks ---
|
185 |
with gr.Blocks() as demo:
|
186 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
187 |
gr.Markdown(
|
@@ -193,18 +342,13 @@ with gr.Blocks() as demo:
|
|
193 |
---
|
194 |
**Disclaimers:**
|
195 |
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
196 |
-
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
|
197 |
"""
|
198 |
)
|
199 |
-
|
200 |
gr.LoginButton()
|
201 |
-
|
202 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
203 |
-
|
204 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
205 |
-
# Removed max_rows=10 from DataFrame constructor
|
206 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
207 |
-
|
208 |
run_button.click(
|
209 |
fn=run_and_submit_all,
|
210 |
outputs=[status_output, results_table]
|
@@ -212,24 +356,19 @@ with gr.Blocks() as demo:
|
|
212 |
|
213 |
if __name__ == "__main__":
|
214 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
215 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
216 |
space_host_startup = os.getenv("SPACE_HOST")
|
217 |
-
space_id_startup = os.getenv("SPACE_ID")
|
218 |
-
|
219 |
if space_host_startup:
|
220 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
221 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
222 |
else:
|
223 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
224 |
-
|
225 |
-
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
226 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
227 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
228 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
229 |
else:
|
230 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
231 |
-
|
232 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
233 |
-
|
234 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
235 |
demo.launch(debug=True, share=False)
|
|
|
1 |
+
import requests
|
2 |
import os
|
3 |
import gradio as gr
|
|
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
6 |
import time
|
7 |
+
import re
|
|
|
8 |
|
9 |
+
# === TOOLS SECTION (scalone z helper.py) ===
|
|
|
10 |
|
11 |
+
from langchain_experimental.utilities import PythonREPL
|
12 |
+
from langchain.tools import Tool
|
13 |
|
14 |
+
# 1. Python REPL Tool
|
15 |
+
python_repl = PythonREPL()
|
16 |
+
repl_tool = Tool(
|
17 |
+
name="python_repl",
|
18 |
+
description="""
|
19 |
+
A Python REPL (Read-Eval-Print Loop) for executing Python code.
|
20 |
+
Use this tool for:
|
21 |
+
- Performing accurate calculations (arithmetic, complex math).
|
22 |
+
- Manipulating and analyzing data (e.g., lists, numbers).
|
23 |
+
- Executing small, self-contained Python scripts.
|
24 |
+
Input MUST be valid Python code, and all outputs must be printed.
|
25 |
+
""",
|
26 |
+
func=python_repl.run,
|
27 |
+
)
|
28 |
|
29 |
+
# 2. File Saver Tool
|
30 |
+
def download_and_save_file(args: dict) -> str:
|
31 |
+
"""
|
32 |
+
Downloads a file from a given URL and saves it to a specified local filename.
|
33 |
+
Input: JSON string with 'url' and 'local_filename' keys.
|
34 |
+
Example: {"url": "https://example.com/data.xlsx", "local_filename": "data.xlsx"}
|
35 |
+
"""
|
36 |
+
try:
|
37 |
+
if isinstance(args, str):
|
38 |
+
import json
|
39 |
+
args = json.loads(args)
|
40 |
+
url = args.get("url")
|
41 |
+
local_filename = args.get("local_filename")
|
42 |
+
if not url or not local_filename:
|
43 |
+
return "Error: Both 'url' and 'local_filename' must be provided."
|
44 |
+
response = requests.get(url, stream=True, timeout=30)
|
45 |
+
response.raise_for_status()
|
46 |
+
os.makedirs(os.path.dirname(local_filename) or '.', exist_ok=True)
|
47 |
+
with open(local_filename, 'wb') as f:
|
48 |
+
for chunk in response.iter_content(chunk_size=8192):
|
49 |
+
f.write(chunk)
|
50 |
+
return f"File downloaded successfully to {local_filename}"
|
51 |
+
except Exception as e:
|
52 |
+
return f"An unexpected error occurred: {e}"
|
53 |
+
|
54 |
+
file_saver_tool = Tool(
|
55 |
+
name="file_saver",
|
56 |
+
description="Downloads a file from a URL and saves it to a specified local filename. Input: JSON with 'url' and 'local_filename'.",
|
57 |
+
func=download_and_save_file,
|
58 |
+
)
|
59 |
|
60 |
+
# 3. Audio Transcriber Tool
|
61 |
+
import speech_recognition as sr
|
62 |
+
from pydub import AudioSegment
|
63 |
+
|
64 |
+
def transcribe_audio_from_path(local_audio_path: str, language: str = "en-US") -> str:
|
65 |
"""
|
66 |
+
Transcribes audio content from a local file path to text.
|
67 |
+
Only local file paths. Converts to WAV if needed.
|
|
|
|
|
|
|
68 |
"""
|
69 |
+
r = sr.Recognizer()
|
70 |
+
temp_wav_path = "temp_audio_to_transcribe.wav"
|
71 |
+
transcribed_text = ""
|
72 |
try:
|
73 |
+
if local_audio_path.startswith("http://") or local_audio_path.startswith("https://"):
|
74 |
+
return "Error: This tool only accepts local file paths, not URLs. Please use 'file_saver' first."
|
75 |
+
if not os.path.exists(local_audio_path):
|
76 |
+
return f"Error: Local audio file not found at '{local_audio_path}'."
|
77 |
+
audio = AudioSegment.from_file(local_audio_path)
|
78 |
+
audio.export(temp_wav_path, format="wav")
|
79 |
+
with sr.AudioFile(temp_wav_path) as source:
|
80 |
+
audio_listened = r.record(source)
|
81 |
+
try:
|
82 |
+
transcribed_text = r.recognize_google(audio_listened, language=language)
|
83 |
+
except sr.UnknownValueError:
|
84 |
+
return "Could not understand audio (speech not clear or too short)."
|
85 |
+
except sr.RequestError as e:
|
86 |
+
return f"Could not request results from Google Speech Recognition service; {e}"
|
87 |
+
except Exception as e:
|
88 |
+
return f"An unexpected error occurred during audio processing or transcription: {e}"
|
89 |
+
finally:
|
90 |
+
if os.path.exists(temp_wav_path):
|
91 |
+
os.remove(temp_wav_path)
|
92 |
+
return transcribed_text.strip()
|
93 |
|
94 |
+
audio_transcriber_tool = Tool(
|
95 |
+
name="audio_transcriber_tool",
|
96 |
+
description=(
|
97 |
+
"Transcribes audio content from a **local file path** to a text transcript. "
|
98 |
+
"Use for extracting spoken information from audio recordings downloaded using 'file_saver'."
|
99 |
+
),
|
100 |
+
func=transcribe_audio_from_path,
|
101 |
+
)
|
102 |
+
|
103 |
+
# 4. Gemini Multimodal Tool (for images)
|
104 |
+
import base64
|
105 |
+
from langchain.tools import Tool
|
106 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
107 |
+
from langchain_core.messages import HumanMessage
|
108 |
+
|
109 |
+
def analyze_image_with_gemini(args: dict) -> str:
|
110 |
"""
|
111 |
+
Analyzes an image using Gemini Multimodal LLM to answer a given question.
|
112 |
+
Input: JSON with 'image_path' and 'question'.
|
113 |
"""
|
114 |
+
try:
|
115 |
+
if isinstance(args, str):
|
116 |
+
import json
|
117 |
+
args = json.loads(args)
|
118 |
+
image_path = args.get("image_path")
|
119 |
+
question = args.get("question")
|
120 |
+
if not image_path or not question:
|
121 |
+
return "Error: Both 'image_path' and 'question' must be provided."
|
122 |
+
if not os.path.exists(image_path):
|
123 |
+
return f"Error: Local image file not found at '{image_path}'."
|
124 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
125 |
+
if not google_api_key:
|
126 |
+
return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
|
127 |
+
llm = ChatGoogleGenerativeAI(
|
128 |
+
model="gemini-2.0-flash",
|
129 |
+
google_api_key=google_api_key,
|
130 |
+
temperature=0.0
|
131 |
+
)
|
132 |
+
with open(image_path, "rb") as f:
|
133 |
+
image_bytes = f.read()
|
134 |
+
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
135 |
+
message = HumanMessage(
|
136 |
+
content=[
|
137 |
+
{"type": "text", "text": question},
|
138 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
139 |
+
]
|
140 |
+
)
|
141 |
+
response = llm.invoke([message])
|
142 |
+
return response.content
|
143 |
+
except Exception as e:
|
144 |
+
return f"Error in gemini_multimodal_tool: {e}"
|
145 |
+
|
146 |
+
gemini_multimodal_tool = Tool(
|
147 |
+
name="gemini_multimodal_tool",
|
148 |
+
description="Analyze an image with Gemini LLM. Input: JSON with 'image_path' and 'question'.",
|
149 |
+
func=analyze_image_with_gemini,
|
150 |
+
)
|
151 |
+
|
152 |
+
# 5. Wikipedia Search Tool
|
153 |
+
from langchain_community.document_loaders import WikipediaLoader
|
154 |
+
|
155 |
+
def wiki_search(query: str) -> str:
|
156 |
+
"""Search Wikipedia for a query and return up to 2 results."""
|
157 |
+
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
|
158 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
159 |
+
[
|
160 |
+
f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
161 |
+
for doc in search_docs
|
162 |
+
])
|
163 |
+
return formatted_search_docs
|
164 |
+
|
165 |
+
wikipedia_search_tool2 = Tool(
|
166 |
+
name="wikipedia_search_tool2",
|
167 |
+
description="Search Wikipedia for a query and return up to 2 results.",
|
168 |
+
func=wiki_search,
|
169 |
+
)
|
170 |
+
|
171 |
+
# ========== END TOOLS SECTION ==========
|
172 |
+
|
173 |
+
# --- AGENT SECTION ---
|
174 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
175 |
+
from langchain.memory import ConversationSummaryMemory
|
176 |
+
from langchain.prompts import PromptTemplate
|
177 |
+
from langchain.agents import AgentExecutor, create_react_agent
|
178 |
+
from typing import List, Optional
|
179 |
+
|
180 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
181 |
+
|
182 |
+
# Set up LLM (Google Gemini - requires GOOGLE_API_KEY env variable)
|
183 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
184 |
+
if not google_api_key:
|
185 |
+
raise RuntimeError("GOOGLE_API_KEY not found in environment. Please set it.")
|
186 |
+
|
187 |
+
gemini_model = "gemini-2.0-flash"
|
188 |
+
llm_client = ChatGoogleGenerativeAI(
|
189 |
+
model=gemini_model,
|
190 |
+
google_api_key=google_api_key,
|
191 |
+
temperature=0,
|
192 |
+
)
|
193 |
+
summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
|
194 |
+
|
195 |
+
# Prompt
|
196 |
+
prompt = PromptTemplate(
|
197 |
+
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
198 |
+
template="""
|
199 |
+
You are a smart and helpful AI Agent/Assistant that excels at fact-based reasoning. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
|
200 |
+
Your FINAL ANSWER must be one of these formats and ONLY the answer itself (no intro phrases):
|
201 |
+
- A number (e.g., '26', '1977', '519')
|
202 |
+
- As few words as possible (e.g., 'Paris', 'down', 'LUX')
|
203 |
+
- A comma-separated list of numbers and/or strings (e.g., '10,20,30', 'apple,banana,orange')
|
204 |
+
---
|
205 |
+
Previous conversation history:
|
206 |
+
{chat_history}
|
207 |
+
New input: {input}
|
208 |
+
---
|
209 |
+
{agent_scratchpad}
|
210 |
+
"""
|
211 |
+
)
|
212 |
+
|
213 |
+
tools = [repl_tool, file_saver_tool, audio_transcriber_tool, gemini_multimodal_tool, wikipedia_search_tool2]
|
214 |
|
215 |
+
summary_llm = ChatGoogleGenerativeAI(
|
216 |
+
model=gemini_model,
|
217 |
+
google_api_key=google_api_key,
|
218 |
+
temperature=0,
|
219 |
+
streaming=True
|
220 |
+
)
|
221 |
+
|
222 |
+
summary_react_agent = create_react_agent(
|
223 |
+
llm=summary_llm,
|
224 |
+
tools=tools,
|
225 |
+
prompt=prompt
|
226 |
+
)
|
227 |
+
|
228 |
+
class BasicAgent:
|
229 |
+
def __init__(
|
230 |
+
self,
|
231 |
+
agent,
|
232 |
+
tools: List,
|
233 |
+
verbose: bool = False,
|
234 |
+
handle_parsing_errors: bool = True,
|
235 |
+
max_iterations: int = 9,
|
236 |
+
memory: Optional[ConversationSummaryMemory] = None
|
237 |
+
) -> None:
|
238 |
+
self.agent = agent
|
239 |
+
self.tools = tools
|
240 |
+
self.verbose = verbose
|
241 |
+
self.handle_parsing_errors = handle_parsing_errors
|
242 |
+
self.max_iterations = max_iterations
|
243 |
+
self.memory = memory
|
244 |
+
self.agent_obj = AgentExecutor(
|
245 |
+
agent=self.agent,
|
246 |
+
tools=self.tools,
|
247 |
+
verbose=self.verbose,
|
248 |
+
handle_parsing_errors=self.handle_parsing_errors,
|
249 |
+
max_iterations=self.max_iterations,
|
250 |
+
memory=self.memory
|
251 |
+
)
|
252 |
+
def __call__(self, question: str) -> str:
|
253 |
+
result = self.agent_obj.invoke(
|
254 |
+
{"input": question},
|
255 |
+
config={"configurable": {"session_id": "test-session"}},
|
256 |
+
)
|
257 |
+
return result['output']
|
258 |
+
|
259 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
260 |
+
space_id = os.getenv("SPACE_ID")
|
261 |
if profile:
|
262 |
username= f"{profile.username}"
|
263 |
print(f"User logged in: {username}")
|
|
|
269 |
questions_url = f"{api_url}/questions"
|
270 |
submit_url = f"{api_url}/submit"
|
271 |
|
272 |
+
agent = BasicAgent(summary_react_agent, tools, True, True, 30, summary_memory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
274 |
print(agent_code)
|
275 |
|
|
|
276 |
print(f"Fetching questions from: {questions_url}")
|
277 |
try:
|
278 |
response = requests.get(questions_url, timeout=15)
|
|
|
282 |
print("Fetched questions list is empty.")
|
283 |
return "Fetched questions list is empty or invalid format.", None
|
284 |
print(f"Fetched {len(questions_data)} questions.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
except Exception as e:
|
286 |
+
return f"Error fetching questions: {e}", None
|
|
|
287 |
|
|
|
288 |
results_log = []
|
289 |
answers_payload = []
|
290 |
+
print(f"Running agent on {len(questions_data)} questions...")
|
291 |
+
for item in questions_data:
|
292 |
+
task_id = item.get("task_id")
|
293 |
+
question_text = item.get("question")
|
294 |
+
file_name = item.get("file_name")
|
295 |
+
full_question_for_agent = question_text
|
296 |
+
if file_name:
|
297 |
+
attachment_url = f"{api_url}/files/{task_id}"
|
298 |
+
full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
|
299 |
+
print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
|
300 |
+
try:
|
301 |
+
submitted_answer = agent(full_question_for_agent)
|
302 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
303 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
304 |
+
time.sleep(5) # for demo, zmień na 60 przy real eval!
|
305 |
+
except Exception as e:
|
306 |
+
print(f"Error running agent on task {task_id}: {e}")
|
307 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
+
if not answers_payload:
|
310 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
|
311 |
|
312 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
313 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
314 |
try:
|
315 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
|
322 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
323 |
f"Message: {result_data.get('message', 'No message received.')}"
|
324 |
)
|
325 |
+
cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status)
|
326 |
+
cleaned_final_status = cleaned_final_status.strip()
|
327 |
results_df = pd.DataFrame(results_log)
|
328 |
+
return cleaned_final_status, results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
except Exception as e:
|
330 |
status_message = f"An unexpected error occurred during submission: {e}"
|
|
|
331 |
results_df = pd.DataFrame(results_log)
|
332 |
return status_message, results_df
|
333 |
|
|
|
|
|
334 |
with gr.Blocks() as demo:
|
335 |
gr.Markdown("# Basic Agent Evaluation Runner")
|
336 |
gr.Markdown(
|
|
|
342 |
---
|
343 |
**Disclaimers:**
|
344 |
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
345 |
+
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
|
346 |
"""
|
347 |
)
|
|
|
348 |
gr.LoginButton()
|
|
|
349 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
|
|
350 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
|
|
351 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
|
|
352 |
run_button.click(
|
353 |
fn=run_and_submit_all,
|
354 |
outputs=[status_output, results_table]
|
|
|
356 |
|
357 |
if __name__ == "__main__":
|
358 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
359 |
space_host_startup = os.getenv("SPACE_HOST")
|
360 |
+
space_id_startup = os.getenv("SPACE_ID")
|
|
|
361 |
if space_host_startup:
|
362 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
363 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
364 |
else:
|
365 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
366 |
+
if space_id_startup:
|
|
|
367 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
368 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
369 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
370 |
else:
|
371 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
|
|
372 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
|
|
373 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
374 |
demo.launch(debug=True, share=False)
|