dlaima commited on
Commit
6a52f23
·
verified ·
1 Parent(s): 53194ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -73
app.py CHANGED
@@ -2,27 +2,19 @@
2
  import os
3
  import gradio as gr
4
  import requests
5
- #from smolagents.agent import Agent
6
- from smolagents import Tool
7
-
8
- from smolagents import Agent
9
 
 
 
 
 
10
 
11
  from audio_transcriber import AudioTranscriptionTool
12
  from image_analyzer import ImageAnalysisTool
13
  from wikipedia_searcher import WikipediaSearcher
14
 
15
 
16
- # Hugging Face API setup
17
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
18
- HF_CHAT_MODEL_URL = "https://api-inference.huggingface.com/models/HuggingFaceH4/zephyr-7b-beta"
19
-
20
- HEADERS = {
21
- "Authorization": f"Bearer {HF_API_TOKEN}",
22
- "Content-Type": "application/json"
23
- }
24
-
25
- # Static system prompt
26
  SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
27
  Rules to follow:
28
  1. Return only the exact requested answer: no explanation and no reasoning.
@@ -39,9 +31,11 @@ Examples of good responses:
39
  Never include phrases like "the answer is..." or "Based on my research".
40
  Only return the exact answer."""
41
 
42
- # Agent tools
 
43
  audio_tool = AudioTranscriptionTool()
44
  image_tool = ImageAnalysisTool()
 
45
  wiki_tool = Tool.from_function(
46
  name="wikipedia_search",
47
  description="Search for facts using Wikipedia.",
@@ -52,68 +46,153 @@ wiki_tool = Tool.from_function(
52
 
53
  tools = [audio_tool, image_tool, wiki_tool]
54
 
55
- agent = Agent(
56
- tools=tools,
57
- system_prompt=SYSTEM_PROMPT
58
- )
59
 
60
- def query_hf_model(prompt: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
- response = requests.post(
63
- HF_CHAT_MODEL_URL,
64
- headers=HEADERS,
65
- json={
66
- "inputs": {
67
- "past_user_inputs": [],
68
- "text": prompt
69
- },
70
- "parameters": {
71
- "max_new_tokens": 256,
72
- "return_full_text": False
73
- }
74
- }
75
  )
76
- result = response.json()
77
- if isinstance(result, dict) and "error" in result:
78
- return f"HF API Error: {result['error']}"
79
- return result[0]["generated_text"].strip()
 
 
 
 
 
 
80
  except Exception as e:
81
- return f"Error querying Hugging Face model: {e}"
82
-
83
- def run_and_submit_all(question, file):
84
- if file:
85
- file_path = file.name
86
- if file_path.endswith((".mp3", ".wav")):
87
- transcript = audio_tool.forward(file_path)
88
- question = f"{question}\n\nTranscription of audio: {transcript}"
89
- elif file_path.endswith((".png", ".jpg", ".jpeg")):
90
- image_answer = image_tool.forward(file_path, question)
91
- return image_answer
92
- elif file_path.endswith(".py"):
93
- try:
94
- with open(file_path, "r") as f:
95
- code = f.read()
96
- question = f"{question}\n\nPython code:\n{code}"
97
- except Exception as e:
98
- return f"Error reading code file: {e}"
99
- else:
100
- return "Unsupported file type."
101
-
102
- full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
103
- return query_hf_model(full_prompt)
104
-
105
- with gr.Blocks(title="GAIA Agent with HF API") as demo:
106
- gr.Markdown("### GAIA Evaluation Agent (Hugging Face-based)")
107
-
108
- with gr.Row():
109
- question_input = gr.Textbox(label="Question", placeholder="Enter your question here...", lines=3)
110
- file_input = gr.File(label="Optional File (Audio, Image, or Python)", file_types=[".mp3", ".wav", ".jpg", ".jpeg", ".png", ".py"])
111
-
112
- submit_button = gr.Button("Run Agent")
113
- output_box = gr.Textbox(label="Answer")
114
-
115
- submit_button.click(fn=run_and_submit_all, inputs=[question_input, file_input], outputs=output_box)
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  if __name__ == "__main__":
118
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
 
2
  import os
3
  import gradio as gr
4
  import requests
5
+ import pandas as pd
 
 
 
6
 
7
+ #from smolagents.agent import CodeAgent
8
+ #from smolagents.models import HfApiModel
9
+ from smolagents import Tool
10
+ from smolagents import CodeAgent, HfApiModel
11
 
12
  from audio_transcriber import AudioTranscriptionTool
13
  from image_analyzer import ImageAnalysisTool
14
  from wikipedia_searcher import WikipediaSearcher
15
 
16
 
17
+ # System prompt
 
 
 
 
 
 
 
 
 
18
  SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
19
  Rules to follow:
20
  1. Return only the exact requested answer: no explanation and no reasoning.
 
31
  Never include phrases like "the answer is..." or "Based on my research".
32
  Only return the exact answer."""
33
 
34
+
35
+ # Tool definitions
36
  audio_tool = AudioTranscriptionTool()
37
  image_tool = ImageAnalysisTool()
38
+
39
  wiki_tool = Tool.from_function(
40
  name="wikipedia_search",
41
  description="Search for facts using Wikipedia.",
 
46
 
47
  tools = [audio_tool, image_tool, wiki_tool]
48
 
 
 
 
 
49
 
50
+ # Agent factory
51
+ def MyAgent():
52
+ return CodeAgent(
53
+ tools=tools,
54
+ system_prompt=SYSTEM_PROMPT,
55
+ model=HfApiModel(
56
+ api_url="https://api-inference.huggingface.com/models/HuggingFaceH4/zephyr-7b-beta",
57
+ api_key=os.getenv("HF_API_TOKEN")
58
+ )
59
+ )
60
+
61
+
62
+ # Main run and submission logic
63
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
64
+ space_id = os.getenv("SPACE_ID")
65
+
66
+ if profile:
67
+ username = profile.username
68
+ print(f"User logged in: {username}")
69
+ else:
70
+ print("User not logged in.")
71
+ return "Please Login to Hugging Face with the button.", None
72
+
73
+ api_url = os.getenv("GAIA_API_URL", "https://gaia-benchmark.com/api")
74
+ questions_url = f"{api_url}/questions"
75
+ submit_url = f"{api_url}/submit"
76
+
77
+ try:
78
+ agent = MyAgent()
79
+ except Exception as e:
80
+ print(f"Error initializing agent: {e}")
81
+ return f"Error initializing agent: {e}", None
82
+
83
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
84
+ print(f"Agent code URL: {agent_code}")
85
+
86
+ print(f"Fetching questions from: {questions_url}")
87
+ try:
88
+ response = requests.get(questions_url, timeout=15)
89
+ response.raise_for_status()
90
+ questions_data = response.json()
91
+ if not questions_data:
92
+ return "Fetched questions list is empty or invalid format.", None
93
+ print(f"Fetched {len(questions_data)} questions.")
94
+ except Exception as e:
95
+ return f"Error fetching questions: {e}", None
96
+
97
+ results_log = []
98
+ answers_payload = []
99
+ print(f"Running agent on {len(questions_data)} questions...")
100
+
101
+ for item in questions_data:
102
+ task_id = item.get("task_id")
103
+ if not task_id:
104
+ continue
105
+ try:
106
+ submitted_answer = agent(item)
107
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
108
+ results_log.append({
109
+ "Task ID": task_id,
110
+ "Question": item.get("question", ""),
111
+ "Submitted Answer": submitted_answer
112
+ })
113
+ except Exception as e:
114
+ error_msg = f"AGENT ERROR: {e}"
115
+ results_log.append({
116
+ "Task ID": task_id,
117
+ "Question": item.get("question", ""),
118
+ "Submitted Answer": error_msg
119
+ })
120
+
121
+ if not answers_payload:
122
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
123
+
124
+ submission_data = {
125
+ "username": username.strip(),
126
+ "agent_code": agent_code,
127
+ "answers": answers_payload
128
+ }
129
+
130
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
131
  try:
132
+ response = requests.post(submit_url, json=submission_data, timeout=60)
133
+ response.raise_for_status()
134
+ result_data = response.json()
135
+ final_status = (
136
+ f"Submission Successful!\n"
137
+ f"User: {result_data.get('username')}\n"
138
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
139
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
140
+ f"Message: {result_data.get('message', 'No message received.')}"
 
 
 
 
141
  )
142
+ results_df = pd.DataFrame(results_log)
143
+ return final_status, results_df
144
+ except requests.exceptions.HTTPError as e:
145
+ try:
146
+ detail = e.response.json().get("detail", e.response.text)
147
+ except Exception:
148
+ detail = e.response.text[:500]
149
+ return f"Submission Failed: {detail}", pd.DataFrame(results_log)
150
+ except requests.exceptions.Timeout:
151
+ return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
152
  except Exception as e:
153
+ return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+
156
+ # Gradio UI setup
157
+ with gr.Blocks() as demo:
158
+ gr.Markdown("# Basic Agent Evaluation Runner")
159
+ gr.Markdown("""
160
+ **Instructions:**
161
+ 1. Clone this space, modify code to define your agent's logic, tools, and packages.
162
+ 2. Log in to your Hugging Face account using the button below.
163
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
164
+ **Note:** Submitting can take some time.
165
+ """)
166
+
167
+ gr.LoginButton()
168
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
169
+
170
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
171
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
172
+
173
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
174
+
175
+ # App startup logs
176
  if __name__ == "__main__":
177
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
178
+ space_host = os.getenv("SPACE_HOST")
179
+ space_id = os.getenv("SPACE_ID")
180
+
181
+ if space_host:
182
+ print(f"✅ SPACE_HOST found: {space_host}")
183
+ print(f" Runtime URL should be: https://{space_host}.hf.space")
184
+ else:
185
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
186
+
187
+ if space_id:
188
+ print(f"✅ SPACE_ID found: {space_id}")
189
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
190
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
191
+ else:
192
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
193
+
194
+ print("-" * (60 + len(" App Starting ")) + "\n")
195
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
196
+ demo.launch(debug=True, share=False)
197
+
198