arbnori45 commited on
Commit
977704f
·
verified ·
1 Parent(s): 2caebe4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -221
app.py DELETED
@@ -1,221 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import pandas as pd
5
- from dotenv import load_dotenv
6
- from functions import *
7
- from langchain_core.messages import HumanMessage
8
- import traceback
9
- import time
10
-
11
- load_dotenv()
12
-
13
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
-
15
- def run_and_submit_all(profile: gr.OAuthProfile | None):
16
- space_id = os.getenv("SPACE_ID")
17
-
18
- if not profile:
19
- print("User not logged in.")
20
- return "Please Login to Hugging Face with the button.", None
21
- username = profile.username
22
- print(f"User logged in: {username}")
23
-
24
- api_url = DEFAULT_API_URL
25
- questions_url = f"{api_url}/questions"
26
- submit_url = f"{api_url}/submit"
27
-
28
- try:
29
- graph = build_graph()
30
- agent = graph.invoke
31
- except Exception as e:
32
- print(f"Error instantiating agent: {e}")
33
- return f"Error initializing agent: {e}", None
34
-
35
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Repo URL not available"
36
- print(f"Agent code repo: {agent_code}")
37
-
38
- # Fetch questions
39
- try:
40
- response = requests.get(questions_url, timeout=15)
41
- response.raise_for_status()
42
- questions_data = response.json()
43
- if not questions_data:
44
- print("Fetched questions list is empty.")
45
- return "Fetched questions list is empty or invalid format.", None
46
- print(f"Fetched {len(questions_data)} questions.")
47
- except Exception as e:
48
- print(f"Error fetching questions: {e}")
49
- return f"Error fetching questions: {e}", None
50
-
51
- results_log = []
52
- answers_payload = []
53
-
54
- print(f"\n{'='*60}")
55
- print(f"Running agent on {len(questions_data)} questions...")
56
- print(f"{'='*60}\n")
57
-
58
- # Add delay between questions to avoid rate limiting
59
- question_delay = 3.0 # seconds between questions
60
-
61
- for idx, item in enumerate(questions_data, 1):
62
- task_id = item.get("task_id")
63
- question_text = item.get("question")
64
- if not task_id or question_text is None:
65
- print(f"Skipping item with missing task_id or question: {item}")
66
- continue
67
-
68
- # Add delay between questions (except for the first one)
69
- if idx > 1:
70
- print(f"Waiting {question_delay}s before next question to avoid rate limits...")
71
- time.sleep(question_delay)
72
-
73
- print(f"\n--- Question {idx}/{len(questions_data)} ---")
74
- print(f"Task ID: {task_id}")
75
- print(f"Question: {question_text}")
76
-
77
- try:
78
- # Add timeout for each question
79
- start_time = time.time()
80
- input_messages = [HumanMessage(content=question_text)]
81
-
82
- # Invoke the agent with the question
83
- result = agent({"messages": input_messages})
84
-
85
- # Extract the answer from the result
86
- answer = "UNKNOWN"
87
- if "messages" in result and result["messages"]:
88
- # Look for the last AI message with content
89
- for msg in reversed(result["messages"]):
90
- if hasattr(msg, "content") and isinstance(msg.content, str) and msg.content.strip():
91
- # Skip planner outputs
92
- if not any(msg.content.upper().startswith(prefix) for prefix in ["SEARCH:", "CALCULATE:", "DEFINE:", "WIKIPEDIA:", "REVERSE:", "DIRECT:"]):
93
- answer = msg.content.strip()
94
- break
95
-
96
- elapsed_time = time.time() - start_time
97
- print(f"Answer: {answer}")
98
- print(f"Time taken: {elapsed_time:.2f}s")
99
-
100
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
101
- results_log.append({
102
- "Task ID": task_id,
103
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
104
- "Submitted Answer": answer,
105
- "Time (s)": f"{elapsed_time:.2f}"
106
- })
107
-
108
- except Exception as e:
109
- print(f"Error running agent on task {task_id}: {e}")
110
- print(f"Traceback: {traceback.format_exc()}")
111
-
112
- # Still submit UNKNOWN for errors
113
- answers_payload.append({"task_id": task_id, "submitted_answer": "UNKNOWN"})
114
- results_log.append({
115
- "Task ID": task_id,
116
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
117
- "Submitted Answer": f"ERROR: {str(e)[:50]}",
118
- "Time (s)": "N/A"
119
- })
120
-
121
- print(f"\n{'='*60}")
122
- print(f"Completed processing all questions")
123
- print(f"{'='*60}\n")
124
-
125
- if not answers_payload:
126
- print("Agent did not produce any answers to submit.")
127
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
128
-
129
- # Summary before submission
130
- unknown_count = sum(1 for ans in answers_payload if ans["submitted_answer"] == "UNKNOWN")
131
- print(f"\nSummary before submission:")
132
- print(f"Total questions: {len(answers_payload)}")
133
- print(f"UNKNOWN answers: {unknown_count}")
134
- print(f"Attempted answers: {len(answers_payload) - unknown_count}")
135
-
136
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
137
- print(f"\nSubmitting {len(answers_payload)} answers for user '{username}'...")
138
-
139
- try:
140
- response = requests.post(submit_url, json=submission_data, timeout=60)
141
- response.raise_for_status()
142
- result_data = response.json()
143
-
144
- score = result_data.get('score', 0)
145
- correct_count = result_data.get('correct_count', 0)
146
- total_attempted = result_data.get('total_attempted', 0)
147
-
148
- final_status = (
149
- f"Submission Successful!\n"
150
- f"User: {result_data.get('username')}\n"
151
- f"Overall Score: {score}% "
152
- f"({correct_count}/{total_attempted} correct)\n"
153
- f"Message: {result_data.get('message', 'No message received.')}"
154
- )
155
-
156
- print("\n" + "="*60)
157
- print("SUBMISSION RESULTS:")
158
- print(f"Score: {score}%")
159
- print(f"Correct: {correct_count}/{total_attempted}")
160
- print("="*60)
161
-
162
- results_df = pd.DataFrame(results_log)
163
- return final_status, results_df
164
- except Exception as e:
165
- status_message = f"Submission Failed: {e}"
166
- print(status_message)
167
- results_df = pd.DataFrame(results_log)
168
- return status_message, results_df
169
-
170
- # Gradio UI
171
- with gr.Blocks() as demo:
172
- gr.Markdown("# Enhanced GAIA Agent Evaluation Runner")
173
- gr.Markdown(
174
- """
175
- This enhanced agent is optimized for GAIA benchmark questions with improved:
176
- - Planning logic for better tool selection
177
- - Search capabilities with more comprehensive results
178
- - Mathematical expression parsing
179
- - Answer extraction from search results
180
- - Error handling and logging
181
-
182
- Target: >50% accuracy on GAIA questions
183
- """
184
- )
185
-
186
- gr.LoginButton()
187
-
188
- run_button = gr.Button("Run Evaluation & Submit All Answers")
189
-
190
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
191
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
192
-
193
- run_button.click(
194
- fn=run_and_submit_all,
195
- outputs=[status_output, results_table]
196
- )
197
-
198
-
199
- if __name__ == "__main__":
200
- print("\n" + "-"*30 + " App Starting " + "-"*30)
201
-
202
- space_host_startup = os.getenv("SPACE_HOST")
203
- space_id_startup = os.getenv("SPACE_ID")
204
-
205
- if space_host_startup:
206
- print(f" SPACE_HOST found: {space_host_startup}")
207
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
208
- else:
209
- print("SPACE_HOST environment variable not found (running locally?).")
210
-
211
- if space_id_startup:
212
- print(f" SPACE_ID found: {space_id_startup}")
213
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
214
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
215
- else:
216
- print("SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
217
-
218
- print("-"*(60 + len(" App Starting ")) + "\n")
219
-
220
- print("Launching Gradio Interface for Enhanced GAIA Agent Evaluation...")
221
- demo.launch(debug=True, share=False)