Datawithsarah commited on
Commit
1daff82
·
1 Parent(s): ae06836
Files changed (3) hide show
  1. agent.py +226 -104
  2. app.py +172 -79
  3. requirements.txt +7 -14
agent.py CHANGED
@@ -1,144 +1,266 @@
1
- """LangGraph Agent"""
2
- import os
3
  from dotenv import load_dotenv
4
- from langgraph.graph import START, StateGraph, MessagesState
5
- from langgraph.prebuilt import tools_condition
6
- from langgraph.prebuilt import ToolNode
7
- from langchain_anthropic import ChatAnthropic
8
- from langchain_google_genai import ChatGoogleGenerativeAI
9
- from langchain_groq import ChatGroq
10
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
11
- from langchain_community.tools.tavily_search import TavilySearchResults
12
  from langchain_community.document_loaders import WikipediaLoader
13
  from langchain_community.document_loaders import ArxivLoader
14
- from langchain_community.vectorstores import SupabaseVectorStore
15
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
16
- from langchain_core.tools import tool
17
- from langchain.tools.retriever import create_retriever_tool
18
- from supabase.client import Client, create_client
19
- import re
20
 
21
- load_dotenv()
 
 
 
 
 
22
 
23
- # === Tools ===
24
- @tool
25
- def multiply(a: int, b: int) -> int:
26
- """Multiplies two integers and returns the result."""
27
- return a * b
28
 
29
  @tool
30
  def add(a: int, b: int) -> int:
31
- """Adds two integers and returns the sum."""
 
 
 
 
 
 
32
  return a + b
33
 
34
  @tool
35
- def subtract(a: int, b: int) -> int:
36
- """Subtracts the second integer from the first and returns the result."""
 
 
 
 
 
 
37
  return a - b
38
 
39
  @tool
40
- def divide(a: int, b: int) -> float:
41
- """Divides the first integer by the second and returns the result as a float."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  if b == 0:
43
- raise ValueError("Cannot divide by zero.")
44
  return a / b
45
 
46
  @tool
47
- def modulus(a: int, b: int) -> int:
48
- """Returns the remainder of dividing the first integer by the second."""
 
 
 
 
 
 
49
  return a % b
50
 
51
  @tool
52
  def wiki_search(query: str) -> str:
53
- """Searches Wikipedia for a query and returns the top 2 results as a formatted string."""
54
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
55
- return "\n\n---\n\n".join([doc.page_content for doc in search_docs])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  @tool
58
  def web_search(query: str) -> str:
59
- """Uses Tavily to search the web for a query and returns the top 3 result snippets."""
60
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
61
- return "\n\n---\n\n".join([doc.page_content for doc in search_docs])
 
 
 
 
 
 
 
 
 
 
62
 
63
  @tool
64
- def arvix_search(query: str) -> str:
65
- """Searches Arxiv for academic papers related to the query and returns the top 3 abstracts."""
66
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
67
- return "\n\n---\n\n".join([doc.page_content[:1000] for doc in search_docs])
68
-
69
- # === System Prompt ===
70
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
71
- system_prompt = f.read()
72
- sys_msg = SystemMessage(content=system_prompt)
73
-
74
- # === Embeddings & Vector Store ===
75
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
76
- supabase: Client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY"))
77
- vector_store = SupabaseVectorStore(
78
- client=supabase,
79
- embedding=embeddings,
80
- table_name="Vector_Test",
81
- query_name="match_documents_langchain",
82
- )
83
-
84
- # === Tools ===
85
- tools = [multiply, add, subtract, divide, modulus, wiki_search, web_search, arvix_search]
86
-
87
- # === LangGraph Builder ===
88
- def build_graph(provider: str = "huggingface"):
89
- if provider == "huggingface":
90
- llm = ChatHuggingFace(
91
- llm=HuggingFaceEndpoint(
92
- repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
93
- temperature=0,
94
- huggingfacehub_api_token=os.getenv("HF_TOKEN")
95
- )
96
- )
97
- else:
98
- raise ValueError("Only 'huggingface' (Qwen3) is supported in this build.")
99
-
100
- llm_with_tools = llm.bind_tools(tools)
101
-
102
- def retriever(state: MessagesState):
103
- query = state["messages"][-1].content
104
- similar = vector_store.similarity_search(query)
105
- return {
106
- "messages": [
107
- sys_msg,
108
- state["messages"][-1],
109
- HumanMessage(content=f"Reference: {similar[0].page_content}")
110
- ]
111
- }
112
 
113
- def assistant(state: MessagesState):
114
- response = llm_with_tools.invoke(state["messages"])
115
- return {"messages": state["messages"] + [response]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- def formatter(state: MessagesState):
118
- last = state["messages"][-1].content.strip()
119
- cleaned = re.sub(r"<.*?>", "", last)
120
- cleaned = re.sub(r"(Final\s*Answer:|Answer:)", "", cleaned, flags=re.IGNORECASE)
121
- cleaned = cleaned.strip().split("\n")[0].strip()
122
- return {"messages": [AIMessage(content=cleaned)]}
 
 
 
 
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  builder = StateGraph(MessagesState)
125
- builder.add_node("retriever", retriever)
126
  builder.add_node("assistant", assistant)
127
  builder.add_node("tools", ToolNode(tools))
128
- builder.add_node("formatter", formatter)
129
-
130
- builder.add_edge(START, "retriever")
131
- builder.add_edge("retriever", "assistant")
132
  builder.add_conditional_edges("assistant", tools_condition)
133
  builder.add_edge("tools", "assistant")
134
- builder.add_edge("assistant", "formatter")
135
 
 
136
  return builder.compile()
137
 
138
- # === Run Test ===
 
139
  if __name__ == "__main__":
140
- graph = build_graph()
141
- result = graph.invoke({"messages": [HumanMessage(content="What is the capital of France?")]})
142
- for m in result["messages"]:
143
- m.pretty_print()
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from dotenv import load_dotenv
2
+
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_core.tools import tool
 
 
 
 
 
5
  from langchain_community.document_loaders import WikipediaLoader
6
  from langchain_community.document_loaders import ArxivLoader
7
+ from langchain_community.tools.tavily_search import TavilySearchResults
8
+ from langchain_tavily import TavilyExtract
9
+ from youtube_transcript_api import YouTubeTranscriptApi
 
 
 
10
 
11
+ from langchain_core.messages import SystemMessage, HumanMessage
12
+ from langgraph.graph import START, StateGraph, MessagesState
13
+ from langgraph.prebuilt import ToolNode
14
+ from langgraph.prebuilt import tools_condition
15
+ import base64
16
+ import httpx
17
 
18
+
19
+ load_dotenv()
 
 
 
20
 
21
  @tool
22
  def add(a: int, b: int) -> int:
23
+ """
24
+ Add b to a.
25
+
26
+ Args:
27
+ a: first int number
28
+ b: second int number
29
+ """
30
  return a + b
31
 
32
  @tool
33
+ def substract(a: int, b: int) -> int:
34
+ """
35
+ Subtract b from a.
36
+
37
+ Args:
38
+ a: first int number
39
+ b: second int number
40
+ """
41
  return a - b
42
 
43
  @tool
44
+ def multiply(a: int, b: int) -> int:
45
+ """
46
+ Multiply a by b.
47
+
48
+ Args:
49
+ a: first int number
50
+ b: second int number
51
+ """
52
+ return a * b
53
+
54
+ @tool
55
+ def divide(a: int, b: int) -> int:
56
+ """
57
+ Divide a by b.
58
+
59
+ Args:
60
+ a: first int number
61
+ b: second int number
62
+ """
63
  if b == 0:
64
+ raise ValueError("Can't divide by zero.")
65
  return a / b
66
 
67
  @tool
68
+ def mod(a: int, b: int) -> int:
69
+ """
70
+ Remainder of a devided by b.
71
+
72
+ Args:
73
+ a: first int number
74
+ b: second int number
75
+ """
76
  return a % b
77
 
78
  @tool
79
  def wiki_search(query: str) -> str:
80
+ """
81
+ Search Wikipedia.
82
+
83
+ Args:
84
+ query: what to search for
85
+ """
86
+ search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
87
+ formatted_search_docs = "".join(
88
+ [
89
+ f'<START source="{doc.metadata["source"]}">{doc.page_content[:1000]}<END>'
90
+ for doc in search_docs
91
+ ])
92
+ return {"wiki_results": formatted_search_docs}
93
+
94
+ @tool
95
+ def arvix_search(query: str) -> str:
96
+ """
97
+ Search arXiv which is online archive of preprint and postprint manuscripts
98
+ for different fields of science.
99
+
100
+ Args:
101
+ query: what to search for
102
+ """
103
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
104
+ formatted_search_docs = "".join(
105
+ [
106
+ f'<START source="{doc.metadata["source"]}">{doc.page_content[:1000]}<END>'
107
+ for doc in search_docs
108
+ ])
109
+ return {"arvix_results": formatted_search_docs}
110
 
111
  @tool
112
  def web_search(query: str) -> str:
113
+ """
114
+ Search WEB.
115
+
116
+ Args:
117
+ query: what to search for
118
+ """
119
+ search_docs = TavilySearchResults(max_results=3, include_answer=True).invoke({"query": query})
120
+ formatted_search_docs = "".join(
121
+ [
122
+ f'<START source="{doc["url"]}">{doc["content"][:1000]}<END>'
123
+ for doc in search_docs
124
+ ])
125
+ return {"web_results": formatted_search_docs}
126
 
127
  @tool
128
+ def open_web_page(url: str) -> str:
129
+ """
130
+ Open web page and get its content.
131
+
132
+ Args:
133
+ url: web page url in ""
134
+ """
135
+ search_docs = TavilyExtract().invoke({"urls": [url]})
136
+ formatted_search_docs = f'<START source="{search_docs["results"][0]["url"]}">{search_docs["results"][0]["raw_content"][:1000]}<END>'
137
+ return {"web_page_content": formatted_search_docs}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ @tool
140
+ def youtube_transcript(url: str) -> str:
141
+ """
142
+ Get transcript of YouTube video.
143
+ Args:
144
+ url: YouTube video url in ""
145
+ """
146
+ video_id = url.partition("https://www.youtube.com/watch?v=")[2]
147
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
148
+ transcript_text = " ".join([item["text"] for item in transcript])
149
+ return {"youtube_transcript": transcript_text}
150
+
151
+
152
+ tools = [
153
+ add,
154
+ substract,
155
+ multiply,
156
+ divide,
157
+ mod,
158
+ wiki_search,
159
+ arvix_search,
160
+ web_search,
161
+ open_web_page,
162
+ youtube_transcript,
163
+ ]
164
 
165
+ # System prompt
166
+ system_prompt = f"""
167
+ You are a general AI assistant. I will ask you a question.
168
+ First, provide a step-by-step explanation of your reasoning to arrive at the answer.
169
+ Then, respond with your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]".
170
+ [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question.
171
+ If the answer is a number, do not use commas or units (e.g., $, %) unless specified.
172
+ If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified.
173
+ If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string.
174
+ """
175
+ system_message = SystemMessage(content=system_prompt)
176
 
177
+ # Build graph
178
+ def build_graph():
179
+ """Build LangGrapth graph of agent."""
180
+
181
+ # Language model and tools
182
+ llm = ChatOpenAI(
183
+ model="gpt-4.1",
184
+ temperature=0,
185
+ max_retries=2
186
+ )
187
+ llm_with_tools = llm.bind_tools(tools, strict=True)
188
+
189
+ # Nodes
190
+ def assistant(state: MessagesState):
191
+ """Assistant node."""
192
+ return {"messages": [llm_with_tools.invoke([system_message] + state["messages"])]}
193
+
194
+ # Graph
195
  builder = StateGraph(MessagesState)
 
196
  builder.add_node("assistant", assistant)
197
  builder.add_node("tools", ToolNode(tools))
198
+ builder.add_edge(START, "assistant")
 
 
 
199
  builder.add_conditional_edges("assistant", tools_condition)
200
  builder.add_edge("tools", "assistant")
 
201
 
202
+ # Compile graph
203
  return builder.compile()
204
 
205
+
206
+ # Testing and solving particular tasks
207
  if __name__ == "__main__":
 
 
 
 
208
 
209
+ agent = build_graph()
210
+
211
+ question = """
212
+ Review the chess position provided in the image. It is black's turn.
213
+ Provide the correct next move for black which guarantees a win.
214
+ Please provide your response in algebraic notation.
215
+ """
216
+ content_urls = {
217
+ "image": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44",
218
+ "audio": None
219
+ }
220
+
221
+ # Define user message and add all the content
222
+ content = [
223
+ {
224
+ "type": "text",
225
+ "text": question
226
+ }
227
+ ]
228
+ if content_urls["image"]:
229
+ image_data = base64.b64encode(httpx.get(content_urls["image"]).content).decode("utf-8")
230
+ content.append(
231
+ {
232
+ "type": "image",
233
+ "source_type": "base64",
234
+ "data": image_data,
235
+ "mime_type": "image/jpeg"
236
+ }
237
+ )
238
+ if content_urls["audio"]:
239
+ audio_data = base64.b64encode(httpx.get(content_urls["audio"]).content).decode("utf-8")
240
+ content.append(
241
+ {
242
+ "type": "audio",
243
+ "source_type": "base64",
244
+ "data": audio_data,
245
+ "mime_type": "audio/wav"
246
+ }
247
+ )
248
+ messages = {
249
+ "role": "user",
250
+ "content": content
251
+ }
252
+
253
+ # Run agent on the question
254
+ messages = agent.invoke({"messages": messages})
255
+ for message in messages["messages"]:
256
+ message.pretty_print()
257
+
258
+ answer = messages["messages"][-1].content
259
+ index = answer.find("FINAL ANSWER: ")
260
+
261
+ print("\n")
262
+ print("="*30)
263
+ if index == -1:
264
+ print(answer)
265
+ print(answer[index+14:])
266
+ print("="*30)
app.py CHANGED
@@ -1,130 +1,223 @@
1
  import os
2
  import gradio as gr
3
- import pandas as pd
4
  import requests
5
- from dotenv import load_dotenv
6
- from langchain_core.messages import HumanMessage
7
  from agent import build_graph
 
 
 
8
 
9
- load_dotenv()
10
-
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
- cached_answers = []
13
 
14
- class ChatAgent:
 
15
  def __init__(self):
16
- print("ChatAgent initialized with Qwen LangGraph workflow.")
17
- self.graph = build_graph("huggingface") # Uses Qwen endpoint
18
-
19
  def __call__(self, question: str) -> str:
20
- print(f"Processing question: {question[:60]}...")
21
  messages = [HumanMessage(content=question)]
22
- results = self.graph.invoke({"messages": messages})
23
- answer = results['messages'][-1].content.strip()
24
- return answer
25
-
26
- def run_agent_only(profile: gr.OAuthProfile | None):
27
- global cached_answers
28
- cached_answers = []
29
- results_log = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- if not profile:
32
- return "Please login first.", None
 
33
 
 
34
  try:
35
- agent = ChatAgent()
36
  except Exception as e:
37
- return f"Agent Init Error: {e}", None
 
 
 
 
 
38
 
 
 
39
  try:
40
- response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
 
41
  questions_data = response.json()
42
- except Exception as e:
 
 
 
 
 
43
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
45
  for item in questions_data:
46
  task_id = item.get("task_id")
47
- question = item.get("question")
48
- file_name = item.get("file_name")
49
-
50
- if not task_id or question is None:
51
  continue
52
-
53
  try:
54
- user_message = question
55
- if file_name:
56
- user_message += f"\n\nFile to use: {file_name}"
57
-
58
- answer = agent(user_message)
59
- cached_answers.append({"task_id": task_id, "submitted_answer": answer})
60
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
61
  except Exception as e:
62
- results_log.append({
63
- "Task ID": task_id,
64
- "Question": question,
65
- "Submitted Answer": f"AGENT ERROR: {e}"
66
- })
67
-
68
- return "Agent finished. Now click 'Submit Cached Answers'", pd.DataFrame(results_log)
69
 
70
- def submit_cached_answers(profile: gr.OAuthProfile | None):
71
- global cached_answers
72
- if not profile or not cached_answers:
73
- return "No cached answers to submit. Run the agent first.", None
74
 
75
- space_id = os.getenv("SPACE_ID")
76
- username = profile.username
77
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
78
-
79
- payload = {
80
- "username": username,
81
- "agent_code": agent_code,
82
- "answers": cached_answers
83
- }
84
 
 
 
85
  try:
86
- response = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
87
- result = response.json()
 
88
  final_status = (
89
- f"Submission Successful!\nUser: {result.get('username')}\n"
90
- f"Score: {result.get('score', 'N/A')}% "
91
- f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})"
 
 
92
  )
93
- return final_status, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- return f"Submission failed: {e}", None
 
 
 
96
 
97
- # --- Gradio UI ---
 
98
  with gr.Blocks() as demo:
99
- gr.Markdown("# LangGraph ChatAgent Evaluation")
100
- gr.Markdown("Run the agent on all tasks, then submit for scoring.")
 
 
 
 
 
 
 
 
 
 
 
 
101
  gr.LoginButton()
102
 
103
- run_button = gr.Button("\U0001F9E0 Run Agent")
104
- submit_button = gr.Button("\U0001F4E4 Submit Answers")
105
 
106
- status_box = gr.Textbox(label="Status", lines=3)
107
- table = gr.DataFrame(label="Results", wrap=True)
 
108
 
109
- run_button.click(fn=run_agent_only, outputs=[status_box, table])
110
- submit_button.click(fn=submit_cached_answers, outputs=[status_box, table])
 
 
111
 
112
  if __name__ == "__main__":
113
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
114
  space_host_startup = os.getenv("SPACE_HOST")
115
- space_id_startup = os.getenv("SPACE_ID")
116
 
117
  if space_host_startup:
118
  print(f"✅ SPACE_HOST found: {space_host_startup}")
119
- print(f" Runtime URL: https://{space_host_startup}.hf.space")
120
  else:
121
- print("ℹ️ No SPACE_HOST found.")
122
 
123
- if space_id_startup:
124
  print(f"✅ SPACE_ID found: {space_id_startup}")
125
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
 
126
  else:
127
- print("ℹ️ No SPACE_ID found.")
 
 
128
 
129
- print("Launching Gradio Interface...")
130
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
 
3
  import requests
4
+ import inspect
5
+ import pandas as pd
6
  from agent import build_graph
7
+ from langchain_core.messages import HumanMessage
8
+ import time
9
+ import csv
10
 
11
+ # (Keep Constants as is)
12
+ # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
14
 
15
+ # --- Basic Agent Definition ---
16
+ class BasicAgent:
17
  def __init__(self):
18
+ print("BasicAgent initialized.")
19
+ self.agent = build_graph()
20
+
21
  def __call__(self, question: str) -> str:
22
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
23
  messages = [HumanMessage(content=question)]
24
+ messages = self.agent.invoke({"messages": messages})
25
+ answer = messages['messages'][-1].content
26
+
27
+ index = answer.find("FINAL ANSWER: ")
28
+ if index == -1:
29
+ return answer
30
+ return answer[index+14:]
31
+
32
+ # --- Upload answers solved locally ---
33
+ def csv_to_dict(file_path):
34
+ result = {}
35
+ with open(file_path, 'r') as file:
36
+ csv_reader = csv.reader(file)
37
+ header = next(csv_reader) # Skip header row
38
+ for row in csv_reader:
39
+ result[row[0]] = row[1]
40
+ return result
41
+
42
+
43
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
44
+ """
45
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
46
+ and displays the results.
47
+ """
48
+ # --- Determine HF Space Runtime URL and Repo URL ---
49
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
50
+
51
+ if profile:
52
+ username= f"{profile.username}"
53
+ print(f"User logged in: {username}")
54
+ else:
55
+ print("User not logged in.")
56
+ return "Please log in to Hugging Face with the button.", None
57
 
58
+ api_url = DEFAULT_API_URL
59
+ questions_url = f"{api_url}/questions"
60
+ submit_url = f"{api_url}/submit"
61
 
62
+ # 1. Instantiate Agent (modify this part to create your agent)
63
  try:
64
+ agent = BasicAgent()
65
  except Exception as e:
66
+ print(f"Error instantiating agent: {e}")
67
+ return f"Error initializing agent: {e}", None
68
+
69
+ # In the case of an app running as a Hugging Face space, this link points toward your codebase (usefull for others so please keep it public)
70
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
71
+ print(agent_code)
72
 
73
+ # 2. Fetch questions
74
+ print(f"Fetching questions from: {questions_url}")
75
  try:
76
+ response = requests.get(questions_url, timeout=15)
77
+ response.raise_for_status()
78
  questions_data = response.json()
79
+ if not questions_data:
80
+ print("Fetched questions list is empty.")
81
+ return "Fetched questions list is empty or invalid format.", None
82
+ print(f"Fetched {len(questions_data)} questions.")
83
+ except requests.exceptions.RequestException as e:
84
+ print(f"Error fetching questions: {e}")
85
  return f"Error fetching questions: {e}", None
86
+ except requests.exceptions.JSONDecodeError as e:
87
+ print(f"Error decoding JSON response from questions endpoint: {e}")
88
+ print(f"Response text: {response.text[:500]}")
89
+ return f"Error decoding server response for questions: {e}", None
90
+ except Exception as e:
91
+ print(f"An unexpected error occurred fetching questions: {e}")
92
+ return f"An unexpected error occurred fetching questions: {e}", None
93
 
94
+ # 3. Run your agent
95
+ results_log = []
96
+ answers_payload = []
97
+
98
+ answers = csv_to_dict("answers.csv")
99
+
100
+ print(f"Running agent on {len(questions_data)} questions...")
101
  for item in questions_data:
102
  task_id = item.get("task_id")
103
+ question_text = item.get("question")
104
+ if not task_id or question_text is None:
105
+ print(f"Skipping item with missing task_id or question: {item}")
 
106
  continue
 
107
  try:
108
+ #submitted_answer = agent(question_text)
109
+ submitted_answer = answers[task_id]
110
+
111
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
112
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
113
+ time.sleep(10)
 
114
  except Exception as e:
115
+ print(f"Error running agent on task {task_id}: {e}")
116
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
117
+ time.sleep(10)
 
 
 
 
118
 
119
+ if not answers_payload:
120
+ print("Agent did not produce any answers to submit.")
121
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
122
 
123
+ # 4. Prepare submission
124
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
125
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
126
+ print(status_update)
 
 
 
 
 
127
 
128
+ # 5. Submit answers
129
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
130
  try:
131
+ response = requests.post(submit_url, json=submission_data, timeout=60)
132
+ response.raise_for_status()
133
+ result_data = response.json()
134
  final_status = (
135
+ f"Submission Successful!\n"
136
+ f"User: {result_data.get('username')}\n"
137
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
138
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
139
+ f"Message: {result_data.get('message', 'No message received.')}"
140
  )
141
+ print("Submission successful.")
142
+ results_df = pd.DataFrame(results_log)
143
+ return final_status, results_df
144
+ except requests.exceptions.HTTPError as e:
145
+ error_detail = f"Server responded with status {e.response.status_code}."
146
+ try:
147
+ error_json = e.response.json()
148
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
149
+ except requests.exceptions.JSONDecodeError:
150
+ error_detail += f" Response: {e.response.text[:500]}"
151
+ status_message = f"Submission Failed: {error_detail}"
152
+ print(status_message)
153
+ results_df = pd.DataFrame(results_log)
154
+ return status_message, results_df
155
+ except requests.exceptions.Timeout:
156
+ status_message = "Submission Failed: The request timed out."
157
+ print(status_message)
158
+ results_df = pd.DataFrame(results_log)
159
+ return status_message, results_df
160
+ except requests.exceptions.RequestException as e:
161
+ status_message = f"Submission Failed: Network error - {e}"
162
+ print(status_message)
163
+ results_df = pd.DataFrame(results_log)
164
+ return status_message, results_df
165
  except Exception as e:
166
+ status_message = f"Unexpected error occurred during submission: {e}"
167
+ print(status_message)
168
+ results_df = pd.DataFrame(results_log)
169
+ return status_message, results_df
170
 
171
+
172
+ # --- Build Gradio interface using Blocks ---
173
  with gr.Blocks() as demo:
174
+ gr.Markdown("# Basic Agent Evaluation Runner")
175
+ gr.Markdown(
176
+ """
177
+ **Instructions:**
178
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
179
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
180
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
181
+ ---
182
+ **Disclaimers:**
183
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
184
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
185
+ """
186
+ )
187
+
188
  gr.LoginButton()
189
 
190
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
 
191
 
192
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
193
+ # Removed max_rows=10 from DataFrame constructor
194
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
195
 
196
+ run_button.click(
197
+ fn=run_and_submit_all,
198
+ outputs=[status_output, results_table]
199
+ )
200
 
201
  if __name__ == "__main__":
202
  print("\n" + "-"*30 + " App Starting " + "-"*30)
203
+ # Check for SPACE_HOST and SPACE_ID at startup for information
204
  space_host_startup = os.getenv("SPACE_HOST")
205
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
206
 
207
  if space_host_startup:
208
  print(f"✅ SPACE_HOST found: {space_host_startup}")
209
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
210
  else:
211
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
212
 
213
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
214
  print(f"✅ SPACE_ID found: {space_id_startup}")
215
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
216
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
217
  else:
218
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
219
+
220
+ print("-"*(60 + len(" App Starting ")) + "\n")
221
 
222
+ print("Launching Gradio interface for Basic Agent evaluation...")
223
  demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,21 +1,14 @@
1
  gradio
2
  requests
 
3
  langchain
4
- langchain-community
5
  langchain-core
6
- langchain-google-genai
7
- langchain-huggingface
8
- langchain-groq
9
- langchain-anthropic
10
  langchain-tavily
11
- langchain-chroma
 
12
  langgraph
13
- huggingface_hub
14
- sentence-transformers
15
- supabase
16
- arxiv
17
- pymupdf
18
  wikipedia
19
- pgvector
20
- python-dotenv
21
- tqdm
 
1
  gradio
2
  requests
3
+ python-dotenv
4
  langchain
 
5
  langchain-core
6
+ langchain-community
 
 
 
7
  langchain-tavily
8
+ langchain-google-genai
9
+ langchain-openai
10
  langgraph
 
 
 
 
 
11
  wikipedia
12
+ arxiv
13
+ youtube_transcript_api
14
+ httpx