Arbnor Tefiki commited on
Commit
94b3868
Β·
1 Parent(s): f40578f

First commit

Browse files
Files changed (5) hide show
  1. app.py +158 -0
  2. custom_tools.py +96 -0
  3. functions.py +140 -0
  4. index.html +0 -19
  5. style.css +0 -28
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from dotenv import load_dotenv
6
+ from functions import *
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ load_dotenv()
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
14
+ space_id = os.getenv("SPACE_ID")
15
+
16
+ if not profile:
17
+ print("User not logged in.")
18
+ return "Please Login to Hugging Face with the button.", None
19
+ username = profile.username
20
+ print(f"User logged in: {username}")
21
+
22
+ api_url = DEFAULT_API_URL
23
+ questions_url = f"{api_url}/questions"
24
+ submit_url = f"{api_url}/submit"
25
+
26
+ try:
27
+ graph = build_graph()
28
+ agent = graph.invoke
29
+ except Exception as e:
30
+ print(f"Error instantiating agent: {e}")
31
+ return f"Error initializing agent: {e}", None
32
+
33
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Repo URL not available"
34
+ print(f"Agent code repo: {agent_code}")
35
+
36
+ # Fetch questions
37
+ try:
38
+ response = requests.get(questions_url, timeout=15)
39
+ response.raise_for_status()
40
+ questions_data = response.json()
41
+ if not questions_data:
42
+ print("Fetched questions list is empty.")
43
+ return "Fetched questions list is empty or invalid format.", None
44
+ print(f"Fetched {len(questions_data)} questions.")
45
+ except Exception as e:
46
+ print(f"Error fetching questions: {e}")
47
+ return f"Error fetching questions: {e}", None
48
+
49
+ results_log = []
50
+ answers_payload = []
51
+
52
+ print(f"Running agent on {len(questions_data)} questions...")
53
+ for item in questions_data:
54
+ task_id = item.get("task_id")
55
+ question_text = item.get("question")
56
+ if not task_id or question_text is None:
57
+ print(f"Skipping item with missing task_id or question: {item}")
58
+ continue
59
+ try:
60
+ input_messages = [HumanMessage(content=question_text)]
61
+
62
+ result = agent({"messages": input_messages})
63
+
64
+ if "messages" in result and result["messages"]:
65
+ last_valid = next(
66
+ (m for m in reversed(result["messages"]) if hasattr(m, "content") and isinstance(m.content, str)),
67
+ None
68
+ )
69
+ if last_valid:
70
+ answer = last_valid.content.strip()
71
+ else:
72
+ answer = "UNKNOWN"
73
+ else:
74
+ answer = "UNKNOWN"
75
+
76
+ print("Answered with:", answer)
77
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
78
+ results_log.append({
79
+ "Task ID": task_id,
80
+ "Question": question_text,
81
+ "Submitted Answer": answer
82
+ })
83
+ except Exception as e:
84
+ print(f"Error running agent on task {task_id}: {e}")
85
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
86
+
87
+ if not answers_payload:
88
+ print("Agent did not produce any answers to submit.")
89
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
90
+
91
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
92
+ print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
93
+
94
+ try:
95
+ response = requests.post(submit_url, json=submission_data, timeout=60)
96
+ response.raise_for_status()
97
+ result_data = response.json()
98
+ final_status = (
99
+ f"Submission Successful!\n"
100
+ f"User: {result_data.get('username')}\n"
101
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
102
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
103
+ f"Message: {result_data.get('message', 'No message received.')}"
104
+ )
105
+ print("Submission successful.")
106
+ results_df = pd.DataFrame(results_log)
107
+ return final_status, results_df
108
+ except Exception as e:
109
+ status_message = f"Submission Failed: {e}"
110
+ print(status_message)
111
+ results_df = pd.DataFrame(results_log)
112
+ return status_message, results_df
113
+
114
+ # Gradio UI
115
+ with gr.Blocks() as demo:
116
+ gr.Markdown("# Basic Agent Evaluation Runner")
117
+ gr.Markdown(
118
+ """
119
+ Modify the code here to define your agent's logic, the tools, the necessary packages, etc...
120
+ """
121
+ )
122
+
123
+ gr.LoginButton()
124
+
125
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
126
+
127
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
128
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
129
+
130
+ run_button.click(
131
+ fn=run_and_submit_all,
132
+ outputs=[status_output, results_table]
133
+ )
134
+
135
+
136
+ if __name__ == "__main__":
137
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
138
+
139
+ space_host_startup = os.getenv("SPACE_HOST")
140
+ space_id_startup = os.getenv("SPACE_ID")
141
+
142
+ if space_host_startup:
143
+ print(f" SPACE_HOST found: {space_host_startup}")
144
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
145
+ else:
146
+ print("SPACE_HOST environment variable not found (running locally?).")
147
+
148
+ if space_id_startup:
149
+ print(f" SPACE_ID found: {space_id_startup}")
150
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
151
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
152
+ else:
153
+ print("SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
154
+
155
+ print("-"*(60 + len(" App Starting ")) + "\n")
156
+
157
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
158
+ demo.launch(debug=True, share=False)
custom_tools.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from duckduckgo_search import DDGS
3
+ from langchain_core.tools import tool
4
+
5
+ @tool
6
+ def reverse_text(input: str) -> str:
7
+ """Reverse the characters in a text or string.
8
+
9
+ Args:
10
+ query: The text or string to reverse.
11
+ """
12
+ return input[::-1]
13
+
14
+ @tool
15
+ def web_search(query: str) -> str:
16
+ """Perform a web search using DuckDuckGo and return the top 3 summarized results.
17
+
18
+ Args:
19
+ query: The search query to look up.
20
+ """
21
+ try:
22
+ results = []
23
+ with DDGS() as ddgs:
24
+ for r in ddgs.text(query, max_results=3):
25
+ title = r.get("title", "")
26
+ snippet = r.get("body", "")
27
+ url = r.get("href", "")
28
+ if title and snippet:
29
+ results.append(f"{title}: {snippet} (URL: {url})")
30
+ if not results:
31
+ return "No results found."
32
+ return "\n\n---\n\n".join(results)
33
+ except Exception as e:
34
+ return f"Web search error: {e}"
35
+
36
+ @tool
37
+ def calculate(expression: str) -> str:
38
+ """Evaluate a simple math expression and return the result.
39
+
40
+ Args:
41
+ expression: A string containing the math expression to evaluate.
42
+ """
43
+ try:
44
+ allowed_names = {
45
+ "abs": abs,
46
+ "round": round,
47
+ "min": min,
48
+ "max": max,
49
+ "pow": pow,
50
+ }
51
+ result = eval(expression, {"__builtins__": None}, allowed_names)
52
+ return str(result)
53
+ except Exception as e:
54
+ return f"Calculation error: {e}"
55
+
56
+ @tool
57
+ def wikipedia_summary(query: str) -> str:
58
+ """Retrieve a summary of a topic from Wikipedia.
59
+
60
+ Args:
61
+ query: The subject or topic to summarize.
62
+ """
63
+ try:
64
+ response = requests.get(
65
+ f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}", timeout=10
66
+ )
67
+ response.raise_for_status()
68
+ data = response.json()
69
+ return data.get("extract", "No summary found.")
70
+ except Exception as e:
71
+ return f"Wikipedia error: {e}"
72
+
73
+ @tool
74
+ def define_term(term: str) -> str:
75
+ """Provide a dictionary-style definition of a given term using an online API.
76
+
77
+ Args:
78
+ term: The word or term to define.
79
+ """
80
+ try:
81
+ response = requests.get(
82
+ f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}", timeout=10
83
+ )
84
+ response.raise_for_status()
85
+ data = response.json()
86
+ meanings = data[0].get("meanings", [])
87
+ if meanings:
88
+ defs = meanings[0].get("definitions", [])
89
+ if defs:
90
+ return defs[0].get("definition", "Definition not found.")
91
+ return "Definition not found."
92
+ except Exception as e:
93
+ return f"Definition error: {e}"
94
+
95
+ # List of tools to register with your agent
96
+ TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text]
functions.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from langgraph.graph import START, StateGraph, MessagesState
4
+ from langgraph.prebuilt import ToolNode
5
+ from langchain_core.messages import HumanMessage, SystemMessage
6
+ from huggingface_hub import InferenceClient
7
+ from custom_tools import TOOLS
8
+ from langchain_core.messages import AIMessage
9
+
10
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
11
+ client = InferenceClient(token=HF_TOKEN)
12
+
13
+ planner_prompt = SystemMessage(content="""
14
+ You are a planning assistant. Your job is to decide how to answer a question.
15
+
16
+ - If the answer is easy and factual, answer it directly.
17
+ - If you are not 100% certain or the answer requires looking up real-world information, say:
18
+ I need to search this.
19
+
20
+ - If the question contains math or expressions like +, -, /, ^, say:
21
+ I need to calculate this.
22
+
23
+ - If a word should be explained, say:
24
+ I need to define this.
25
+
26
+ -If the question asks about a person, historical event, or specific topic, say:
27
+ I need to look up wikipedia.
28
+
29
+ -If the questions asks for backwards pronounciation or reversing text, say:
30
+ I need to reverse text.
31
+
32
+ Only respond with one line explaining what you will do.
33
+ Do not try to answer yet.
34
+
35
+ e.g:
36
+ Q: How many studio albums did Mercedes Sosa release between 2000 and 2009?
37
+ A: I need to search this.
38
+
39
+ Q: What does the word 'ephemeral' mean?
40
+ A: I need to define this.
41
+
42
+ Q: What is 23 * 6 + 3?
43
+ A: I need to calculate this.
44
+
45
+ Q: Reverse this: 'tfel drow eht'
46
+ A: I need to reverse text.
47
+
48
+ Q: What bird species are seen in this video?
49
+ A: UNKNOWN
50
+ """)
51
+
52
+ def planner_node(state: MessagesState):
53
+ hf_messages = [planner_prompt] + state["messages"]
54
+
55
+ # Properly map LangChain message objects to dicts
56
+ messages_dict = []
57
+ for msg in hf_messages:
58
+ if isinstance(msg, SystemMessage):
59
+ role = "system"
60
+ elif isinstance(msg, HumanMessage):
61
+ role = "user"
62
+ else:
63
+ raise ValueError(f"Unsupported message type: {type(msg)}")
64
+ messages_dict.append({"role": role, "content": msg.content})
65
+
66
+ response = client.chat.completions.create(
67
+ model="mistralai/Mistral-7B-Instruct-v0.2",
68
+ messages=messages_dict,
69
+ )
70
+
71
+ text = response.choices[0].message.content.strip()
72
+ print("Planner output:\n", text)
73
+
74
+ return {"messages": [SystemMessage(content=text)]}
75
+
76
+ answer_prompt = SystemMessage(content="""
77
+ You are now given the result of a tool (like a search, calculator, or text reversal).
78
+ Use the tool result and the original question to give the final answer.
79
+ If the tool result is unhelpful or unclear, respond with 'UNKNOWN'.
80
+ Respond with only the answer β€” no explanations.
81
+ """)
82
+
83
+ def assistant_node(state: MessagesState):
84
+ hf_messages = [answer_prompt] + state["messages"]
85
+
86
+ messages_dict = []
87
+ for msg in hf_messages:
88
+ if isinstance(msg, SystemMessage):
89
+ role = "system"
90
+ elif isinstance(msg, HumanMessage):
91
+ role = "user"
92
+ else:
93
+ raise ValueError(f"Unsupported message type: {type(msg)}")
94
+ messages_dict.append({"role": role, "content": msg.content})
95
+
96
+ response = client.chat.completions.create(
97
+ model="mistralai/Mistral-7B-Instruct-v0.2",
98
+ messages=messages_dict,
99
+ )
100
+
101
+ text = response.choices[0].message.content.strip()
102
+ print("Final answer output:\n", text)
103
+
104
+ return {"messages": [AIMessage(content=text)]}
105
+
106
+ def tools_condition(state: MessagesState) -> str:
107
+ last_msg = state["messages"][-1].content.lower()
108
+
109
+ if any(trigger in last_msg for trigger in [
110
+ "i need to search",
111
+ "i need to calculate",
112
+ "i need to define",
113
+ "i need to reverse text",
114
+ "i need to look up wikipedia"
115
+ ]):
116
+ return "tools"
117
+
118
+ return "end"
119
+
120
+ class PatchedToolNode(ToolNode):
121
+ def invoke(self, state: MessagesState, config) -> dict:
122
+ result = super().invoke(state)
123
+ tool_output = result.get("messages", [])[0].content if result.get("messages") else "UNKNOWN"
124
+
125
+ # Append tool result as a HumanMessage so assistant sees it
126
+ new_messages = state["messages"] + [HumanMessage(content=f"Tool result:\n{tool_output}")]
127
+ return {"messages": new_messages}
128
+
129
+ def build_graph():
130
+ builder = StateGraph(MessagesState)
131
+
132
+ builder.add_node("planner", planner_node)
133
+ builder.add_node("assistant", assistant_node)
134
+ builder.add_node("tools", PatchedToolNode(TOOLS))
135
+
136
+ builder.add_edge(START, "planner")
137
+ builder.add_conditional_edges("planner", tools_condition)
138
+ builder.add_edge("tools", "assistant")
139
+
140
+ return builder.compile()
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }