Markiian Tsalyk
commited on
Commit
·
12c47a4
1
Parent(s):
81917a3
LlamaIndex agent
Browse files- __pycache__/agent.cpython-313.pyc +0 -0
- __pycache__/f918266a-b3e0-4914-865d-4faa564f1aef.cpython-313.pyc +0 -0
- __pycache__/open_router_chat.cpython-313.pyc +0 -0
- __pycache__/tools.cpython-313.pyc +0 -0
- __pycache__/wikipedia_tables_parser.cpython-313.pyc +0 -0
- app.py +55 -36
- f918266a-b3e0-4914-865d-4faa564f1aef.py +5 -0
- llama_index_agent.py +175 -0
- requirements.txt +13 -1
- tools.py +272 -0
- wikipedia_tables_parser.py +106 -0
__pycache__/agent.cpython-313.pyc
ADDED
Binary file (1.75 kB). View file
|
|
__pycache__/f918266a-b3e0-4914-865d-4faa564f1aef.cpython-313.pyc
ADDED
Binary file (367 Bytes). View file
|
|
__pycache__/open_router_chat.cpython-313.pyc
ADDED
Binary file (1.62 kB). View file
|
|
__pycache__/tools.cpython-313.pyc
ADDED
Binary file (10.2 kB). View file
|
|
__pycache__/wikipedia_tables_parser.cpython-313.pyc
ADDED
Binary file (4.68 kB). View file
|
|
app.py
CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
|
|
6 |
|
7 |
# (Keep Constants as is)
|
8 |
# --- Constants ---
|
@@ -10,25 +12,18 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
10 |
|
11 |
# --- Basic Agent Definition ---
|
12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
def __call__(self, question: str) -> str:
|
17 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
18 |
-
fixed_answer = "This is a default answer."
|
19 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
-
return fixed_answer
|
21 |
-
|
22 |
-
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
23 |
"""
|
24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
25 |
and displays the results.
|
26 |
"""
|
27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
28 |
-
space_id = os.getenv("SPACE_ID")
|
29 |
|
30 |
if profile:
|
31 |
-
username= f"{profile.username}"
|
32 |
print(f"User logged in: {username}")
|
33 |
else:
|
34 |
print("User not logged in.")
|
@@ -40,7 +35,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
40 |
|
41 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
42 |
try:
|
43 |
-
agent =
|
44 |
except Exception as e:
|
45 |
print(f"Error instantiating agent: {e}")
|
46 |
return f"Error initializing agent: {e}", None
|
@@ -55,16 +50,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
55 |
response.raise_for_status()
|
56 |
questions_data = response.json()
|
57 |
if not questions_data:
|
58 |
-
|
59 |
-
|
60 |
print(f"Fetched {len(questions_data)} questions.")
|
61 |
except requests.exceptions.RequestException as e:
|
62 |
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
except Exception as e:
|
69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
@@ -76,23 +71,44 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
76 |
for item in questions_data:
|
77 |
task_id = item.get("task_id")
|
78 |
question_text = item.get("question")
|
|
|
79 |
if not task_id or question_text is None:
|
80 |
print(f"Skipping item with missing task_id or question: {item}")
|
81 |
continue
|
82 |
try:
|
|
|
|
|
83 |
submitted_answer = agent(question_text)
|
84 |
-
answers_payload.append(
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
except Exception as e:
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if not answers_payload:
|
91 |
print("Agent did not produce any answers to submit.")
|
92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
93 |
|
94 |
-
# 4. Prepare Submission
|
95 |
-
submission_data = {
|
|
|
|
|
|
|
|
|
96 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
97 |
print(status_update)
|
98 |
|
@@ -162,20 +178,19 @@ with gr.Blocks() as demo:
|
|
162 |
|
163 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
164 |
|
165 |
-
status_output = gr.Textbox(
|
|
|
|
|
166 |
# Removed max_rows=10 from DataFrame constructor
|
167 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
168 |
|
169 |
-
run_button.click(
|
170 |
-
fn=run_and_submit_all,
|
171 |
-
outputs=[status_output, results_table]
|
172 |
-
)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
176 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
177 |
space_host_startup = os.getenv("SPACE_HOST")
|
178 |
-
space_id_startup = os.getenv("SPACE_ID")
|
179 |
|
180 |
if space_host_startup:
|
181 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
@@ -183,14 +198,18 @@ if __name__ == "__main__":
|
|
183 |
else:
|
184 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
185 |
|
186 |
-
if space_id_startup:
|
187 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
188 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
189 |
-
print(
|
|
|
|
|
190 |
else:
|
191 |
-
print(
|
|
|
|
|
192 |
|
193 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
194 |
|
195 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
196 |
-
demo.launch(debug=True, share=False)
|
|
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
6 |
+
from llama_index_agent import LlamaIndexAgent
|
7 |
+
|
8 |
|
9 |
# (Keep Constants as is)
|
10 |
# --- Constants ---
|
|
|
12 |
|
13 |
# --- Basic Agent Definition ---
|
14 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
15 |
+
|
16 |
+
|
17 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"""
|
19 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
20 |
and displays the results.
|
21 |
"""
|
22 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
23 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
24 |
|
25 |
if profile:
|
26 |
+
username = f"{profile.username}"
|
27 |
print(f"User logged in: {username}")
|
28 |
else:
|
29 |
print("User not logged in.")
|
|
|
35 |
|
36 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
37 |
try:
|
38 |
+
agent = LlamaIndexAgent()
|
39 |
except Exception as e:
|
40 |
print(f"Error instantiating agent: {e}")
|
41 |
return f"Error initializing agent: {e}", None
|
|
|
50 |
response.raise_for_status()
|
51 |
questions_data = response.json()
|
52 |
if not questions_data:
|
53 |
+
print("Fetched questions list is empty.")
|
54 |
+
return "Fetched questions list is empty or invalid format.", None
|
55 |
print(f"Fetched {len(questions_data)} questions.")
|
56 |
except requests.exceptions.RequestException as e:
|
57 |
print(f"Error fetching questions: {e}")
|
58 |
return f"Error fetching questions: {e}", None
|
59 |
except requests.exceptions.JSONDecodeError as e:
|
60 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
61 |
+
print(f"Response text: {response.text[:500]}")
|
62 |
+
return f"Error decoding server response for questions: {e}", None
|
63 |
except Exception as e:
|
64 |
print(f"An unexpected error occurred fetching questions: {e}")
|
65 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
71 |
for item in questions_data:
|
72 |
task_id = item.get("task_id")
|
73 |
question_text = item.get("question")
|
74 |
+
file_name = item.get("file_name")
|
75 |
if not task_id or question_text is None:
|
76 |
print(f"Skipping item with missing task_id or question: {item}")
|
77 |
continue
|
78 |
try:
|
79 |
+
if len(file_name) > 0:
|
80 |
+
question_text = f"{question_text}\nAttached file: {file_name}"
|
81 |
submitted_answer = agent(question_text)
|
82 |
+
answers_payload.append(
|
83 |
+
{"task_id": task_id, "submitted_answer": submitted_answer}
|
84 |
+
)
|
85 |
+
results_log.append(
|
86 |
+
{
|
87 |
+
"Task ID": task_id,
|
88 |
+
"Question": question_text,
|
89 |
+
"Submitted Answer": submitted_answer,
|
90 |
+
}
|
91 |
+
)
|
92 |
except Exception as e:
|
93 |
+
print(f"Error running agent on task {task_id}: {e}")
|
94 |
+
results_log.append(
|
95 |
+
{
|
96 |
+
"Task ID": task_id,
|
97 |
+
"Question": question_text,
|
98 |
+
"Submitted Answer": f"AGENT ERROR: {e}",
|
99 |
+
}
|
100 |
+
)
|
101 |
|
102 |
if not answers_payload:
|
103 |
print("Agent did not produce any answers to submit.")
|
104 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
105 |
|
106 |
+
# 4. Prepare Submission
|
107 |
+
submission_data = {
|
108 |
+
"username": username.strip(),
|
109 |
+
"agent_code": agent_code,
|
110 |
+
"answers": answers_payload,
|
111 |
+
}
|
112 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
113 |
print(status_update)
|
114 |
|
|
|
178 |
|
179 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
180 |
|
181 |
+
status_output = gr.Textbox(
|
182 |
+
label="Run Status / Submission Result", lines=5, interactive=False
|
183 |
+
)
|
184 |
# Removed max_rows=10 from DataFrame constructor
|
185 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
186 |
|
187 |
+
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
|
|
|
|
|
|
188 |
|
189 |
if __name__ == "__main__":
|
190 |
+
print("\n" + "-" * 30 + " App Starting " + "-" * 30)
|
191 |
# Check for SPACE_HOST and SPACE_ID at startup for information
|
192 |
space_host_startup = os.getenv("SPACE_HOST")
|
193 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
194 |
|
195 |
if space_host_startup:
|
196 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
198 |
else:
|
199 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
200 |
|
201 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
202 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
203 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
204 |
+
print(
|
205 |
+
f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
|
206 |
+
)
|
207 |
else:
|
208 |
+
print(
|
209 |
+
"ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
|
210 |
+
)
|
211 |
|
212 |
+
print("-" * (60 + len(" App Starting ")) + "\n")
|
213 |
|
214 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
215 |
+
demo.launch(debug=True, share=False)
|
f918266a-b3e0-4914-865d-4faa564f1aef.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def simple_func():
|
2 |
+
return 124.5
|
3 |
+
|
4 |
+
|
5 |
+
print(simple_func())
|
llama_index_agent.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
|
6 |
+
|
7 |
+
from llama_index.core.agent import ReActAgent
|
8 |
+
from llama_index.core.tools import FunctionTool
|
9 |
+
from llama_index.llms.openrouter import OpenRouter
|
10 |
+
|
11 |
+
import tools
|
12 |
+
|
13 |
+
|
14 |
+
class LlamaIndexAgent:
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
# model_name: str = "meta-llama/llama-4-maverick:free",
|
18 |
+
# model_name: str = "meta-llama/llama-4-scout:free",
|
19 |
+
# model_name: str = "microsoft/phi-4-reasoning-plus:free",
|
20 |
+
model_name: str = "google/gemini-2.5-flash-preview",
|
21 |
+
temperature: float = 0.7,
|
22 |
+
verbose: bool = True,
|
23 |
+
):
|
24 |
+
"""
|
25 |
+
Initialize the LlamaIndex agent with OpenRouter LLM.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
openrouter_api_key: API key for OpenRouter
|
29 |
+
model_name: Model name to use from OpenRouter
|
30 |
+
temperature: Temperature setting for the LLM
|
31 |
+
verbose: Whether to output verbose logs
|
32 |
+
"""
|
33 |
+
self.llm = OpenRouter(
|
34 |
+
api_key=os.getenv("OPENROUTER_API_KEY"),
|
35 |
+
model=model_name,
|
36 |
+
temperature=temperature,
|
37 |
+
)
|
38 |
+
|
39 |
+
# Define tools
|
40 |
+
reverse_tool = FunctionTool.from_defaults(
|
41 |
+
fn=tools.reverse_text,
|
42 |
+
name="reverse_text",
|
43 |
+
description="Reverses the given text",
|
44 |
+
)
|
45 |
+
|
46 |
+
final_answer_tool = FunctionTool.from_defaults(
|
47 |
+
fn=tools.final_answer,
|
48 |
+
name="final_answer",
|
49 |
+
description="Use this to provide your final answer to the user's question",
|
50 |
+
)
|
51 |
+
web_search_tool = FunctionTool.from_defaults(
|
52 |
+
fn=tools.web_search,
|
53 |
+
name="web_search",
|
54 |
+
description="Use this to search the web for the given query",
|
55 |
+
)
|
56 |
+
wikipedia_search_tool = FunctionTool.from_defaults(
|
57 |
+
fn=tools.wikipedia_search,
|
58 |
+
name="wikipedia_search",
|
59 |
+
description="Use this to search the wikipedia for the given query",
|
60 |
+
)
|
61 |
+
multiply_tool = FunctionTool.from_defaults(
|
62 |
+
fn=tools.multiply,
|
63 |
+
name="multiply",
|
64 |
+
description="Use this to multiply two numbers",
|
65 |
+
)
|
66 |
+
length_tool = FunctionTool.from_defaults(
|
67 |
+
fn=tools.length,
|
68 |
+
name="length",
|
69 |
+
description="Use this to get the length of an iterable",
|
70 |
+
)
|
71 |
+
execute_python_file_tool = FunctionTool.from_defaults(
|
72 |
+
fn=tools.execute_python_file,
|
73 |
+
name="execute_python_file",
|
74 |
+
description="Use this to execute a python file",
|
75 |
+
)
|
76 |
+
transcript_youtube_tool = FunctionTool.from_defaults(
|
77 |
+
fn=tools.trascript_youtube,
|
78 |
+
name="transcript_youtube",
|
79 |
+
description="Use this to get the transcript of a YouTube video",
|
80 |
+
)
|
81 |
+
classify_fruit_vegitable_tool = FunctionTool.from_defaults(
|
82 |
+
fn=tools.classify_fruit_vegitable,
|
83 |
+
name="classify_fruit_vegitable",
|
84 |
+
description="Use this to classify items to fruits and vegitables",
|
85 |
+
)
|
86 |
+
fetch_historical_event_data_tool = FunctionTool.from_defaults(
|
87 |
+
fn=tools.fetch_historical_event_data,
|
88 |
+
name="fetch_historical_event_data",
|
89 |
+
description="Use this to fetch data about historical event that occured in certain year such as Olympics games, Footbal games, NBA etc.",
|
90 |
+
)
|
91 |
+
|
92 |
+
# Create the agent
|
93 |
+
self.agent = ReActAgent.from_tools(
|
94 |
+
[
|
95 |
+
reverse_tool,
|
96 |
+
final_answer_tool,
|
97 |
+
web_search_tool,
|
98 |
+
wikipedia_search_tool,
|
99 |
+
multiply_tool,
|
100 |
+
length_tool,
|
101 |
+
execute_python_file_tool,
|
102 |
+
transcript_youtube_tool,
|
103 |
+
classify_fruit_vegitable_tool,
|
104 |
+
fetch_historical_event_data_tool,
|
105 |
+
],
|
106 |
+
llm=self.llm,
|
107 |
+
verbose=verbose,
|
108 |
+
max_iterations=20,
|
109 |
+
system_prompt="""
|
110 |
+
You are a helpful AI assistant that can use tools to answer the user's questions.
|
111 |
+
You have set of tools that you are free to use.
|
112 |
+
When you have the complete answer to the user's question, always use the final_answer tool to present it.
|
113 |
+
""",
|
114 |
+
)
|
115 |
+
|
116 |
+
self.small_agent = ReActAgent.from_tools(
|
117 |
+
[final_answer_tool],
|
118 |
+
llm=self.llm,
|
119 |
+
verbose=verbose,
|
120 |
+
max_iterations=10,
|
121 |
+
system_prompt="You are approached to prepare answer for the user question in desired format. You always need to use final_answer tool, it will help you.",
|
122 |
+
)
|
123 |
+
|
124 |
+
def __call__(self, query_text: str, **kwds) -> str:
|
125 |
+
"""
|
126 |
+
Process a user query through the agent.
|
127 |
+
|
128 |
+
Args:
|
129 |
+
query_text: User's query text
|
130 |
+
|
131 |
+
Returns:
|
132 |
+
The agent's response
|
133 |
+
"""
|
134 |
+
try:
|
135 |
+
response = self.agent.chat(query_text).response
|
136 |
+
except:
|
137 |
+
response = ""
|
138 |
+
final_response = self.small_agent.chat(
|
139 |
+
f"Question: {query_text}\nResponse: {response}"
|
140 |
+
)
|
141 |
+
|
142 |
+
return final_response.response
|
143 |
+
|
144 |
+
|
145 |
+
if __name__ == "__main__":
|
146 |
+
agent = LlamaIndexAgent()
|
147 |
+
|
148 |
+
# Queries
|
149 |
+
example_queries = [
|
150 |
+
# '.rewsna eht sa "tfel" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI',
|
151 |
+
# "What is the weather in Lviv now?",
|
152 |
+
# "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
|
153 |
+
# "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.",
|
154 |
+
# "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
|
155 |
+
# "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
|
156 |
+
"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
|
157 |
+
# "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
|
158 |
+
# "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?",
|
159 |
+
# "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.",
|
160 |
+
# "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
|
161 |
+
# "What is the final numeric output from the attached Python code? File name: f918266a-b3e0-4914-865d-4faa564f1aef.py",
|
162 |
+
# """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"""",
|
163 |
+
# "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
|
164 |
+
# """
|
165 |
+
# I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.
|
166 |
+
# """,
|
167 |
+
# """
|
168 |
+
# On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?
|
169 |
+
# """,
|
170 |
+
]
|
171 |
+
|
172 |
+
for query in example_queries:
|
173 |
+
print(f"\nQuery: {query}")
|
174 |
+
response = agent(query)
|
175 |
+
print(f"Response: {response}")
|
requirements.txt
CHANGED
@@ -1,2 +1,14 @@
|
|
1 |
gradio
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
+
gradio[oauth]
|
3 |
+
requests
|
4 |
+
pandas
|
5 |
+
smolagents
|
6 |
+
openai
|
7 |
+
langchain
|
8 |
+
langchain-openai
|
9 |
+
langchain-community
|
10 |
+
llama-index
|
11 |
+
llama-index-llms-openrouter
|
12 |
+
wikipedia
|
13 |
+
youtube-transcript-api
|
14 |
+
python-dotenv
|
tools.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import DuckDuckGoSearchTool
|
2 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
3 |
+
import wikipedia
|
4 |
+
from wikipedia_tables_parser import fetch_wikipedia_tables
|
5 |
+
import pandas as pd
|
6 |
+
from typing import Any
|
7 |
+
import os
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
import importlib.util
|
12 |
+
import sys
|
13 |
+
import io
|
14 |
+
import contextlib
|
15 |
+
from llama_index.llms.openrouter import OpenRouter
|
16 |
+
from llama_index.core.types import ChatMessage
|
17 |
+
|
18 |
+
|
19 |
+
llm = OpenRouter(
|
20 |
+
api_key=os.getenv("OPENROUTER_API_KEY"),
|
21 |
+
model="google/gemini-2.5-flash-preview",
|
22 |
+
temperature=0.7,
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
def reverse_text(text: str, **kwargs) -> str:
|
27 |
+
"""
|
28 |
+
Returns the reversed version of the text.
|
29 |
+
If you receive some unknown text, that can't be recognized and analyzed, then you need to use this tool to make it clear.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
text: text to be reversed
|
33 |
+
|
34 |
+
Return:
|
35 |
+
The reversed text.
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
print(text[::-1])
|
39 |
+
return text[::-1]
|
40 |
+
except Exception as e:
|
41 |
+
raise ValueError(f"Can't reverse text: {e}")
|
42 |
+
|
43 |
+
|
44 |
+
def fetch_historical_event_data(event_name: str, year: str, **kwargs) -> str:
|
45 |
+
"""
|
46 |
+
Fetches data about historical event that occured in certain year.
|
47 |
+
Some examples of events: Olympics games, Footbal games, NBA etc.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
event_name: String name of the event
|
51 |
+
year: String year of the event
|
52 |
+
|
53 |
+
Return:
|
54 |
+
String with data about the event
|
55 |
+
"""
|
56 |
+
result = wikipedia.page(f"{event_name} in {year}")
|
57 |
+
|
58 |
+
url = result.url
|
59 |
+
content = result.content
|
60 |
+
try:
|
61 |
+
tables = pd.read_html(url)
|
62 |
+
except Exception as e:
|
63 |
+
tables = fetch_wikipedia_tables(url)
|
64 |
+
|
65 |
+
result = f"Content: {content}\nTables: {tables}"
|
66 |
+
|
67 |
+
return result
|
68 |
+
|
69 |
+
|
70 |
+
def classify_fruit_vegitable(item: str, **kwargs) -> str:
|
71 |
+
"""
|
72 |
+
Classifies items to fruits and vegitables
|
73 |
+
|
74 |
+
Args:
|
75 |
+
item: Item to classify
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
Text with explanation whether it is a fruit or vegetable.
|
79 |
+
"""
|
80 |
+
response = llm.chat(
|
81 |
+
messages=[
|
82 |
+
ChatMessage(
|
83 |
+
content=f"Classify whether it is fruit or vegetable: {item}. Return only `fruit` or `vegetable` without explanations"
|
84 |
+
)
|
85 |
+
]
|
86 |
+
)
|
87 |
+
return response.message.content
|
88 |
+
|
89 |
+
|
90 |
+
def web_search(query: str, **kwargs) -> str:
|
91 |
+
"""
|
92 |
+
Returns web search results for the provided query.
|
93 |
+
Don't use it for Wikipedia queries. For Wikipedia queries use wikipedia_search tool.
|
94 |
+
Important, query is human-language string input, not the URL or key.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
query: query to search in WEB
|
98 |
+
|
99 |
+
Return:
|
100 |
+
String with web search results.
|
101 |
+
"""
|
102 |
+
result = DuckDuckGoSearchTool().forward(query)
|
103 |
+
print(result)
|
104 |
+
return result
|
105 |
+
|
106 |
+
|
107 |
+
def wikipedia_search(query: str, **kwargs) -> Any:
|
108 |
+
"""
|
109 |
+
Returns wikipedia search results for the provided query.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
query: query to search in WIKIPEDIA
|
113 |
+
|
114 |
+
Return:
|
115 |
+
Wikipedia search results.
|
116 |
+
"""
|
117 |
+
result = wikipedia.page(query)
|
118 |
+
|
119 |
+
url = result.url
|
120 |
+
content = result.content
|
121 |
+
try:
|
122 |
+
tables = pd.read_html(url)
|
123 |
+
except:
|
124 |
+
tables = fetch_wikipedia_tables(url)
|
125 |
+
|
126 |
+
result = f"Content: {content}\nTables: {tables}"
|
127 |
+
|
128 |
+
return result
|
129 |
+
|
130 |
+
|
131 |
+
def multiply(a: float, b: float, **kwargs) -> float:
|
132 |
+
"""
|
133 |
+
Multiply two numbers.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
a: First number
|
137 |
+
b: Second number
|
138 |
+
|
139 |
+
Return:
|
140 |
+
The product of the two numbers.
|
141 |
+
"""
|
142 |
+
return a * b
|
143 |
+
|
144 |
+
|
145 |
+
def length(iterable: Any, **kwargs) -> int:
|
146 |
+
"""
|
147 |
+
Return the length of an iterable.
|
148 |
+
|
149 |
+
Args:
|
150 |
+
iterable: Any iterable
|
151 |
+
|
152 |
+
Return:
|
153 |
+
The length of the iterable.
|
154 |
+
"""
|
155 |
+
return len(iterable)
|
156 |
+
|
157 |
+
|
158 |
+
def execute_python_file(file_path: str) -> Any:
|
159 |
+
"""
|
160 |
+
Executes a Python file and returns its result.
|
161 |
+
|
162 |
+
This function takes a path to a Python file, executes it by importing it as a module,
|
163 |
+
and returns the result. The file should contain a function call that produces
|
164 |
+
the result to be returned.
|
165 |
+
|
166 |
+
Args:
|
167 |
+
file_path (str): Path to the Python file to execute.
|
168 |
+
|
169 |
+
Returns:
|
170 |
+
Any: The result of executing the Python file. If the file sets a variable
|
171 |
+
named 'result', that value will be returned.
|
172 |
+
|
173 |
+
Raises:
|
174 |
+
FileNotFoundError: If the specified file does not exist.
|
175 |
+
ImportError: If there was an error importing the Python file.
|
176 |
+
|
177 |
+
Example:
|
178 |
+
>>> # If example.py contains: result = 2 + 3
|
179 |
+
>>> execute_python_file('example.py')
|
180 |
+
5
|
181 |
+
"""
|
182 |
+
# Verify file exists
|
183 |
+
if not os.path.isfile(file_path):
|
184 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
185 |
+
|
186 |
+
# Get the directory and filename
|
187 |
+
file_dir = os.path.dirname(os.path.abspath(file_path))
|
188 |
+
file_name = os.path.basename(file_path)
|
189 |
+
module_name = file_name.replace(".py", "")
|
190 |
+
|
191 |
+
# Store original sys.path and add the file's directory
|
192 |
+
original_sys_path = sys.path.copy()
|
193 |
+
sys.path.insert(0, file_dir)
|
194 |
+
|
195 |
+
# Prepare stdout/stderr capture
|
196 |
+
stdout_capture = io.StringIO()
|
197 |
+
stderr_capture = io.StringIO()
|
198 |
+
|
199 |
+
# Store the original __main__ module
|
200 |
+
original_main = sys.modules.get("__main__")
|
201 |
+
|
202 |
+
try:
|
203 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
204 |
+
if spec is None or spec.loader is None:
|
205 |
+
raise ImportError(f"Could not load module spec from {file_path}")
|
206 |
+
|
207 |
+
module = importlib.util.module_from_spec(spec)
|
208 |
+
|
209 |
+
sys.modules[module_name] = module
|
210 |
+
|
211 |
+
# Execute the module
|
212 |
+
with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(
|
213 |
+
stderr_capture
|
214 |
+
):
|
215 |
+
spec.loader.exec_module(module)
|
216 |
+
|
217 |
+
if hasattr(module, "result"):
|
218 |
+
return module.result
|
219 |
+
else:
|
220 |
+
print(f"RESULT PYTHON: {stdout_capture.getvalue().strip()}")
|
221 |
+
return stdout_capture.getvalue().strip()
|
222 |
+
|
223 |
+
except Exception as e:
|
224 |
+
error_output = stderr_capture.getvalue()
|
225 |
+
if error_output:
|
226 |
+
raise type(e)(f"{str(e)}\nProgram output: {error_output}") from None
|
227 |
+
else:
|
228 |
+
raise
|
229 |
+
finally:
|
230 |
+
sys.path = original_sys_path
|
231 |
+
|
232 |
+
if module_name in sys.modules:
|
233 |
+
del sys.modules[module_name]
|
234 |
+
|
235 |
+
|
236 |
+
def trascript_youtube(video_id: str, **kwargs) -> list:
|
237 |
+
"""
|
238 |
+
Returns transcript of YouTube video.
|
239 |
+
|
240 |
+
Args:
|
241 |
+
video_id: ID of youtube video (Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.)
|
242 |
+
|
243 |
+
Return:
|
244 |
+
Transcript of YouTube video.
|
245 |
+
"""
|
246 |
+
ytt_api = YouTubeTranscriptApi()
|
247 |
+
result = ytt_api.fetch(video_id)
|
248 |
+
|
249 |
+
return result.snippets
|
250 |
+
|
251 |
+
|
252 |
+
def final_answer(query: str, answer: str, **kwargs) -> str:
|
253 |
+
"""
|
254 |
+
Prepare the final answer for the user. It should be always used as a last step.
|
255 |
+
|
256 |
+
Args:
|
257 |
+
query: The initial query of the user
|
258 |
+
answer: The answer to format and return to the user
|
259 |
+
Return:
|
260 |
+
The final answer.
|
261 |
+
"""
|
262 |
+
return f"""
|
263 |
+
User query: {query}
|
264 |
+
Final answer from agent: {answer}
|
265 |
+
Adapt final answer to user request.
|
266 |
+
There might be requested exact number, then you need to compress the output so that it was only number without any comments or explanations (float or integer).
|
267 |
+
And on the other hand, the question might request some exact string value. Don't explain it, just return this value (For example, insted of `In response to the question, desired person is X` return only `X`)
|
268 |
+
"""
|
269 |
+
|
270 |
+
|
271 |
+
# print(wikipedia_search("Mercedes Sosa studio albums"))
|
272 |
+
# execute_python_file("f918266a-b3e0-4914-865d-4faa564f1aef.py")
|
wikipedia_tables_parser.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
|
5 |
+
|
6 |
+
def fetch_wikipedia_tables(
|
7 |
+
url: str,
|
8 |
+
handle_special_chars: bool = True,
|
9 |
+
) -> list[pd.DataFrame]:
|
10 |
+
"""
|
11 |
+
Fetch tables from a Wikipedia URL with robust error handling.
|
12 |
+
|
13 |
+
Parameters:
|
14 |
+
-----------
|
15 |
+
url : str
|
16 |
+
The Wikipedia URL to fetch tables from.
|
17 |
+
handle_special_chars : bool, default True
|
18 |
+
Whether to clean special characters in data before parsing.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
--------
|
22 |
+
list of pd.DataFrame
|
23 |
+
A list of pandas DataFrames containing the tables found on the page.
|
24 |
+
"""
|
25 |
+
try:
|
26 |
+
all_tables = _fetch_tables_with_bs4(url)
|
27 |
+
|
28 |
+
if handle_special_chars:
|
29 |
+
# Clean tables to handle special characters and formatting issues
|
30 |
+
for i, table in enumerate(all_tables):
|
31 |
+
all_tables[i] = _clean_table(table)
|
32 |
+
|
33 |
+
if all_tables:
|
34 |
+
return all_tables
|
35 |
+
else:
|
36 |
+
print(f"No tables found at {url}")
|
37 |
+
return []
|
38 |
+
except Exception as e:
|
39 |
+
print(f"Error fetching tables: {e}")
|
40 |
+
return []
|
41 |
+
|
42 |
+
|
43 |
+
def _fetch_tables_with_bs4(url: str) -> list[pd.DataFrame]:
|
44 |
+
"""Method to fetch tables using BeautifulSoup."""
|
45 |
+
try:
|
46 |
+
response = requests.get(url)
|
47 |
+
response.raise_for_status()
|
48 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
49 |
+
tables = []
|
50 |
+
|
51 |
+
for table in soup.find_all("table", {"class": "wikitable"}):
|
52 |
+
data = []
|
53 |
+
headers = []
|
54 |
+
|
55 |
+
# Extract headers
|
56 |
+
for th in table.find_all("th"):
|
57 |
+
headers.append(th.text.strip())
|
58 |
+
|
59 |
+
# If no headers found in th tags, try first tr
|
60 |
+
if not headers and table.find("tr"):
|
61 |
+
for td in table.find("tr").find_all(["th", "td"]):
|
62 |
+
headers.append(td.text.strip())
|
63 |
+
|
64 |
+
# Extract rows
|
65 |
+
for row in table.find_all("tr")[1:] if headers else table.find_all("tr"):
|
66 |
+
row_data = []
|
67 |
+
for cell in row.find_all(["td", "th"]):
|
68 |
+
row_data.append(cell.text.strip())
|
69 |
+
if row_data: # Skip empty rows
|
70 |
+
data.append(row_data)
|
71 |
+
|
72 |
+
# Create DataFrame
|
73 |
+
if data:
|
74 |
+
if headers and len(headers) == len(data[0]):
|
75 |
+
df = pd.DataFrame(data, columns=headers)
|
76 |
+
else:
|
77 |
+
df = pd.DataFrame(data)
|
78 |
+
tables.append(df)
|
79 |
+
|
80 |
+
return tables
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error in BeautifulSoup fallback: {e}")
|
83 |
+
return []
|
84 |
+
|
85 |
+
|
86 |
+
def _clean_table(df: pd.DataFrame) -> pd.DataFrame:
|
87 |
+
"""Clean a table by handling special characters and formatting issues."""
|
88 |
+
# Make a copy to avoid modifying the original
|
89 |
+
df = df.copy()
|
90 |
+
|
91 |
+
# Handle all string columns
|
92 |
+
for col in df.columns:
|
93 |
+
if df[col].dtype == "object":
|
94 |
+
# Replace common problematic characters
|
95 |
+
df[col] = df[col].astype(str).str.replace(";", "", regex=False)
|
96 |
+
df[col] = df[col].str.replace("−", "-", regex=False) # Replace minus sign
|
97 |
+
df[col] = df[col].str.replace(
|
98 |
+
"\xa0", " ", regex=False
|
99 |
+
) # Replace non-breaking space
|
100 |
+
df[col] = df[col].str.replace("\n", " ", regex=False) # Replace newlines
|
101 |
+
df[col] = df[col].str.strip() # Strip whitespace
|
102 |
+
|
103 |
+
# Remove reference tags like [1], [2], etc.
|
104 |
+
df[col] = df[col].str.replace(r"\[\d+\]", "", regex=True)
|
105 |
+
|
106 |
+
return df
|