Giustino98 commited on
Commit
c4b829b
·
1 Parent(s): 81917a3

first submission

Browse files
Files changed (8) hide show
  1. .gitignore +120 -0
  2. app.py +109 -195
  3. app_for_submission.py +227 -0
  4. math_tools.py +44 -0
  5. multimodal_tools.py +174 -0
  6. serpapi_tools.py +53 -0
  7. tools.py +69 -0
  8. youtube_tools.py +25 -0
.gitignore ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # PEP 582; __pypackages__
88
+ __pypackages__/
89
+
90
+ # Celery stuff
91
+ celerybeat-schedule
92
+ celerybeat.pid
93
+
94
+ # SageMath parsed files
95
+ *.sage.py
96
+
97
+ # Environments
98
+ .env
99
+ .venv
100
+ env/
101
+ venv/
102
+ ENV/
103
+ env.bak/
104
+ venv.bak/
105
+
106
+ # IDE / Editor specific files
107
+ .idea/
108
+ .vscode/
109
+ *.project
110
+ *.pydevproject
111
+ .project
112
+ .settings/
113
+ *.sublime-workspace
114
+
115
+ # dotenv
116
+ .env
117
+
118
+ # OS specific files
119
+ .DS_Store
120
+ Thumbs.db
app.py CHANGED
@@ -1,196 +1,110 @@
1
  import os
2
- import gradio as gr
3
- import requests
4
- import inspect
5
- import pandas as pd
6
-
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
-
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
- try:
43
- agent = BasicAgent()
44
- except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
-
51
- # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
- try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
- if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
- except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
-
72
- # 3. Run your Agent
73
- results_log = []
74
- answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
- except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
-
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
-
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
- try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
- try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
- except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
-
142
-
143
- # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
- gr.Markdown(
147
- """
148
- **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
- """
159
- )
160
-
161
- gr.LoginButton()
162
-
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
-
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
-
174
- if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"✅ SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
-
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ # Import the load_dotenv function from the dotenv library
3
+ from dotenv import load_dotenv
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+
6
+ from multimodal_tools import extract_text_tool, analyze_image_tool, analyze_audio_tool
7
+
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
+ # Read your API key from the environment variable or set it manually
11
+ api_key = os.getenv("GEMINI_API_KEY")
12
+ langfuse_secret_key = os.getenv("LANGFUSE_SECRET_KEY")
13
+ langfuse_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
14
+
15
+ from typing import TypedDict, Annotated
16
+ from langgraph.graph.message import add_messages
17
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
18
+ from langgraph.prebuilt import ToolNode
19
+ from langgraph.graph import START, StateGraph
20
+ from langgraph.prebuilt import tools_condition
21
+ from langchain_community.tools.tavily_search import TavilySearchResults # Importa Tavily
22
+ from langchain_community.tools import DuckDuckGoSearchRun
23
+ # from langfuse import Langfuse # Langfuse is initialized by CallbackHandler directly
24
+ from langfuse.callback import CallbackHandler
25
+ from youtube_tools import youtube_transcript_tool
26
+ from math_tools import add_tool, subtract_tool, multiply_tool, divide_tool
27
+ from serpapi_tools import serpapi_search_tool
28
+ from IPython.display import Image, display
29
+ # Generate thfrom langchain_community.tools.tavily_search import TavilySearchResults
30
+ from langchain_community.tools.tavily_search import TavilySearchResults
31
+
32
+
33
+ # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
34
+ langfuse_handler = CallbackHandler(
35
+ public_key=langfuse_public_key,
36
+ secret_key=langfuse_secret_key,
37
+ host="http://localhost:3000"
38
+ )
39
+
40
+ # Create LLM class
41
+ chat = ChatGoogleGenerativeAI(
42
+ model= "gemini-2.5-pro-preview-05-06",
43
+ temperature=0,
44
+ max_retries=2,
45
+ google_api_key=api_key,
46
+ thinking_budget= 0
47
+ )
48
+
49
+ search_tool = TavilySearchResults(
50
+ name="tavily_web_search", # Puoi personalizzare il nome se vuoi
51
+ description="Esegue una ricerca web avanzata utilizzando Tavily per informazioni aggiornate e complete. Utile per domande complesse o che richiedono dati recenti. Può essere utile fare più ricerche modificando la query per ottenere risultati migliori.", # Descrizione per l'LLM
52
+ max_results=5
53
+ )
54
+
55
+ tools = [
56
+ extract_text_tool,
57
+ analyze_image_tool,
58
+ analyze_audio_tool,
59
+ youtube_transcript_tool,
60
+ add_tool,
61
+ subtract_tool,
62
+ multiply_tool,
63
+ divide_tool,
64
+ search_tool
65
+ ]
66
+ chat_with_tools = chat.bind_tools(tools)
67
+
68
+
69
+ class AgentState(TypedDict):
70
+ messages: Annotated[list[AnyMessage], add_messages]
71
+
72
+ def assistant(state: AgentState):
73
+ sys_msg = "You are a helpful assistant with access to tools. Understand user requests accurately. Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints." \
74
+ "Pay attention: your output needs to contain only the final answer without any reasoning since it will be strictly evaluated against a dataset which contains only the specific response." \
75
+ "Your final output needs to be just the string or integer containing the answer, not an array or technical stuff."
76
+ return {
77
+ "messages": [chat_with_tools.invoke([sys_msg] + state["messages"])]
78
+ }
79
+
80
+
81
+ ## The graph
82
+ builder = StateGraph(AgentState)
83
+
84
+ # Define nodes: these do the work
85
+ builder.add_node("assistant", assistant)
86
+ builder.add_node("tools", ToolNode(tools))
87
+
88
+ # Define edges: these determine how the control flow moves
89
+ builder.add_edge(START, "assistant")
90
+ builder.add_conditional_edges(
91
+ "assistant",
92
+ # If the latest message requires a tool, route to tools
93
+ # Otherwise, provide a direct response
94
+ tools_condition,
95
+ )
96
+ builder.add_edge("tools", "assistant")
97
+ alfred = builder.compile()
98
+
99
+ """ # Salva l'immagine del grafo su un file
100
+ graph_image_bytes = alfred.get_graph(xray=True).draw_mermaid_png()
101
+ with open("alfred_graph.png", "wb") as f:
102
+ f.write(graph_image_bytes)
103
+ print("L'immagine del grafo è stata salvata come alfred_graph.png")
104
+
105
+ messages = [HumanMessage(content="Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.")]
106
+ response = alfred.invoke(input={"messages": messages}, config={"callbacks": [langfuse_handler]})
107
+
108
+ print("🎩 Alfred's Response:")
109
+ print(response['messages'][-1].content)
110
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_for_submission.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+ from app import alfred
7
+ from langfuse.callback import CallbackHandler
8
+ from typing import Optional
9
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
10
+ # (Keep Constants as is)
11
+ # --- Constants ---
12
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+
14
+
15
+ langfuse_secret_key = os.getenv("LANGFUSE_SECRET_KEY")
16
+ langfuse_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
17
+
18
+ # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
19
+ langfuse_handler = CallbackHandler(
20
+ public_key=langfuse_public_key,
21
+ secret_key=langfuse_secret_key,
22
+ host="http://localhost:3000"
23
+ )
24
+
25
+ # --- Basic Agent Definition ---
26
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
27
+ """ class BasicAgent:
28
+ def __init__(self):
29
+ print("BasicAgent initialized.")
30
+ def __call__(self, question: str, file_name: str | None = None) -> str:
31
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
32
+ if file_name:
33
+ print(f"Agent received file_name: {file_name}")
34
+ # Qui puoi aggiungere la logica per utilizzare file_name se fornito.
35
+ # Per ora, lo aggiungiamo alla risposta di default per dimostrazione.
36
+ fixed_answer = "This is a default answer."
37
+ if file_name:
38
+ fixed_answer += f" (File to use: {file_name})"
39
+ print(f"Agent returning fixed answer: {fixed_answer}")
40
+ return fixed_answer """
41
+
42
+ def run_and_submit_all( profile: Optional[gr.OAuthProfile]):
43
+ """
44
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
45
+ and displays the results.
46
+ """
47
+ # --- Determine HF Space Runtime URL and Repo URL ---
48
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
49
+
50
+ if profile:
51
+ username= f"{profile.username}"
52
+ print(f"User logged in: {username}")
53
+ else:
54
+ print("User not logged in.")
55
+ return "Please Login to Hugging Face with the button.", None
56
+
57
+ api_url = DEFAULT_API_URL
58
+ questions_url = f"{api_url}/questions"
59
+ submit_url = f"{api_url}/submit"
60
+
61
+ # 1. Instantiate Agent ( modify this part to create your agent)
62
+ try:
63
+ agent = alfred
64
+ except Exception as e:
65
+ print(f"Error instantiating agent: {e}")
66
+ return f"Error initializing agent: {e}", None
67
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
68
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
69
+ print(agent_code)
70
+
71
+ # 2. Fetch Questions
72
+ print(f"Fetching questions from: {questions_url}")
73
+ try:
74
+ response = requests.get(questions_url, timeout=15)
75
+ response.raise_for_status()
76
+ questions_data = response.json()
77
+ if not questions_data:
78
+ print("Fetched questions list is empty.")
79
+ return "Fetched questions list is empty or invalid format.", None
80
+ print(f"Fetched {len(questions_data)} questions.")
81
+ except requests.exceptions.RequestException as e:
82
+ print(f"Error fetching questions: {e}")
83
+ return f"Error fetching questions: {e}", None
84
+ except requests.exceptions.JSONDecodeError as e:
85
+ print(f"Error decoding JSON response from questions endpoint: {e}")
86
+ print(f"Response text: {response.text[:500]}")
87
+ return f"Error decoding server response for questions: {e}", None
88
+ except Exception as e:
89
+ print(f"An unexpected error occurred fetching questions: {e}")
90
+ return f"An unexpected error occurred fetching questions: {e}", None
91
+
92
+ # 3. Run your Agent
93
+ results_log = []
94
+ answers_payload = []
95
+ print(f"Running agent on {len(questions_data)} questions...")
96
+ for item in questions_data:
97
+ task_id = item.get("task_id")
98
+ question_text = item.get("question")
99
+ file_name = item.get("file_name") # Estrai file_name
100
+
101
+ if not task_id or question_text is None:
102
+ print(f"Skipping item with missing task_id or question: {item}")
103
+ continue
104
+ try:
105
+ if file_name and isinstance(file_name, str) and file_name.strip():
106
+ messages = HumanMessage(content=question_text + " Path: files/" + file_name)
107
+ else:
108
+ messages = HumanMessage(content=question_text)
109
+ submitted_answer = alfred.invoke(input={"messages": messages}, config={"callbacks": [langfuse_handler]})
110
+ answers_payload.append({
111
+ "task_id": task_id,
112
+ "submitted_answer": submitted_answer['messages'][-1].content[-1]
113
+ if isinstance(submitted_answer['messages'][-1].content, list)
114
+ else submitted_answer['messages'][-1].content
115
+ })
116
+ results_log.append({"Task ID": task_id, "Question": question_text, "File Name": file_name if file_name and file_name.strip() else "N/A", "Submitted Answer": submitted_answer['messages'][-1].content})
117
+ except Exception as e:
118
+ print(f"Error running agent on task {task_id}: {e}")
119
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
120
+
121
+ if not answers_payload:
122
+ print("Agent did not produce any answers to submit.")
123
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
124
+
125
+ # 4. Prepare Submission
126
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
127
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
128
+ print(status_update)
129
+
130
+ # 5. Submit
131
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
132
+ try:
133
+ response = requests.post(submit_url, json=submission_data, timeout=60)
134
+ response.raise_for_status()
135
+ result_data = response.json()
136
+ final_status = (
137
+ f"Submission Successful!\n"
138
+ f"User: {result_data.get('username')}\n"
139
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
140
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
141
+ f"Message: {result_data.get('message', 'No message received.')}"
142
+ )
143
+ print("Submission successful.")
144
+ results_df = pd.DataFrame(results_log)
145
+ return final_status, results_df
146
+ except requests.exceptions.HTTPError as e:
147
+ error_detail = f"Server responded with status {e.response.status_code}."
148
+ try:
149
+ error_json = e.response.json()
150
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
151
+ except requests.exceptions.JSONDecodeError:
152
+ error_detail += f" Response: {e.response.text[:500]}"
153
+ status_message = f"Submission Failed: {error_detail}"
154
+ print(status_message)
155
+ results_df = pd.DataFrame(results_log)
156
+ return status_message, results_df
157
+ except requests.exceptions.Timeout:
158
+ status_message = "Submission Failed: The request timed out."
159
+ print(status_message)
160
+ results_df = pd.DataFrame(results_log)
161
+ return status_message, results_df
162
+ except requests.exceptions.RequestException as e:
163
+ status_message = f"Submission Failed: Network error - {e}"
164
+ print(status_message)
165
+ results_df = pd.DataFrame(results_log)
166
+ return status_message, results_df
167
+ except Exception as e:
168
+ status_message = f"An unexpected error occurred during submission: {e}"
169
+ print(status_message)
170
+ results_df = pd.DataFrame(results_log)
171
+ return status_message, results_df
172
+
173
+
174
+ # --- Build Gradio Interface using Blocks ---
175
+ with gr.Blocks() as demo:
176
+ gr.Markdown("# Basic Agent Evaluation Runner")
177
+ gr.Markdown(
178
+ """
179
+ **Instructions:**
180
+
181
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
182
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
183
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
184
+
185
+ ---
186
+ **Disclaimers:**
187
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
188
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
189
+ """
190
+ )
191
+
192
+ gr.LoginButton()
193
+
194
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
195
+
196
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
197
+ # Removed max_rows=10 from DataFrame constructor
198
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
199
+
200
+ run_button.click(
201
+ fn=run_and_submit_all,
202
+ outputs=[status_output, results_table]
203
+ )
204
+
205
+ if __name__ == "__main__":
206
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
207
+ # Check for SPACE_HOST and SPACE_ID at startup for information
208
+ space_host_startup = os.getenv("SPACE_HOST")
209
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
210
+
211
+ if space_host_startup:
212
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
213
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
214
+ else:
215
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
216
+
217
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
218
+ print(f"✅ SPACE_ID found: {space_id_startup}")
219
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
220
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
221
+ else:
222
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
223
+
224
+ print("-"*(60 + len(" App Starting ")) + "\n")
225
+
226
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
227
+ demo.launch(debug=True, share=False)
math_tools.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import Tool
2
+ import operator
3
+
4
+ def add(a: float, b: float) -> float:
5
+ """Adds two numbers."""
6
+ return operator.add(a, b)
7
+
8
+ def subtract(a: float, b: float) -> float:
9
+ """Subtracts the second number from the first."""
10
+ return operator.sub(a, b)
11
+
12
+ def multiply(a: float, b: float) -> float:
13
+ """Multiplies two numbers."""
14
+ return operator.mul(a, b)
15
+
16
+ def divide(a: float, b: float) -> float:
17
+ """Divides the first number by the second. Returns an error message if division by zero."""
18
+ if b == 0:
19
+ return "Error: Cannot divide by zero."
20
+ return operator.truediv(a, b)
21
+
22
+ add_tool = Tool(
23
+ name="calculator_add",
24
+ func=add,
25
+ description="Adds two numbers. Input should be two numbers (a, b)."
26
+ )
27
+
28
+ subtract_tool = Tool(
29
+ name="calculator_subtract",
30
+ func=subtract,
31
+ description="Subtracts the second number from the first. Input should be two numbers (a, b)."
32
+ )
33
+
34
+ multiply_tool = Tool(
35
+ name="calculator_multiply",
36
+ func=multiply,
37
+ description="Multiplies two numbers. Input should be two numbers (a, b)."
38
+ )
39
+
40
+ divide_tool = Tool(
41
+ name="calculator_divide",
42
+ func=divide,
43
+ description="Divides the first number by the second. Input should be two numbers (a, b)."
44
+ )
multimodal_tools.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+ from langchain.tools import Tool
6
+ from langchain_core.tools import tool
7
+
8
+ api_key = os.getenv("GEMINI_API_KEY")
9
+
10
+ # Create LLM class
11
+ vision_llm = ChatGoogleGenerativeAI(
12
+ model= "gemini-2.5-flash-preview-05-20",
13
+ temperature=0,
14
+ max_retries=2,
15
+ google_api_key=api_key
16
+ )
17
+
18
+ def extract_text(img_path: str) -> str:
19
+ """
20
+ Extract text from an image file using a multimodal model.
21
+ Input needs to be the path of the image.
22
+ """
23
+ all_text = ""
24
+ try:
25
+ # Read image and encode as base64
26
+ with open(img_path, "rb") as image_file:
27
+ image_bytes = image_file.read()
28
+
29
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
30
+
31
+ # Prepare the prompt including the base64 image data
32
+ message = [
33
+ HumanMessage(
34
+ content=[
35
+ {
36
+ "type": "text",
37
+ "text": (
38
+ "Extract all the text from this image. "
39
+ "Return only the extracted text, no explanations."
40
+ ),
41
+ },
42
+ {
43
+ "type": "image_url",
44
+ "image_url": {
45
+ "url": f"data:image/png;base64,{image_base64}"
46
+ },
47
+ },
48
+ ]
49
+ )
50
+ ]
51
+
52
+ # Call the vision-capable model
53
+ response = vision_llm.invoke(message)
54
+
55
+ # Append extracted text
56
+ all_text += response.content + "\n\n"
57
+
58
+ return all_text.strip()
59
+ except Exception as e:
60
+ # A butler should handle errors gracefully
61
+ error_msg = f"Error extracting text: {str(e)}"
62
+ print(error_msg)
63
+ return ""
64
+
65
+ @tool("analyze_image_tool", parse_docstring=True)
66
+ def analyze_image_tool(user_query: str, img_path: str) -> str:
67
+ """
68
+ Answer the question reasoning on the image.
69
+
70
+ Args:
71
+ user_query (str): The question to be answered.
72
+ img_path (str): Path to the image file.
73
+ """
74
+ all_text = ""
75
+ try:
76
+ # Read image and encode as base64
77
+ with open(img_path, "rb") as image_file:
78
+ image_bytes = image_file.read()
79
+
80
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
81
+
82
+ # Prepare the prompt including the base64 image data
83
+ message = [
84
+ HumanMessage(
85
+ content=[
86
+ {
87
+ "type": "text",
88
+ "text": (
89
+ f"User query: {user_query}"
90
+ ),
91
+ },
92
+ {
93
+ "type": "image_url",
94
+ "image_url": {
95
+ "url": f"data:image/png;base64,{image_base64}"
96
+ },
97
+ },
98
+ ]
99
+ )
100
+ ]
101
+
102
+ # Call the vision-capable model
103
+ response = vision_llm.invoke(message)
104
+
105
+ # Append extracted text
106
+ all_text += response.content + "\n\n"
107
+
108
+ return all_text.strip()
109
+ except Exception as e:
110
+ # A butler should handle errors gracefully
111
+ error_msg = f"Error analyzing image: {str(e)}"
112
+ print(error_msg)
113
+ return ""
114
+
115
+ @tool("analyze_audio_tool", parse_docstring=True)
116
+ def analyze_audio_tool(user_query: str, audio_path: str) -> str:
117
+ """
118
+ Answer the question by reasoning on the provided audio file.
119
+
120
+ Args:
121
+ user_query (str): The question to be answered.
122
+ audio_path (str): Path to the audio file (e.g., .mp3, .wav, .flac, .aac, .ogg).
123
+ """
124
+ try:
125
+ # Determine MIME type from file extension
126
+ _filename, file_extension = os.path.splitext(audio_path)
127
+ file_extension = file_extension.lower()
128
+
129
+ supported_formats = {
130
+ ".mp3": "audio/mp3", ".wav": "audio/wav", ".flac": "audio/flac",
131
+ ".aac": "audio/aac", ".ogg": "audio/ogg"
132
+ }
133
+
134
+ if file_extension not in supported_formats:
135
+ return (f"Error: Unsupported audio file format '{file_extension}'. "
136
+ f"Supported extensions: {', '.join(supported_formats.keys())}.")
137
+ mime_type = supported_formats[file_extension]
138
+
139
+ # Read audio file and encode as base64
140
+ with open(audio_path, "rb") as audio_file:
141
+ audio_bytes = audio_file.read()
142
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
143
+
144
+ # Prepare the prompt including the base64 audio data
145
+ message = [
146
+ HumanMessage(
147
+ content=[
148
+ {
149
+ "type": "text",
150
+ "text": f"User query: {user_query}",
151
+ },
152
+ {
153
+ "type": "audio",
154
+ "source_type": "base64",
155
+ "mime_type": mime_type,
156
+ "data": audio_base64
157
+ },
158
+ ]
159
+ )
160
+ ]
161
+
162
+ # Call the vision-capable model
163
+ response = vision_llm.invoke(message)
164
+ return response.content.strip()
165
+ except Exception as e:
166
+ error_msg = f"Error analyzing audio: {str(e)}"
167
+ print(error_msg)
168
+ return ""
169
+
170
+ extract_text_tool = Tool(
171
+ name="extract_text_tool",
172
+ func=extract_text,
173
+ description="Extract text from an image file using a multimodal model."
174
+ )
serpapi_tools.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.tools import Tool
3
+ from serpapi import GoogleSearch
4
+ from dotenv import load_dotenv
5
+
6
+ # Carica le variabili d'ambiente se hai la chiave API in un file .env
7
+ load_dotenv()
8
+
9
+ SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
10
+
11
+ def _serpapi_search(query: str, num_results: int = 5, gl: str = "it", hl: str = "it") -> str:
12
+ """
13
+ Esegue una ricerca sul web utilizzando SerpAPI con Google Search e restituisce i risultati formattati.
14
+ Questo tool ha un costo elevato, pertanto sono da preferire altri tool se disponibili.
15
+ Richiamare questo tool soltanto in caso gli altri tool non siano stati soddisfacenti.
16
+
17
+ Args:
18
+ query: La query di ricerca.
19
+ num_results: Il numero di risultati da restituire.
20
+ gl: Codice del paese per la geolocalizzazione dei risultati (es. "it" per Italia).
21
+ hl: Codice della lingua per i risultati della ricerca (es. "it" per Italiano).
22
+
23
+ Returns:
24
+ Una stringa formattata con i risultati della ricerca o un messaggio di errore.
25
+ """
26
+ if not SERPAPI_API_KEY:
27
+ return "Errore: La variabile d'ambiente SERPAPI_API_KEY non è impostata."
28
+
29
+ params = {
30
+ "engine": "google",
31
+ "q": query,
32
+ "api_key": SERPAPI_API_KEY,
33
+ "num": num_results,
34
+ "gl": gl,
35
+ "hl": hl
36
+ }
37
+ search = GoogleSearch(params)
38
+ results = search.get_dict()
39
+ organic_results = results.get("organic_results", [])
40
+
41
+ if not organic_results:
42
+ return f"Nessun risultato trovato per '{query}'."
43
+
44
+ formatted_results = "\n\n".join([f"Title: {res.get('title')}\nLink: {res.get('link')}\nSnippet: {res.get('snippet')}" for res in organic_results])
45
+ return formatted_results
46
+
47
+ serpapi_search_tool = Tool(
48
+ name="serpapi_web_search",
49
+ func=_serpapi_search,
50
+ description="Esegue una ricerca sul web utilizzando SerpAPI (Google Search) per trovare informazioni aggiornate. L'input dovrebbe essere la query di ricerca." \
51
+ " Questo tool ha un costo elevato, pertanto sono da preferire altri tool se disponibili. \
52
+ Richiamare questo tool soltanto in caso gli altri tool non siano stati soddisfacenti."
53
+ )
tools.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import Tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
3
+ import operator
4
+
5
+
6
+ def extract_youtube_transcript(youtube_url: str) -> str:
7
+ """
8
+ Extracts the transcript from a given YouTube video URL.
9
+ Returns the transcript as a single string or an error message if not found.
10
+ """
11
+ try:
12
+ video_id = youtube_url.split("v=")[1].split("&")[0]
13
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
14
+ transcript = " ".join([item['text'] for item in transcript_list])
15
+ return transcript
16
+ except NoTranscriptFound:
17
+ return "Error: No transcript found for this video. It might be disabled or not available in English."
18
+ except TranscriptsDisabled:
19
+ return "Error: Transcripts are disabled for this video."
20
+ except Exception as e:
21
+ return f"Error extracting transcript: {str(e)}"
22
+
23
+ youtube_transcript_tool = Tool(
24
+ name="youtube_transcript_extractor",
25
+ func=extract_youtube_transcript,
26
+ description="Extracts the full transcript from a YouTube video given its URL. Input should be a valid YouTube video URL."
27
+ )
28
+
29
+ def add(a: float, b: float) -> float:
30
+ """Adds two numbers."""
31
+ return operator.add(a, b)
32
+
33
+ def subtract(a: float, b: float) -> float:
34
+ """Subtracts the second number from the first."""
35
+ return operator.sub(a, b)
36
+
37
+ def multiply(a: float, b: float) -> float:
38
+ """Multiplies two numbers."""
39
+ return operator.mul(a, b)
40
+
41
+ def divide(a: float, b: float) -> float:
42
+ """Divides the first number by the second. Returns an error message if division by zero."""
43
+ if b == 0:
44
+ return "Error: Cannot divide by zero."
45
+ return operator.truediv(a, b)
46
+
47
+ add_tool = Tool(
48
+ name="calculator_add",
49
+ func=add,
50
+ description="Adds two numbers. Input should be two numbers (a, b)."
51
+ )
52
+
53
+ subtract_tool = Tool(
54
+ name="calculator_subtract",
55
+ func=subtract,
56
+ description="Subtracts the second number from the first. Input should be two numbers (a, b)."
57
+ )
58
+
59
+ multiply_tool = Tool(
60
+ name="calculator_multiply",
61
+ func=multiply,
62
+ description="Multiplies two numbers. Input should be two numbers (a, b)."
63
+ )
64
+
65
+ divide_tool = Tool(
66
+ name="calculator_divide",
67
+ func=divide,
68
+ description="Divides the first number by the second. Input should be two numbers (a, b)."
69
+ )
youtube_tools.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import Tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
3
+
4
+ def extract_youtube_transcript(youtube_url: str) -> str:
5
+ """
6
+ Extracts the transcript from a given YouTube video URL.
7
+ Returns the transcript as a single string or an error message if not found.
8
+ """
9
+ try:
10
+ video_id = youtube_url.split("v=")[1].split("&")[0]
11
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
12
+ transcript = " ".join([item['text'] for item in transcript_list])
13
+ return transcript
14
+ except NoTranscriptFound:
15
+ return "Error: No transcript found for this video. It might be disabled or not available in English."
16
+ except TranscriptsDisabled:
17
+ return "Error: Transcripts are disabled for this video."
18
+ except Exception as e:
19
+ return f"Error extracting transcript: {str(e)}"
20
+
21
+ youtube_transcript_tool = Tool(
22
+ name="youtube_transcript_extractor",
23
+ func=extract_youtube_transcript,
24
+ description="Extracts the full transcript from a YouTube video given its URL. Input should be a valid YouTube video URL."
25
+ )