Agents_Course_Final_Assignment_Evaluator

Paused

App Files Files Community

Michele De Stefano commited on May 1

Commit

1b8aef5

1 Parent(s): 81917a3

Adapted the code so that it can run locally

Browse files

Files changed (40) hide show

README.md +20 -0
agent_factory.py +221 -0
app.py +130 -38
data/__init__.py +0 -0
local_setup/__init__.py +0 -0
local_setup/nltk_config.py +12 -0
question_retriever.py +42 -0
requirements.txt +31 -2
tests/__init__.py +0 -0
tests/data/__init__.py +0 -0
tests/download_data.py +31 -0
tests/resources/__init__.py +0 -0
tests/resources/african-penguins-kelp-gull.jpg +0 -0
tests/resources/penguin.jpeg +0 -0
tests/test_agent.py +27 -0
tests/test_download_questions_and_files.py +6 -0
tests/tools/__init__.py +0 -0
tests/tools/test_audio_transcriber.py +53 -0
tests/tools/test_bird_classifier.py +43 -0
tests/tools/test_excel_table_content_retriever.py +29 -0
tests/tools/test_math_reasoning.py +30 -0
tests/tools/test_python_script_executor.py +29 -0
tests/tools/test_string_reverser.py +27 -0
tests/tools/test_web_page_info_retriever.py +21 -0
tests/tools/test_web_search.py +28 -0
tests/tools/test_whisper.py +53 -0
tests/tools/test_youtube_transcript.py +28 -0
tests/tools/test_youtube_video_analysis.py +28 -0
tests/tools/test_youtube_video_frame_sampler.py +31 -0
tools/__init__.py +8 -0
tools/audio_transcriber.py +54 -0
tools/data_helpers.py +16 -0
tools/excel_table_content_retriever.py +23 -0
tools/math_tools.py +15 -0
tools/python_script_executor.py +27 -0
tools/string_reverser.py +16 -0
tools/video_sampling.py +102 -0
tools/web_page_info_retriever.py +59 -0
tools/youtube_helpers.py +62 -0
tools/youtube_video_transcript_retriever.py +24 -0

README.md CHANGED Viewed

@@ -11,5 +11,25 @@ hf_oauth: true
 # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
 hf_oauth_expiration_minutes: 480
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
 hf_oauth_expiration_minutes: 480
 ---
+# Evaluation application for Unit 4 of the HuggingFace Agents course
+This is my implementation of the evaluation application.
+Differently from the original application I cloned from, this one is thought
+to be run locally, because I am using Ollama.
+When running locally, you have to create a `.env` file into the root of the
+project. This file gets read from the `dotenv.load_dotenv()` instruction
+and must contain the following variables:
+```commandline
+HF_USERNAME="<your HuggingFace user name>"
+HF_ACCESS_TOKEN="<your HuggingFace access token>"
+SPACE_HOST="localhost"
+SPACE_ID="<your space ID>"
+```
+You can infer the space ID by reading the address of your space when you
+access the `Files` section. For example, if you read
+```commandline
+https://huggingface.co/spaces/aaa/bbb/tree/main
+```
+then the `SPACE_ID` is `aaa/bbb` (where `aaa` should be the user name).
+# Configuration
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agent_factory.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import re
+from typing import Any, Literal
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_core.messages import SystemMessage, AnyMessage
+from langchain_core.runnables import Runnable
+from langchain_core.tools import BaseTool
+from langchain_ollama import ChatOllama
+from langgraph.constants import START, END
+from langgraph.graph import MessagesState, StateGraph
+from langgraph.graph.graph import CompiledGraph
+from langgraph.prebuilt import ToolNode
+from pydantic import BaseModel
+from tools import (
+    get_excel_table_content,
+    get_youtube_video_transcript,
+    reverse_string,
+    transcribe_audio_file,
+    web_page_info_retriever,
+    youtube_video_to_frame_captions, sum_list, execute_python_script,
+)
+class AgentFactory:
+    """
+    A factory for the agent. It is assumed that an Ollama server is running
+    on the machine where the factory is used.
+    """
+    __system_prompt: str = (
+        "You have to answer to test questions and you need to score high.\n"
+        "Sometimes auxiliary files may be attached to the question so the\n"
+        "question itself is presented as a JSON string with the following\n"
+        "fields:\n"
+        "1. task_id: unique hash identifier of the question.\n"
+        "2. question: the text of the question.\n"
+        "3. Level: a number with the question difficulty level. You can ignore "
+        "this field.\n"
+        "4. file_name: the name of the file needed to answer the question. "
+        "This is empty if the question does not refer to any file. "
+        "IMPORTANT: The text of the question may mention a file name that is "
+        "different from what is reported into the \"file_name\" JSON field. "
+        "YOU HAVE TO IGNORE THE FILE NAME MENTIONED INTO \"question\" AND "
+        "YOU MUST USE THE FILE NAME PROVIDED INTO THE \"file_name\" FIELD.\n"
+        "\n"
+        "Depending on the question, the\n"
+        "format of your answer is a number OR as few words as possible OR a\n"
+        "comma separated list of numbers and/or strings. If you are asked for\n"
+        "a number, don't use comma to write your number neither use units\n"
+        "such as $ or percent sign unless specified otherwise. If you are\n"
+        "asked for a string, don't use articles, neither abbreviations (e.g.\n"
+        "for cities), and write the digits in plain text unless specified\n"
+        "otherwise. If you are asked for a comma separated list, apply the\n"
+        "above rules depending of whether the element to be put in the list\n"
+        "is a number or a string.\n"
+        "When you have to perform a sum, DON'T try to do that yourself.\n"
+        "Exploit the tool that is able to sum list of numbers. If you have\n"
+        "to sum the results of previous sums, use again the same tool\n"
+        "recursively. NEVER do the sums yourself.\n"
+        "Achieve the solution by dividing your reasoning in steps, and\n"
+        "provide an explanation for each step.\n"
+        "You are advised to cycle between reasoning and tool calling also\n"
+        "multiple times. Provide an answer only when you are sure you don't\n"
+        "have to call any tool again. Provide the answer between\n"
+        "<ANSWER> and </ANSWER> tags. I stress that the final answer must\n"
+        "follow the rules explained above.\n"
+    )
+    __llm_for_decision: Runnable
+    __llm: Runnable
+    __tools: list[BaseTool]
+    def __init__(
+        self,
+        model: str = "qwen2.5-coder:32b",
+        # model: str = "mistral-small3.1",
+        # model: str = "phi4-mini",
+        temperature: float = 0.0,
+        num_ctx: int = 8192
+    ) -> None:
+        """
+        Constructor.
+        Args:
+            model:          The name of the Ollama model to use.
+            temperature:    Temperature parameter.
+            num_ctx:        Size of the context window used to generate the
+                            next token.
+        """
+        search_tool = DuckDuckGoSearchResults(
+            description=(
+                "A wrapper around Duck Duck Go Search. Useful for when you "
+                "need to answer questions about information you can find on "
+                "the web. Input should be a search query. It is advisable to "
+                "use this tool to retrieve web page URLs and use another tool "
+                "to analyze the pages. If the web source is suggested by the "
+                "user query, prefer retrieving information from that source. "
+                "For example, the query may suggest to search on Wikipedia or "
+                "Medium. In those cases, prepend the query with "
+                "'site: <name of the source>'. For example: "
+                "'site: wikipedia.org'"
+            ),
+            output_format="list"
+        )
+        search_tool.with_retry()
+        self.__tools = [
+            execute_python_script,
+            get_excel_table_content,
+            get_youtube_video_transcript,
+            reverse_string,
+            search_tool,
+            sum_list,
+            transcribe_audio_file,
+            web_page_info_retriever,
+            youtube_video_to_frame_captions
+        ]
+        self.__llm_for_decision = ChatOllama(
+            model=model,
+            temperature=1.0,
+            num_ctx=num_ctx
+        )
+        self.__llm = ChatOllama(
+            model=model,
+            temperature=temperature,
+            num_ctx=num_ctx
+        ).bind_tools(tools=self.__tools)
+    def __decide_for_code_agent(self, state: MessagesState) -> str:
+        decision_messages = [
+            SystemMessage(
+                content="Answer only yes or no. "
+                        "If you think the question can be easily answered "
+                        "by writing Python code and executing it then answer "
+                        "yes. If you think you can answer by exploiting other "
+                        "resources then answer no."
+            ),
+            state["messages"][-1]
+        ]
+        answer = self.__llm_for_decision.invoke(decision_messages)
+        return answer.content
+    def __run_llm(self, state: MessagesState) -> dict[str, Any]:
+        answer = self.__llm.invoke(state["messages"])
+        # Remove thinking pattern if present
+        pattern = r'\n*<think>.*?</think>\n*'
+        answer.content = re.sub(
+            pattern, "", answer.content, flags=re.DOTALL
+        )
+        return {"messages": [answer]}
+    @staticmethod
+    def __extract_last_message(
+        state: list[AnyMessage] | dict[str, Any] | BaseModel,
+        messages_key: str
+    ) -> str:
+        if isinstance(state, list):
+            last_message = state[-1]
+        elif isinstance(state, dict) and (messages := state.get(messages_key, [])):
+            last_message = messages[-1]
+        elif messages := getattr(state, messages_key, []):
+            last_message = messages[-1]
+        else:
+            raise ValueError(f"No messages found in input state to tool_edge: {state}")
+        return last_message
+    def __route_from_llm(
+        self,
+        state: list[AnyMessage] | dict[str, Any] | BaseModel,
+        messages_key: str = "messages",
+    ) -> Literal["tools", "extract_final_answer"]:
+        ai_message = self.__extract_last_message(state, messages_key)
+        if hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0:
+            return "tools"
+        return "extract_final_answer"
+    @staticmethod
+    def __extract_final_answer(state: MessagesState) -> dict[str, Any]:
+        last_message = state["messages"][-1].content
+        pattern = r"<ANSWER>(?P<answer>.*?)</ANSWER>"
+        m = re.search(pattern, last_message, flags=re.DOTALL)
+        answer = m.group("answer").strip() if m else ""
+        return {"messages": [answer]}
+    @property
+    def system_prompt(self) -> SystemMessage:
+        """
+        Returns:
+            The system prompt to use with the agent.
+        """
+        return SystemMessage(content=self.__system_prompt)
+    def get(self) -> CompiledGraph:
+        """
+        Factory method.
+        Returns:
+            The instance of the agent.
+        """
+        graph_builder = StateGraph(MessagesState)
+        graph_builder.add_node("LLM", self.__run_llm)
+        graph_builder.add_node("tools", ToolNode(tools=self.__tools))
+        graph_builder.add_node(
+            "extract_final_answer",
+            self.__extract_final_answer
+        )
+        graph_builder.add_edge(start_key=START, end_key="LLM")
+        graph_builder.add_conditional_edges(
+            source="LLM",
+            path=self.__route_from_llm,
+            path_map={
+                "tools": "tools",
+                "extract_final_answer": "extract_final_answer"
+            }
+        )
+        graph_builder.add_edge(start_key="tools", end_key="LLM")
+        graph_builder.add_edge(start_key="extract_final_answer", end_key=END)
+        return graph_builder.compile()

app.py CHANGED Viewed

@@ -1,25 +1,125 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -27,15 +127,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
@@ -44,38 +139,21 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -91,6 +169,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
@@ -98,8 +181,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
@@ -193,4 +285,4 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

+import dotenv
+import importlib.resources
+import json
 import os
+from typing import Any
 import gradio as gr
 import requests
 import pandas as pd
+from pathlib import Path
+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from langgraph.graph.graph import CompiledGraph
+from agent_factory import AgentFactory
+dotenv.load_dotenv()
+HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
+HF_USERNAME = os.getenv("HF_USERNAME")
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+DATA_PATH = Path(str(importlib.resources.files("data")))
+QUESTIONS_FILE_PATH = DATA_PATH / "questions.jsonl"
+AGENT_ANSWERS_FILE_PATH = DATA_PATH / "agent-answers.jsonl"
 # --- Basic Agent Definition ---
+# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
+    __agent_factory: AgentFactory
+    __agent: CompiledGraph
     def __init__(self):
+        self.__agent_factory = AgentFactory()
+        self.__agent = self.__agent_factory.get()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        initial_state = MessagesState(
+            messages=[
+                self.__agent_factory.system_prompt,
+                HumanMessage(content=question)
+            ]
+        )
+        final_state = self.__agent.invoke(input=initial_state)
+        answer = final_state["messages"][-1].content
+        print(f"Agent returning answer: {answer}")
+        return answer
+def download_questions_and_files() -> dict[str, Any]:
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    files_base_url = f"{api_url}/files"
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return {
+                "error": "Fetched questions list is empty or invalid format."
+            }
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return {
+            "error": f"Error fetching questions: {e}"
+        }
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return {
+            "error": f"Error decoding server response for questions: {e}"
+        }
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return {
+            "error": f"An unexpected error occurred fetching questions: {e}"
+        }
+    # Save input questions and related files into the data subdirectory
+    try:
+        with open(QUESTIONS_FILE_PATH, mode="w") as f:
+            for cur_question in questions_data:
+                json.dump(cur_question, f)
+                f.write("\n")
+                file_name = cur_question["file_name"]
+                if len(file_name) > 0:
+                    file_url = f"{files_base_url}/{cur_question["task_id"]}"
+                    response = requests.get(file_url)
+                    out_file_path = DATA_PATH / file_name
+                    with open(out_file_path, 'wb') as file:
+                        file.write(response.content)
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching question-related file: {e}")
+        return {
+            "error": f"Error fetching question-related file: {e}"
+        }
+    except Exception as e:
+        print(f"An unexpected error occurred fetching question-related file: {e}")
+        return {
+            "error": f"An unexpected error occurred fetching question-related file: {e}"
+        }
+    return questions_data
+def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
+    username= f"{HF_USERNAME}"
+    print(f"User: {username}")
     api_url = DEFAULT_API_URL
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points
+    # towards your codebase ( useful for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
+    # 2. Fetch Questions and related files (they get saved into the data directory)
+    questions_data = download_questions_and_files()
+    # 3. Run your Agent and save agent's answers for later review
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
+        question_text = json.dumps(item)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    with open(AGENT_ANSWERS_FILE_PATH, mode="w") as f:
+        for cur_answer in answers_payload:
+            json.dump(cur_answer, f)
+            f.write("\n")
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    headers = {
+        "Authorization": f"Bearer {HF_ACCESS_TOKEN}",
+        "Content-Type": "application/json"
+    }
     try:
+        response = requests.post(
+            submit_url,
+            json=submission_data,
+            headers=headers,
+            timeout=60
+        )
         response.raise_for_status()
         result_data = response.json()
         final_status = (
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

data/__init__.py ADDED Viewed

File without changes

local_setup/__init__.py ADDED Viewed

File without changes

local_setup/nltk_config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import nltk
+def download_nltk_packages() -> None:
+    nltk.download('punkt')
+    nltk.download('averaged_perceptron_tagger')
+    nltk.download('stopwords')
+    nltk.download('wordnet')
+if __name__ == "__main__":
+    download_nltk_packages()

question_retriever.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import importlib.resources
+import json
+from pathlib import Path
+__questions_path = (
+        Path(str(importlib.resources.files("data"))) / "questions.jsonl"
+)
+def get_question(task_id: str) -> str | None:
+    """
+    Given the ID of one of the available questions, reads it from
+    the JSONL file where questions have been previously downloaded.
+    Args:
+        task_id: The hash code of the question.
+    Returns:
+        The JSONL string with the required question.
+    """
+    with open(__questions_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            data = json.loads(line)
+            if data["task_id"] == task_id:
+                return line
+    return None
+def get_all_questions() -> list[str]:
+    """
+    Retrieves the list of all questions previously downloaded.
+    Returns:
+        The list of questions previously downloaded.
+    """
+    questions = []
+    with open(__questions_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            questions += [json.loads(line)]
+    return questions

requirements.txt CHANGED Viewed

@@ -1,2 +1,31 @@
-gradio
-requests

+beautifulsoup4
+python-dotenv
+duckduckgo-search
+faiss-cpu
+gradio[oauth]
+helium
+langchain
+langchain_community
+langchain-core
+langchain-ollama
+langchain-unstructured[local]
+langgraph
+opencv-python
+pandas
+pdfminer
+pillow
+pydantic
+pytest
+# https://www.youtube.com/watch?v=VgxnyKnB3qc
+# https://github.com/juanbindez/pytubefix
+pytubefix
+requests
+torch
+transformers
+ultralytics
+# NOTE: For unstructured to work locally, install also system requirements
+# according to what's told here:
+# https://docs.unstructured.io/open-source/installation/full-installation
+unstructured[all-docs]
+youtube-transcript-api

tests/__init__.py ADDED Viewed

File without changes

tests/data/__init__.py ADDED Viewed

File without changes

tests/download_data.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import json
+import requests
+from app import DEFAULT_API_URL
+def main() -> None:
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    dest_file = "data/questions.jsonl"
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return
+    with open(dest_file, mode="w") as f:
+        for item in questions_data:
+            json.dump(item, f)
+            f.write("\n")
+    print("Done.")
+if __name__ == "__main__":
+    main()

tests/resources/__init__.py ADDED Viewed

File without changes

tests/resources/african-penguins-kelp-gull.jpg ADDED Viewed

tests/resources/penguin.jpeg ADDED Viewed

tests/test_agent.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_agent() -> None:
+    # given
+    # grocery list
+    task_id = "3cef3a44-215e-4aed-8e3b-b1e3f08063b7"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    final_state = agent.invoke(input=initial_state)
+    answer = final_state["messages"][-1].content
+    print(answer)

tests/test_download_questions_and_files.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from app import download_questions_and_files
+def test_download_questions_and_files() -> None:
+    download_questions_and_files()
+    print("Download success.")

tests/tools/__init__.py ADDED Viewed

File without changes

tests/tools/test_audio_transcriber.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_audio_transcriber() -> None:
+    # given
+    task_id = "1f975693-876d-457b-a649-393859e79bf3"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    # then
+    answer = final_state["messages"][-1].content
+    assert answer == "132,133,134,197,245"
+def test_audio_transcriber_pie_recipe() -> None:
+    # given
+    task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    # then
+    answer = final_state["messages"][-1].content
+    assert answer == "cornstarch,granulated sugar,lemon juice,ripe strawberries,vanilla extract"

tests/tools/test_bird_classifier.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import importlib.resources
+from pathlib import Path
+from PIL import Image
+from transformers import pipeline
+__resources_path = Path(str(importlib.resources.files("tests.resources")))
+def test_bird_classifier_with_one_single_bird() -> None:
+    # given
+    img_file = __resources_path / "penguin.jpeg"
+    img = Image.open(img_file)
+    # when
+    pipe = pipeline(
+        task="image-classification",
+        model="dennisjooo/Birds-Classifier-EfficientNetB2"
+    )
+    result = pipe(img)
+    result = result[0]
+    # then
+    assert "penguin" in result["label"].lower()
+def test_bird_classifier_with_multiple_birds() -> None:
+    # given
+    img_file = __resources_path / "african-penguins-kelp-gull.jpg"
+    img = Image.open(img_file)
+    # when
+    pipe = pipeline(
+        task="image-classification",
+        model="dennisjooo/Birds-Classifier-EfficientNetB2"
+    )
+    result = pipe(img)
+    result = result[0]
+    # then
+    assert "penguin" not in result["label"].lower()

tests/tools/test_excel_table_content_retriever.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_excel_table_content_retriever() -> None:
+    # given
+    task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    # then
+    answer = final_state["messages"][-1].content
+    assert answer.lower() == "89706"

tests/tools/test_math_reasoning.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_math_reasoning() -> None:
+    # given
+    # Table describing operation
+    task_id = "6f37996b-2ac7-44b0-8e68-6d28256631b4"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    # then
+    answer = final_state["messages"][-1].content
+    assert answer.lower() == "b,e"

tests/tools/test_python_script_executor.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_python_script_executor() -> None:
+    # given
+    task_id = "f918266a-b3e0-4914-865d-4faa564f1aef"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    # then
+    answer = final_state["messages"][-1].content
+    assert answer.lower() == "0"

tests/tools/test_string_reverser.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_string_reverser() -> None:
+    # given
+    task_id = "2d83110e-a098-4ebb-9987-066c06fa42d0"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    final_state = agent.invoke(input=initial_state)
+    answer = final_state["messages"][-1].content
+    assert answer.lower() == "right"

tests/tools/test_web_page_info_retriever.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import numpy as np
+from tools import web_page_info_retriever
+def test_web_document_info_retriever() -> None:
+    # given
+    web_url = "https://en.wikipedia.org/wiki/Albert_Einstein"
+    query = "Albert Einstein nobel prize"
+    # when
+    results = web_page_info_retriever.invoke({
+        "web_url": web_url,
+        "query": query
+    })
+    # then
+    all_text = " ".join(results)
+    assert "1922" in all_text
+    assert "1921" in all_text
+    assert "photoelectric" in all_text
+    assert "Nobel" in all_text

tests/tools/test_web_search.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_web_search() -> None:
+    # given
+    task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    answer = final_state["messages"][-1].content
+    # then
+    assert answer == "2"

tests/tools/test_whisper.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import importlib.resources
+import json
+import torch
+from pathlib import Path
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from question_retriever import get_question
+from tools.data_helpers import get_file_path
+__resources_path = Path(str(importlib.resources.files("data")))
+def test_whisper() -> None:
+    task_id = "1f975693-876d-457b-a649-393859e79bf3"
+    question = json.loads(get_question(task_id=task_id))
+    audio_file = get_file_path(file_name=question["file_name"])
+    # cuda_available = torch.cuda.is_available()
+    cuda_available = False
+    device = "cuda:0" if cuda_available else "cpu"
+    torch_dtype = torch.float16 if cuda_available else torch.float32
+    model_id = "openai/whisper-large-v3-turbo"
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+    )
+    model.to(device)
+    processor = AutoProcessor.from_pretrained(model_id)
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        torch_dtype=torch_dtype,
+        device=device,
+    )
+    sample = audio_file
+    generate_kwargs = {
+        "return_timestamps": True,
+    }
+    result = pipe(sample, generate_kwargs=generate_kwargs)
+    print(result["text"])

tests/tools/test_youtube_transcript.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_get_youtube_video_transcript() -> None:
+    # given
+    task_id = "9d191bce-651d-4746-be2d-7ef8ecadb9c2"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    answer = final_state["messages"][-1].content
+    # then
+    assert answer.lower() == "extremely"

tests/tools/test_youtube_video_analysis.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from langchain_core.messages import HumanMessage
+from langgraph.graph import MessagesState
+from agent_factory import AgentFactory
+from question_retriever import get_question
+def test_youtube_video_analysis() -> None:
+    # given
+    task_id = "a1e91b78-d3d8-4675-bb8d-62741b4b68a6"
+    question = get_question(task_id=task_id)
+    agent_factory = AgentFactory()
+    agent = agent_factory.get()
+    initial_state = MessagesState(
+        messages=[
+            agent_factory.system_prompt,
+            HumanMessage(content=question)
+        ]
+    )
+    # when
+    final_state = agent.invoke(input=initial_state)
+    answer = final_state["messages"][-1].content
+    # then
+    assert answer.lower() == "2"

tests/tools/test_youtube_video_frame_sampler.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import json
+import tempfile
+from pathlib import Path
+from tools.youtube_helpers import youtube_video_frame_sampler, youtube_video_to_frame_captions
+def test_youtube_video_frame_sampler() -> None:
+    # given
+    temp_frames_dir = tempfile.TemporaryDirectory()
+    dest_dir = temp_frames_dir.name
+    # when
+    youtube_video_frame_sampler(
+        addr="https://www.youtube.com/watch?v=L1vXCYZAYYM",
+        dest_dir=dest_dir
+    )
+    # then
+    dest_path = Path(dest_dir)
+    assert len(list(dest_path.glob('*.jpg'))) == 61
+def test_youtube_video_captions_generator() -> None:
+    # given, when
+    captions_str = youtube_video_to_frame_captions(
+        addr="https://www.youtube.com/watch?v=L1vXCYZAYYM",
+    )
+    # then
+    captions = json.loads(captions_str)

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .audio_transcriber import transcribe_audio_file
+from .excel_table_content_retriever import get_excel_table_content
+from .math_tools import sum_list
+from .python_script_executor import execute_python_script
+from .string_reverser import reverse_string
+from .web_page_info_retriever import web_page_info_retriever
+from .youtube_video_transcript_retriever import get_youtube_video_transcript
+from .youtube_helpers import youtube_video_to_frame_captions

tools/audio_transcriber.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import torch
+from langchain_core.tools import tool
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from .data_helpers import get_file_path
+@tool(parse_docstring=True)
+def transcribe_audio_file(file_name: str) -> str:
+    """
+    Transcribes an audio file to text.
+    Args:
+        file_name:  The name of the audio file. This is simply the file name,
+                    not the full path.
+    Returns:
+        The transcribed text.
+    """
+    # Specific setting for local run with GPU busy for the LLM (ollama)
+    cuda_available = False
+    device = "cuda:0" if cuda_available else "cpu"
+    torch_dtype = torch.float16 if cuda_available else torch.float32
+    model_id = "openai/whisper-large-v3-turbo"
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        model_id,
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        use_safetensors=True
+    )
+    model.to(device)
+    processor = AutoProcessor.from_pretrained(model_id)
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        torch_dtype=torch_dtype,
+        device=device,
+    )
+    generate_kwargs = {
+        "return_timestamps": True,
+    }
+    file_path = get_file_path(file_name)
+    result = pipe(file_path, generate_kwargs=generate_kwargs)
+    return result["text"]

tools/data_helpers.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import importlib.resources
+from pathlib import Path
+def get_file_path(file_name: str) -> str:
+    """
+    Returns the full path of a question file.
+    Args:
+        file_name: The file name specified into the question.
+    Returns:
+        The full path of the file that was previously downloaded.
+    """
+    data_path = Path(str(importlib.resources.files("data")))
+    return str(data_path / file_name)

tools/excel_table_content_retriever.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import pandas as pd
+from langchain_core.tools import tool
+from .data_helpers import get_file_path
+@tool(parse_docstring=True)
+def get_excel_table_content(file_name: str) -> str:
+    """
+    Given an Excel file name, it returns its content into a string.
+    Explicitly returns also the list of column names.
+    It assumes the file contains a table. Reads the first sheet with
+    pandas and returns its string representation.
+    Args:
+        file_name: Name of the Excel file.
+    Returns:
+        String representation of the content of the first sheet of the
+        file.
+    """
+    file_path = get_file_path(file_name)
+    df = pd.read_excel(io=file_path)
+    return str(df) + f"\nColumn names: {df.columns.tolist()}\n"

tools/math_tools.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from langchain_core.tools import tool
+@tool(parse_docstring=True, return_direct=True)
+def sum_list(numbers: list[float]) -> float:
+    """
+    Sums the provided input numbers.
+    Args:
+        numbers: The sequence of numbers to sum.
+    Returns:
+        The sum of the input numbers.
+    """
+    return sum(numbers)

tools/python_script_executor.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import subprocess as sub
+from langchain_core.tools import tool
+from tools.data_helpers import get_file_path
+@tool(parse_docstring=True)
+def execute_python_script(file_name: str) -> str:
+    """
+    Given the Python source file name, executes it and returns the output
+    captured from stdout.
+    Args:
+        file_name: The name of the Python source to execute. This is only the
+                   file name, not the full path.
+    Returns:
+        The execution output, captured from stdout.
+    """
+    source_file = get_file_path(file_name)
+    result = sub.run(
+        args=["python", source_file],
+        capture_output=True,
+        encoding="utf-8"
+    )
+    return result.stdout

tools/string_reverser.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_core.tools import tool
+@tool(parse_docstring=True)
+def reverse_string(s: str) -> str:
+    """
+    Returns the reverse of a string. Use this tool when you suspect that
+    the provided prompt is written with characters in reverse order.
+    Args:
+        s: The input string.
+    Returns:
+        The output string. It is the input with character in reverse order.
+    """
+    return s[::-1]

tools/video_sampling.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import json
+import os
+import cv2
+from transformers import BlipProcessor, BlipForConditionalGeneration
+# model_id = "Salesforce/blip-image-captioning-base"
+model_id = "Salesforce/blip-image-captioning-large"
+captioning_processor = BlipProcessor.from_pretrained(model_id)
+captioning_model = BlipForConditionalGeneration.from_pretrained(model_id)
+def extract_frames(video_path, output_folder, interval_ms=2000) -> None:
+    """
+    Extracts frames from a video into an output folder at a specified time
+    interval. Frames are saved as *.jpg images.
+    Args:
+        video_path:     The file name of the video to sample.
+        output_folder:  The output directory for the extracted frames.
+        interval_ms:    The sampling interval in milliseconds.
+                        NOTE: No anti-aliasing filter is applied.
+    """
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)  # Get fps
+    # Compute sampling interval in number of frames to skip
+    interval_frames = int(fps * interval_ms * 0.001)
+    frame_count = 0
+    saved_frame_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Keep only selected frames
+        if frame_count % interval_frames == 0:
+            frame_filename = os.path.join(
+                output_folder,
+                f"frame_{saved_frame_count:04d}.jpg"
+            )
+            cv2.imwrite(frame_filename, frame)
+            saved_frame_count += 1
+        frame_count += 1
+    cap.release()
+def extract_frame_captions(
+    video_path,
+    interval_ms=2000
+) -> str:
+    """
+    Extracts frame captions from a video at a specified time
+    interval.
+    Args:
+        video_path:     The file name of the video to sample.
+        interval_ms:    The sampling interval in milliseconds.
+                        NOTE: No anti-aliasing filter is applied.
+    Returns:
+        Frame descriptions as a list of strings.
+    """
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)  # Get fps
+    # Compute sampling interval in number of frames to skip
+    interval_frames = int(fps * interval_ms * 0.001)
+    frame_count = 0
+    saved_frame_count = 0
+    captions = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Keep only selected frames
+        if frame_count % interval_frames == 0:
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            inputs = captioning_processor(
+                frame,
+                text="Detailed image description:",
+                return_tensors="pt"
+            )
+            out = captioning_model.generate(**inputs)
+            cur_caption = (
+                captioning_processor.decode(out[0], skip_special_tokens=True)
+            )
+            captions += [cur_caption]
+            saved_frame_count += 1
+        frame_count += 1
+    cap.release()
+    return json.dumps(captions)

tools/web_page_info_retriever.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import faiss
+from langchain_community.docstore.in_memory import InMemoryDocstore
+from langchain_community.vectorstores import FAISS
+from langchain_community.vectorstores.utils import DistanceStrategy
+from langchain_core.tools import tool
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_unstructured import UnstructuredLoader
+@tool(parse_docstring=True)
+def web_page_info_retriever(
+    web_url: str,
+    query: str,
+    k: int = 10
+) -> list[str]:
+    """
+    Retrieves information on the fly from a web page.
+    Args:
+        web_url: The url of the web page.
+        query: The user query.
+        k: The maximum number of documents to retrieve. Use a reasonable
+           number depending on the amount of context you want to retrieve.
+           Usually a number between 10 and 20 should suffice (but there is
+           no upper bound to this parameter).
+    Returns:
+        A list of strings containing the most relevant documents retrieved.
+    """
+    loader = UnstructuredLoader(web_url=web_url)
+    docs = loader.load()
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+        model_kwargs={"device": "cpu"}
+    )
+    index = faiss.IndexFlatIP(len(embeddings.embed_query("hello world")))
+    vector_store = FAISS(
+        embedding_function=embeddings,
+        index=index,
+        docstore= InMemoryDocstore(),
+        index_to_docstore_id={},
+        distance_strategy=DistanceStrategy.COSINE,
+    )
+    vector_store.add_documents(documents=docs)
+    retrieved_docs = vector_store.similarity_search_with_relevance_scores(
+        query=query,
+        k=k
+    )
+    sorted_contents = [
+        t[0].page_content
+        for t in sorted(retrieved_docs, key=lambda x: x[1], reverse=True)
+    ]
+    return sorted_contents

tools/youtube_helpers.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import tempfile
+from langchain_core.tools import tool
+from pytubefix import YouTube
+from .video_sampling import extract_frames, extract_frame_captions
+def download_video(url, output_path):
+    """
+    Downloads the video into an output path.
+    Args:
+        url:            The URL of the YouTube video.
+        output_path:    The output folder where to download the video.
+    Returns:
+        The file name of the downloaded video.
+    """
+    yt = YouTube(url)
+    stream = yt.streams.get_lowest_resolution()
+    stream.download(output_path)
+    return os.path.join(output_path, stream.default_filename)
+def youtube_video_frame_sampler(addr: str, dest_dir: str) -> None:
+    """
+    Downsamples a YouTube video into frames. Saves the frames into a destination
+    directory. Returns the path to the destination directory.
+    Args:
+        addr:       The URL of the YouTube video.
+        dest_dir:   The destination directory.
+    """
+    temp_dir = tempfile.TemporaryDirectory()
+    download_path = temp_dir.name
+    video_path = download_video(addr, download_path)
+    extract_frames(video_path, dest_dir)
+@tool(parse_docstring=True)
+def youtube_video_to_frame_captions(addr: str) -> str:
+    """
+    Analyzes video frames from a YouTube video and obtains
+    captions for each frame. This is useful when we need to
+    answer questions on the images shown in the video. It adds
+    computer vision capabilities to the LLM.
+    Args:
+        addr:       The URL of the YouTube video.
+    Returns:
+        Frame descriptions as a list of strings.
+    """
+    temp_dir = tempfile.TemporaryDirectory()
+    download_path = temp_dir.name
+    video_path = download_video(addr, download_path)
+    return extract_frame_captions(video_path)

tools/youtube_video_transcript_retriever.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import youtube_transcript_api as yt_api
+from langchain_core.tools import tool
+@tool(parse_docstring=True)
+def get_youtube_video_transcript(addr: str) -> str:
+    """
+    Given the address of a YouTube video, returns the transcript of the audio.
+    This is useful when we need to answer questions on what it is said into
+    the video.
+    Args:
+        addr: The URL of the YouTube video.
+    Returns:
+        The transcript of the audio extracted from the video. Different items
+        of the transcript are separated by ";". These different items may be
+        sentences pronounced by different characters.
+    """
+    video_id = addr.split(sep="=")[1]
+    ytt_api = yt_api.YouTubeTranscriptApi()
+    fetched_data = ytt_api.fetch(video_id)
+    result_transcript = ";".join([t.text for t in fetched_data])
+    return result_transcript