from dotenv import load_dotenv from langchain_openai import ChatOpenAI from langchain_core.tools import tool from langchain_community.document_loaders import WikipediaLoader from langchain_community.document_loaders import ArxivLoader from langchain_community.tools.tavily_search import TavilySearchResults from langchain_tavily import TavilyExtract from youtube_transcript_api import YouTubeTranscriptApi from langchain_core.messages import SystemMessage, HumanMessage from langgraph.graph import START, StateGraph, MessagesState from langgraph.prebuilt import ToolNode from langgraph.prebuilt import tools_condition import base64 import httpx load_dotenv() @tool def add(a: int, b: int) -> int: """ Add b to a. Args: a: first int number b: second int number """ return a + b @tool def substract(a: int, b: int) -> int: """ Subtract b from a. Args: a: first int number b: second int number """ return a - b @tool def multiply(a: int, b: int) -> int: """ Multiply a by b. Args: a: first int number b: second int number """ return a * b @tool def divide(a: int, b: int) -> int: """ Divide a by b. Args: a: first int number b: second int number """ if b == 0: raise ValueError("Can't divide by zero.") return a / b @tool def mod(a: int, b: int) -> int: """ Remainder of a devided by b. Args: a: first int number b: second int number """ return a % b @tool def wiki_search(query: str) -> str: """ Search Wikipedia. Args: query: what to search for """ search_docs = WikipediaLoader(query=query, load_max_docs=3).load() formatted_search_docs = "".join( [ f'{doc.page_content[:1000]}' for doc in search_docs ]) return {"wiki_results": formatted_search_docs} @tool def arvix_search(query: str) -> str: """ Search arXiv which is online archive of preprint and postprint manuscripts for different fields of science. Args: query: what to search for """ search_docs = ArxivLoader(query=query, load_max_docs=3).load() formatted_search_docs = "".join( [ f'{doc.page_content[:1000]}' for doc in search_docs ]) return {"arvix_results": formatted_search_docs} @tool def web_search(query: str) -> str: """ Search WEB. Args: query: what to search for """ search_docs = TavilySearchResults(max_results=3, include_answer=True).invoke({"query": query}) formatted_search_docs = "".join( [ f'{doc["content"][:1000]}' for doc in search_docs ]) return {"web_results": formatted_search_docs} @tool def open_web_page(url: str) -> str: """ Open web page and get its content. Args: url: web page url in "" """ search_docs = TavilyExtract().invoke({"urls": [url]}) formatted_search_docs = f'{search_docs["results"][0]["raw_content"][:1000]}' return {"web_page_content": formatted_search_docs} @tool def youtube_transcript(url: str) -> str: """ Get transcript of YouTube video. Args: url: YouTube video url in "" """ video_id = url.partition("https://www.youtube.com/watch?v=")[2] transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = " ".join([item["text"] for item in transcript]) return {"youtube_transcript": transcript_text} tools = [ add, substract, multiply, divide, mod, wiki_search, arvix_search, web_search, open_web_page, youtube_transcript, ] # System prompt system_prompt = f""" You are a general AI assistant. I will ask you a question. First, provide a step-by-step explanation of your reasoning to arrive at the answer. Then, respond with your final answer in a single line, formatted as follows: "FINAL ANSWER: [YOUR FINAL ANSWER]". [YOUR FINAL ANSWER] should be a number, a string, or a comma-separated list of numbers and/or strings, depending on the question. If the answer is a number, do not use commas or units (e.g., $, %) unless specified. If the answer is a string, do not use articles or abbreviations (e.g., for cities), and write digits in plain text unless specified. If the answer is a comma-separated list, apply the above rules for each element based on whether it is a number or a string. """ system_message = SystemMessage(content=system_prompt) # Build graph def build_graph(): """Build LangGrapth graph of agent.""" # Language model and tools llm = ChatOpenAI( model="gpt-4.1", temperature=0, max_retries=2 ) llm_with_tools = llm.bind_tools(tools, strict=True) # Nodes def assistant(state: MessagesState): """Assistant node.""" return {"messages": [llm_with_tools.invoke([system_message] + state["messages"])]} # Graph builder = StateGraph(MessagesState) builder.add_node("assistant", assistant) builder.add_node("tools", ToolNode(tools)) builder.add_edge(START, "assistant") builder.add_conditional_edges("assistant", tools_condition) builder.add_edge("tools", "assistant") # Compile graph return builder.compile() # Testing and solving particular tasks if __name__ == "__main__": agent = build_graph() question = """ Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation. """ content_urls = { "image": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44", "audio": None } # Define user message and add all the content content = [ { "type": "text", "text": question } ] if content_urls["image"]: image_data = base64.b64encode(httpx.get(content_urls["image"]).content).decode("utf-8") content.append( { "type": "image", "source_type": "base64", "data": image_data, "mime_type": "image/jpeg" } ) if content_urls["audio"]: audio_data = base64.b64encode(httpx.get(content_urls["audio"]).content).decode("utf-8") content.append( { "type": "audio", "source_type": "base64", "data": audio_data, "mime_type": "audio/wav" } ) messages = { "role": "user", "content": content } # Run agent on the question messages = agent.invoke({"messages": messages}) for message in messages["messages"]: message.pretty_print() answer = messages["messages"][-1].content index = answer.find("FINAL ANSWER: ") print("\n") print("="*30) if index == -1: print(answer) print(answer[index+14:]) print("="*30)