Spaces:
Running
Running
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage | |
from langgraph.graph.message import add_messages | |
from langgraph.graph import MessagesState | |
from langgraph.graph import StateGraph, START, END | |
from typing import TypedDict, Annotated, Literal | |
from langchain_community.tools import BraveSearch # web search | |
from langchain_experimental.tools.python.tool import PythonAstREPLTool # for logic/math problems | |
from tools import (calculator_basic, datetime_tools, transcribe_audio, transcribe_youtube, query_image, webpage_content, read_excel) | |
from prompt import system_prompt | |
from langchain_core.runnables import RunnableConfig # for LangSmith tracking | |
# LangSmith to observe the agent | |
langsmith_api_key = os.getenv("LANGSMITH_API_KEY") | |
langsmith_tracing = os.getenv("LANGSMITH_TRACING") | |
# gpt-4o-mini: cheaper for debugging, temperature 0 for less randomness | |
# o4-mini: better reasoning for benchmarking, temperature 1 (default) | |
llm = ChatOpenAI( | |
model="o4-mini", | |
api_key=os.getenv("OPENAI_API_KEY"), | |
temperature=1 | |
) | |
python_tool = PythonAstREPLTool() | |
search_tool = BraveSearch.from_api_key( | |
api_key=os.getenv("BRAVE_SEARCH_API"), | |
search_kwargs={"count": 4}, # returns the 4 best results and their URL | |
description="Web search using Brave" | |
) | |
community_tools = [search_tool, python_tool] | |
custom_tools = calculator_basic + datetime_tools + [transcribe_audio, transcribe_youtube, query_image, webpage_content, read_excel] | |
tools = community_tools + custom_tools | |
llm_with_tools = llm.bind_tools(tools) | |
# Prepare tools by name | |
tools_by_name = {tool.name: tool for tool in tools} | |
class MessagesState(TypedDict): # creates the state (is like the agent's memory at any moment) | |
messages: Annotated[list[AnyMessage], add_messages] | |
# LLM node | |
def llm_call(state: MessagesState): | |
return { | |
"messages": [ | |
llm_with_tools.invoke( | |
[SystemMessage(content=system_prompt)] + state["messages"] | |
) | |
] | |
} | |
# Tool node | |
def tool_node(state: MessagesState): | |
"""Executes the tools""" | |
result = [] | |
for tool_call in state["messages"][-1].tool_calls: # gives a list of the tools the LLM decided to call | |
tool = tools_by_name[tool_call["name"]] # look up the actual tool function using a dictionary | |
observation = tool.invoke(tool_call["args"]) # executes the tool | |
result.append(ToolMessage(content=observation, tool_call_id=tool_call["id"])) # the result from the tool is added to the memory | |
return {"messages": result} # thanks to add_messages, LangGraph will automatically append the result to the agent's message history | |
# Conditional edge function to route to the tool node or end based upon whether the LLM made a tool call | |
def should_continue(state: MessagesState) -> Literal["Action", END]: | |
"""Decide if we should continue the loop or stop based upon whether the LLM made a tool call""" | |
last_message = state["messages"][-1] # looks at the last message (usually from the LLM) | |
# If the LLM makes a tool call, then perform an action | |
if last_message.tool_calls: | |
return "Action" | |
# Otherwise, we stop (reply to the user) | |
return END | |
# Build workflow | |
builder = StateGraph(MessagesState) | |
# Add nodes | |
builder.add_node("llm_call", llm_call) | |
builder.add_node("environment", tool_node) | |
# Add edges to connect nodes | |
builder.add_edge(START, "llm_call") | |
builder.add_conditional_edges( | |
"llm_call", | |
should_continue, | |
{"Action": "environment", # name returned by should_continue : Name of the next node | |
END: END} | |
) | |
# If tool calls -> "Action" -> environment (executes the tool) | |
# If no tool calls -> END | |
builder.add_edge("environment", "llm_call") # after running the tools go back to the LLM for another round of reasoning | |
gaia_agent = builder.compile() # converts my builder into a runnable agent by using gaia_agent.invoke() | |
# Wrapper class to initialize and call the LangGraph agent with a user question | |
class LangGraphAgent: | |
def __init__(self): | |
print("LangGraphAgent initialized.") | |
def __call__(self, question: str) -> str: | |
input_state = {"messages": [HumanMessage(content=question)]} # prepare the initial user message | |
print(f"Running LangGraphAgent with input: {question[:150]}...") | |
# tracing configuration for LangSmith | |
config = RunnableConfig( | |
config={ | |
"run_name": "GAIA Agent", | |
"tags": ["gaia", "langgraph", "agent"], | |
"metadata": {"user_input": question}, | |
"recursion_limit": 30 | |
} | |
) | |
result = gaia_agent.invoke(input_state, config) # prevents infinite looping when the LLM keeps calling tools over and over | |
final_response = result["messages"][-1].content | |
try: | |
return final_response.split("FINAL ANSWER:")[-1].strip() # parse out only what's after "FINAL ANSWER:" | |
except Exception: | |
print("Could not split on 'FINAL ANSWER:'") | |
return final_response |