Agents_Course_Assignment

Sleeping

App Files Files Community

KPatelis commited on Jun 19

Commit

b3f9415

1 Parent(s): 81917a3

Agent implementation

Browse files

Files changed (13) hide show

.gitignore +1 -0
.python-version +1 -0
__pycache__/tools.cpython-312.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
agent.py +81 -0
app.py +6 -3
create_vector_database.ipynb +109 -0
hello.py +6 -0
metadata.jsonl +0 -0
prompt.yaml +2 -0
pyproject.toml +7 -0
tools.py +85 -0
utils.py +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

__pycache__/tools.cpython-312.pyc ADDED Viewed

Binary file (3.63 kB). View file

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (825 Bytes). View file

agent.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import os
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_core.messages import HumanMessage
+from langchain.tools.retriever import create_retriever_tool
+from supabase.client import Client, create_client
+from utils import load_prompt
+from tools import calculator, duck_web_search, wiki_search, arxiv_search
+load_dotenv()
+# Create retriever
+embeddings = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-modernbert-base") #  dim=768
+supabase: Client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY"))
+vector_store = SupabaseVectorStore(
+    client=supabase,
+    embedding= embeddings,
+    table_name="gaia_documents",
+    query_name="match_documents_langchain",
+)
+retriever = create_retriever_tool(
+    retriever=vector_store.as_retriever(),
+    name="ModernBERT Retriever",
+    description="A retriever of similar questions from a vector store.",
+)
+tools = [calculator, duck_web_search, wiki_search, arxiv_search]
+model_id = "Qwen/Qwen3-32B"
+llm = HuggingFaceEndpoint(
+    repo_id=model_id,
+    temperature=0,
+    repetition_penalty=1.03,
+    provider="auto",
+    huggingfacehub_api_token=os.getenv("HF_INFERENCE_KEY")
+)
+agent = ChatHuggingFace(llm=llm)
+agent_with_tools = agent.bind_tools(tools)
+def retriever_node(state: MessagesState):
+        """RAG node"""
+        similar_question = vector_store.similarity_search(state["messages"][0].content)
+        response = [HumanMessage(f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}")]
+        return {"messages": response}
+def processor_node(state: MessagesState):
+        system_prompt = load_prompt("prompt.yaml")
+        messages = state.get("messages", [])
+        response = [agent_with_tools.invoke([system_prompt] + messages)]
+        """Agent node that answers questions"""
+        return {"messages": response}
+def agent_graph():
+    builder = StateGraph(MessagesState)
+    ## Add nodes
+    builder.add_node("retriever_node", retriever_node)
+    builder.add_node("processor_node", processor_node)
+    builder.add_node("tools", ToolNode(tools))
+    ## Add edges
+    builder.add_edge(START, "retriever_node")
+    builder.add_edge("retriever_node", "processor_node")
+    builder.add_conditional_edges("processor_node", tools_condition)
+    builder.add_edge("tools", "processor_node")
+    # Compile graph
+    builder.compile()
+    return builder

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -12,10 +13,12 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
@@ -40,7 +43,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None

 import requests
 import inspect
 import pandas as pd
+from agent import agent_graph
 # (Keep Constants as is)
 # --- Constants ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        self.agent = agent_graph()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
+        messages = [HumanMessage(content=question)]
+        response = self.graph.invoke({"messages": messages})
+        fixed_answer = response['messages'][-1].content[16:]
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = agent_graph()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None

create_vector_database.ipynb ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9f7a25f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kpatelis/projects/Agents_Course_Assignment/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from dotenv import load_dotenv\n",
+    "from supabase.client import Client, create_client\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
+    "from langchain.schema import Document\n",
+    "\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c948d46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "supabase: Client = create_client(\n",
+    "    os.environ.get(\"SUPABASE_URL\"), \n",
+    "    os.environ.get(\"SUPABASE_SERVICE_KEY\"))\n",
+    "\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=\"Alibaba-NLP/gte-modernbert-base\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f2c5492b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('metadata.jsonl', 'r') as jsonl_file:\n",
+    "    json_list = list(jsonl_file)\n",
+    "\n",
+    "documents = []\n",
+    "for json_str in json_list:\n",
+    "    json_data = json.loads(json_str)\n",
+    "    content = f\"Question : {json_data['Question']}\\n\\nFinal answer : {json_data['Final answer']}\"\n",
+    "    embedding = embeddings.embed_query(content)\n",
+    "    document = {\n",
+    "        \"content\" : content,\n",
+    "        \"metadata\" : {\n",
+    "            \"source\" : json_data['task_id']\n",
+    "        },\n",
+    "        \"embedding\" : embedding,\n",
+    "    }\n",
+    "    documents.append(document)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26ddbafd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pgvector needs to be enabled, to turn to vector database\n",
+    "# Table needs to be created beforehand in Supabase, with column types\n",
+    "try:\n",
+    "    response = (\n",
+    "        supabase.table(\"gaia_documents\")\n",
+    "        .insert(documents)\n",
+    "        .execute()\n",
+    "    )\n",
+    "except Exception as exception:\n",
+    "    print(\"Error inserting data into Supabase:\", exception)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

hello.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from agents-course-assignment!")
+if __name__ == "__main__":
+    main()

metadata.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

prompt.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ title: "Agent"
2	+ prompt: "You are a helpful AI assistant, equiped with a set of tools. Your task is to answer questions provided by the user. You should always make use of tools to answer the questions. For any given question, think and formulate a response. You can describe your thought process and use tool calls to assist you in answering the question. The final answer should be either a number, or a concise reply with as fewer words as possible, or a comma separated list of numbers and/or strings. When the answer is a number, do not use comma or any units or currency signs to write your number. If you are asked for a number, do not use comma to write your number neither use units such as $ or percent sign unless specified otherwise. When the answer is a string, do not use articles or abbreviations (e.g. city names). When the answer is a list, apply the above rules, depending on if the item in the list is a string or number. Your answer should only start with FINAL ANSWER: <answer>."

pyproject.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[project]
+name = "agents-course-assignment"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = []

tools.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_core.tools import tool
+@tool
+def calculator(a: float, b: float, type: str) -> float:
+    """Performs mathematical calculations, addition, subtraction, multiplication, division, modulus.
+    Args:
+        a (float): first float number
+        b (float): second float number
+        type (str): the type of calculation to perform, can be addition, subtraction, multiplication, division, modulus
+    """
+    if type == "addition":
+        return a + b
+    elif type == "subtraction":
+        return a - b
+    elif type == "multiplication":
+        return a * b
+    elif type == "division":
+        if b == 0:
+            raise ValueError("Cannot divide by zero.")
+        return a / b
+    elif type == "modulus":
+        a % b
+    else:
+        TypeError(f"{type} is not an option for type, choose one of addition, subtraction, multiplication, division, modulus")
+@tool
+def duck_web_search(query: str) -> str:
+    """Use DuckDuckGo to search the web.
+    Args:
+        query: The search query.
+    """
+    search = DuckDuckGoSearchRun().invoke(query=query)
+    return {"duckduckgo_web_search": search}
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    documents = WikipediaLoader(query=query, load_max_docs=3).load()
+    processed_documents = "\n\n---\n\n".join(
+        [
+            f"Document title: {document.metadata.get("title", "")}. Summary: {document.metadata.get("summary", "")}. Documents details: {document.page_content}"
+            for document in documents
+        ])
+    return {"wiki_results": processed_documents}
+@tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    documents = ArxivLoader(query=query, load_max_docs=3).load()
+    processed_documents = "\n\n---\n\n".join(
+        [
+            f"Document title: {document.metadata.get("title", "")}. Summary: {document.metadata.get("summary", "")}. Documents details: {document.page_content}"
+            for document in documents
+        ])
+    return {"arxiv_results": processed_documents}
+@tool
+def tavily_web_search(query: str) -> str:
+    """Search the web using Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    search_engine = TavilySearchResults(max_results=3)
+    search_documents = search_engine.invoke(input=query)
+    web_results = "\n\n---\n\n".join(
+        [
+            f"Document title: {document["title"]}. Contents: {document["content"]}. Relevance Score: {document["score"]}"
+            for document in search_documents
+        ])
+    return {"web_results": web_results}

utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import yaml
+from langchain_core.messages import SystemMessage
+def load_prompt(prompt_location):
+    with open(prompt_location) as f:
+        try:
+            prompt = yaml.safe_load(f)["prompt"]
+            return SystemMessage(content=prompt)
+        except yaml.YAMLError as exc:
+            print(exc)