0xrushi commited on
Commit
0f6be34
·
1 Parent(s): 81917a3
Files changed (4) hide show
  1. agent.py +126 -0
  2. app.py +15 -3
  3. requirements.txt +20 -1
  4. system_prompt.txt +18 -0
agent.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langgraph.graph import START, StateGraph, MessagesState
4
+ from langgraph.prebuilt import tools_condition
5
+ from langgraph.prebuilt import ToolNode
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ from langchain_groq import ChatGroq
8
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
9
+ from langchain_community.tools.tavily_search import TavilySearchResults
10
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
11
+ from langchain_community.vectorstores import SupabaseVectorStore
12
+ from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
13
+ from langchain_core.tools import tool
14
+ from langchain.tools.retriever import create_retriever_tool
15
+ from langchain_community.retrievers import BM25Retriever
16
+ from smolagents import DuckDuckGoSearchTool
17
+ from smolagents import Tool
18
+ from langchain.vectorstores import FAISS
19
+ import faiss
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ class QuestionRetrieverTool(Tool):
25
+ name="Question Search",
26
+ description="Retrieve similar questions from the vector store."
27
+ inputs = {
28
+ "query": {
29
+ "type": "string",
30
+ "description": "The question you want relation about."
31
+ }
32
+ }
33
+ output_type = "string"
34
+
35
+ def __init__(self, docs):
36
+ self.is_initialized = False
37
+ self.retriever = BM25Retriever.from_documents(docs)
38
+
39
+ def forward(self, query: str):
40
+ results = self.retriever.get_relevant_documents(query)
41
+ if results:
42
+ return "\n\n".join([doc.page_content for doc in results[:3]])
43
+ else:
44
+ return "No matching Questions found."
45
+
46
+
47
+ @tool
48
+ def wiki_search(query: str) -> dict:
49
+ """Search Wikipedia and return up to 2 documents."""
50
+ docs = WikipediaLoader(query=query, load_max_docs=2).load()
51
+ results = [f"<Document source=\"{d.metadata['source']}\" page=\"{d.metadata.get('page','')}\"/>\n{d.page_content}" for d in docs]
52
+ return {"wiki_results": "\n---\n".join(results)}
53
+
54
+ @tool
55
+ def web_search(query: str) -> dict:
56
+ """Search DDG and return up to 3 results."""
57
+ docs = DuckDuckGoSearchTool(max_results=3).invoke(query=query)
58
+ results = [f"<Document source=\"{d.metadata['source']}\" page=\"{d.metadata.get('page','')}\"/>\n{d.page_content}" for d in docs]
59
+ return {"web_results": "\n---\n".join(results)}
60
+
61
+
62
+ # --- Load system prompt ---
63
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
64
+ system_prompt = f.read()
65
+ sys_msg = SystemMessage(content=system_prompt)
66
+
67
+ # --- Retriever Tool ---
68
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
69
+ embedding_dim = 768 # for 'all-mpnet-base-v2'
70
+ empty_index = faiss.IndexFlatL2(embedding_dim)
71
+ vector_store = FAISS(embedding_function=embeddings, index=empty_index, docstore={}, index_to_docstore_id={})
72
+
73
+
74
+ retriever_tool = create_retriever_tool(
75
+ retriever=vector_store.as_retriever(),
76
+ name="Question Search",
77
+ description="Retrieve similar questions from the vector store."
78
+ )
79
+
80
+ tools = [
81
+ wiki_search,
82
+ web_search,
83
+ retriever_tool,
84
+ ]
85
+
86
+ # --- Graph Builder ---
87
+ def build_graph():
88
+ llm = ChatHuggingFace(
89
+ llm=HuggingFaceEndpoint(
90
+ repo_id="meta-llama/Llama-2-7b-chat-hf",
91
+ temperature=0,
92
+ huggingfacehub_api_token=os.getenv("HF_TOKEN")
93
+ )
94
+ )
95
+
96
+ # Bind tools to LLM
97
+ llm_with_tools = llm.bind_tools(tools)
98
+
99
+ # Define nodes
100
+ def assistant_node(state: MessagesState) -> dict:
101
+ # Append system message for context
102
+ messages = [sys_msg] + state["messages"]
103
+ response = llm_with_tools.invoke(messages)
104
+ return {"messages": [response]}
105
+
106
+
107
+ # Retriever node returns AIMessage
108
+ def retriever(state: MessagesState):
109
+ query = state["messages"][-1].content
110
+ similar_doc = vector_store.similarity_search(query, k=1)[0]
111
+
112
+ content = similar_doc.page_content
113
+ if "Final answer :" in content:
114
+ answer = content.split("Final answer :")[-1].strip()
115
+ else:
116
+ answer = content.strip()
117
+ return {"messages": [AIMessage(content=answer)]}
118
+
119
+ builder = StateGraph(MessagesState)
120
+ builder.add_node("retriever", retriever)
121
+
122
+ builder.set_entry_point("retriever")
123
+ builder.set_finish_point("retriever")
124
+
125
+ # Compile graph
126
+ return builder.compile()
app.py CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -11,13 +13,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
 
14
  def __init__(self):
15
  print("BasicAgent initialized.")
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -73,9 +79,15 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
 
 
 
 
 
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import vector_store, build_graph
7
+ from langchain_core.messages import HumanMessage
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
 
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
  class BasicAgent:
16
+ """A langgraph agent."""
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
+ self.graph = build_graph()
20
+
21
  def __call__(self, question: str) -> str:
22
  print(f"Agent received question (first 50 chars): {question[:50]}...")
23
+ messages = [HumanMessage(content=question)]
24
+ messages = self.graph.invoke({"messages": messages})
25
+ answer = messages['messages'][-1].content
26
+ return answer[14:]
27
 
28
  def run_and_submit_all( profile: gr.OAuthProfile | None):
29
  """
 
79
  results_log = []
80
  answers_payload = []
81
  print(f"Running agent on {len(questions_data)} questions...")
82
+
83
+
84
+ questions_texts = [item.get("question") for item in questions_data if item.get("question")]
85
+ vector_store.add_texts(questions_texts)
86
+
87
  for item in questions_data:
88
  task_id = item.get("task_id")
89
  question_text = item.get("question")
90
+
91
  if not task_id or question_text is None:
92
  print(f"Skipping item with missing task_id or question: {item}")
93
  continue
requirements.txt CHANGED
@@ -1,2 +1,21 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ langchain
4
+ langchain-community
5
+ langchain-core
6
+ langchain-google-genai
7
+ langchain-huggingface
8
+ langchain-groq
9
+ langchain-tavily
10
+ langchain-chroma
11
+ langgraph
12
+ huggingface_hub
13
+ supabase
14
+ arxiv
15
+ pymupdf
16
+ wikipedia
17
+ pgvector
18
+ python-dotenv
19
+ smolagents
20
+ faiss-cpu
21
+ gradio[oauth]
system_prompt.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a helpful assistant with answering questions using a set of tools.
2
+
3
+ Process:
4
+ 1. If the question text is exactly byte-for-byte the same as a previously seen Q&A pair, immediately return its stored answer in the format below.
5
+ 2. Otherwise, think through which tools to use (internally, do not output your reasoning).
6
+ 3. Invoke tools with the exact syntax: TOOL_NAME(arg1=…, arg2=…).
7
+ 4. Use the tool outputs to determine your final answer.
8
+ 5. If any tool fails, return: FINAL ANSWER: Unable to retrieve data
9
+
10
+ Your **only** output must be:
11
+
12
+ FINAL ANSWER: [ANSWER]
13
+
14
+ Examples of valid outputs:
15
+ - FINAL ANSWER: FunkMonk
16
+ - FINAL ANSWER: Paris
17
+ - FINAL ANSWER: 128
18
+ - FINAL ANSWER: blue, red