File size: 4,996 Bytes
1669f2b
 
 
f00550f
1669f2b
 
f00550f
 
1669f2b
 
 
8ca5d55
 
1669f2b
 
 
85ecabb
1669f2b
8ca5d55
1669f2b
 
 
 
 
 
 
 
 
 
 
 
8ca5d55
85ecabb
1669f2b
 
85ecabb
 
 
 
 
 
 
 
 
 
f00550f
 
 
 
 
 
 
 
 
 
85ecabb
1669f2b
 
 
 
 
294f2c0
 
 
 
 
 
 
1669f2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85ecabb
1669f2b
f00550f
 
 
 
 
 
85ecabb
f00550f
 
1669f2b
 
f00550f
 
85ecabb
1669f2b
f00550f
1669f2b
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os

from dotenv import load_dotenv
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.tools import tool
from langchain_huggingface import (ChatHuggingFace, HuggingFaceEmbeddings,
                                   HuggingFaceEndpoint)
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition

from tools import (absolute, add, analyze_excel_file, arvix_search,
                   audio_transcription, compound_interest, convert_temperature,
                   divide, exponential, factorial, floor_divide,
                   get_current_time_in_timezone, greatest_common_divisor,
                   is_prime, least_common_multiple, logarithm, modulus,
                   multiply, percentage_calculator, power, python_code_parser,
                   roman_calculator_converter, square_root, subtract,
                   web_search, wiki_search)

# Load Constants
load_dotenv()
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")


tools = [
    multiply, add, subtract, power, divide, modulus,
    square_root, floor_divide, absolute, logarithm,
    exponential, web_search, roman_calculator_converter,
    get_current_time_in_timezone, compound_interest,
    convert_temperature, factorial, greatest_common_divisor,
    is_prime, least_common_multiple, percentage_calculator,
    wiki_search, analyze_excel_file, arvix_search, audio_transcription, python_code_parser
]


# Load system prompt
system_prompt = """
You are a helpful assistant tasked with answering questions using a set of tools. 
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template: 
FINAL ANSWER: [YOUR FINAL ANSWER]. 
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
Your answer should only start with "FINAL ANSWER: ", then follows with the answer. 
"""

# System message
sys_msg = SystemMessage(content=system_prompt)

# Embeddings + Chroma Vector Store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = Chroma(
    collection_name="langgraph-documents",
    embedding_function=embeddings,
    persist_directory="chroma_db"  # Use a persistent directory
)

def build_graph():
    """Build the graph""" 

    # First create the HuggingFaceEndpoint
    llm_endpoint = HuggingFaceEndpoint(
        # repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
        repo_id="mistralai/Mistral-7B-Instruct-v0.2",      
        # Other models to try:
        # "meta-llama/Llama-2-7b-chat-hf"
        # "google/gemma-7b-it"
        # "mosaicml/mpt-7b-instruct"
        # "tiiuae/falcon-7b-instruct"
        huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
        temperature=0.1,  # Lower temperature for more consistent responses
        max_new_tokens=1024,
        timeout=30,
        )
    
    # Then wrap it with ChatHuggingFace to get chat model functionality
    llm = ChatHuggingFace(llm=llm_endpoint)

    # Bind tools to LLM
    llm_with_tools = llm.bind_tools(tools)

    # Node
    def assistant(state: MessagesState):
        """Assistant node"""
        return {"messages": [llm_with_tools.invoke([system_prompt] + state["messages"])]}
    
    def retriever(state: MessagesState):
        similar = vector_store.similarity_search(state["messages"][0].content)
        if similar:
            example_msg = HumanMessage(content=f"Here is a similar question:\n\n{similar[0].page_content}")
            return {"messages": [sys_msg] + state["messages"] + [example_msg]}
        return {"messages": [sys_msg] + state["messages"]}

    builder = StateGraph(MessagesState)
    builder.add_node("retriever", retriever)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_edge(START, "retriever")
    builder.add_edge("retriever", "assistant")
    builder.add_conditional_edges("assistant", tools_condition)
    builder.add_edge("tools", "assistant")
    
     # Compile graph
    return builder.compile()

# test
if __name__ == "__main__":
    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
    # Build the graph
    graph = build_graph()
    # Run the graph
    messages = [HumanMessage(content=question)]
    messages = graph.invoke({"messages": messages})
    for m in messages["messages"]:
        m.pretty_print()