### RAG in a straightforward way

In [1]:
from langchain_community.document_loaders import PyPDFLoader

document_url = "https://arxiv.org/pdf/2312.10997.pdf"
loader = PyPDFLoader(document_url)
pages = loader.load()

In [2]:
print(pages[0].page_content[0:100])

1
Retrieval-Augmented Generation for Large
Language Models: A Survey
Yunfan Gaoa, Yun Xiongb, Xinyu 


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=40,
    length_function=len,
    is_separator_regex=False
)

chunks = text_splitter.split_documents(pages)
print(chunks)

[Document(metadata={'source': 'https://arxiv.org/pdf/2312.10997.pdf', 'page': 0}, page_content='1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gaob, Kangxiang Jiab, Jinliu Panb, Yuxi Bic, Yi Daia, Jiawei Suna, Meng\nWangc, and Haofen Wanga,c\naShanghai Research Institute for Intelligent Autonomous Systems, Tongji University\nbShanghai Key Laboratory of Data Science, School of Computer Science, Fudan University'), Document(metadata={'source': 'https://arxiv.org/pdf/2312.10997.pdf', 'page': 0}, page_content='cCollege of Design and Innovation, Tongji University\nAbstract —Large Language Models (LLMs) showcase impres-\nsive capabilities but encounter challenges like hallucination,\noutdated knowledge, and non-transparent, untraceable reasoning\nprocesses. Retrieval-Augmented Generation (RAG) has emerged\nas a promising solution by incorporating knowledge from external'), Document(metadata={'source': 'https://arxiv.org/pdf/2312.10997.p

In [37]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"padding": "max_length", "max_length": 512, "truncation": True, "normalize_embeddings": True}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# chunk_text = list(map(lambda x: x.page_content, chunks))
# embeddings = embeddings.embed_documents(chunk_text)
# print(embeddings[0])



In [38]:
from langchain_community.vectorstores import FAISS



db = FAISS.from_documents(chunks, embeddings)

In [19]:
query = "What is the main drawback of the RAG method based on the paper?"

# results = db.search(query=query, k=5, search_type="similarity")

# print(results[0])

In [20]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a researcher who has just read a paper on a new method for solving a problem in your field. You are excited about the potential of the method, but you have some questions about the details of the method and its limitations."),
        ("human", "{question}")    
    ]
)


In [26]:
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

chat_model = ChatOpenAI(model_name="gpt-4o",
                        api_key=api_key,
                        temperature=0.9,
                        max_tokens=1000
                        )

chain = chat_prompt | chat_model


In [29]:
response = chain.invoke(
    {
        "context": "\n\n".join(list(map(lambda x: x.page_content, chunks))),
        "question":query
    }
)

In [30]:
response.content

'According to the paper, one of the main drawbacks of the Retrieval-Augmented Generation (RAG) method is its reliance on the quality and relevance of the retrieved documents. If the retrieval component fails to find relevant information, the generation component may produce inaccurate or irrelevant outputs. This dependency highlights a few specific issues:\n\n1. **Retrieval Quality**: If the underlying retrieval algorithm is not robust or the index from which the documents are retrieved is not comprehensive and up to date, the entire process can be compromised.\n\n2. **Noise in Retrieved Documents**: The method might retrieve documents that contain irrelevant or even erroneous information, which could negatively influence the generated responses.\n\n3. **Computational Complexity**: Integrating retrieval and generation components can introduce additional computational overhead, which might not be feasible in real-time or resource-constrained environments.\n\n4. **Fine-Tuning Requirement

### Chatbot with RAG method


In [1]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o")

In [5]:
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

In [7]:
model.invoke(
    [
        HumanMessage(content="Hello, how are you?, I'm Bobby"),
        # AIMessage(content="Hello Bobby! I'm fine, how can I help you?")

    ]
)

AIMessage(content="Hello Bobby! I'm an AI, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 15, 'total_tokens': 48, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c17d3befe7', 'finish_reason': 'stop', 'logprobs': None}, id='run-41ec5a6b-f78e-4262-9048-b18d73a048e0-0', usage_metadata={'input_tokens': 15, 'output_tokens': 33, 'total_tokens': 48})

In [8]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph


In [9]:
workflow = StateGraph(
    state_schema=MessagesState
)


In [10]:
def call_model(state:MessagesState):
    response = model.invoke(state["messages"])
    return {"messages":response}

In [11]:
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

<langgraph.graph.state.StateGraph at 0x70a0e1f96b10>

In [12]:
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [27]:
config = {
    "configurable": {"thread_id": "1234"}
}

In [15]:
query = "Hi! I'm Bobby, and you?"

input_message = [HumanMessage(content=query)]
output = app.invoke(
    {
        "messages": input_message
    }, 
    config
)

In [16]:
output

{'messages': [HumanMessage(content="Hi! I'm Bobby, and you?", additional_kwargs={}, response_metadata={}, id='82d2044f-9517-458a-95b8-f0bfa3fb9300'),
  AIMessage(content="Hello, Bobby! I'm an AI developed by OpenAI. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 15, 'total_tokens': 34, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c17d3befe7', 'finish_reason': 'stop', 'logprobs': None}, id='run-169455ca-94b7-49dd-b3d7-4f89fbddf7b3-0', usage_metadata={'input_tokens': 15, 'output_tokens': 19, 'total_tokens': 34})]}

In [20]:
output["messages"][0].pretty_print()


Hi! I'm Bobby, and you?


In [28]:
new_query = "What is my name?"

input_message = [HumanMessage(content=new_query)]
output = app.invoke(
    {
        "messages": input_message
    }, 
    config
)

In [30]:
output

{'messages': [HumanMessage(content="Hi! I'm Bobby, and you?", additional_kwargs={}, response_metadata={}, id='82d2044f-9517-458a-95b8-f0bfa3fb9300'),
  AIMessage(content="Hello, Bobby! I'm an AI developed by OpenAI. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 15, 'total_tokens': 34, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c17d3befe7', 'finish_reason': 'stop', 'logprobs': None}, id='run-169455ca-94b7-49dd-b3d7-4f89fbddf7b3-0', usage_metadata={'input_tokens': 15, 'output_tokens': 19, 'total_tokens': 34}),
  HumanMessage(content='What is my name', additional_kwargs={}, response_metadata={}, id='7eaa30f5-5969-4274-8914-ecc4d157eff0'),
  AIMessage(content='You mentioned that your name is Bobby. How can I assist you today, Bobby?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_toke

In [44]:
from redis import Redis
import json

r = Redis(host="localhost", port=6379, db=3)

def save_message(chat_id, message_type, message_content, metadata=None):
    """
    Save a chat message to Redis.
    
    Args:
        chat_id (str): Unique identifier for the conversation (chat session).
        message_type (str): 'user' or 'ai' to denote who sent the message.
        message_content (str): The actual message content.
        metadata (dict, optional): Additional metadata like tokens used, model information, etc.
        
    """
    key = f"chat:{chat_id}"
    
    # Message object to store
    message = {
        "type": message_type,  # 'user' or 'ai'
        "content": message_content,
        "metadata": metadata if metadata else {}
    }
    
    # Save the message to a Redis list
    r.rpush(key, json.dumps(message))


def get_chat_history(chat_id):
    key = f"chat:{chat_id}"

    messages = r.lrange(key, 0, -1)

    chat_history = [json.loads(msg) for msg in messages]

    return chat_history



In [47]:
for message in output["messages"]:
    print(message.id)
    print(message.type)
    print(message.content)
    print()

82d2044f-9517-458a-95b8-f0bfa3fb9300
run-169455ca-94b7-49dd-b3d7-4f89fbddf7b3-0
7eaa30f5-5969-4274-8914-ecc4d157eff0
run-fa57f662-b5b7-4918-911a-e68886bb70f3-0
be3f2b50-122f-4839-99ef-d207c2da7073
run-178f2c9b-07ed-4ef0-8178-69a109bb2d6f-0
