Prathamesh1420's picture
Create rag.py
93f7f1e verified
from langchain_core.vectorstores import VectorStore
from langchain_core.retrievers import BaseRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Milvus
from langchain.retrievers import MultiVectorRetriever
from langchain.storage import InMemoryStore
import numpy as np
def query_pipeline(question, texts, tables, pictures, embeddings_model, llm_model):
"""
Process a question through the RAG pipeline.
Args:
question: The user's question
texts: List of text documents
tables: List of table documents
pictures: List of image description documents
embeddings_model: Model for generating embeddings
llm_model: LLM for generating answers
Returns:
str: The generated answer
"""
# Combine all document types
all_docs = texts + tables + pictures
# Create vector store and retriever
vectorstore = Milvus.from_documents(
all_docs,
embeddings_model,
connection_args={"host": "127.0.0.1", "port": "19530"}
)
retriever = vectorstore.as_retriever()
# Retrieve relevant documents
relevant_docs = retriever.get_relevant_documents(question)
# Format context for LLM
context = "\n\n".join([doc.page_content for doc in relevant_docs])
# Generate answer
prompt = f"""
You are an AI assistant answering questions based on the provided context.
Use only the information from the context to answer the question.
If you don't know the answer, say "I don't know".
Context:
{context}
Question: {question}
Answer:
"""
response = llm_model.invoke(prompt)
return response.content