from langchain_core.vectorstores import VectorStore from langchain_core.retrievers import BaseRetriever from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Milvus from langchain.retrievers import MultiVectorRetriever from langchain.storage import InMemoryStore import numpy as np def query_pipeline(question, texts, tables, pictures, embeddings_model, llm_model): """ Process a question through the RAG pipeline. Args: question: The user's question texts: List of text documents tables: List of table documents pictures: List of image description documents embeddings_model: Model for generating embeddings llm_model: LLM for generating answers Returns: str: The generated answer """ # Combine all document types all_docs = texts + tables + pictures # Create vector store and retriever vectorstore = Milvus.from_documents( all_docs, embeddings_model, connection_args={"host": "127.0.0.1", "port": "19530"} ) retriever = vectorstore.as_retriever() # Retrieve relevant documents relevant_docs = retriever.get_relevant_documents(question) # Format context for LLM context = "\n\n".join([doc.page_content for doc in relevant_docs]) # Generate answer prompt = f""" You are an AI assistant answering questions based on the provided context. Use only the information from the context to answer the question. If you don't know the answer, say "I don't know". Context: {context} Question: {question} Answer: """ response = llm_model.invoke(prompt) return response.content