from dotenv import load_dotenv load_dotenv() import os from langchain_community.document_loaders import TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_ollama import OllamaEmbeddings from langchain_community.vectorstores import Chroma from langchain_openai import ChatOpenAI from langchain.chains import create_retrieval_chain from langchain import hub retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat") from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate # ——— CONFIGURATION ——— DATA_PATH = "data.txt" OLLAMA_URL = os.getenv( "OLLAMA_SERVER", "https://chandimaprabath-ollama-server.hf.space" ) EMBED_MODEL = "nomic-embed-text:latest" LLM_API_KEY = os.getenv("LLM_API_KEY") LLM_API_BASE = "https://llm.chutes.ai/v1" LLM_MODEL = "chutesai/Llama-4-Scout-17B-16E-Instruct" LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY") CHUNK_SIZE = 1000 CHUNK_OVERLAP = 0 TOP_K = 5 # ———————————————— def main(): # 1) Load & split docs = TextLoader(DATA_PATH).load() splitter = RecursiveCharacterTextSplitter( chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP ) chunks = splitter.split_documents(docs) print(f"→ Split into {len(chunks)} chunks") # 2) Embed & index remotely embedder = OllamaEmbeddings( base_url=OLLAMA_URL, model=EMBED_MODEL ) vector_store = Chroma.from_documents(chunks, embedder) # 3) Configure remote-only LLM llm = ChatOpenAI( api_key=LLM_API_KEY, base_url=LLM_API_BASE, model=LLM_MODEL ) # 4) Build RAG chain with LCEL-style helpers retriever = vector_store.as_retriever(search_kwargs={"k": TOP_K}) combine_chain = create_stuff_documents_chain(llm=llm, prompt=retrieval_qa_chat_prompt) retrieval_chain = create_retrieval_chain(retriever, combine_chain) # 5) Run query query = "Who jumped over the lazy dog?" print("🔎 Query:", query) result = retrieval_chain.invoke({"input": query}) print("\n📝 Answer:\n", result) if __name__ == "__main__": main()