First_RAG_System / day3 /rag_system.py
Hamid Omarov
Spaces: lazy LLM init + filepath upload
cd32be2
raw
history blame
3.15 kB
# day3/rag_system.py
from typing import List, Dict
import os
import chromadb
from pdf_loader import load_pdf
from optimal_chunker import OptimalChunker
from embeddings import embed_texts
# LLM-i sonradan yaradacağıq
from langchain_groq import ChatGroq
from dotenv import load_dotenv
load_dotenv()
class RAGPipeline:
def __init__(self, persist_dir: str = "./chroma_db_space", collection_name: str = "pdf_docs"):
# Vector DB (Chroma 1.x)
self.client = chromadb.PersistentClient(path=persist_dir)
self.col = self.client.get_or_create_collection(name=collection_name)
# Chunker
self.chunker = OptimalChunker()
# LLM hələ YARADILMIR (lazy)
self.llm = None
def _ensure_llm(self):
"""GROQ_API_KEY varsa LLM-i gec (ilk sorğuda) yarat."""
if self.llm is None:
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
# LLM olmadan da app işə düşsün deyə aydın mesaj veririk
raise RuntimeError(
"GROQ_API_KEY tapılmadı. Space Settings → Variables and secrets bölməsində əlavə edin."
)
self.llm = ChatGroq(model="llama3-8b-8192", temperature=0.0, api_key=api_key)
# 1) Load 2) Chunk 3) Embed 4) Upsert to Chroma
def index_document(self, pdf_path: str, doc_id_prefix: str = "doc") -> Dict:
docs = load_pdf(pdf_path)
text = "\n\n".join(d.page_content for d in docs)
summary = self.chunker.fit_on_text(text)
chunks = self.chunker.transform()
vectors = embed_texts(chunks) # list[list[float]]
ids = [f"{doc_id_prefix}-{i}" for i in range(len(chunks))]
self.col.add(
ids=ids,
documents=chunks,
embeddings=vectors,
metadatas=[{"source": pdf_path, "chunk": i} for i in range(len(chunks))],
)
return {"chunks_indexed": len(chunks), "best_strategy": summary}
# 5) Retrieve 6) Ask LLM
def query(self, question: str, k: int = 4) -> Dict:
# LLM-i bu zaman yaradacağıq (secret yoxdursa burda aydın xəta görünəcək)
self._ensure_llm()
results = self.col.query(query_texts=[question], n_results=k)
chunks: List[str] = results["documents"][0] if results.get("documents") else []
context = "\n\n".join(chunks)
prompt = f"""You are an extraction assistant. Use ONLY the Context to answer.
Rules:
- If the answer is explicitly present in Context, return that substring EXACTLY.
- Do not paraphrase. Do not add words. Return a verbatim span from Context.
- If the answer is not in Context, reply exactly: I don't know
Question: {question}
Context:
{context}
Answer (verbatim from Context):"""
resp = self.llm.invoke(prompt)
answer = resp.content.strip() if hasattr(resp, "content") else str(resp)
if (not answer or answer.lower().startswith("i don't know")) and context.strip():
answer = chunks[0] if chunks else "I don't know"
return {"answer": answer, "used_chunks": len(chunks), "context_preview": context[:500]}