In [1]:
! pip install -r requirements.txt



In [2]:
from datasets import load_dataset, get_dataset_config_names

# 1) See which configs RAG mini-wiki offers
print("RAG configs:", get_dataset_config_names("rag-datasets/rag-mini-wikipedia"))

# 2) Load the raw wiki passages
wiki_ds = load_dataset(
    "rag-datasets/rag-mini-wikipedia",
    "text-corpus",
    split="passages"
)
passages = wiki_ds["passage"]
print(f"Loaded {len(passages)} wiki passages.")

# 3) Load a small SQuAD v2 slice (first 1 000 examples for speed)
squad = load_dataset("rajpurkar/squad_v2", split="train[:1000]")
print(f"Loaded {len(squad)} SQuAD examples.")

# 4) Load TriviaQA small
trivia = load_dataset(
    "mandarjoshi/trivia_qa",
    "rc",
    split="validation[:1000]"
)
print(f"Loaded {len(trivia)} TriviaQA examples.")


RAG configs: ['text-corpus', 'question-answer']
Loaded 3200 wiki passages.
Loaded 1000 SQuAD examples.


Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Loaded 1000 TriviaQA examples.


In [3]:
from sentence_transformers import SentenceTransformer
import faiss

# Embedder
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(passages, show_progress_bar=True, convert_to_numpy=True)

# Build index
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
print(f"Indexed {index.ntotal} vectors of size {dim}.")


Batches:   0%|          | 0/100 [00:00<?, ?it/s]

Indexed 3200 vectors of size 384.


In [4]:
# ==== 4. Load & Test the LLM ====
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# 4.1) Use the correct Flan-T5 repo
MODEL_NAME = "google/flan-t5-base"

# 4.2) Load tokenizer & model (requires that you ran `huggingface-cli login`)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model     = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

# 4.3) Create a generation pipeline
qa_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device=-1            # or 0 if you have a GPU
)

# 4.4) Quick sanity check
print(qa_pipeline("Question: What is retrieval-augmented generation? Answer:", max_length=50)[0]["generated_text"])


Device set to use cpu


a neural network


In [5]:
# ==== 5. Retrieval + Generation Function ====
def retrieve_and_answer(question: str, k: int = 5) -> str:
    # 1) Embed the question
    q_emb = embedder.encode([question], convert_to_numpy=True)
    # 2) Retrieve top-k passages
    distances, indices = index.search(q_emb, k)
    # 3) Build a little context block
    contexts = "\n".join(
        f"Context {i+1}: {passages[idx][:200]}..."
        for i, idx in enumerate(indices[0])
    )
    # 4) Assemble prompt
    prompt = (
        "You are a helpful QA assistant. "
        "Use ONLY the following contexts to answer the question. "
        "If the answer is not contained in the contexts, respond with "
        "'Sorry, I don't know.'\n\n"
        f"{contexts}\n"
        f"Question: {question}\nAnswer:"
    )

    # 5) Generate and return
    out = qa_pipeline(prompt, max_length=200, do_sample=False)
    return out[0]["generated_text"]


# 5.1) Test it end-to-end
sample_q = "Who conceptualized the theory of relativity?"
print("Answer:", retrieve_and_answer(sample_q))


Answer: Isaac Newton


In [12]:
import gradio as gr

def chat_fn(user_message, history=None):
    # Initialize history if needed
    history = history or []
    # 1) Record the user turn
    history.append({"role": "user", "content": user_message})
    # 2) Retrieve & generate
    answer = retrieve_and_answer(user_message, k=5)
    # 3) Record the assistant turn
    history.append({"role": "assistant", "content": answer})
    # 4) Return the full message list
    return history




In [14]:
# Launch with the new format
gr.ChatInterface(
    fn=chat_fn,
    type="messages",            # ‚Üê switch to messages!
    title="üîç RAG QA Demo",
    description="I only answer from retrieved contexts; otherwise I'll say 'Sorry, I don't know.'"
).launch(share=True)            # ‚Üê add share=True if you want a public link

* Running on local URL:  http://127.0.0.1:7865

Could not create share link. Missing file: C:\Users\victo\.cache\huggingface\gradio\frpc\frpc_windows_amd64_v0.3. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_windows_amd64.exe
2. Rename the downloaded file to: frpc_windows_amd64_v0.3
3. Move the file to this location: C:\Users\victo\.cache\huggingface\gradio\frpc


