Spaces:
Paused
Paused
import os | |
import signal | |
import sys | |
from step4_faiss import load_index, search_index | |
from step3_embed import embedder | |
from llama_cpp import Llama | |
MODEL_PATH = "models/Nous-Hermes-2-Mistral-7B-DPO.Q4_K_M.gguf" | |
INDEX_PATH = "vectordb/index.faiss" | |
CHUNKS_PATH = "vectordb/chunks.pkl" | |
# Initialisation LLM | |
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4) | |
# Chargement index et chunks | |
index, chunks = load_index(INDEX_PATH, CHUNKS_PATH) | |
if any("nicolas" in chunk.lower() for chunk in chunks): | |
print ("Nicolas est dans les chunks") | |
# Interruption propre | |
def exit_gracefully(sig, frame): | |
print("\n👋 Au revoir !") | |
sys.exit(0) | |
signal.signal(signal.SIGINT, exit_gracefully) | |
print("🧠 RAG CLI interactif (CTRL+C pour quitter)") | |
print("Pose ta question :") | |
while True: | |
question = input("\n❓> ").strip() | |
if not question: | |
continue | |
query_embedding = embedder.encode([question], convert_to_numpy=True) | |
indices, _ = search_index(index, query_embedding, top_k=3) | |
for i in indices: | |
print(f"\n--- Chunk {i} ---\n{chunks[i]}") | |
context = "\n\n".join([chunks[i] for i in indices]) | |
MAX_CONTEXT_CHARS = 3000 | |
truncated_context = context[:MAX_CONTEXT_CHARS] | |
prompt = f"""### Instruction: En te basant uniquement sur le contexte ci-dessous, réponds à la question de manière précise et en français. | |
Contexte : | |
{truncated_context} | |
Question : {question} | |
### Réponse:""" | |
# output = llm(prompt, max_tokens=128, stop=["### Instruction:"]) | |
# response = output["choices"][0]["text"].strip().split("###")[0] | |
# print(f"\n💬 Réponse : {response}") | |