import streamlit as st
from pinecone import Pinecone
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain

# ========== CONFIGURATION ========== #
PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
INDEX_NAME = "rag-granite-index"
NAMESPACE = "rag-ns"

# ========== SETUP ========== #
st.set_page_config(page_title="RAG Assistant", page_icon="🤖")
st.title("💬 RAG-Powered Q&A Assistant")

# Init Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(INDEX_NAME)

# Init Groq LLM
llm = ChatGroq(
    model_name="llama3-70b-8192",
    api_key=GROQ_API_KEY
)

# Prompt Template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a smart assistant. Based on the provided context, answer the question in 1 to 2 lines only.

Context:
{context}

Question: {question}

Answer:"""
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

# ========== STREAMLIT UI ========== #
user_query = st.text_input("Ask a question from the document 👇")

if user_query:
    with st.spinner("Fetching answer..."):
        from sentence_transformers import SentenceTransformer
        embedder = SentenceTransformer("all-MiniLM-L6-v2")  # Match your embedding model
        query_embedding = embedder.encode(user_query).tolist()

        results = index.query(
            namespace=NAMESPACE,
            vector=query_embedding,
            top_k=3,
            include_metadata=True
        )

        context = "\n\n".join(match['metadata']['text'] for match in results['matches'])

        response = llm_chain.invoke({
            "context": context,
            "question": user_query
        })

        st.success("Answer:")
        st.write(response["text"])