import streamlit as st from pinecone import Pinecone from langchain_groq import ChatGroq from langchain.prompts import PromptTemplate from langchain.chains.llm import LLMChain # ========== CONFIGURATION ========== # PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"] GROQ_API_KEY = st.secrets["GROQ_API_KEY"] INDEX_NAME = "rag-granite-index" NAMESPACE = "rag-ns" # ========== SETUP ========== # st.set_page_config(page_title="RAG Assistant", page_icon="🤖") st.title("💬 RAG-Powered Q&A Assistant") # Init Pinecone pc = Pinecone(api_key=PINECONE_API_KEY) index = pc.Index(INDEX_NAME) # Init Groq LLM llm = ChatGroq( model_name="llama3-70b-8192", api_key=GROQ_API_KEY ) # Prompt Template prompt = PromptTemplate( input_variables=["context", "question"], template=""" You are a smart assistant. Based on the provided context, answer the question in 1 to 2 lines only. Context: {context} Question: {question} Answer:""" ) llm_chain = LLMChain(llm=llm, prompt=prompt) # ========== STREAMLIT UI ========== # user_query = st.text_input("Ask a question from the document 👇") if user_query: with st.spinner("Fetching answer..."): from sentence_transformers import SentenceTransformer embedder = SentenceTransformer("all-MiniLM-L6-v2") # Match your embedding model query_embedding = embedder.encode(user_query).tolist() results = index.query( namespace=NAMESPACE, vector=query_embedding, top_k=3, include_metadata=True ) context = "\n\n".join(match['metadata']['text'] for match in results['matches']) response = llm_chain.invoke({ "context": context, "question": user_query }) st.success("Answer:") st.write(response["text"])