Quasa / app.py
masadonline's picture
Update app.py
a8dec22 verified
raw
history blame
2.22 kB
import os
import streamlit as st
from glob import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq # βœ… Correct import
# Set page config
st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")
# Load API key
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
st.stop()
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
# Load all PDFs from the 'docs' folder
pdf_paths = glob("docs/*.pdf")
if not pdf_paths:
st.warning("πŸ“ Please place some PDF files in the `docs/` folder.")
st.stop()
st.info(f"πŸ“„ Loaded {len(pdf_paths)} document(s) from `docs/`")
# Load and split all PDFs
documents = []
for path in pdf_paths:
loader = PyPDFLoader(path)
documents.extend(loader.load())
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)
# Create vector store from chunks
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
# Set up LLM with Groq
llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
# Build QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
# Ask question
user_question = st.text_input("πŸ’¬ Ask your question about SME documents:")
if user_question:
with st.spinner("πŸ€” Thinking..."):
result = qa_chain({"query": user_question})
st.success("βœ… Answer:")
st.write(result["result"])
with st.expander("πŸ“„ Source Snippets"):
for i, doc in enumerate(result["source_documents"]):
st.markdown(f"**Source {i+1}:**\n{doc.page_content[:300]}...")