Spaces:
Sleeping
Sleeping
File size: 2,218 Bytes
1725afa 972a93c a8dec22 972a93c 7bf6ead 972a93c a8dec22 1725afa 972a93c a8dec22 7bf6ead 714b045 1725afa a8dec22 972a93c a8dec22 6409405 a8dec22 1725afa a8dec22 972a93c a8dec22 972a93c a8dec22 972a93c a8dec22 972a93c a8dec22 972a93c a8dec22 972a93c a8dec22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import os
import streamlit as st
from glob import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq # β
Correct import
# Set page config
st.set_page_config(page_title="SMEHelpBot π€", layout="wide")
st.title("π€ SMEHelpBot β Your AI Assistant for Small Businesses")
# Load API key
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("β Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
st.stop()
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
# Load all PDFs from the 'docs' folder
pdf_paths = glob("docs/*.pdf")
if not pdf_paths:
st.warning("π Please place some PDF files in the `docs/` folder.")
st.stop()
#st.info(f"π Loaded {len(pdf_paths)} document(s) from `docs/`")
# Load and split all PDFs
documents = []
for path in pdf_paths:
loader = PyPDFLoader(path)
documents.extend(loader.load())
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)
# Create vector store from chunks
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
# Set up LLM with Groq
llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
# Build QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
# Ask question
user_question = st.text_input("π¬ Ask your question about SME documents:")
if user_question:
with st.spinner("π€ Thinking..."):
result = qa_chain({"query": user_question})
st.success("β
Answer:")
st.write(result["result"])
with st.expander("π Source Snippets"):
for i, doc in enumerate(result["source_documents"]):
st.markdown(f"**Source {i+1}:**\n{doc.page_content[:300]}...")
|