File size: 2,217 Bytes
1725afa
972a93c
a8dec22
972a93c
 
 
 
 
7bf6ead
972a93c
a8dec22
1725afa
 
972a93c
a8dec22
7bf6ead
 
 
 
 
714b045
1725afa
a8dec22
 
972a93c
a8dec22
 
 
 
 
 
 
 
 
 
 
1725afa
a8dec22
 
972a93c
a8dec22
 
 
 
972a93c
a8dec22
 
972a93c
a8dec22
 
 
 
 
 
 
972a93c
a8dec22
 
972a93c
a8dec22
 
 
 
 
972a93c
a8dec22
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import streamlit as st
from glob import glob
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq  # βœ… Correct import

# Set page config
st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")

# Load API key
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
    st.stop()

os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# Load all PDFs from the 'docs' folder
pdf_paths = glob("docs/*.pdf")

if not pdf_paths:
    st.warning("πŸ“ Please place some PDF files in the `docs/` folder.")
    st.stop()

st.info(f"πŸ“„ Loaded {len(pdf_paths)} document(s) from `docs/`")

# Load and split all PDFs
documents = []
for path in pdf_paths:
    loader = PyPDFLoader(path)
    documents.extend(loader.load())

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(documents)

# Create vector store from chunks
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()

# Set up LLM with Groq
llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")

# Build QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# Ask question
user_question = st.text_input("πŸ’¬ Ask your question about SME documents:")

if user_question:
    with st.spinner("πŸ€” Thinking..."):
        result = qa_chain({"query": user_question})
        st.success("βœ… Answer:")
        st.write(result["result"])

        with st.expander("πŸ“„ Source Snippets"):
            for i, doc in enumerate(result["source_documents"]):
                st.markdown(f"**Source {i+1}:**\n{doc.page_content[:300]}...")