File size: 2,025 Bytes
972a93c
 
 
 
 
 
210135b
 
972a93c
210135b
 
 
972a93c
210135b
 
972a93c
210135b
972a93c
 
 
210135b
972a93c
210135b
972a93c
 
210135b
972a93c
 
210135b
972a93c
210135b
 
 
 
 
 
 
 
 
 
972a93c
210135b
972a93c
 
 
 
 
210135b
 
 
972a93c
 
210135b
972a93c
210135b
972a93c
 
210135b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint
import os

# --- UI ---
st.set_page_config(page_title="SMEHelpBot", layout="wide")
st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Business")

uploaded_file = st.file_uploader("πŸ“„ Upload an industry-specific PDF (policy, FAQ, etc.):", type=["pdf"])
user_query = st.text_input("πŸ’¬ Ask a business-related question:")

# --- Process PDF + RAG ---
if uploaded_file:
    with open("temp.pdf", "wb") as f:
        f.write(uploaded_file.read())
    
    loader = PyPDFLoader("temp.pdf")
    pages = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(pages)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_documents(chunks, embeddings)

    retriever = db.as_retriever()

    # --- Groq API (LLaMA3 via HuggingFaceEndpoint) ---
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets.get("HF_TOKEN") or "your_api_token_here"

    llm = HuggingFaceEndpoint(
        repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
        temperature=0.6,
        max_new_tokens=512
    )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        return_source_documents=True
    )

    if user_query:
        with st.spinner("Generating response..."):
            result = qa_chain({"query": user_query})
            st.success(result["result"])

            with st.expander("πŸ“š Sources"):
                for doc in result["source_documents"]:
                    st.markdown(f"β€’ Page content: {doc.page_content[:300]}...")

else:
    st.info("Upload a PDF and type your question to get started.")