masadonline commited on
Commit
a8dec22
Β·
verified Β·
1 Parent(s): d627e2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -37
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import streamlit as st
 
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import FAISS
@@ -7,11 +8,11 @@ from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain_groq import ChatGroq # βœ… Correct import
9
 
10
- # Set up Streamlit UI
11
  st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
12
  st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")
13
 
14
- # Set Groq API key (use .streamlit/secrets.toml or environment variable)
15
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
16
  if not GROQ_API_KEY:
17
  st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
@@ -19,46 +20,49 @@ if not GROQ_API_KEY:
19
 
20
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
21
 
22
- # Upload PDF
23
- uploaded_file = st.file_uploader("πŸ“„ Upload a PDF (e.g., SME policies, documents):", type=["pdf"])
24
- user_question = st.text_input("πŸ’¬ Ask a question about the uploaded document:")
25
 
26
- if uploaded_file:
27
- with open("temp.pdf", "wb") as f:
28
- f.write(uploaded_file.read())
 
 
 
 
 
 
 
 
29
 
30
- # Load PDF and split into chunks
31
- loader = PyPDFLoader("temp.pdf")
32
- documents = loader.load()
33
 
34
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
35
- chunks = splitter.split_documents(documents)
 
 
36
 
37
- # Create vector store
38
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
39
- vectorstore = FAISS.from_documents(chunks, embeddings)
40
- retriever = vectorstore.as_retriever()
41
 
42
- # Load Groq LLaMA3
43
- llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
 
 
 
 
 
44
 
45
- # Set up RetrievalQA chain
46
- qa_chain = RetrievalQA.from_chain_type(
47
- llm=llm,
48
- chain_type="stuff",
49
- retriever=retriever,
50
- return_source_documents=True
51
- )
52
 
53
- # Handle user query
54
- if user_question:
55
- with st.spinner("πŸ€” Thinking..."):
56
- result = qa_chain({"query": user_question})
57
- st.success("βœ… Answer:")
58
- st.write(result["result"])
59
 
60
- with st.expander("πŸ“„ Source Snippets"):
61
- for i, doc in enumerate(result["source_documents"]):
62
- st.markdown(f"**Source {i+1}:**\n{doc.page_content[:300]}...")
63
- else:
64
- st.info("πŸ“‚ Upload a PDF and ask a question to get started.")
 
1
  import os
2
  import streamlit as st
3
+ from glob import glob
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import FAISS
 
8
  from langchain.chains import RetrievalQA
9
  from langchain_groq import ChatGroq # βœ… Correct import
10
 
11
+ # Set page config
12
  st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
13
  st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")
14
 
15
+ # Load API key
16
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
17
  if not GROQ_API_KEY:
18
  st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
 
20
 
21
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
22
 
23
+ # Load all PDFs from the 'docs' folder
24
+ pdf_paths = glob("docs/*.pdf")
 
25
 
26
+ if not pdf_paths:
27
+ st.warning("πŸ“ Please place some PDF files in the `docs/` folder.")
28
+ st.stop()
29
+
30
+ st.info(f"πŸ“„ Loaded {len(pdf_paths)} document(s) from `docs/`")
31
+
32
+ # Load and split all PDFs
33
+ documents = []
34
+ for path in pdf_paths:
35
+ loader = PyPDFLoader(path)
36
+ documents.extend(loader.load())
37
 
38
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
39
+ chunks = splitter.split_documents(documents)
 
40
 
41
+ # Create vector store from chunks
42
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
43
+ vectorstore = FAISS.from_documents(chunks, embeddings)
44
+ retriever = vectorstore.as_retriever()
45
 
46
+ # Set up LLM with Groq
47
+ llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
 
 
48
 
49
+ # Build QA chain
50
+ qa_chain = RetrievalQA.from_chain_type(
51
+ llm=llm,
52
+ chain_type="stuff",
53
+ retriever=retriever,
54
+ return_source_documents=True
55
+ )
56
 
57
+ # Ask question
58
+ user_question = st.text_input("πŸ’¬ Ask your question about SME documents:")
 
 
 
 
 
59
 
60
+ if user_question:
61
+ with st.spinner("πŸ€” Thinking..."):
62
+ result = qa_chain({"query": user_question})
63
+ st.success("βœ… Answer:")
64
+ st.write(result["result"])
 
65
 
66
+ with st.expander("πŸ“„ Source Snippets"):
67
+ for i, doc in enumerate(result["source_documents"]):
68
+ st.markdown(f"**Source {i+1}:**\n{doc.page_content[:300]}...")