masadonline commited on
Commit
210135b
Β·
verified Β·
1 Parent(s): 108920c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -31
app.py CHANGED
@@ -1,60 +1,58 @@
1
- import os
2
  import streamlit as st
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import FAISS
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
- from langchain_community.chat_models import ChatGroq
9
-
10
- # Set Streamlit page config
11
- st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
12
- st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")
13
 
14
- # File uploader
15
- uploaded_file = st.file_uploader("πŸ“„ Upload a PDF (e.g., SME policy, business doc, etc.):", type=["pdf"])
16
- user_question = st.text_input("πŸ’¬ Ask a question related to your document or SME operations:")
17
 
18
- # Set Groq API key securely (use Streamlit secrets or env var)
19
- GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY") or "your_groq_api_key_here"
20
 
 
21
  if uploaded_file:
22
- # Save uploaded file temporarily
23
  with open("temp.pdf", "wb") as f:
24
  f.write(uploaded_file.read())
25
-
26
- # Load PDF and split into chunks
27
  loader = PyPDFLoader("temp.pdf")
28
- documents = loader.load()
29
 
30
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
31
- chunks = splitter.split_documents(documents)
32
 
33
- # Create vector store using MiniLM embeddings
34
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
35
- vectorstore = FAISS.from_documents(chunks, embeddings)
36
- retriever = vectorstore.as_retriever()
37
 
38
- # Set up LLM using Groq + LLaMA3
39
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
40
- llm = ChatGroq(temperature=0.3, model_name="llama3-8b-8192")
 
 
 
 
 
 
 
41
 
42
- # QA chain with retrieval
43
- qa = RetrievalQA.from_chain_type(
44
  llm=llm,
45
- chain_type="stuff",
46
  retriever=retriever,
47
  return_source_documents=True
48
  )
49
 
50
- if user_question:
51
- with st.spinner("Generating answer..."):
52
- result = qa({"query": user_question})
53
  st.success(result["result"])
54
 
55
- with st.expander("πŸ“„ Sources"):
56
  for doc in result["source_documents"]:
57
- st.markdown(f"β€’ {doc.page_content[:300]}...")
58
 
59
  else:
60
- st.info("Upload a PDF and enter a question to begin.")
 
 
1
  import streamlit as st
2
  from langchain_community.document_loaders import PyPDFLoader
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain.vectorstores import FAISS
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.chains import RetrievalQA
7
+ from langchain_community.llms import HuggingFaceEndpoint
8
+ import os
 
 
 
9
 
10
+ # --- UI ---
11
+ st.set_page_config(page_title="SMEHelpBot", layout="wide")
12
+ st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Business")
13
 
14
+ uploaded_file = st.file_uploader("πŸ“„ Upload an industry-specific PDF (policy, FAQ, etc.):", type=["pdf"])
15
+ user_query = st.text_input("πŸ’¬ Ask a business-related question:")
16
 
17
+ # --- Process PDF + RAG ---
18
  if uploaded_file:
 
19
  with open("temp.pdf", "wb") as f:
20
  f.write(uploaded_file.read())
21
+
 
22
  loader = PyPDFLoader("temp.pdf")
23
+ pages = loader.load()
24
 
25
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
26
+ chunks = splitter.split_documents(pages)
27
 
 
28
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
+ db = FAISS.from_documents(chunks, embeddings)
 
30
 
31
+ retriever = db.as_retriever()
32
+
33
+ # --- Groq API (LLaMA3 via HuggingFaceEndpoint) ---
34
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets.get("HF_TOKEN") or "your_api_token_here"
35
+
36
+ llm = HuggingFaceEndpoint(
37
+ repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
38
+ temperature=0.6,
39
+ max_new_tokens=512
40
+ )
41
 
42
+ qa_chain = RetrievalQA.from_chain_type(
 
43
  llm=llm,
 
44
  retriever=retriever,
45
  return_source_documents=True
46
  )
47
 
48
+ if user_query:
49
+ with st.spinner("Generating response..."):
50
+ result = qa_chain({"query": user_query})
51
  st.success(result["result"])
52
 
53
+ with st.expander("πŸ“š Sources"):
54
  for doc in result["source_documents"]:
55
+ st.markdown(f"β€’ Page content: {doc.page_content[:300]}...")
56
 
57
  else:
58
+ st.info("Upload a PDF and type your question to get started.")