masadonline commited on
Commit
6785822
Β·
verified Β·
1 Parent(s): 1b3ae27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -39
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
- import streamlit as st
 
3
  import PyPDF2
4
  from pdfminer.high_level import extract_text
5
  from transformers import AutoTokenizer
@@ -7,9 +8,9 @@ from sentence_transformers import SentenceTransformer
7
  import faiss
8
  import numpy as np
9
  from groq import Groq
10
- import docx # to read .docx files
11
 
12
- # --- Helper Functions ---
13
 
14
  def extract_text_from_pdf(pdf_path):
15
  try:
@@ -23,7 +24,7 @@ def extract_text_from_pdf(pdf_path):
23
  text += page_text
24
  return text
25
  except Exception as e:
26
- st.warning(f"PyPDF2 failed with error: {e}. Trying pdfminer.six...")
27
  return extract_text(pdf_path)
28
 
29
  def extract_text_from_docx(docx_path):
@@ -34,7 +35,7 @@ def extract_text_from_docx(docx_path):
34
  full_text.append(para.text)
35
  return '\n'.join(full_text)
36
  except Exception as e:
37
- st.warning(f"Failed to read DOCX {docx_path}: {e}")
38
  return ""
39
 
40
  def chunk_text_with_tokenizer(text, tokenizer, chunk_size=150, chunk_overlap=30):
@@ -57,7 +58,7 @@ def retrieve_relevant_chunks(question, index, embeddings_model, text_chunks, k=3
57
 
58
  def generate_answer_with_groq(question, context):
59
  prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
60
- model_name = "llama-3.3-70b-versatile" # Adjust model if needed
61
  try:
62
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
63
  response = groq_client.chat.completions.create(
@@ -69,49 +70,34 @@ def generate_answer_with_groq(question, context):
69
  )
70
  return response.choices[0].message.content
71
  except Exception as e:
72
- st.error(f"Error generating answer with Groq API: {e}")
73
  return "I'm sorry, I couldn't generate an answer at this time."
74
 
75
- # --- Streamlit UI & Logic ---
76
 
77
- st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
78
- st.title("πŸ€– SMEHelpBot – Your AI Assistant for Small Businesses")
79
-
80
- # GROQ API key check
81
- GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
82
- if not GROQ_API_KEY:
83
- st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
84
- st.stop()
85
-
86
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
87
-
88
- # Load and process all docs at startup
89
- @st.cache_data(show_spinner=True)
90
  def load_and_prepare_docs(folder_path="docs"):
 
91
  all_text = ""
92
  if not os.path.exists(folder_path):
93
- st.error(f"Folder '{folder_path}' does not exist!")
94
  return None, None, None
95
 
96
- # Collect all pdf and docx files
97
  files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.pdf', '.docx', '.doc'))]
98
  if not files:
99
- st.error(f"No PDF or DOCX files found in folder '{folder_path}'.")
100
  return None, None, None
101
 
102
  for file in files:
103
  path = os.path.join(folder_path, file)
104
  if file.lower().endswith('.pdf'):
105
  text = extract_text_from_pdf(path)
106
- elif file.lower().endswith(('.docx', '.doc')):
107
- text = extract_text_from_docx(path)
108
  else:
109
- text = ""
110
  if text:
111
  all_text += text + "\n\n"
112
 
113
  if not all_text.strip():
114
- st.error("No text extracted from documents.")
115
  return None, None, None
116
 
117
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
@@ -121,26 +107,54 @@ def load_and_prepare_docs(folder_path="docs"):
121
  all_embeddings = embedding_model.encode(text_chunks) if text_chunks else None
122
 
123
  if all_embeddings is None or len(all_embeddings) == 0:
124
- st.error("No text chunks found to create embeddings.")
125
  return None, None, None
126
 
127
  embedding_dim = all_embeddings[0].shape[0]
128
  index = faiss.IndexFlatL2(embedding_dim)
129
  index.add(np.array(all_embeddings))
130
 
 
131
  return index, embedding_model, text_chunks
132
 
 
 
 
 
 
 
 
133
  index, embedding_model, text_chunks = load_and_prepare_docs()
134
 
135
- user_question = st.text_input("πŸ’¬ Ask your question about SME documents:")
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- if st.button("Get Answer") and user_question:
138
  if index is None or embedding_model is None or text_chunks is None:
139
- st.error("The document knowledge base is not ready. Please check the errors above.")
140
- else:
141
- with st.spinner("Searching for relevant information and generating answer..."):
142
- relevant_chunks = retrieve_relevant_chunks(user_question, index, embedding_model, text_chunks)
143
- context = "\n\n".join(relevant_chunks)
144
- answer = generate_answer_with_groq(user_question, context)
145
- st.markdown("### Answer:")
146
- st.success(answer)
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from flask import Flask, request
3
+ from twilio.twiml.messaging_response import MessagingResponse
4
  import PyPDF2
5
  from pdfminer.high_level import extract_text
6
  from transformers import AutoTokenizer
 
8
  import faiss
9
  import numpy as np
10
  from groq import Groq
11
+ import docx
12
 
13
+ # --- Helper functions from your code ---
14
 
15
  def extract_text_from_pdf(pdf_path):
16
  try:
 
24
  text += page_text
25
  return text
26
  except Exception as e:
27
+ print(f"PyPDF2 failed with error: {e}. Trying pdfminer.six...")
28
  return extract_text(pdf_path)
29
 
30
  def extract_text_from_docx(docx_path):
 
35
  full_text.append(para.text)
36
  return '\n'.join(full_text)
37
  except Exception as e:
38
+ print(f"Failed to read DOCX {docx_path}: {e}")
39
  return ""
40
 
41
  def chunk_text_with_tokenizer(text, tokenizer, chunk_size=150, chunk_overlap=30):
 
58
 
59
  def generate_answer_with_groq(question, context):
60
  prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
61
+ model_name = "llama-3.3-70b-versatile" # Adjust if needed
62
  try:
63
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
64
  response = groq_client.chat.completions.create(
 
70
  )
71
  return response.choices[0].message.content
72
  except Exception as e:
73
+ print(f"Error generating answer with Groq API: {e}")
74
  return "I'm sorry, I couldn't generate an answer at this time."
75
 
76
+ # --- Load and prepare docs on startup ---
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def load_and_prepare_docs(folder_path="docs"):
79
+ print("Loading documents from", folder_path)
80
  all_text = ""
81
  if not os.path.exists(folder_path):
82
+ print(f"Folder '{folder_path}' does not exist!")
83
  return None, None, None
84
 
 
85
  files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.pdf', '.docx', '.doc'))]
86
  if not files:
87
+ print(f"No PDF or DOCX files found in folder '{folder_path}'.")
88
  return None, None, None
89
 
90
  for file in files:
91
  path = os.path.join(folder_path, file)
92
  if file.lower().endswith('.pdf'):
93
  text = extract_text_from_pdf(path)
 
 
94
  else:
95
+ text = extract_text_from_docx(path)
96
  if text:
97
  all_text += text + "\n\n"
98
 
99
  if not all_text.strip():
100
+ print("No text extracted from documents.")
101
  return None, None, None
102
 
103
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
 
107
  all_embeddings = embedding_model.encode(text_chunks) if text_chunks else None
108
 
109
  if all_embeddings is None or len(all_embeddings) == 0:
110
+ print("No text chunks found to create embeddings.")
111
  return None, None, None
112
 
113
  embedding_dim = all_embeddings[0].shape[0]
114
  index = faiss.IndexFlatL2(embedding_dim)
115
  index.add(np.array(all_embeddings))
116
 
117
+ print("Documents loaded and FAISS index created.")
118
  return index, embedding_model, text_chunks
119
 
120
+ # --- Flask app and WhatsApp webhook ---
121
+
122
+ from flask_cors import CORS
123
+ app = Flask(__name__)
124
+ CORS(app) # Optional, if you call API from other domains
125
+
126
+ # Load documents once at start
127
  index, embedding_model, text_chunks = load_and_prepare_docs()
128
 
129
+ @app.route("/whatsapp", methods=["POST"])
130
+ def whatsapp_reply():
131
+ incoming_msg = request.values.get('Body', '').strip()
132
+ from_number = request.values.get('From', '')
133
+ print(f"Incoming message from {from_number}: {incoming_msg}")
134
+
135
+ resp = MessagingResponse()
136
+ msg = resp.message()
137
+
138
+ if not incoming_msg:
139
+ msg.body("Please send a question.")
140
+ return str(resp)
141
 
 
142
  if index is None or embedding_model is None or text_chunks is None:
143
+ msg.body("Sorry, the knowledge base is not ready. Please try again later.")
144
+ return str(resp)
145
+
146
+ # Retrieve context and generate answer
147
+ relevant_chunks = retrieve_relevant_chunks(incoming_msg, index, embedding_model, text_chunks)
148
+ context = "\n\n".join(relevant_chunks)
149
+ answer = generate_answer_with_groq(incoming_msg, context)
150
+
151
+ msg.body(answer)
152
+ return str(resp)
153
+
154
+ if __name__ == "__main__":
155
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
156
+ if not GROQ_API_KEY:
157
+ print("Please set the GROQ_API_KEY environment variable before running.")
158
+ exit(1)
159
+ print("Starting WhatsApp SMEHelpBot server...")
160
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 5000)))