masadonline commited on
Commit
013dd9f
Β·
verified Β·
1 Parent(s): 3cdab77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -61
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  import os
3
  import json
4
- import tempfile
5
  import pdfplumber
6
  import faiss
7
  import numpy as np
@@ -22,68 +21,65 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
22
  LLM_MODEL = "llama3-8b-8192"
23
  embedder = SentenceTransformer(EMBEDDING_MODEL)
24
 
25
- # Streamlit app setup
26
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
27
  st.title("🧸 ToyShop RAG-Based Assistant")
28
 
29
- # --- Helper functions ---
30
-
31
  def extract_pdf_text(file):
32
  text = ""
33
  with pdfplumber.open(file) as pdf:
34
  for page in pdf.pages:
35
- page_text = page.extract_text()
36
- if page_text:
37
- text += page_text + "\n"
38
  return text.strip()
39
 
 
 
 
 
 
 
 
 
 
 
40
  def load_json_orders(json_file):
41
- valid_orders = []
42
  try:
43
  data = json.load(json_file)
44
- if isinstance(data, list):
45
- for i, order in enumerate(data):
46
- try:
47
- json.dumps(order) # test serialization
48
- valid_orders.append(order)
49
- except Exception as e:
50
- st.warning(f"⚠️ Skipping invalid order at index {i}: {e}")
51
- elif isinstance(data, dict):
52
- for k, order in data.items():
53
- try:
54
- json.dumps(order)
55
- valid_orders.append(order)
56
- except Exception as e:
57
- st.warning(f"⚠️ Skipping invalid order with key '{k}': {e}")
58
  except Exception as e:
59
- st.error(f"❌ Error parsing JSON file: {e}")
60
- return valid_orders
61
 
62
- def build_index(text_chunks):
63
- vectors = embedder.encode(text_chunks)
64
  index = faiss.IndexFlatL2(vectors.shape[1])
65
  index.add(np.array(vectors))
66
- return index, text_chunks
67
 
68
  def ask_llm(context, query):
69
- prompt = (
70
- f"You are a helpful assistant for an online toy shop.\n\n"
71
- f"Knowledge base:\n{context}\n\n"
72
- f"Question: {query}"
73
- )
74
- # For debugging: show the prompt being sent.
75
- st.expander("Prompt to LLM").code(prompt)
76
-
77
  response = client.chat.completions.create(
78
  model=LLM_MODEL,
79
  messages=[{"role": "user", "content": prompt}]
80
  )
81
- # Log full response for inspection (can be commented out in production)
82
- st.expander("Raw LLM API Response").json(response)
83
  return response.choices[0].message.content.strip()
84
 
85
- # --- File upload section ---
86
-
87
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
88
  orders_file = st.file_uploader("Upload JSON file", type="json")
89
 
@@ -92,57 +88,50 @@ pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multi
92
 
93
  order_chunks, pdf_chunks = [], []
94
 
95
- # --- Process JSON ---
96
  if orders_file:
97
  orders = load_json_orders(orders_file)
98
  if orders:
99
- order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
100
- st.success(f"βœ… Loaded {len(order_chunks)} customer order records.")
101
- # Attempt to flatten for viewing
102
  try:
103
  df = pd.json_normalize(orders)
104
  st.dataframe(df, use_container_width=True)
105
  except Exception:
106
- st.warning("⚠️ Nested JSON detected. Showing raw JSON preview instead.")
107
  st.json(orders)
108
- else:
109
- st.error("No valid orders found in the JSON file.")
110
 
111
- # --- Process PDFs ---
112
  if pdf_files:
113
- for pdf_file in pdf_files:
114
  try:
115
- text = extract_pdf_text(pdf_file)
116
- # Split into paragraphs (non-empty lines)
117
- paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
118
- pdf_chunks.extend(paragraphs)
119
- st.success(f"πŸ“„ Processed {pdf_file.name}")
120
  except Exception as e:
121
- st.error(f"❌ Failed to read {pdf_file.name}: {e}")
122
 
 
123
  combined_chunks = order_chunks + pdf_chunks
124
 
125
- # --- Question Answering Section ---
126
  if combined_chunks:
127
  index, sources = build_index(combined_chunks)
128
 
129
  st.subheader("❓ Ask a Question")
130
- user_query = st.text_input("What would you like to know?", placeholder="e.g. What is the status of order 123?")
131
 
132
  if user_query:
133
  query_vector = embedder.encode([user_query])
134
  D, I = index.search(query_vector, k=5)
135
- # Prepare context from the top-K results:
136
  context = "\n---\n".join([sources[i] for i in I[0]])
137
- st.expander("Combined Context").code(context)
138
 
139
  with st.spinner("πŸ€” Thinking..."):
140
  try:
141
  answer = ask_llm(context, user_query)
142
  st.markdown("### 🧠 Answer")
143
- # Use st.write() to render the answer as text.
144
  st.write(answer)
145
  except Exception as e:
146
- st.error(f"❌ GROQ API Error: {e}")
147
  else:
148
- st.info("πŸ“‚ Please upload both JSON orders and relevant PDFs to begin.")
 
1
  import streamlit as st
2
  import os
3
  import json
 
4
  import pdfplumber
5
  import faiss
6
  import numpy as np
 
21
  LLM_MODEL = "llama3-8b-8192"
22
  embedder = SentenceTransformer(EMBEDDING_MODEL)
23
 
24
+ # Streamlit UI
25
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
26
  st.title("🧸 ToyShop RAG-Based Assistant")
27
 
 
 
28
  def extract_pdf_text(file):
29
  text = ""
30
  with pdfplumber.open(file) as pdf:
31
  for page in pdf.pages:
32
+ content = page.extract_text()
33
+ if content:
34
+ text += content + "\n"
35
  return text.strip()
36
 
37
+ def flatten_order(order):
38
+ flat = []
39
+ if isinstance(order, dict):
40
+ for k, v in order.items():
41
+ if isinstance(v, (dict, list)):
42
+ flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
43
+ else:
44
+ flat.append(f"{k}: {v}")
45
+ return "\n".join(flat)
46
+
47
  def load_json_orders(json_file):
 
48
  try:
49
  data = json.load(json_file)
50
+ if isinstance(data, dict):
51
+ orders = list(data.values())
52
+ elif isinstance(data, list):
53
+ orders = data
54
+ else:
55
+ return []
56
+ valid_orders = [o for o in orders if isinstance(o, dict)]
57
+ return valid_orders
 
 
 
 
 
 
58
  except Exception as e:
59
+ st.error(f"❌ Error parsing JSON: {e}")
60
+ return []
61
 
62
+ def build_index(chunks):
63
+ vectors = embedder.encode(chunks)
64
  index = faiss.IndexFlatL2(vectors.shape[1])
65
  index.add(np.array(vectors))
66
+ return index, chunks
67
 
68
  def ask_llm(context, query):
69
+ prompt = f"""You are a helpful assistant for an online toy shop.
70
+
71
+ Knowledge base:
72
+ {context}
73
+
74
+ Question: {query}
75
+ """
 
76
  response = client.chat.completions.create(
77
  model=LLM_MODEL,
78
  messages=[{"role": "user", "content": prompt}]
79
  )
 
 
80
  return response.choices[0].message.content.strip()
81
 
82
+ # Uploads
 
83
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
84
  orders_file = st.file_uploader("Upload JSON file", type="json")
85
 
 
88
 
89
  order_chunks, pdf_chunks = [], []
90
 
91
+ # Handle JSON orders
92
  if orders_file:
93
  orders = load_json_orders(orders_file)
94
  if orders:
95
+ order_chunks = [flatten_order(o) for o in orders]
96
+ st.success(f"βœ… Loaded {len(order_chunks)} valid orders.")
97
+
98
  try:
99
  df = pd.json_normalize(orders)
100
  st.dataframe(df, use_container_width=True)
101
  except Exception:
102
+ st.warning("⚠️ Unable to normalize JSON. Showing raw preview.")
103
  st.json(orders)
 
 
104
 
105
+ # Handle PDFs
106
  if pdf_files:
107
+ for file in pdf_files:
108
  try:
109
+ text = extract_pdf_text(file)
110
+ pdf_chunks.extend(text.split("\n\n"))
111
+ st.success(f"πŸ“„ Processed: {file.name}")
 
 
112
  except Exception as e:
113
+ st.error(f"❌ Error in {file.name}: {e}")
114
 
115
+ # Combine & build index
116
  combined_chunks = order_chunks + pdf_chunks
117
 
 
118
  if combined_chunks:
119
  index, sources = build_index(combined_chunks)
120
 
121
  st.subheader("❓ Ask a Question")
122
+ user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
123
 
124
  if user_query:
125
  query_vector = embedder.encode([user_query])
126
  D, I = index.search(query_vector, k=5)
 
127
  context = "\n---\n".join([sources[i] for i in I[0]])
 
128
 
129
  with st.spinner("πŸ€” Thinking..."):
130
  try:
131
  answer = ask_llm(context, user_query)
132
  st.markdown("### 🧠 Answer")
 
133
  st.write(answer)
134
  except Exception as e:
135
+ st.error(f"❌ GROQ Error: {e}")
136
  else:
137
+ st.info("πŸ“‚ Please upload orders (JSON) and info files (PDF) to get started.")