Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import json
|
4 |
-
import tempfile
|
5 |
import pdfplumber
|
6 |
import faiss
|
7 |
import numpy as np
|
@@ -22,68 +21,65 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
22 |
LLM_MODEL = "llama3-8b-8192"
|
23 |
embedder = SentenceTransformer(EMBEDDING_MODEL)
|
24 |
|
25 |
-
# Streamlit
|
26 |
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide")
|
27 |
st.title("π§Έ ToyShop RAG-Based Assistant")
|
28 |
|
29 |
-
# --- Helper functions ---
|
30 |
-
|
31 |
def extract_pdf_text(file):
|
32 |
text = ""
|
33 |
with pdfplumber.open(file) as pdf:
|
34 |
for page in pdf.pages:
|
35 |
-
|
36 |
-
if
|
37 |
-
text +=
|
38 |
return text.strip()
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
def load_json_orders(json_file):
|
41 |
-
valid_orders = []
|
42 |
try:
|
43 |
data = json.load(json_file)
|
44 |
-
if isinstance(data,
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
for k, order in data.items():
|
53 |
-
try:
|
54 |
-
json.dumps(order)
|
55 |
-
valid_orders.append(order)
|
56 |
-
except Exception as e:
|
57 |
-
st.warning(f"β οΈ Skipping invalid order with key '{k}': {e}")
|
58 |
except Exception as e:
|
59 |
-
st.error(f"β Error parsing JSON
|
60 |
-
|
61 |
|
62 |
-
def build_index(
|
63 |
-
vectors = embedder.encode(
|
64 |
index = faiss.IndexFlatL2(vectors.shape[1])
|
65 |
index.add(np.array(vectors))
|
66 |
-
return index,
|
67 |
|
68 |
def ask_llm(context, query):
|
69 |
-
prompt =
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
response = client.chat.completions.create(
|
78 |
model=LLM_MODEL,
|
79 |
messages=[{"role": "user", "content": prompt}]
|
80 |
)
|
81 |
-
# Log full response for inspection (can be commented out in production)
|
82 |
-
st.expander("Raw LLM API Response").json(response)
|
83 |
return response.choices[0].message.content.strip()
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
st.subheader("π Upload Customer Orders (JSON)")
|
88 |
orders_file = st.file_uploader("Upload JSON file", type="json")
|
89 |
|
@@ -92,57 +88,50 @@ pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multi
|
|
92 |
|
93 |
order_chunks, pdf_chunks = [], []
|
94 |
|
95 |
-
#
|
96 |
if orders_file:
|
97 |
orders = load_json_orders(orders_file)
|
98 |
if orders:
|
99 |
-
order_chunks = [
|
100 |
-
st.success(f"β
Loaded {len(order_chunks)}
|
101 |
-
|
102 |
try:
|
103 |
df = pd.json_normalize(orders)
|
104 |
st.dataframe(df, use_container_width=True)
|
105 |
except Exception:
|
106 |
-
st.warning("β οΈ
|
107 |
st.json(orders)
|
108 |
-
else:
|
109 |
-
st.error("No valid orders found in the JSON file.")
|
110 |
|
111 |
-
#
|
112 |
if pdf_files:
|
113 |
-
for
|
114 |
try:
|
115 |
-
text = extract_pdf_text(
|
116 |
-
|
117 |
-
|
118 |
-
pdf_chunks.extend(paragraphs)
|
119 |
-
st.success(f"π Processed {pdf_file.name}")
|
120 |
except Exception as e:
|
121 |
-
st.error(f"β
|
122 |
|
|
|
123 |
combined_chunks = order_chunks + pdf_chunks
|
124 |
|
125 |
-
# --- Question Answering Section ---
|
126 |
if combined_chunks:
|
127 |
index, sources = build_index(combined_chunks)
|
128 |
|
129 |
st.subheader("β Ask a Question")
|
130 |
-
user_query = st.text_input("What would you like to know?", placeholder="e.g
|
131 |
|
132 |
if user_query:
|
133 |
query_vector = embedder.encode([user_query])
|
134 |
D, I = index.search(query_vector, k=5)
|
135 |
-
# Prepare context from the top-K results:
|
136 |
context = "\n---\n".join([sources[i] for i in I[0]])
|
137 |
-
st.expander("Combined Context").code(context)
|
138 |
|
139 |
with st.spinner("π€ Thinking..."):
|
140 |
try:
|
141 |
answer = ask_llm(context, user_query)
|
142 |
st.markdown("### π§ Answer")
|
143 |
-
# Use st.write() to render the answer as text.
|
144 |
st.write(answer)
|
145 |
except Exception as e:
|
146 |
-
st.error(f"β GROQ
|
147 |
else:
|
148 |
-
st.info("π Please upload
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import json
|
|
|
4 |
import pdfplumber
|
5 |
import faiss
|
6 |
import numpy as np
|
|
|
21 |
LLM_MODEL = "llama3-8b-8192"
|
22 |
embedder = SentenceTransformer(EMBEDDING_MODEL)
|
23 |
|
24 |
+
# Streamlit UI
|
25 |
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide")
|
26 |
st.title("π§Έ ToyShop RAG-Based Assistant")
|
27 |
|
|
|
|
|
28 |
def extract_pdf_text(file):
|
29 |
text = ""
|
30 |
with pdfplumber.open(file) as pdf:
|
31 |
for page in pdf.pages:
|
32 |
+
content = page.extract_text()
|
33 |
+
if content:
|
34 |
+
text += content + "\n"
|
35 |
return text.strip()
|
36 |
|
37 |
+
def flatten_order(order):
|
38 |
+
flat = []
|
39 |
+
if isinstance(order, dict):
|
40 |
+
for k, v in order.items():
|
41 |
+
if isinstance(v, (dict, list)):
|
42 |
+
flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
|
43 |
+
else:
|
44 |
+
flat.append(f"{k}: {v}")
|
45 |
+
return "\n".join(flat)
|
46 |
+
|
47 |
def load_json_orders(json_file):
|
|
|
48 |
try:
|
49 |
data = json.load(json_file)
|
50 |
+
if isinstance(data, dict):
|
51 |
+
orders = list(data.values())
|
52 |
+
elif isinstance(data, list):
|
53 |
+
orders = data
|
54 |
+
else:
|
55 |
+
return []
|
56 |
+
valid_orders = [o for o in orders if isinstance(o, dict)]
|
57 |
+
return valid_orders
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
except Exception as e:
|
59 |
+
st.error(f"β Error parsing JSON: {e}")
|
60 |
+
return []
|
61 |
|
62 |
+
def build_index(chunks):
|
63 |
+
vectors = embedder.encode(chunks)
|
64 |
index = faiss.IndexFlatL2(vectors.shape[1])
|
65 |
index.add(np.array(vectors))
|
66 |
+
return index, chunks
|
67 |
|
68 |
def ask_llm(context, query):
|
69 |
+
prompt = f"""You are a helpful assistant for an online toy shop.
|
70 |
+
|
71 |
+
Knowledge base:
|
72 |
+
{context}
|
73 |
+
|
74 |
+
Question: {query}
|
75 |
+
"""
|
|
|
76 |
response = client.chat.completions.create(
|
77 |
model=LLM_MODEL,
|
78 |
messages=[{"role": "user", "content": prompt}]
|
79 |
)
|
|
|
|
|
80 |
return response.choices[0].message.content.strip()
|
81 |
|
82 |
+
# Uploads
|
|
|
83 |
st.subheader("π Upload Customer Orders (JSON)")
|
84 |
orders_file = st.file_uploader("Upload JSON file", type="json")
|
85 |
|
|
|
88 |
|
89 |
order_chunks, pdf_chunks = [], []
|
90 |
|
91 |
+
# Handle JSON orders
|
92 |
if orders_file:
|
93 |
orders = load_json_orders(orders_file)
|
94 |
if orders:
|
95 |
+
order_chunks = [flatten_order(o) for o in orders]
|
96 |
+
st.success(f"β
Loaded {len(order_chunks)} valid orders.")
|
97 |
+
|
98 |
try:
|
99 |
df = pd.json_normalize(orders)
|
100 |
st.dataframe(df, use_container_width=True)
|
101 |
except Exception:
|
102 |
+
st.warning("β οΈ Unable to normalize JSON. Showing raw preview.")
|
103 |
st.json(orders)
|
|
|
|
|
104 |
|
105 |
+
# Handle PDFs
|
106 |
if pdf_files:
|
107 |
+
for file in pdf_files:
|
108 |
try:
|
109 |
+
text = extract_pdf_text(file)
|
110 |
+
pdf_chunks.extend(text.split("\n\n"))
|
111 |
+
st.success(f"π Processed: {file.name}")
|
|
|
|
|
112 |
except Exception as e:
|
113 |
+
st.error(f"β Error in {file.name}: {e}")
|
114 |
|
115 |
+
# Combine & build index
|
116 |
combined_chunks = order_chunks + pdf_chunks
|
117 |
|
|
|
118 |
if combined_chunks:
|
119 |
index, sources = build_index(combined_chunks)
|
120 |
|
121 |
st.subheader("β Ask a Question")
|
122 |
+
user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
|
123 |
|
124 |
if user_query:
|
125 |
query_vector = embedder.encode([user_query])
|
126 |
D, I = index.search(query_vector, k=5)
|
|
|
127 |
context = "\n---\n".join([sources[i] for i in I[0]])
|
|
|
128 |
|
129 |
with st.spinner("π€ Thinking..."):
|
130 |
try:
|
131 |
answer = ask_llm(context, user_query)
|
132 |
st.markdown("### π§ Answer")
|
|
|
133 |
st.write(answer)
|
134 |
except Exception as e:
|
135 |
+
st.error(f"β GROQ Error: {e}")
|
136 |
else:
|
137 |
+
st.info("π Please upload orders (JSON) and info files (PDF) to get started.")
|