Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import pdfplumber
|
|
5 |
import faiss
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
|
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
from openai import OpenAI
|
10 |
from dotenv import load_dotenv
|
@@ -25,6 +26,8 @@ embedder = SentenceTransformer(EMBEDDING_MODEL)
|
|
25 |
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide")
|
26 |
st.title("π§Έ ToyShop RAG-Based Assistant")
|
27 |
|
|
|
|
|
28 |
def extract_pdf_text(file):
|
29 |
text = ""
|
30 |
with pdfplumber.open(file) as pdf:
|
@@ -42,6 +45,9 @@ def flatten_order(order):
|
|
42 |
flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
|
43 |
else:
|
44 |
flat.append(f"{k}: {v}")
|
|
|
|
|
|
|
45 |
return "\n".join(flat)
|
46 |
|
47 |
def load_json_orders(json_file):
|
@@ -79,7 +85,19 @@ Question: {query}
|
|
79 |
)
|
80 |
return response.choices[0].message.content.strip()
|
81 |
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
st.subheader("π Upload Customer Orders (JSON)")
|
84 |
orders_file = st.file_uploader("Upload JSON file", type="json")
|
85 |
|
@@ -87,14 +105,14 @@ st.subheader("π Upload FAQs / Product Info / Return Policy (PDFs)")
|
|
87 |
pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
|
88 |
|
89 |
order_chunks, pdf_chunks = [], []
|
|
|
90 |
|
91 |
-
# Handle JSON
|
92 |
if orders_file:
|
93 |
orders = load_json_orders(orders_file)
|
94 |
if orders:
|
95 |
order_chunks = [flatten_order(o) for o in orders]
|
96 |
st.success(f"β
Loaded {len(order_chunks)} valid orders.")
|
97 |
-
|
98 |
try:
|
99 |
df = pd.json_normalize(orders)
|
100 |
st.dataframe(df, use_container_width=True)
|
@@ -102,7 +120,7 @@ if orders_file:
|
|
102 |
st.warning("β οΈ Unable to normalize JSON. Showing raw preview.")
|
103 |
st.json(orders)
|
104 |
|
105 |
-
# Handle PDFs
|
106 |
if pdf_files:
|
107 |
for file in pdf_files:
|
108 |
try:
|
@@ -112,7 +130,7 @@ if pdf_files:
|
|
112 |
except Exception as e:
|
113 |
st.error(f"β Error in {file.name}: {e}")
|
114 |
|
115 |
-
#
|
116 |
combined_chunks = order_chunks + pdf_chunks
|
117 |
|
118 |
if combined_chunks:
|
@@ -122,9 +140,15 @@ if combined_chunks:
|
|
122 |
user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
|
123 |
|
124 |
if user_query:
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
with st.spinner("π€ Thinking..."):
|
130 |
try:
|
|
|
5 |
import faiss
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
+
import re
|
9 |
from sentence_transformers import SentenceTransformer
|
10 |
from openai import OpenAI
|
11 |
from dotenv import load_dotenv
|
|
|
26 |
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide")
|
27 |
st.title("π§Έ ToyShop RAG-Based Assistant")
|
28 |
|
29 |
+
# --- Helper Functions ---
|
30 |
+
|
31 |
def extract_pdf_text(file):
|
32 |
text = ""
|
33 |
with pdfplumber.open(file) as pdf:
|
|
|
45 |
flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
|
46 |
else:
|
47 |
flat.append(f"{k}: {v}")
|
48 |
+
# Add a natural language summary
|
49 |
+
if "order_id" in order and "status" in order:
|
50 |
+
flat.append(f"The status of order {order['order_id']} is {order['status']}.")
|
51 |
return "\n".join(flat)
|
52 |
|
53 |
def load_json_orders(json_file):
|
|
|
85 |
)
|
86 |
return response.choices[0].message.content.strip()
|
87 |
|
88 |
+
def preprocess_query(q):
|
89 |
+
return q.replace("order_id", "order").replace("_", " ")
|
90 |
+
|
91 |
+
def get_order_by_id(orders, query):
|
92 |
+
match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query)
|
93 |
+
if match:
|
94 |
+
oid = match.group(1)
|
95 |
+
for order in orders:
|
96 |
+
if str(order.get("order_id")) == oid:
|
97 |
+
return flatten_order(order)
|
98 |
+
return None
|
99 |
+
|
100 |
+
# --- Uploads ---
|
101 |
st.subheader("π Upload Customer Orders (JSON)")
|
102 |
orders_file = st.file_uploader("Upload JSON file", type="json")
|
103 |
|
|
|
105 |
pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
|
106 |
|
107 |
order_chunks, pdf_chunks = [], []
|
108 |
+
orders = []
|
109 |
|
110 |
+
# --- Handle JSON Orders ---
|
111 |
if orders_file:
|
112 |
orders = load_json_orders(orders_file)
|
113 |
if orders:
|
114 |
order_chunks = [flatten_order(o) for o in orders]
|
115 |
st.success(f"β
Loaded {len(order_chunks)} valid orders.")
|
|
|
116 |
try:
|
117 |
df = pd.json_normalize(orders)
|
118 |
st.dataframe(df, use_container_width=True)
|
|
|
120 |
st.warning("β οΈ Unable to normalize JSON. Showing raw preview.")
|
121 |
st.json(orders)
|
122 |
|
123 |
+
# --- Handle PDFs ---
|
124 |
if pdf_files:
|
125 |
for file in pdf_files:
|
126 |
try:
|
|
|
130 |
except Exception as e:
|
131 |
st.error(f"β Error in {file.name}: {e}")
|
132 |
|
133 |
+
# --- Build Index & Q&A ---
|
134 |
combined_chunks = order_chunks + pdf_chunks
|
135 |
|
136 |
if combined_chunks:
|
|
|
140 |
user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
|
141 |
|
142 |
if user_query:
|
143 |
+
pre_q = preprocess_query(user_query)
|
144 |
+
direct_match = get_order_by_id(orders, user_query)
|
145 |
+
|
146 |
+
if direct_match:
|
147 |
+
context = direct_match
|
148 |
+
else:
|
149 |
+
query_vector = embedder.encode([pre_q])
|
150 |
+
D, I = index.search(query_vector, k=5)
|
151 |
+
context = "\n---\n".join([sources[i] for i in I[0]])
|
152 |
|
153 |
with st.spinner("π€ Thinking..."):
|
154 |
try:
|