masadonline commited on
Commit
92d0c75
Β·
verified Β·
1 Parent(s): 013dd9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -5,6 +5,7 @@ import pdfplumber
5
  import faiss
6
  import numpy as np
7
  import pandas as pd
 
8
  from sentence_transformers import SentenceTransformer
9
  from openai import OpenAI
10
  from dotenv import load_dotenv
@@ -25,6 +26,8 @@ embedder = SentenceTransformer(EMBEDDING_MODEL)
25
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
26
  st.title("🧸 ToyShop RAG-Based Assistant")
27
 
 
 
28
  def extract_pdf_text(file):
29
  text = ""
30
  with pdfplumber.open(file) as pdf:
@@ -42,6 +45,9 @@ def flatten_order(order):
42
  flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
43
  else:
44
  flat.append(f"{k}: {v}")
 
 
 
45
  return "\n".join(flat)
46
 
47
  def load_json_orders(json_file):
@@ -79,7 +85,19 @@ Question: {query}
79
  )
80
  return response.choices[0].message.content.strip()
81
 
82
- # Uploads
 
 
 
 
 
 
 
 
 
 
 
 
83
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
84
  orders_file = st.file_uploader("Upload JSON file", type="json")
85
 
@@ -87,14 +105,14 @@ st.subheader("πŸ“š Upload FAQs / Product Info / Return Policy (PDFs)")
87
  pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
88
 
89
  order_chunks, pdf_chunks = [], []
 
90
 
91
- # Handle JSON orders
92
  if orders_file:
93
  orders = load_json_orders(orders_file)
94
  if orders:
95
  order_chunks = [flatten_order(o) for o in orders]
96
  st.success(f"βœ… Loaded {len(order_chunks)} valid orders.")
97
-
98
  try:
99
  df = pd.json_normalize(orders)
100
  st.dataframe(df, use_container_width=True)
@@ -102,7 +120,7 @@ if orders_file:
102
  st.warning("⚠️ Unable to normalize JSON. Showing raw preview.")
103
  st.json(orders)
104
 
105
- # Handle PDFs
106
  if pdf_files:
107
  for file in pdf_files:
108
  try:
@@ -112,7 +130,7 @@ if pdf_files:
112
  except Exception as e:
113
  st.error(f"❌ Error in {file.name}: {e}")
114
 
115
- # Combine & build index
116
  combined_chunks = order_chunks + pdf_chunks
117
 
118
  if combined_chunks:
@@ -122,9 +140,15 @@ if combined_chunks:
122
  user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
123
 
124
  if user_query:
125
- query_vector = embedder.encode([user_query])
126
- D, I = index.search(query_vector, k=5)
127
- context = "\n---\n".join([sources[i] for i in I[0]])
 
 
 
 
 
 
128
 
129
  with st.spinner("πŸ€” Thinking..."):
130
  try:
 
5
  import faiss
6
  import numpy as np
7
  import pandas as pd
8
+ import re
9
  from sentence_transformers import SentenceTransformer
10
  from openai import OpenAI
11
  from dotenv import load_dotenv
 
26
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
27
  st.title("🧸 ToyShop RAG-Based Assistant")
28
 
29
+ # --- Helper Functions ---
30
+
31
  def extract_pdf_text(file):
32
  text = ""
33
  with pdfplumber.open(file) as pdf:
 
45
  flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
46
  else:
47
  flat.append(f"{k}: {v}")
48
+ # Add a natural language summary
49
+ if "order_id" in order and "status" in order:
50
+ flat.append(f"The status of order {order['order_id']} is {order['status']}.")
51
  return "\n".join(flat)
52
 
53
  def load_json_orders(json_file):
 
85
  )
86
  return response.choices[0].message.content.strip()
87
 
88
+ def preprocess_query(q):
89
+ return q.replace("order_id", "order").replace("_", " ")
90
+
91
+ def get_order_by_id(orders, query):
92
+ match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query)
93
+ if match:
94
+ oid = match.group(1)
95
+ for order in orders:
96
+ if str(order.get("order_id")) == oid:
97
+ return flatten_order(order)
98
+ return None
99
+
100
+ # --- Uploads ---
101
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
102
  orders_file = st.file_uploader("Upload JSON file", type="json")
103
 
 
105
  pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
106
 
107
  order_chunks, pdf_chunks = [], []
108
+ orders = []
109
 
110
+ # --- Handle JSON Orders ---
111
  if orders_file:
112
  orders = load_json_orders(orders_file)
113
  if orders:
114
  order_chunks = [flatten_order(o) for o in orders]
115
  st.success(f"βœ… Loaded {len(order_chunks)} valid orders.")
 
116
  try:
117
  df = pd.json_normalize(orders)
118
  st.dataframe(df, use_container_width=True)
 
120
  st.warning("⚠️ Unable to normalize JSON. Showing raw preview.")
121
  st.json(orders)
122
 
123
+ # --- Handle PDFs ---
124
  if pdf_files:
125
  for file in pdf_files:
126
  try:
 
130
  except Exception as e:
131
  st.error(f"❌ Error in {file.name}: {e}")
132
 
133
+ # --- Build Index & Q&A ---
134
  combined_chunks = order_chunks + pdf_chunks
135
 
136
  if combined_chunks:
 
140
  user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")
141
 
142
  if user_query:
143
+ pre_q = preprocess_query(user_query)
144
+ direct_match = get_order_by_id(orders, user_query)
145
+
146
+ if direct_match:
147
+ context = direct_match
148
+ else:
149
+ query_vector = embedder.encode([pre_q])
150
+ D, I = index.search(query_vector, k=5)
151
+ context = "\n---\n".join([sources[i] for i in I[0]])
152
 
153
  with st.spinner("πŸ€” Thinking..."):
154
  try: