Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import json | |
import pdfplumber | |
import faiss | |
import numpy as np | |
import pandas as pd | |
import re | |
from sentence_transformers import SentenceTransformer | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
# Setup GROQ client | |
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1") | |
# Constants | |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
LLM_MODEL = "llama3-8b-8192" | |
embedder = SentenceTransformer(EMBEDDING_MODEL) | |
# Streamlit UI | |
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide") | |
st.title("π§Έ ToyShop RAG-Based Assistant") | |
# --- Helper Functions --- | |
def extract_pdf_text(file): | |
text = "" | |
with pdfplumber.open(file) as pdf: | |
for page in pdf.pages: | |
content = page.extract_text() | |
if content: | |
text += content + "\n" | |
return text.strip() | |
# Replace only the following functions from your current app.py | |
def flatten_order(order): | |
flat = [] | |
if isinstance(order, dict): | |
for k, v in order.items(): | |
if isinstance(v, (dict, list)): | |
flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}") | |
else: | |
flat.append(f"{k}: {v}") | |
# Add a friendly natural language summary for the assistant | |
if "order_id" in order and "status" in order: | |
summary = f""" | |
Dear {order.get("customer_name", "Customer")}, | |
Here are the complete details of your order **#{order['order_id']}**: | |
- **Status**: {order['status']} | |
- **Items**: {', '.join([item['name'] + ' (x' + str(item['quantity']) + ')' for item in order.get('items', [])]) if order.get('items') else 'Not available'} | |
- **Total**: {order.get('total', 'N/A')} | |
- **Date**: {order.get('date', 'N/A')} | |
- **Shipping Address**: {order.get('shipping_address', 'N/A')} | |
We hope this helps! Let us know if you need anything else. | |
Thanks for shopping with us! π | |
""".strip() | |
flat.append(summary) | |
return "\n".join(flat) | |
def get_order_by_id(orders, query): | |
match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query) | |
if match: | |
oid = match.group(1) | |
for order in orders: | |
if str(order.get("order_id")) == oid: | |
return flatten_order(order) | |
return None | |
def load_json_orders(json_file): | |
try: | |
data = json.load(json_file) | |
if isinstance(data, dict): | |
orders = list(data.values()) | |
elif isinstance(data, list): | |
orders = data | |
else: | |
return [] | |
valid_orders = [o for o in orders if isinstance(o, dict)] | |
return valid_orders | |
except Exception as e: | |
st.error(f"β Error parsing JSON: {e}") | |
return [] | |
def build_index(chunks): | |
vectors = embedder.encode(chunks) | |
index = faiss.IndexFlatL2(vectors.shape[1]) | |
index.add(np.array(vectors)) | |
return index, chunks | |
def ask_llm(context, query): | |
prompt = f"""You are a helpful assistant for an online toy shop. | |
Knowledge base: | |
{context} | |
Question: {query} | |
""" | |
response = client.chat.completions.create( | |
model=LLM_MODEL, | |
messages=[{"role": "user", "content": prompt}] | |
) | |
return response.choices[0].message.content.strip() | |
def preprocess_query(q): | |
return q.replace("order_id", "order").replace("_", " ") | |
def get_order_by_id(orders, query): | |
match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query) | |
if match: | |
oid = match.group(1) | |
for order in orders: | |
if str(order.get("order_id")) == oid: | |
return flatten_order(order) | |
return None | |
# --- Uploads --- | |
st.subheader("π Upload Customer Orders (JSON)") | |
orders_file = st.file_uploader("Upload JSON file", type="json") | |
st.subheader("π Upload FAQs / Product Info / Return Policy (PDFs)") | |
pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True) | |
order_chunks, pdf_chunks = [], [] | |
orders = [] | |
# --- Handle JSON Orders --- | |
if orders_file: | |
orders = load_json_orders(orders_file) | |
if orders: | |
order_chunks = [flatten_order(o) for o in orders] | |
st.success(f"β Loaded {len(order_chunks)} valid orders.") | |
try: | |
df = pd.json_normalize(orders) | |
st.dataframe(df, use_container_width=True) | |
except Exception: | |
st.warning("β οΈ Unable to normalize JSON. Showing raw preview.") | |
st.json(orders) | |
# --- Handle PDFs --- | |
if pdf_files: | |
for file in pdf_files: | |
try: | |
text = extract_pdf_text(file) | |
pdf_chunks.extend(text.split("\n\n")) | |
st.success(f"π Processed: {file.name}") | |
except Exception as e: | |
st.error(f"β Error in {file.name}: {e}") | |
# --- Build Index & Q&A --- | |
combined_chunks = order_chunks + pdf_chunks | |
if combined_chunks: | |
index, sources = build_index(combined_chunks) | |
st.subheader("β Ask a Question") | |
user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?") | |
if user_query: | |
pre_q = preprocess_query(user_query) | |
direct_match = get_order_by_id(orders, user_query) | |
if direct_match: | |
context = direct_match | |
else: | |
query_vector = embedder.encode([pre_q]) | |
D, I = index.search(query_vector, k=5) | |
context = "\n---\n".join([sources[i] for i in I[0]]) | |
with st.spinner("π€ Thinking..."): | |
try: | |
answer = ask_llm(context, user_query) | |
st.markdown("### π§ Answer") | |
st.write(answer) | |
except Exception as e: | |
st.error(f"β GROQ Error: {e}") | |
else: | |
st.info("π Please upload orders (JSON) and info files (PDF) to get started.") | |