import streamlit as st import os import json import pdfplumber import faiss import numpy as np import pandas as pd import re from sentence_transformers import SentenceTransformer from openai import OpenAI from dotenv import load_dotenv # Load environment variables load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Setup GROQ client client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1") # Constants EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" LLM_MODEL = "llama3-8b-8192" embedder = SentenceTransformer(EMBEDDING_MODEL) # Streamlit UI st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide") st.title("🧸 ToyShop RAG-Based Assistant") # --- Helper Functions --- def extract_pdf_text(file): text = "" with pdfplumber.open(file) as pdf: for page in pdf.pages: content = page.extract_text() if content: text += content + "\n" return text.strip() # Replace only the following functions from your current app.py def flatten_order(order): flat = [] if isinstance(order, dict): for k, v in order.items(): if isinstance(v, (dict, list)): flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}") else: flat.append(f"{k}: {v}") # Add a friendly natural language summary for the assistant if "order_id" in order and "status" in order: summary = f""" Dear {order.get("customer_name", "Customer")}, Here are the complete details of your order **#{order['order_id']}**: - **Status**: {order['status']} - **Items**: {', '.join([item['name'] + ' (x' + str(item['quantity']) + ')' for item in order.get('items', [])]) if order.get('items') else 'Not available'} - **Total**: {order.get('total', 'N/A')} - **Date**: {order.get('date', 'N/A')} - **Shipping Address**: {order.get('shipping_address', 'N/A')} We hope this helps! Let us know if you need anything else. Thanks for shopping with us! 😊 """.strip() flat.append(summary) return "\n".join(flat) def get_order_by_id(orders, query): match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query) if match: oid = match.group(1) for order in orders: if str(order.get("order_id")) == oid: return flatten_order(order) return None def load_json_orders(json_file): try: data = json.load(json_file) if isinstance(data, dict): orders = list(data.values()) elif isinstance(data, list): orders = data else: return [] valid_orders = [o for o in orders if isinstance(o, dict)] return valid_orders except Exception as e: st.error(f"❌ Error parsing JSON: {e}") return [] def build_index(chunks): vectors = embedder.encode(chunks) index = faiss.IndexFlatL2(vectors.shape[1]) index.add(np.array(vectors)) return index, chunks def ask_llm(context, query): prompt = f"""You are a helpful assistant for an online toy shop. Knowledge base: {context} Question: {query} """ response = client.chat.completions.create( model=LLM_MODEL, messages=[{"role": "user", "content": prompt}] ) return response.choices[0].message.content.strip() def preprocess_query(q): return q.replace("order_id", "order").replace("_", " ") def get_order_by_id(orders, query): match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query) if match: oid = match.group(1) for order in orders: if str(order.get("order_id")) == oid: return flatten_order(order) return None # --- Uploads --- st.subheader("📁 Upload Customer Orders (JSON)") orders_file = st.file_uploader("Upload JSON file", type="json") st.subheader("📚 Upload FAQs / Product Info / Return Policy (PDFs)") pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True) order_chunks, pdf_chunks = [], [] orders = [] # --- Handle JSON Orders --- if orders_file: orders = load_json_orders(orders_file) if orders: order_chunks = [flatten_order(o) for o in orders] st.success(f"✅ Loaded {len(order_chunks)} valid orders.") try: df = pd.json_normalize(orders) st.dataframe(df, use_container_width=True) except Exception: st.warning("⚠️ Unable to normalize JSON. Showing raw preview.") st.json(orders) # --- Handle PDFs --- if pdf_files: for file in pdf_files: try: text = extract_pdf_text(file) pdf_chunks.extend(text.split("\n\n")) st.success(f"📄 Processed: {file.name}") except Exception as e: st.error(f"❌ Error in {file.name}: {e}") # --- Build Index & Q&A --- combined_chunks = order_chunks + pdf_chunks if combined_chunks: index, sources = build_index(combined_chunks) st.subheader("❓ Ask a Question") user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?") if user_query: pre_q = preprocess_query(user_query) direct_match = get_order_by_id(orders, user_query) if direct_match: context = direct_match else: query_vector = embedder.encode([pre_q]) D, I = index.search(query_vector, k=5) context = "\n---\n".join([sources[i] for i in I[0]]) with st.spinner("🤔 Thinking..."): try: answer = ask_llm(context, user_query) st.markdown("### 🧠 Answer") st.write(answer) except Exception as e: st.error(f"❌ GROQ Error: {e}") else: st.info("📂 Please upload orders (JSON) and info files (PDF) to get started.")