File size: 5,884 Bytes
36c0c0f
b02d98a
1b72738
 
b02d98a
 
4dbf41f
92d0c75
b02d98a
85e6257
36c0c0f
0b8ee3b
12fd03c
0b8ee3b
85e6257
ab4f2f9
8a8a6d6
85e6257
b02d98a
8a8a6d6
 
85e6257
8a8a6d6
b02d98a
013dd9f
1b72738
 
 
92d0c75
 
1b72738
 
 
 
013dd9f
 
 
1b72738
 
0831006
 
013dd9f
 
 
 
 
 
 
 
0831006
 
92d0c75
0831006
 
 
 
 
 
 
 
 
 
 
 
 
 
 
013dd9f
 
0831006
 
 
 
 
 
 
 
 
 
1b72738
8a8a6d6
 
013dd9f
 
 
 
 
 
 
 
8a8a6d6
013dd9f
 
d899598
013dd9f
 
b02d98a
 
013dd9f
b02d98a
 
013dd9f
 
 
 
 
 
 
85e6257
b02d98a
 
 
1b72738
 
92d0c75
 
 
 
 
 
 
 
 
 
 
 
 
1b72738
 
b02d98a
8a8a6d6
1b72738
36c0c0f
1b72738
92d0c75
1b72738
92d0c75
1b72738
8a8a6d6
 
013dd9f
 
8a8a6d6
 
 
 
013dd9f
8a8a6d6
 
92d0c75
1b72738
013dd9f
1b72738
013dd9f
 
 
1b72738
013dd9f
4dbf41f
92d0c75
1b72738
12a98fd
1b72738
 
36c0c0f
1b72738
013dd9f
36c0c0f
1b72738
92d0c75
 
 
 
 
 
 
 
 
1b72738
8a8a6d6
1b72738
 
 
 
 
013dd9f
1b72738
013dd9f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import streamlit as st
import os
import json
import pdfplumber
import faiss
import numpy as np
import pandas as pd
import re
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Setup GROQ client
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")

# Constants
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL = "llama3-8b-8192"
embedder = SentenceTransformer(EMBEDDING_MODEL)

# Streamlit UI
st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
st.title("🧸 ToyShop RAG-Based Assistant")

# --- Helper Functions ---

def extract_pdf_text(file):
    text = ""
    with pdfplumber.open(file) as pdf:
        for page in pdf.pages:
            content = page.extract_text()
            if content:
                text += content + "\n"
    return text.strip()

# Replace only the following functions from your current app.py

def flatten_order(order):
    flat = []
    if isinstance(order, dict):
        for k, v in order.items():
            if isinstance(v, (dict, list)):
                flat.append(f"{k}: {json.dumps(v, ensure_ascii=False)}")
            else:
                flat.append(f"{k}: {v}")
        
        # Add a friendly natural language summary for the assistant
        if "order_id" in order and "status" in order:
            summary = f"""
Dear {order.get("customer_name", "Customer")},

Here are the complete details of your order **#{order['order_id']}**:
- **Status**: {order['status']}
- **Items**: {', '.join([item['name'] + ' (x' + str(item['quantity']) + ')' for item in order.get('items', [])]) if order.get('items') else 'Not available'}
- **Total**: {order.get('total', 'N/A')}
- **Date**: {order.get('date', 'N/A')}
- **Shipping Address**: {order.get('shipping_address', 'N/A')}

We hope this helps! Let us know if you need anything else.

Thanks for shopping with us! 😊
            """.strip()
            flat.append(summary)
    return "\n".join(flat)

def get_order_by_id(orders, query):
    match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query)
    if match:
        oid = match.group(1)
        for order in orders:
            if str(order.get("order_id")) == oid:
                return flatten_order(order)
    return None


def load_json_orders(json_file):
    try:
        data = json.load(json_file)
        if isinstance(data, dict):
            orders = list(data.values())
        elif isinstance(data, list):
            orders = data
        else:
            return []
        valid_orders = [o for o in orders if isinstance(o, dict)]
        return valid_orders
    except Exception as e:
        st.error(f"❌ Error parsing JSON: {e}")
        return []

def build_index(chunks):
    vectors = embedder.encode(chunks)
    index = faiss.IndexFlatL2(vectors.shape[1])
    index.add(np.array(vectors))
    return index, chunks

def ask_llm(context, query):
    prompt = f"""You are a helpful assistant for an online toy shop.

Knowledge base:
{context}

Question: {query}
"""
    response = client.chat.completions.create(
        model=LLM_MODEL,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

def preprocess_query(q):
    return q.replace("order_id", "order").replace("_", " ")

def get_order_by_id(orders, query):
    match = re.search(r"order(?:_id)?\s*[:#]?\s*(\d+)", query)
    if match:
        oid = match.group(1)
        for order in orders:
            if str(order.get("order_id")) == oid:
                return flatten_order(order)
    return None

# --- Uploads ---
st.subheader("πŸ“ Upload Customer Orders (JSON)")
orders_file = st.file_uploader("Upload JSON file", type="json")

st.subheader("πŸ“š Upload FAQs / Product Info / Return Policy (PDFs)")
pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)

order_chunks, pdf_chunks = [], []
orders = []

# --- Handle JSON Orders ---
if orders_file:
    orders = load_json_orders(orders_file)
    if orders:
        order_chunks = [flatten_order(o) for o in orders]
        st.success(f"βœ… Loaded {len(order_chunks)} valid orders.")
        try:
            df = pd.json_normalize(orders)
            st.dataframe(df, use_container_width=True)
        except Exception:
            st.warning("⚠️ Unable to normalize JSON. Showing raw preview.")
            st.json(orders)

# --- Handle PDFs ---
if pdf_files:
    for file in pdf_files:
        try:
            text = extract_pdf_text(file)
            pdf_chunks.extend(text.split("\n\n"))
            st.success(f"πŸ“„ Processed: {file.name}")
        except Exception as e:
            st.error(f"❌ Error in {file.name}: {e}")

# --- Build Index & Q&A ---
combined_chunks = order_chunks + pdf_chunks

if combined_chunks:
    index, sources = build_index(combined_chunks)

    st.subheader("❓ Ask a Question")
    user_query = st.text_input("What would you like to know?", placeholder="e.g., What is the status of order 105?")

    if user_query:
        pre_q = preprocess_query(user_query)
        direct_match = get_order_by_id(orders, user_query)

        if direct_match:
            context = direct_match
        else:
            query_vector = embedder.encode([pre_q])
            D, I = index.search(query_vector, k=5)
            context = "\n---\n".join([sources[i] for i in I[0]])

        with st.spinner("πŸ€” Thinking..."):
            try:
                answer = ask_llm(context, user_query)
                st.markdown("### 🧠 Answer")
                st.write(answer)
            except Exception as e:
                st.error(f"❌ GROQ Error: {e}")
else:
    st.info("πŸ“‚ Please upload orders (JSON) and info files (PDF) to get started.")