Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
-
import json
|
3 |
import os
|
|
|
|
|
|
|
4 |
import faiss
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
@@ -8,11 +10,11 @@ from sentence_transformers import SentenceTransformer
|
|
8 |
from openai import OpenAI
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
-
# Load
|
12 |
load_dotenv()
|
13 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
14 |
|
15 |
-
# Setup GROQ client
|
16 |
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
|
17 |
|
18 |
# Constants
|
@@ -20,65 +22,92 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
20 |
LLM_MODEL = "llama3-8b-8192"
|
21 |
embedder = SentenceTransformer(EMBEDDING_MODEL)
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
data = json.load(json_file)
|
25 |
if isinstance(data, list):
|
26 |
-
|
27 |
elif isinstance(data, dict):
|
28 |
-
|
29 |
else:
|
30 |
-
|
31 |
-
return rows
|
32 |
|
33 |
-
def build_index(
|
34 |
-
text_chunks = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
35 |
vectors = embedder.encode(text_chunks)
|
36 |
index = faiss.IndexFlatL2(vectors.shape[1])
|
37 |
index.add(np.array(vectors))
|
38 |
return index, text_chunks
|
39 |
|
40 |
def ask_llm(context, query):
|
41 |
-
prompt = f"You are a helpful assistant for an online toy shop.\n\
|
42 |
response = client.chat.completions.create(
|
43 |
model=LLM_MODEL,
|
44 |
messages=[{"role": "user", "content": prompt}]
|
45 |
)
|
46 |
-
return response.choices[0].message.content
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
st.
|
50 |
-
st.title("π¦ ToyShop Order Status Assistant")
|
51 |
|
52 |
-
|
|
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
55 |
try:
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
st.
|
65 |
|
66 |
-
|
67 |
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
D, I = index.search(query_vec, k=3)
|
73 |
-
context = "\n".join([text_chunks[i] for i in I[0]])
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
answer = ask_llm(context, query)
|
78 |
-
st.markdown("### π§ Answer")
|
79 |
-
st.write(answer)
|
80 |
-
except Exception as e:
|
81 |
-
st.error(f"LLM Error: {str(e)}")
|
82 |
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
2 |
import os
|
3 |
+
import json
|
4 |
+
import tempfile
|
5 |
+
import pdfplumber
|
6 |
import faiss
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
|
|
10 |
from openai import OpenAI
|
11 |
from dotenv import load_dotenv
|
12 |
|
13 |
+
# Load GROQ API key from .env
|
14 |
load_dotenv()
|
15 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
16 |
|
17 |
+
# Setup GROQ LLM client
|
18 |
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
|
19 |
|
20 |
# Constants
|
|
|
22 |
LLM_MODEL = "llama3-8b-8192"
|
23 |
embedder = SentenceTransformer(EMBEDDING_MODEL)
|
24 |
|
25 |
+
st.set_page_config(page_title="π§Έ ToyShop Assistant", layout="wide")
|
26 |
+
st.title("π§Έ ToyShop RAG-Based Assistant")
|
27 |
+
|
28 |
+
# --- Load and process uploaded files ---
|
29 |
+
|
30 |
+
def extract_pdf_text(file):
|
31 |
+
text = ""
|
32 |
+
with pdfplumber.open(file) as pdf:
|
33 |
+
for page in pdf.pages:
|
34 |
+
text += page.extract_text() + "\n"
|
35 |
+
return text.strip()
|
36 |
+
|
37 |
+
def load_json_orders(json_file):
|
38 |
data = json.load(json_file)
|
39 |
if isinstance(data, list):
|
40 |
+
return data
|
41 |
elif isinstance(data, dict):
|
42 |
+
return list(data.values())
|
43 |
else:
|
44 |
+
return []
|
|
|
45 |
|
46 |
+
def build_index(text_chunks):
|
|
|
47 |
vectors = embedder.encode(text_chunks)
|
48 |
index = faiss.IndexFlatL2(vectors.shape[1])
|
49 |
index.add(np.array(vectors))
|
50 |
return index, text_chunks
|
51 |
|
52 |
def ask_llm(context, query):
|
53 |
+
prompt = f"You are a helpful assistant for an online toy shop.\n\nKnowledge base:\n{context}\n\nQuestion: {query}"
|
54 |
response = client.chat.completions.create(
|
55 |
model=LLM_MODEL,
|
56 |
messages=[{"role": "user", "content": prompt}]
|
57 |
)
|
58 |
+
return response.choices[0].message.content.strip()
|
59 |
+
|
60 |
+
# --- File upload UI ---
|
61 |
|
62 |
+
st.subheader("π Upload Customer Orders (JSON)")
|
63 |
+
orders_file = st.file_uploader("Upload JSON file", type="json")
|
|
|
64 |
|
65 |
+
st.subheader("π Upload FAQ / Product Info / Return Policy (PDFs)")
|
66 |
+
pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
|
67 |
|
68 |
+
order_chunks, pdf_chunks = [], []
|
69 |
+
|
70 |
+
# --- Process files ---
|
71 |
+
|
72 |
+
if orders_file:
|
73 |
try:
|
74 |
+
orders = load_json_orders(orders_file)
|
75 |
+
order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
|
76 |
+
df = pd.DataFrame(orders)
|
77 |
+
st.success(f"β
Loaded {len(order_chunks)} customer order records.")
|
78 |
+
st.dataframe(df, use_container_width=True)
|
79 |
+
except Exception as e:
|
80 |
+
st.error(f"β Error loading JSON: {e}")
|
81 |
|
82 |
+
if pdf_files:
|
83 |
+
for pdf_file in pdf_files:
|
84 |
+
try:
|
85 |
+
text = extract_pdf_text(pdf_file)
|
86 |
+
pdf_chunks.extend(text.split("\n\n")) # chunk by paragraph
|
87 |
+
except Exception as e:
|
88 |
+
st.error(f"β Failed to read {pdf_file.name}: {e}")
|
89 |
|
90 |
+
combined_chunks = order_chunks + pdf_chunks
|
91 |
|
92 |
+
# --- Question Answering ---
|
93 |
|
94 |
+
if combined_chunks:
|
95 |
+
index, sources = build_index(combined_chunks)
|
|
|
|
|
96 |
|
97 |
+
st.subheader("β Ask a Question")
|
98 |
+
user_query = st.text_input("What would you like to know?")
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
if user_query:
|
101 |
+
query_vector = embedder.encode([user_query])
|
102 |
+
D, I = index.search(query_vector, k=5)
|
103 |
+
context = "\n---\n".join([sources[i] for i in I[0]])
|
104 |
+
|
105 |
+
with st.spinner("Thinking..."):
|
106 |
+
try:
|
107 |
+
answer = ask_llm(context, user_query)
|
108 |
+
st.markdown("### π§ Answer")
|
109 |
+
st.write(answer)
|
110 |
+
except Exception as e:
|
111 |
+
st.error(f"β GROQ API Error: {e}")
|
112 |
+
else:
|
113 |
+
st.info("π Please upload both JSON orders and PDFs to begin.")
|