masadonline commited on
Commit
1b72738
Β·
verified Β·
1 Parent(s): 8fe6699

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -40
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import streamlit as st
2
- import json
3
  import os
 
 
 
4
  import faiss
5
  import numpy as np
6
  import pandas as pd
@@ -8,11 +10,11 @@ from sentence_transformers import SentenceTransformer
8
  from openai import OpenAI
9
  from dotenv import load_dotenv
10
 
11
- # Load environment variables
12
  load_dotenv()
13
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
 
15
- # Setup GROQ client
16
  client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
17
 
18
  # Constants
@@ -20,65 +22,92 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
20
  LLM_MODEL = "llama3-8b-8192"
21
  embedder = SentenceTransformer(EMBEDDING_MODEL)
22
 
23
- def load_orders_from_json(json_file):
 
 
 
 
 
 
 
 
 
 
 
 
24
  data = json.load(json_file)
25
  if isinstance(data, list):
26
- rows = data
27
  elif isinstance(data, dict):
28
- rows = list(data.values())
29
  else:
30
- rows = []
31
- return rows
32
 
33
- def build_index(chunks):
34
- text_chunks = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
35
  vectors = embedder.encode(text_chunks)
36
  index = faiss.IndexFlatL2(vectors.shape[1])
37
  index.add(np.array(vectors))
38
  return index, text_chunks
39
 
40
  def ask_llm(context, query):
41
- prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the customer order data:\n{context}\n\nQuestion: {query}"
42
  response = client.chat.completions.create(
43
  model=LLM_MODEL,
44
  messages=[{"role": "user", "content": prompt}]
45
  )
46
- return response.choices[0].message.content
 
 
47
 
48
- # Streamlit UI
49
- st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
50
- st.title("πŸ“¦ ToyShop Order Status Assistant")
51
 
52
- uploaded_file = st.file_uploader("Upload a Customer Orders JSON File", type="json")
 
53
 
54
- if uploaded_file:
 
 
 
 
55
  try:
56
- rows = load_orders_from_json(uploaded_file)
 
 
 
 
 
 
57
 
58
- if not rows:
59
- st.error("❌ No valid order data found in the JSON file.")
60
- else:
61
- st.success(f"βœ… Loaded {len(rows)} order records.")
62
- df = pd.DataFrame(rows)
63
- st.subheader("πŸ“‹ Customer Orders")
64
- st.dataframe(df, use_container_width=True)
65
 
66
- index, text_chunks = build_index(rows)
67
 
68
- query = st.text_input("Ask a question (e.g., 'What is the status of order #1002?')")
69
 
70
- if query:
71
- query_vec = embedder.encode([query])
72
- D, I = index.search(query_vec, k=3)
73
- context = "\n".join([text_chunks[i] for i in I[0]])
74
 
75
- with st.spinner("Generating answer..."):
76
- try:
77
- answer = ask_llm(context, query)
78
- st.markdown("### 🧠 Answer")
79
- st.write(answer)
80
- except Exception as e:
81
- st.error(f"LLM Error: {str(e)}")
82
 
83
- except Exception as e:
84
- st.error(f"❌ Failed to load or process JSON file: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import os
3
+ import json
4
+ import tempfile
5
+ import pdfplumber
6
  import faiss
7
  import numpy as np
8
  import pandas as pd
 
10
  from openai import OpenAI
11
  from dotenv import load_dotenv
12
 
13
+ # Load GROQ API key from .env
14
  load_dotenv()
15
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
16
 
17
+ # Setup GROQ LLM client
18
  client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
19
 
20
  # Constants
 
22
  LLM_MODEL = "llama3-8b-8192"
23
  embedder = SentenceTransformer(EMBEDDING_MODEL)
24
 
25
+ st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
26
+ st.title("🧸 ToyShop RAG-Based Assistant")
27
+
28
+ # --- Load and process uploaded files ---
29
+
30
+ def extract_pdf_text(file):
31
+ text = ""
32
+ with pdfplumber.open(file) as pdf:
33
+ for page in pdf.pages:
34
+ text += page.extract_text() + "\n"
35
+ return text.strip()
36
+
37
+ def load_json_orders(json_file):
38
  data = json.load(json_file)
39
  if isinstance(data, list):
40
+ return data
41
  elif isinstance(data, dict):
42
+ return list(data.values())
43
  else:
44
+ return []
 
45
 
46
+ def build_index(text_chunks):
 
47
  vectors = embedder.encode(text_chunks)
48
  index = faiss.IndexFlatL2(vectors.shape[1])
49
  index.add(np.array(vectors))
50
  return index, text_chunks
51
 
52
  def ask_llm(context, query):
53
+ prompt = f"You are a helpful assistant for an online toy shop.\n\nKnowledge base:\n{context}\n\nQuestion: {query}"
54
  response = client.chat.completions.create(
55
  model=LLM_MODEL,
56
  messages=[{"role": "user", "content": prompt}]
57
  )
58
+ return response.choices[0].message.content.strip()
59
+
60
+ # --- File upload UI ---
61
 
62
+ st.subheader("πŸ“ Upload Customer Orders (JSON)")
63
+ orders_file = st.file_uploader("Upload JSON file", type="json")
 
64
 
65
+ st.subheader("πŸ“š Upload FAQ / Product Info / Return Policy (PDFs)")
66
+ pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
67
 
68
+ order_chunks, pdf_chunks = [], []
69
+
70
+ # --- Process files ---
71
+
72
+ if orders_file:
73
  try:
74
+ orders = load_json_orders(orders_file)
75
+ order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
76
+ df = pd.DataFrame(orders)
77
+ st.success(f"βœ… Loaded {len(order_chunks)} customer order records.")
78
+ st.dataframe(df, use_container_width=True)
79
+ except Exception as e:
80
+ st.error(f"❌ Error loading JSON: {e}")
81
 
82
+ if pdf_files:
83
+ for pdf_file in pdf_files:
84
+ try:
85
+ text = extract_pdf_text(pdf_file)
86
+ pdf_chunks.extend(text.split("\n\n")) # chunk by paragraph
87
+ except Exception as e:
88
+ st.error(f"❌ Failed to read {pdf_file.name}: {e}")
89
 
90
+ combined_chunks = order_chunks + pdf_chunks
91
 
92
+ # --- Question Answering ---
93
 
94
+ if combined_chunks:
95
+ index, sources = build_index(combined_chunks)
 
 
96
 
97
+ st.subheader("❓ Ask a Question")
98
+ user_query = st.text_input("What would you like to know?")
 
 
 
 
 
99
 
100
+ if user_query:
101
+ query_vector = embedder.encode([user_query])
102
+ D, I = index.search(query_vector, k=5)
103
+ context = "\n---\n".join([sources[i] for i in I[0]])
104
+
105
+ with st.spinner("Thinking..."):
106
+ try:
107
+ answer = ask_llm(context, user_query)
108
+ st.markdown("### 🧠 Answer")
109
+ st.write(answer)
110
+ except Exception as e:
111
+ st.error(f"❌ GROQ API Error: {e}")
112
+ else:
113
+ st.info("πŸ“‚ Please upload both JSON orders and PDFs to begin.")