masadonline commited on
Commit
8a8a6d6
Β·
verified Β·
1 Parent(s): 12fd03c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -25
app.py CHANGED
@@ -9,27 +9,25 @@ import pandas as pd
9
  from sentence_transformers import SentenceTransformer
10
  from openai import OpenAI
11
  from dotenv import load_dotenv
12
- import torch
13
 
14
  # Load environment variables
15
  load_dotenv()
16
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
 
18
- # Setup GROQ LLM client
19
  client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
20
 
21
- # Load embedding model with device specification
22
- device = "cuda" if torch.cuda.is_available() else "cpu"
23
- embedder = SentenceTransformer("all-MiniLM-L6-v2", trust_remote_code=True)
24
- embedder.to(device)
25
-
26
- # LLM model name
27
  LLM_MODEL = "llama3-8b-8192"
 
28
 
29
- # Streamlit setup
30
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
31
  st.title("🧸 ToyShop RAG-Based Assistant")
32
 
 
 
33
  def extract_pdf_text(file):
34
  text = ""
35
  with pdfplumber.open(file) as pdf:
@@ -40,8 +38,17 @@ def extract_pdf_text(file):
40
  return text.strip()
41
 
42
  def load_json_orders(json_file):
43
- data = json.load(json_file)
44
- return data if isinstance(data, list) else list(data.values())
 
 
 
 
 
 
 
 
 
45
 
46
  def build_index(text_chunks):
47
  vectors = embedder.encode(text_chunks)
@@ -57,49 +64,56 @@ def ask_llm(context, query):
57
  )
58
  return response.choices[0].message.content.strip()
59
 
60
- # File upload
 
61
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
62
  orders_file = st.file_uploader("Upload JSON file", type="json")
63
 
64
- st.subheader("πŸ“š Upload FAQ / Product Info / Return Policy (PDFs)")
65
  pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
66
 
67
  order_chunks, pdf_chunks = [], []
68
 
69
- # Handle JSON
70
  if orders_file:
71
- try:
72
- orders = load_json_orders(orders_file)
73
  order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
74
  st.success(f"βœ… Loaded {len(order_chunks)} customer order records.")
75
- st.dataframe(pd.DataFrame(orders), use_container_width=True)
76
- except Exception as e:
77
- st.error(f"❌ Error loading JSON: {e}")
78
 
79
- # Handle PDFs
 
 
 
 
 
 
 
 
80
  if pdf_files:
81
  for pdf_file in pdf_files:
82
  try:
83
  text = extract_pdf_text(pdf_file)
84
- pdf_chunks.extend(text.split("\n\n")) # simple paragraph chunking
 
85
  except Exception as e:
86
  st.error(f"❌ Failed to read {pdf_file.name}: {e}")
87
 
88
- # Build index if we have content
89
  combined_chunks = order_chunks + pdf_chunks
90
 
 
91
  if combined_chunks:
92
  index, sources = build_index(combined_chunks)
93
 
94
  st.subheader("❓ Ask a Question")
95
- user_query = st.text_input("What would you like to know?")
96
 
97
  if user_query:
98
  query_vector = embedder.encode([user_query])
99
  D, I = index.search(query_vector, k=5)
100
  context = "\n---\n".join([sources[i] for i in I[0]])
101
 
102
- with st.spinner("Thinking..."):
103
  try:
104
  answer = ask_llm(context, user_query)
105
  st.markdown("### 🧠 Answer")
@@ -107,4 +121,4 @@ if combined_chunks:
107
  except Exception as e:
108
  st.error(f"❌ GROQ API Error: {e}")
109
  else:
110
- st.info("πŸ“‚ Please upload both JSON orders and PDFs to begin.")
 
9
  from sentence_transformers import SentenceTransformer
10
  from openai import OpenAI
11
  from dotenv import load_dotenv
 
12
 
13
  # Load environment variables
14
  load_dotenv()
15
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
16
 
17
+ # Setup GROQ client
18
  client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
19
 
20
+ # Constants
21
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 
 
 
 
22
  LLM_MODEL = "llama3-8b-8192"
23
+ embedder = SentenceTransformer(EMBEDDING_MODEL)
24
 
25
+ # Streamlit app setup
26
  st.set_page_config(page_title="🧸 ToyShop Assistant", layout="wide")
27
  st.title("🧸 ToyShop RAG-Based Assistant")
28
 
29
+ # --- Helper functions ---
30
+
31
  def extract_pdf_text(file):
32
  text = ""
33
  with pdfplumber.open(file) as pdf:
 
38
  return text.strip()
39
 
40
  def load_json_orders(json_file):
41
+ try:
42
+ data = json.load(json_file)
43
+ if isinstance(data, list):
44
+ return data
45
+ elif isinstance(data, dict):
46
+ return list(data.values())
47
+ else:
48
+ return []
49
+ except Exception as e:
50
+ st.error(f"Error parsing JSON: {e}")
51
+ return []
52
 
53
  def build_index(text_chunks):
54
  vectors = embedder.encode(text_chunks)
 
64
  )
65
  return response.choices[0].message.content.strip()
66
 
67
+ # --- File upload section ---
68
+
69
  st.subheader("πŸ“ Upload Customer Orders (JSON)")
70
  orders_file = st.file_uploader("Upload JSON file", type="json")
71
 
72
+ st.subheader("πŸ“š Upload FAQs / Product Info / Return Policy (PDFs)")
73
  pdf_files = st.file_uploader("Upload one or more PDFs", type="pdf", accept_multiple_files=True)
74
 
75
  order_chunks, pdf_chunks = [], []
76
 
77
+ # --- Process JSON ---
78
  if orders_file:
79
+ orders = load_json_orders(orders_file)
80
+ if orders:
81
  order_chunks = [json.dumps(order, ensure_ascii=False) for order in orders]
82
  st.success(f"βœ… Loaded {len(order_chunks)} customer order records.")
 
 
 
83
 
84
+ # Try to flatten for DataFrame view
85
+ try:
86
+ df = pd.json_normalize(orders)
87
+ st.dataframe(df, use_container_width=True)
88
+ except Exception:
89
+ st.warning("⚠️ Nested JSON detected. Showing raw JSON preview instead.")
90
+ st.json(orders)
91
+
92
+ # --- Process PDFs ---
93
  if pdf_files:
94
  for pdf_file in pdf_files:
95
  try:
96
  text = extract_pdf_text(pdf_file)
97
+ pdf_chunks.extend(text.split("\n\n")) # paragraph-wise
98
+ st.success(f"πŸ“„ Processed {pdf_file.name}")
99
  except Exception as e:
100
  st.error(f"❌ Failed to read {pdf_file.name}: {e}")
101
 
 
102
  combined_chunks = order_chunks + pdf_chunks
103
 
104
+ # --- Question Answering Section ---
105
  if combined_chunks:
106
  index, sources = build_index(combined_chunks)
107
 
108
  st.subheader("❓ Ask a Question")
109
+ user_query = st.text_input("What would you like to know?", placeholder="e.g. What is the status of order 123?")
110
 
111
  if user_query:
112
  query_vector = embedder.encode([user_query])
113
  D, I = index.search(query_vector, k=5)
114
  context = "\n---\n".join([sources[i] for i in I[0]])
115
 
116
+ with st.spinner("πŸ€” Thinking..."):
117
  try:
118
  answer = ask_llm(context, user_query)
119
  st.markdown("### 🧠 Answer")
 
121
  except Exception as e:
122
  st.error(f"❌ GROQ API Error: {e}")
123
  else:
124
+ st.info("πŸ“‚ Please upload both JSON orders and relevant PDFs to begin.")