masadonline commited on
Commit
12a98fd
Β·
verified Β·
1 Parent(s): 4dbf41f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -44
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
- import pdfplumber
3
  import os
4
- import tempfile
5
  import faiss
6
  import numpy as np
7
  import pandas as pd
@@ -21,27 +20,25 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
  LLM_MODEL = "llama3-8b-8192"
22
  embedder = SentenceTransformer(EMBEDDING_MODEL)
23
 
24
- def extract_rows_from_pdf(pdf_file_path):
25
- rows = []
26
- with pdfplumber.open(pdf_file_path) as pdf:
27
- for page in pdf.pages:
28
- tables = page.extract_tables()
29
- for table in tables:
30
- for row in table[1:]: # skip header
31
- cleaned = [str(cell).strip() if cell else "" for cell in row]
32
- if any(cleaned): # skip empty rows
33
- rows.append(cleaned)
34
  return rows
35
 
36
  def build_index(chunks):
37
- text_chunks = [" | ".join(chunk) for chunk in chunks]
38
  vectors = embedder.encode(text_chunks)
39
  index = faiss.IndexFlatL2(vectors.shape[1])
40
  index.add(np.array(vectors))
41
  return index, text_chunks
42
 
43
  def ask_llm(context, query):
44
- prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the order data:\n{context}\n\nQuestion: {query}"
45
  response = client.chat.completions.create(
46
  model=LLM_MODEL,
47
  messages=[{"role": "user", "content": prompt}]
@@ -52,42 +49,36 @@ def ask_llm(context, query):
52
  st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
53
  st.title("πŸ“¦ ToyShop Order Status Assistant")
54
 
55
- uploaded_file = st.file_uploader("Upload a Customer Order PDF", type="pdf")
56
 
57
  if uploaded_file:
58
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
59
- tmp.write(uploaded_file.read())
60
- pdf_path = tmp.name
61
-
62
- st.success("βœ… File uploaded successfully")
63
 
64
- rows = extract_rows_from_pdf(pdf_path)
65
- if not rows:
66
- st.error("❌ No tabular data found in the PDF.")
67
- else:
68
- st.info(f"πŸ“„ Extracted {len(rows)} order records.")
69
-
70
- # Display records as table (if columns look uniform)
71
- try:
72
  df = pd.DataFrame(rows)
73
- st.subheader("πŸ“‹ Extracted Order Records")
74
  st.dataframe(df, use_container_width=True)
75
- except:
76
- st.text_area("Extracted Rows", "\n".join([" | ".join(r) for r in rows]), height=300)
77
 
78
- index, text_chunks = build_index(rows)
 
 
79
 
80
- query = st.text_input("Ask a question (e.g., 'What is the status of order 27?')")
 
 
 
81
 
82
- if query:
83
- query_vec = embedder.encode([query])
84
- D, I = index.search(query_vec, k=3)
85
- context = "\n".join([text_chunks[i] for i in I[0]])
 
 
 
86
 
87
- with st.spinner("Generating answer..."):
88
- try:
89
- answer = ask_llm(context, query)
90
- st.markdown("### 🧠 Answer")
91
- st.write(answer)
92
- except Exception as e:
93
- st.error(f"LLM Error: {str(e)}")
 
1
  import streamlit as st
2
+ import json
3
  import os
 
4
  import faiss
5
  import numpy as np
6
  import pandas as pd
 
20
  LLM_MODEL = "llama3-8b-8192"
21
  embedder = SentenceTransformer(EMBEDDING_MODEL)
22
 
23
+ def load_orders_from_json(json_file):
24
+ data = json.load(json_file)
25
+ if isinstance(data, list):
26
+ rows = data
27
+ elif isinstance(data, dict):
28
+ rows = list(data.values())
29
+ else:
30
+ rows = []
 
 
31
  return rows
32
 
33
  def build_index(chunks):
34
+ text_chunks = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
35
  vectors = embedder.encode(text_chunks)
36
  index = faiss.IndexFlatL2(vectors.shape[1])
37
  index.add(np.array(vectors))
38
  return index, text_chunks
39
 
40
  def ask_llm(context, query):
41
+ prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the customer order data:\n{context}\n\nQuestion: {query}"
42
  response = client.chat.completions.create(
43
  model=LLM_MODEL,
44
  messages=[{"role": "user", "content": prompt}]
 
49
  st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
50
  st.title("πŸ“¦ ToyShop Order Status Assistant")
51
 
52
+ uploaded_file = st.file_uploader("Upload a Customer Orders JSON File", type="json")
53
 
54
  if uploaded_file:
55
+ try:
56
+ rows = load_orders_from_json(uploaded_file)
 
 
 
57
 
58
+ if not rows:
59
+ st.error("❌ No valid order data found in the JSON file.")
60
+ else:
61
+ st.success(f"βœ… Loaded {len(rows)} order records.")
 
 
 
 
62
  df = pd.DataFrame(rows)
63
+ st.subheader("πŸ“‹ Customer Orders")
64
  st.dataframe(df, use_container_width=True)
 
 
65
 
66
+ index, text_chunks = build_index(rows)
67
+
68
+ query = st.text_input("Ask a question (e.g., 'What is the status of order #1002?')")
69
 
70
+ if query:
71
+ query_vec = embedder.encode([query])
72
+ D, I = index.search(query_vec, k=3)
73
+ context = "\n".join([text_chunks[i] for i in I[0]])
74
 
75
+ with st.spinner("Generating answer..."):
76
+ try:
77
+ answer = ask_llm(context, query)
78
+ st.markdown("### 🧠 Answer")
79
+ st.write(answer)
80
+ except Exception as e:
81
+ st.error(f"LLM Error: {str(e)}")
82
 
83
+ except Exception as e:
84
+ st.error(f"❌ Failed to load or process JSON file: {e}")