Spaces:

masadonline
/

RAG-PDF

Sleeping

App Files Files Community

masadonline commited on May 18

Commit

12a98fd

verified ·

1 Parent(s): 4dbf41f

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -44

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
-import pdfplumber
 import os
-import tempfile
 import faiss
 import numpy as np
 import pandas as pd
@@ -21,27 +20,25 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL = "llama3-8b-8192"
 embedder = SentenceTransformer(EMBEDDING_MODEL)
-def extract_rows_from_pdf(pdf_file_path):
-    rows = []
-    with pdfplumber.open(pdf_file_path) as pdf:
-        for page in pdf.pages:
-            tables = page.extract_tables()
-            for table in tables:
-                for row in table[1:]:  # skip header
-                    cleaned = [str(cell).strip() if cell else "" for cell in row]
-                    if any(cleaned):  # skip empty rows
-                        rows.append(cleaned)
     return rows
 def build_index(chunks):
-    text_chunks = [" | ".join(chunk) for chunk in chunks]
     vectors = embedder.encode(text_chunks)
     index = faiss.IndexFlatL2(vectors.shape[1])
     index.add(np.array(vectors))
     return index, text_chunks
 def ask_llm(context, query):
-    prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the order data:\n{context}\n\nQuestion: {query}"
     response = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}]
@@ -52,42 +49,36 @@ def ask_llm(context, query):
 st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
 st.title("📦 ToyShop Order Status Assistant")
-uploaded_file = st.file_uploader("Upload a Customer Order PDF", type="pdf")
 if uploaded_file:
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
-        tmp.write(uploaded_file.read())
-        pdf_path = tmp.name
-    st.success("✅ File uploaded successfully")
-    rows = extract_rows_from_pdf(pdf_path)
-    if not rows:
-        st.error("❌ No tabular data found in the PDF.")
-    else:
-        st.info(f"📄 Extracted {len(rows)} order records.")
-        # Display records as table (if columns look uniform)
-        try:
             df = pd.DataFrame(rows)
-            st.subheader("📋 Extracted Order Records")
             st.dataframe(df, use_container_width=True)
-        except:
-            st.text_area("Extracted Rows", "\n".join([" | ".join(r) for r in rows]), height=300)
-        index, text_chunks = build_index(rows)
-        query = st.text_input("Ask a question (e.g., 'What is the status of order 27?')")
-        if query:
-            query_vec = embedder.encode([query])
-            D, I = index.search(query_vec, k=3)
-            context = "\n".join([text_chunks[i] for i in I[0]])
-            with st.spinner("Generating answer..."):
-                try:
-                    answer = ask_llm(context, query)
-                    st.markdown("### 🧠 Answer")
-                    st.write(answer)
-                except Exception as e:
-                    st.error(f"LLM Error: {str(e)}")

 import streamlit as st
+import json
 import os
 import faiss
 import numpy as np
 import pandas as pd
 LLM_MODEL = "llama3-8b-8192"
 embedder = SentenceTransformer(EMBEDDING_MODEL)
+def load_orders_from_json(json_file):
+    data = json.load(json_file)
+    if isinstance(data, list):
+        rows = data
+    elif isinstance(data, dict):
+        rows = list(data.values())
+    else:
+        rows = []
     return rows
 def build_index(chunks):
+    text_chunks = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
     vectors = embedder.encode(text_chunks)
     index = faiss.IndexFlatL2(vectors.shape[1])
     index.add(np.array(vectors))
     return index, text_chunks
 def ask_llm(context, query):
+    prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the customer order data:\n{context}\n\nQuestion: {query}"
     response = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}]
 st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
 st.title("📦 ToyShop Order Status Assistant")
+uploaded_file = st.file_uploader("Upload a Customer Orders JSON File", type="json")
 if uploaded_file:
+    try:
+        rows = load_orders_from_json(uploaded_file)
+        if not rows:
+            st.error("❌ No valid order data found in the JSON file.")
+        else:
+            st.success(f"✅ Loaded {len(rows)} order records.")
             df = pd.DataFrame(rows)
+            st.subheader("📋 Customer Orders")
             st.dataframe(df, use_container_width=True)
+            index, text_chunks = build_index(rows)
+            query = st.text_input("Ask a question (e.g., 'What is the status of order #1002?')")
+            if query:
+                query_vec = embedder.encode([query])
+                D, I = index.search(query_vec, k=3)
+                context = "\n".join([text_chunks[i] for i in I[0]])
+                with st.spinner("Generating answer..."):
+                    try:
+                        answer = ask_llm(context, query)
+                        st.markdown("### 🧠 Answer")
+                        st.write(answer)
+                    except Exception as e:
+                        st.error(f"LLM Error: {str(e)}")
+    except Exception as e:
+        st.error(f"❌ Failed to load or process JSON file: {e}")