Spaces:

masadonline
/

RAG-PDF

Sleeping

App Files Files Community

masadonline commited on May 18

Commit

b02d98a

verified ·

1 Parent(s): 7592386

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -68

app.py CHANGED Viewed

@@ -1,79 +1,73 @@
-import os
 import streamlit as st
-from PyPDF2 import PdfReader
-import docx
-import pandas as pd
-from bs4 import BeautifulSoup
-from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-client = OpenAI(
-    api_key=GROQ_API_KEY,
-    base_url="https://api.groq.com/openai/v1"  # required for Groq
-)
-st.set_page_config(page_title="ToyShop Order Assistant", layout="wide")
-st.title("🧸 Online Toy Shop - Order Status Assistant")
-st.sidebar.header("Upload Customer Order Files")
-uploaded_files = st.sidebar.file_uploader(
-    "Upload your customer order files",
-    type=["pdf", "docx", "txt", "xlsx", "html"],
-    accept_multiple_files=True
-)
-def extract_text(file):
-    if file.name.endswith(".pdf"):
-        reader = PdfReader(file)
-        return "\n".join(page.extract_text() or "" for page in reader.pages)
-    elif file.name.endswith(".docx"):
-        doc = docx.Document(file)
-        return "\n".join(p.text for p in doc.paragraphs)
-    elif file.name.endswith(".txt"):
-        return file.read().decode("utf-8")
-    elif file.name.endswith(".xlsx"):
-        df = pd.read_excel(file)
-        return df.to_string()
-    elif file.name.endswith(".html"):
-        soup = BeautifulSoup(file.read(), "html.parser")
-        return soup.get_text()
-    else:
-        return ""
-combined_text = ""
-if uploaded_files:
-    st.sidebar.success(f"{len(uploaded_files)} file(s) uploaded.")
-    for f in uploaded_files:
-        try:
-            combined_text += f"\n\n--- {f.name} ---\n\n"
-            combined_text += extract_text(f)
-        except Exception as e:
-            st.sidebar.error(f"Error reading {f.name}: {str(e)}")
-query = st.text_input("Ask about your order (e.g., 'What is the status of order #123?')")
-if query and combined_text:
-    with st.spinner("Thinking..."):
-        try:
-            system_prompt = (
-                "You are a helpful assistant for an online toy shop. "
-                "Answer customer queries based on the following order information:\n\n"
-                + combined_text
-            )
-            response = client.chat.completions.create(
-                model="llama3-8b-8192",
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": query}
-                ]
-            )
-            answer = response.choices[0].message.content
-            st.success("Answer:")
-            st.write(answer)
-        except Exception as e:
-            st.error(f"Error: {str(e)}")
-elif query:
-    st.warning("Please upload order files to enable RAG-based answers.")

+# app.py
 import streamlit as st
+import pdfplumber
+import os
+import tempfile
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import openai
 from dotenv import load_dotenv
 load_dotenv()
+openai.api_key = os.getenv("GROQ_API_KEY")  # assumes GROQ is OpenAI-compatible
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL = "llama3-8b-8192"  # Change if needed
+model = SentenceTransformer(MODEL_NAME)
+# Function to extract table rows
+def extract_rows_from_pdf(pdf_file):
+    with pdfplumber.open(pdf_file) as pdf:
+        rows = []
+        for page in pdf.pages:
+            tables = page.extract_tables()
+            for table in tables:
+                for row in table[1:]:  # skip header
+                    cleaned = " | ".join([str(cell).strip() for cell in row])
+                    rows.append(cleaned)
+        return rows
+# Function to build FAISS index
+def build_index(chunks):
+    vectors = model.encode(chunks)
+    index = faiss.IndexFlatL2(vectors.shape[1])
+    index.add(np.array(vectors))
+    return index, vectors
+# Function to query LLM
+def ask_llm(context, query):
+    prompt = f"Context:\n{context}\n\nAnswer the question: {query}"
+    response = openai.ChatCompletion.create(
+        model=LLM_MODEL,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return response['choices'][0]['message']['content']
+# Streamlit UI
+st.title("📦 Order Status Helper")
+uploaded_file = st.file_uploader("Upload Customer Order PDF", type="pdf")
+if uploaded_file:
+    with tempfile.NamedTemporaryFile(delete=False) as tmp:
+        tmp.write(uploaded_file.read())
+        tmp_path = tmp.name
+    st.success("File uploaded and processed!")
+    st.session_state.rows = extract_rows_from_pdf(tmp_path)
+    st.session_state.index, st.session_state.vectors = build_index(st.session_state.rows)
+    query = st.text_input("Ask a question (e.g., What is the status of ORD12345?)")
+    if query:
+        query_vec = model.encode([query])
+        D, I = st.session_state.index.search(query_vec, k=3)
+        context = "\n".join([st.session_state.rows[i] for i in I[0]])
+        answer = ask_llm(context, query)
+        st.markdown("### 🧠 Answer")
+        st.write(answer)