masadonline commited on
Commit
b02d98a
·
verified ·
1 Parent(s): 7592386

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -68
app.py CHANGED
@@ -1,79 +1,73 @@
1
- import os
2
  import streamlit as st
3
- from PyPDF2 import PdfReader
4
- import docx
5
- import pandas as pd
6
- from bs4 import BeautifulSoup
7
- from openai import OpenAI
 
 
8
  from dotenv import load_dotenv
9
 
10
  load_dotenv()
11
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
12
 
13
- client = OpenAI(
14
- api_key=GROQ_API_KEY,
15
- base_url="https://api.groq.com/openai/v1" # required for Groq
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- st.set_page_config(page_title="ToyShop Order Assistant", layout="wide")
19
- st.title("🧸 Online Toy Shop - Order Status Assistant")
 
 
20
 
21
- st.sidebar.header("Upload Customer Order Files")
22
- uploaded_files = st.sidebar.file_uploader(
23
- "Upload your customer order files",
24
- type=["pdf", "docx", "txt", "xlsx", "html"],
25
- accept_multiple_files=True
26
- )
27
 
28
- def extract_text(file):
29
- if file.name.endswith(".pdf"):
30
- reader = PdfReader(file)
31
- return "\n".join(page.extract_text() or "" for page in reader.pages)
32
- elif file.name.endswith(".docx"):
33
- doc = docx.Document(file)
34
- return "\n".join(p.text for p in doc.paragraphs)
35
- elif file.name.endswith(".txt"):
36
- return file.read().decode("utf-8")
37
- elif file.name.endswith(".xlsx"):
38
- df = pd.read_excel(file)
39
- return df.to_string()
40
- elif file.name.endswith(".html"):
41
- soup = BeautifulSoup(file.read(), "html.parser")
42
- return soup.get_text()
43
- else:
44
- return ""
45
 
46
- combined_text = ""
47
- if uploaded_files:
48
- st.sidebar.success(f"{len(uploaded_files)} file(s) uploaded.")
49
- for f in uploaded_files:
50
- try:
51
- combined_text += f"\n\n--- {f.name} ---\n\n"
52
- combined_text += extract_text(f)
53
- except Exception as e:
54
- st.sidebar.error(f"Error reading {f.name}: {str(e)}")
55
 
56
- query = st.text_input("Ask about your order (e.g., 'What is the status of order #123?')")
 
 
 
57
 
58
- if query and combined_text:
59
- with st.spinner("Thinking..."):
60
- try:
61
- system_prompt = (
62
- "You are a helpful assistant for an online toy shop. "
63
- "Answer customer queries based on the following order information:\n\n"
64
- + combined_text
65
- )
66
- response = client.chat.completions.create(
67
- model="llama3-8b-8192",
68
- messages=[
69
- {"role": "system", "content": system_prompt},
70
- {"role": "user", "content": query}
71
- ]
72
- )
73
- answer = response.choices[0].message.content
74
- st.success("Answer:")
75
- st.write(answer)
76
- except Exception as e:
77
- st.error(f"Error: {str(e)}")
78
- elif query:
79
- st.warning("Please upload order files to enable RAG-based answers.")
 
1
+ # app.py
2
  import streamlit as st
3
+ import pdfplumber
4
+ import os
5
+ import tempfile
6
+ import faiss
7
+ import numpy as np
8
+ from sentence_transformers import SentenceTransformer
9
+ import openai
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
 
13
 
14
+ openai.api_key = os.getenv("GROQ_API_KEY") # assumes GROQ is OpenAI-compatible
15
+
16
+ MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
17
+ LLM_MODEL = "llama3-8b-8192" # Change if needed
18
+
19
+ model = SentenceTransformer(MODEL_NAME)
20
+
21
+ # Function to extract table rows
22
+ def extract_rows_from_pdf(pdf_file):
23
+ with pdfplumber.open(pdf_file) as pdf:
24
+ rows = []
25
+ for page in pdf.pages:
26
+ tables = page.extract_tables()
27
+ for table in tables:
28
+ for row in table[1:]: # skip header
29
+ cleaned = " | ".join([str(cell).strip() for cell in row])
30
+ rows.append(cleaned)
31
+ return rows
32
+
33
+ # Function to build FAISS index
34
+ def build_index(chunks):
35
+ vectors = model.encode(chunks)
36
+ index = faiss.IndexFlatL2(vectors.shape[1])
37
+ index.add(np.array(vectors))
38
+ return index, vectors
39
+
40
+ # Function to query LLM
41
+ def ask_llm(context, query):
42
+ prompt = f"Context:\n{context}\n\nAnswer the question: {query}"
43
+ response = openai.ChatCompletion.create(
44
+ model=LLM_MODEL,
45
+ messages=[{"role": "user", "content": prompt}]
46
+ )
47
+ return response['choices'][0]['message']['content']
48
+
49
+ # Streamlit UI
50
+ st.title("📦 Order Status Helper")
51
+
52
+ uploaded_file = st.file_uploader("Upload Customer Order PDF", type="pdf")
53
 
54
+ if uploaded_file:
55
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
56
+ tmp.write(uploaded_file.read())
57
+ tmp_path = tmp.name
58
 
59
+ st.success("File uploaded and processed!")
 
 
 
 
 
60
 
61
+ st.session_state.rows = extract_rows_from_pdf(tmp_path)
62
+ st.session_state.index, st.session_state.vectors = build_index(st.session_state.rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ query = st.text_input("Ask a question (e.g., What is the status of ORD12345?)")
 
 
 
 
 
 
 
 
65
 
66
+ if query:
67
+ query_vec = model.encode([query])
68
+ D, I = st.session_state.index.search(query_vec, k=3)
69
+ context = "\n".join([st.session_state.rows[i] for i in I[0]])
70
 
71
+ answer = ask_llm(context, query)
72
+ st.markdown("### 🧠 Answer")
73
+ st.write(answer)