masadonline commited on
Commit
85e6257
Β·
verified Β·
1 Parent(s): e90223f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -35
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import streamlit as st
3
  import pdfplumber
4
  import os
@@ -6,68 +5,82 @@ import tempfile
6
  import faiss
7
  import numpy as np
8
  from sentence_transformers import SentenceTransformer
9
- import openai
10
  from dotenv import load_dotenv
11
 
 
12
  load_dotenv()
 
13
 
14
- openai.api_key = os.getenv("GROQ_API_KEY") # assumes GROQ is OpenAI-compatible
 
15
 
16
- MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
17
- LLM_MODEL = "llama3-8b-8192" # Change if needed
 
 
18
 
19
- model = SentenceTransformer(MODEL_NAME)
20
-
21
- # Function to extract table rows
22
- def extract_rows_from_pdf(pdf_file):
23
- with pdfplumber.open(pdf_file) as pdf:
24
- rows = []
25
  for page in pdf.pages:
26
  tables = page.extract_tables()
27
  for table in tables:
28
- for row in table[1:]: # skip header
29
- cleaned = " | ".join([str(cell).strip() for cell in row])
30
  rows.append(cleaned)
31
- return rows
32
 
33
- # Function to build FAISS index
34
  def build_index(chunks):
35
- vectors = model.encode(chunks)
36
  index = faiss.IndexFlatL2(vectors.shape[1])
37
  index.add(np.array(vectors))
38
  return index, vectors
39
 
40
- # Function to query LLM
41
  def ask_llm(context, query):
42
- prompt = f"Context:\n{context}\n\nAnswer the question: {query}"
43
- response = openai.ChatCompletion.create(
44
  model=LLM_MODEL,
45
  messages=[{"role": "user", "content": prompt}]
46
  )
47
- return response['choices'][0]['message']['content']
48
 
49
  # Streamlit UI
50
- st.title("πŸ“¦ Order Status Helper")
 
51
 
52
- uploaded_file = st.file_uploader("Upload Customer Order PDF", type="pdf")
53
 
54
  if uploaded_file:
55
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
56
  tmp.write(uploaded_file.read())
57
- tmp_path = tmp.name
 
 
58
 
59
- st.success("File uploaded and processed!")
 
 
 
 
 
60
 
61
- st.session_state.rows = extract_rows_from_pdf(tmp_path)
62
- st.session_state.index, st.session_state.vectors = build_index(st.session_state.rows)
63
 
64
- query = st.text_input("Ask a question (e.g., What is the status of ORD12345?)")
65
 
66
- if query:
67
- query_vec = model.encode([query])
68
- D, I = st.session_state.index.search(query_vec, k=3)
69
- context = "\n".join([st.session_state.rows[i] for i in I[0]])
70
 
71
- answer = ask_llm(context, query)
72
- st.markdown("### 🧠 Answer")
73
- st.write(answer)
 
 
 
 
 
 
1
  import streamlit as st
2
  import pdfplumber
3
  import os
 
5
  import faiss
6
  import numpy as np
7
  from sentence_transformers import SentenceTransformer
8
+ from openai import OpenAI
9
  from dotenv import load_dotenv
10
 
11
+ # Load environment variables
12
  load_dotenv()
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
 
15
+ # Setup GROQ client
16
+ client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
17
 
18
+ # Constants
19
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
20
+ LLM_MODEL = "llama3-8b-8192"
21
+ embedder = SentenceTransformer(EMBEDDING_MODEL)
22
 
23
+ # Extract table rows from PDF
24
+ def extract_rows_from_pdf(pdf_file_path):
25
+ rows = []
26
+ with pdfplumber.open(pdf_file_path) as pdf:
 
 
27
  for page in pdf.pages:
28
  tables = page.extract_tables()
29
  for table in tables:
30
+ for row in table[1:]:
31
+ cleaned = " | ".join([str(cell).strip() for cell in row if cell is not None])
32
  rows.append(cleaned)
33
+ return rows
34
 
35
+ # Build FAISS index
36
  def build_index(chunks):
37
+ vectors = embedder.encode(chunks)
38
  index = faiss.IndexFlatL2(vectors.shape[1])
39
  index.add(np.array(vectors))
40
  return index, vectors
41
 
42
+ # Ask LLM
43
  def ask_llm(context, query):
44
+ prompt = f"You are a helpful assistant for an online toy shop.\n\nHere is the order data:\n{context}\n\nQuestion: {query}"
45
+ response = client.chat.completions.create(
46
  model=LLM_MODEL,
47
  messages=[{"role": "user", "content": prompt}]
48
  )
49
+ return response.choices[0].message.content
50
 
51
  # Streamlit UI
52
+ st.set_page_config(page_title="🧸 ToyShop Order Status Assistant", layout="wide")
53
+ st.title("πŸ“¦ ToyShop Order Status Assistant")
54
 
55
+ uploaded_file = st.file_uploader("Upload a Customer Order PDF", type="pdf")
56
 
57
  if uploaded_file:
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
59
  tmp.write(uploaded_file.read())
60
+ pdf_path = tmp.name
61
+
62
+ st.success("βœ… File uploaded successfully")
63
 
64
+ # Process file
65
+ rows = extract_rows_from_pdf(pdf_path)
66
+ if not rows:
67
+ st.error("❌ No tabular data found in the PDF.")
68
+ else:
69
+ st.info(f"πŸ“„ Extracted {len(rows)} rows of order data.")
70
 
71
+ index, _ = build_index(rows)
 
72
 
73
+ query = st.text_input("Ask a question (e.g., 'What is the status of order 27?')")
74
 
75
+ if query:
76
+ query_vec = embedder.encode([query])
77
+ D, I = index.search(query_vec, k=3)
78
+ context = "\n".join([rows[i] for i in I[0]])
79
 
80
+ with st.spinner("Generating answer..."):
81
+ try:
82
+ answer = ask_llm(context, query)
83
+ st.markdown("### 🧠 Answer")
84
+ st.write(answer)
85
+ except Exception as e:
86
+ st.error(f"LLM Error: {str(e)}")