Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,73 @@
|
|
1 |
-
|
2 |
import streamlit as st
|
3 |
-
|
4 |
-
import
|
5 |
-
import
|
6 |
-
|
7 |
-
|
|
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
load_dotenv()
|
11 |
-
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
-
st.
|
22 |
-
uploaded_files = st.sidebar.file_uploader(
|
23 |
-
"Upload your customer order files",
|
24 |
-
type=["pdf", "docx", "txt", "xlsx", "html"],
|
25 |
-
accept_multiple_files=True
|
26 |
-
)
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
reader = PdfReader(file)
|
31 |
-
return "\n".join(page.extract_text() or "" for page in reader.pages)
|
32 |
-
elif file.name.endswith(".docx"):
|
33 |
-
doc = docx.Document(file)
|
34 |
-
return "\n".join(p.text for p in doc.paragraphs)
|
35 |
-
elif file.name.endswith(".txt"):
|
36 |
-
return file.read().decode("utf-8")
|
37 |
-
elif file.name.endswith(".xlsx"):
|
38 |
-
df = pd.read_excel(file)
|
39 |
-
return df.to_string()
|
40 |
-
elif file.name.endswith(".html"):
|
41 |
-
soup = BeautifulSoup(file.read(), "html.parser")
|
42 |
-
return soup.get_text()
|
43 |
-
else:
|
44 |
-
return ""
|
45 |
|
46 |
-
|
47 |
-
if uploaded_files:
|
48 |
-
st.sidebar.success(f"{len(uploaded_files)} file(s) uploaded.")
|
49 |
-
for f in uploaded_files:
|
50 |
-
try:
|
51 |
-
combined_text += f"\n\n--- {f.name} ---\n\n"
|
52 |
-
combined_text += extract_text(f)
|
53 |
-
except Exception as e:
|
54 |
-
st.sidebar.error(f"Error reading {f.name}: {str(e)}")
|
55 |
|
56 |
-
query
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
system_prompt = (
|
62 |
-
"You are a helpful assistant for an online toy shop. "
|
63 |
-
"Answer customer queries based on the following order information:\n\n"
|
64 |
-
+ combined_text
|
65 |
-
)
|
66 |
-
response = client.chat.completions.create(
|
67 |
-
model="llama3-8b-8192",
|
68 |
-
messages=[
|
69 |
-
{"role": "system", "content": system_prompt},
|
70 |
-
{"role": "user", "content": query}
|
71 |
-
]
|
72 |
-
)
|
73 |
-
answer = response.choices[0].message.content
|
74 |
-
st.success("Answer:")
|
75 |
-
st.write(answer)
|
76 |
-
except Exception as e:
|
77 |
-
st.error(f"Error: {str(e)}")
|
78 |
-
elif query:
|
79 |
-
st.warning("Please upload order files to enable RAG-based answers.")
|
|
|
1 |
+
# app.py
|
2 |
import streamlit as st
|
3 |
+
import pdfplumber
|
4 |
+
import os
|
5 |
+
import tempfile
|
6 |
+
import faiss
|
7 |
+
import numpy as np
|
8 |
+
from sentence_transformers import SentenceTransformer
|
9 |
+
import openai
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
load_dotenv()
|
|
|
13 |
|
14 |
+
openai.api_key = os.getenv("GROQ_API_KEY") # assumes GROQ is OpenAI-compatible
|
15 |
+
|
16 |
+
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
17 |
+
LLM_MODEL = "llama3-8b-8192" # Change if needed
|
18 |
+
|
19 |
+
model = SentenceTransformer(MODEL_NAME)
|
20 |
+
|
21 |
+
# Function to extract table rows
|
22 |
+
def extract_rows_from_pdf(pdf_file):
|
23 |
+
with pdfplumber.open(pdf_file) as pdf:
|
24 |
+
rows = []
|
25 |
+
for page in pdf.pages:
|
26 |
+
tables = page.extract_tables()
|
27 |
+
for table in tables:
|
28 |
+
for row in table[1:]: # skip header
|
29 |
+
cleaned = " | ".join([str(cell).strip() for cell in row])
|
30 |
+
rows.append(cleaned)
|
31 |
+
return rows
|
32 |
+
|
33 |
+
# Function to build FAISS index
|
34 |
+
def build_index(chunks):
|
35 |
+
vectors = model.encode(chunks)
|
36 |
+
index = faiss.IndexFlatL2(vectors.shape[1])
|
37 |
+
index.add(np.array(vectors))
|
38 |
+
return index, vectors
|
39 |
+
|
40 |
+
# Function to query LLM
|
41 |
+
def ask_llm(context, query):
|
42 |
+
prompt = f"Context:\n{context}\n\nAnswer the question: {query}"
|
43 |
+
response = openai.ChatCompletion.create(
|
44 |
+
model=LLM_MODEL,
|
45 |
+
messages=[{"role": "user", "content": prompt}]
|
46 |
+
)
|
47 |
+
return response['choices'][0]['message']['content']
|
48 |
+
|
49 |
+
# Streamlit UI
|
50 |
+
st.title("📦 Order Status Helper")
|
51 |
+
|
52 |
+
uploaded_file = st.file_uploader("Upload Customer Order PDF", type="pdf")
|
53 |
|
54 |
+
if uploaded_file:
|
55 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
56 |
+
tmp.write(uploaded_file.read())
|
57 |
+
tmp_path = tmp.name
|
58 |
|
59 |
+
st.success("File uploaded and processed!")
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
st.session_state.rows = extract_rows_from_pdf(tmp_path)
|
62 |
+
st.session_state.index, st.session_state.vectors = build_index(st.session_state.rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
query = st.text_input("Ask a question (e.g., What is the status of ORD12345?)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
if query:
|
67 |
+
query_vec = model.encode([query])
|
68 |
+
D, I = st.session_state.index.search(query_vec, k=3)
|
69 |
+
context = "\n".join([st.session_state.rows[i] for i in I[0]])
|
70 |
|
71 |
+
answer = ask_llm(context, query)
|
72 |
+
st.markdown("### 🧠 Answer")
|
73 |
+
st.write(answer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|