Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
# app.py
|
2 |
import streamlit as st
|
3 |
import os
|
@@ -78,12 +79,12 @@ with st.sidebar:
|
|
78 |
vector_store = process_input(input_data)
|
79 |
st.session_state.vectorstore = vector_store
|
80 |
st.success("File processed successfully. You can now ask questions.")
|
81 |
-
except
|
82 |
-
st.error(f"File upload failed:
|
83 |
-
except
|
84 |
-
st.error(f"File upload failed:
|
85 |
except Exception as e:
|
86 |
-
st.error(f"File upload failed:
|
87 |
|
88 |
st.subheader("Chat History")
|
89 |
for i, (q, a) in enumerate(st.session_state.history):
|
@@ -159,21 +160,21 @@ def process_input(input_data):
|
|
159 |
|
160 |
# Step 1: Read PDF file in memory
|
161 |
status.text("Reading PDF file...")
|
162 |
-
progress_bar.progress(0.
|
163 |
|
164 |
pdf_reader = PdfReader(BytesIO(input_data.read()))
|
165 |
documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
|
166 |
|
167 |
# Step 2: Split text
|
168 |
status.text("Splitting text into chunks...")
|
169 |
-
progress_bar.progress(0.
|
170 |
|
171 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
172 |
texts = text_splitter.split_text(documents)
|
173 |
|
174 |
# Step 3: Create embeddings
|
175 |
status.text("Creating embeddings...")
|
176 |
-
progress_bar.progress(0.
|
177 |
|
178 |
hf_embeddings = HuggingFaceEmbeddings(
|
179 |
model_name="sentence-transformers/all-mpnet-base-v2",
|
@@ -182,7 +183,7 @@ def process_input(input_data):
|
|
182 |
|
183 |
# Step 4: Initialize FAISS vector store
|
184 |
status.text("Building vector store...")
|
185 |
-
progress_bar.progress(0
|
186 |
|
187 |
dimension = len(hf_embeddings.embed_query("test"))
|
188 |
index = faiss.IndexFlatL2(dimension)
|
@@ -197,9 +198,8 @@ def process_input(input_data):
|
|
197 |
uuids = [str(uuid.uuid4()) for _ in texts]
|
198 |
vector_store.add_texts(texts, ids=uuids)
|
199 |
|
200 |
-
#
|
201 |
status.text("Processing complete!")
|
202 |
-
progress_bar.progress(1.0)
|
203 |
|
204 |
return vector_store
|
205 |
|
|
|
1 |
+
|
2 |
# app.py
|
3 |
import streamlit as st
|
4 |
import os
|
|
|
79 |
vector_store = process_input(input_data)
|
80 |
st.session_state.vectorstore = vector_store
|
81 |
st.success("File processed successfully. You can now ask questions.")
|
82 |
+
except PermissionError as e:
|
83 |
+
st.error(f"File upload failed: Permission error - {str(e)}. Check file system access.")
|
84 |
+
except OSError as e:
|
85 |
+
st.error(f"File upload failed: OS error - {str(e)}. Check server configuration.")
|
86 |
except Exception as e:
|
87 |
+
st.error(f"File upload failed: {str(e)} (Exception type: {type(e).__name__}). Please try again or check server logs.")
|
88 |
|
89 |
st.subheader("Chat History")
|
90 |
for i, (q, a) in enumerate(st.session_state.history):
|
|
|
160 |
|
161 |
# Step 1: Read PDF file in memory
|
162 |
status.text("Reading PDF file...")
|
163 |
+
progress_bar.progress(0.25)
|
164 |
|
165 |
pdf_reader = PdfReader(BytesIO(input_data.read()))
|
166 |
documents = "".join([page.extract_text() or "" for page in pdf_reader.pages])
|
167 |
|
168 |
# Step 2: Split text
|
169 |
status.text("Splitting text into chunks...")
|
170 |
+
progress_bar.progress(0.50)
|
171 |
|
172 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
173 |
texts = text_splitter.split_text(documents)
|
174 |
|
175 |
# Step 3: Create embeddings
|
176 |
status.text("Creating embeddings...")
|
177 |
+
progress_bar.progress(0.75)
|
178 |
|
179 |
hf_embeddings = HuggingFaceEmbeddings(
|
180 |
model_name="sentence-transformers/all-mpnet-base-v2",
|
|
|
183 |
|
184 |
# Step 4: Initialize FAISS vector store
|
185 |
status.text("Building vector store...")
|
186 |
+
progress_bar.progress(1.0)
|
187 |
|
188 |
dimension = len(hf_embeddings.embed_query("test"))
|
189 |
index = faiss.IndexFlatL2(dimension)
|
|
|
198 |
uuids = [str(uuid.uuid4()) for _ in texts]
|
199 |
vector_store.add_texts(texts, ids=uuids)
|
200 |
|
201 |
+
# Complete processing
|
202 |
status.text("Processing complete!")
|
|
|
203 |
|
204 |
return vector_store
|
205 |
|