alibidaran commited on
Commit
d0f7d18
Β·
verified Β·
1 Parent(s): c630b43

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +26 -32
src/streamlit_app.py CHANGED
@@ -175,19 +175,13 @@ if 'chunks' not in st.session_state:
175
  if 'response' not in st.session_state:
176
  st.session_state.response=''
177
  # Sidebar document upload
178
- st.sidebar.title("Uploading your document πŸ“„")
179
- uploaded_file = st.sidebar.file_uploader(
180
- "Upload your document πŸ“„",
181
- type=["pdf"],
182
- label_visibility="collapsed"
183
- )
184
- upload_button=st.sidebar.button("Upload Document")
185
  uploaded_file = st.sidebar.file_uploader(
186
  "Upload your PDF",
187
  type=["pdf"],
188
  key="pdf_uploader",
189
  )
190
-
191
  def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
192
  reader = PdfReader(io.BytesIO(file_bytes))
193
  pages_text = []
@@ -195,30 +189,30 @@ def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
195
  txt = p.extract_text() or ""
196
  pages_text.append(txt)
197
  return "\n".join(pages_text)
198
-
199
- if uploaded_file is not None:
200
- with st.spinner("Reading & embedding your PDF..."):
201
- # Important: read bytes once on this rerun
202
- file_bytes = uploaded_file.read()
203
- # (Optional) if you ever re-use uploaded_file later, do: uploaded_file.seek(0)
204
-
205
- # Extract text purely in-memory (no /tmp files, no PyPDFLoader)
206
- file_text = extract_pdf_text_from_bytes(file_bytes)
207
-
208
- # Persist to session state
209
- st.session_state.file_text = file_text
210
-
211
- # Build embeddings (uses your existing text_splitter + encoder)
212
- chunks = text_splitter.split_text(file_text)
213
- embeddings = st.session_state.encoder.encode(
214
- chunks, convert_to_tensor=True, show_progress_bar=True
215
- ).cpu().numpy()
216
-
217
- st.session_state.embeddings = embeddings
218
- st.session_state.chunks = chunks
219
- st.session_state.doc_flag = True
220
-
221
- st.success(f"Loaded: {uploaded_file.name} β€” {len(st.session_state.chunks)} chunks")
222
 
223
  st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
224
  Authenication=st.sidebar.button('Authenicate')
 
175
  if 'response' not in st.session_state:
176
  st.session_state.response=''
177
  # Sidebar document upload
178
+
 
 
 
 
 
 
179
  uploaded_file = st.sidebar.file_uploader(
180
  "Upload your PDF",
181
  type=["pdf"],
182
  key="pdf_uploader",
183
  )
184
+ upload_button=st.sidebar.button("Uploading your document πŸ“„")
185
  def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
186
  reader = PdfReader(io.BytesIO(file_bytes))
187
  pages_text = []
 
189
  txt = p.extract_text() or ""
190
  pages_text.append(txt)
191
  return "\n".join(pages_text)
192
+ if upload_button:
193
+ if uploaded_file is not None:
194
+ with st.spinner("Reading & embedding your PDF..."):
195
+ # Important: read bytes once on this rerun
196
+ file_bytes = uploaded_file.read()
197
+ # (Optional) if you ever re-use uploaded_file later, do: uploaded_file.seek(0)
198
+
199
+ # Extract text purely in-memory (no /tmp files, no PyPDFLoader)
200
+ file_text = extract_pdf_text_from_bytes(file_bytes)
201
+
202
+ # Persist to session state
203
+ st.session_state.file_text = file_text
204
+
205
+ # Build embeddings (uses your existing text_splitter + encoder)
206
+ chunks = text_splitter.split_text(file_text)
207
+ embeddings = st.session_state.encoder.encode(
208
+ chunks, convert_to_tensor=True, show_progress_bar=True
209
+ ).cpu().numpy()
210
+
211
+ st.session_state.embeddings = embeddings
212
+ st.session_state.chunks = chunks
213
+ st.session_state.doc_flag = True
214
+
215
+ st.success(f"Loaded: {uploaded_file.name} β€” {len(st.session_state.chunks)} chunks")
216
 
217
  st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
218
  Authenication=st.sidebar.button('Authenicate')