alibidaran commited on
Commit
0a333a5
·
verified ·
1 Parent(s): 9be1f67

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +36 -8
src/streamlit_app.py CHANGED
@@ -4,7 +4,9 @@ import openai
4
  import os
5
  import time
6
  #from roles import *
7
- from langchain_community.document_loaders import PyPDFLoader
 
 
8
  import tempfile
9
  from RAG import load_graph,text_splitter
10
  import torch
@@ -180,18 +182,44 @@ uploaded_file = st.sidebar.file_uploader(
180
  label_visibility="collapsed"
181
  )
182
  upload_button=st.sidebar.button("Upload Document")
183
- if upload_button:
184
- if uploaded_file is None:
185
- st.warning("Please upload a PDF file.")
186
- st.session_state.doc_flag = False
187
- else:
188
- file_text = get_text(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  st.session_state.file_text = file_text
190
- embeddings,chunks = embed_document(file_text)
 
 
 
 
 
 
191
  st.session_state.embeddings = embeddings
192
  st.session_state.chunks = chunks
193
  st.session_state.doc_flag = True
194
 
 
 
195
  st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
196
  Authenication=st.sidebar.button('Authenicate')
197
  if Authenication:
 
4
  import os
5
  import time
6
  #from roles import *
7
+ import io
8
+ from pypdf import PdfReader
9
+ #from langchain_community.document_loaders import PyPDFLoader
10
  import tempfile
11
  from RAG import load_graph,text_splitter
12
  import torch
 
182
  label_visibility="collapsed"
183
  )
184
  upload_button=st.sidebar.button("Upload Document")
185
+ uploaded_file = st.sidebar.file_uploader(
186
+ "Upload your PDF",
187
+ type=["pdf"],
188
+ key="pdf_uploader",
189
+ )
190
+
191
+ def extract_pdf_text_from_bytes(file_bytes: bytes) -> str:
192
+ reader = PdfReader(io.BytesIO(file_bytes))
193
+ pages_text = []
194
+ for p in reader.pages:
195
+ txt = p.extract_text() or ""
196
+ pages_text.append(txt)
197
+ return "\n".join(pages_text)
198
+
199
+ if uploaded_file is not None:
200
+ with st.spinner("Reading & embedding your PDF..."):
201
+ # Important: read bytes once on this rerun
202
+ file_bytes = uploaded_file.read()
203
+ # (Optional) if you ever re-use uploaded_file later, do: uploaded_file.seek(0)
204
+
205
+ # Extract text purely in-memory (no /tmp files, no PyPDFLoader)
206
+ file_text = extract_pdf_text_from_bytes(file_bytes)
207
+
208
+ # Persist to session state
209
  st.session_state.file_text = file_text
210
+
211
+ # Build embeddings (uses your existing text_splitter + encoder)
212
+ chunks = text_splitter.split_text(file_text)
213
+ embeddings = st.session_state.encoder.encode(
214
+ chunks, convert_to_tensor=True, show_progress_bar=True
215
+ ).cpu().numpy()
216
+
217
  st.session_state.embeddings = embeddings
218
  st.session_state.chunks = chunks
219
  st.session_state.doc_flag = True
220
 
221
+ st.success(f"Loaded: {uploaded_file.name} — {len(st.session_state.chunks)} chunks")
222
+
223
  st.sidebar.write("Before making the your faviorate charecter sound, authenicate your code")
224
  Authenication=st.sidebar.button('Authenicate')
225
  if Authenication: