bluenevus commited on
Commit
8d615a7
·
1 Parent(s): ada2aea

Update app.py via AI Editor

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -15,10 +15,9 @@ import base64
15
  import datetime
16
  from werkzeug.utils import secure_filename
17
  import numpy as np
18
-
19
  import io
20
 
21
- from pdfminer.high_level import extract_text as pdf_extract_text
22
  import docx
23
  import openpyxl
24
 
@@ -341,7 +340,13 @@ def _extract_text_from_upload(filepath, ext):
341
  return text
342
  elif ext == ".pdf":
343
  try:
344
- text = pdf_extract_text(filepath)
 
 
 
 
 
 
345
  return text
346
  except Exception as e:
347
  logger.error(f"Error reading PDF {filepath}: {e}")
@@ -575,7 +580,6 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
575
  session_lock = get_session_lock(session_id)
576
  with session_lock:
577
  load_session_state(session_id)
578
- state = get_session_state(session_id)
579
  state["messages"].append({"role": "assistant", "content": reply})
580
  state["stream_buffer"] = ""
581
  state["streaming"] = False
 
15
  import datetime
16
  from werkzeug.utils import secure_filename
17
  import numpy as np
 
18
  import io
19
 
20
+ import PyPDF2
21
  import docx
22
  import openpyxl
23
 
 
340
  return text
341
  elif ext == ".pdf":
342
  try:
343
+ text = ""
344
+ with open(filepath, "rb") as f:
345
+ reader = PyPDF2.PdfReader(f)
346
+ for page in reader.pages:
347
+ page_text = page.extract_text()
348
+ if page_text:
349
+ text += page_text + "\n"
350
  return text
351
  except Exception as e:
352
  logger.error(f"Error reading PDF {filepath}: {e}")
 
580
  session_lock = get_session_lock(session_id)
581
  with session_lock:
582
  load_session_state(session_id)
 
583
  state["messages"].append({"role": "assistant", "content": reply})
584
  state["stream_buffer"] = ""
585
  state["streaming"] = False