Spaces:
Paused
Paused
Update app.py via AI Editor
Browse files
app.py
CHANGED
@@ -15,10 +15,9 @@ import base64
|
|
15 |
import datetime
|
16 |
from werkzeug.utils import secure_filename
|
17 |
import numpy as np
|
18 |
-
|
19 |
import io
|
20 |
|
21 |
-
|
22 |
import docx
|
23 |
import openpyxl
|
24 |
|
@@ -341,7 +340,13 @@ def _extract_text_from_upload(filepath, ext):
|
|
341 |
return text
|
342 |
elif ext == ".pdf":
|
343 |
try:
|
344 |
-
text =
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
return text
|
346 |
except Exception as e:
|
347 |
logger.error(f"Error reading PDF {filepath}: {e}")
|
@@ -575,7 +580,6 @@ def main_callback(session_id, send_clicks, file_contents, new_chat_clicks, strea
|
|
575 |
session_lock = get_session_lock(session_id)
|
576 |
with session_lock:
|
577 |
load_session_state(session_id)
|
578 |
-
state = get_session_state(session_id)
|
579 |
state["messages"].append({"role": "assistant", "content": reply})
|
580 |
state["stream_buffer"] = ""
|
581 |
state["streaming"] = False
|
|
|
15 |
import datetime
|
16 |
from werkzeug.utils import secure_filename
|
17 |
import numpy as np
|
|
|
18 |
import io
|
19 |
|
20 |
+
import PyPDF2
|
21 |
import docx
|
22 |
import openpyxl
|
23 |
|
|
|
340 |
return text
|
341 |
elif ext == ".pdf":
|
342 |
try:
|
343 |
+
text = ""
|
344 |
+
with open(filepath, "rb") as f:
|
345 |
+
reader = PyPDF2.PdfReader(f)
|
346 |
+
for page in reader.pages:
|
347 |
+
page_text = page.extract_text()
|
348 |
+
if page_text:
|
349 |
+
text += page_text + "\n"
|
350 |
return text
|
351 |
except Exception as e:
|
352 |
logger.error(f"Error reading PDF {filepath}: {e}")
|
|
|
580 |
session_lock = get_session_lock(session_id)
|
581 |
with session_lock:
|
582 |
load_session_state(session_id)
|
|
|
583 |
state["messages"].append({"role": "assistant", "content": reply})
|
584 |
state["stream_buffer"] = ""
|
585 |
state["streaming"] = False
|