Spaces:
Paused
Paused
Update app.py via AI Editor
Browse files
app.py
CHANGED
@@ -16,6 +16,12 @@ import datetime
|
|
16 |
from werkzeug.utils import secure_filename
|
17 |
import numpy as np
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
|
20 |
logger = logging.getLogger("AskTricare")
|
21 |
|
@@ -231,7 +237,6 @@ def left_navbar_static():
|
|
231 |
], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
|
232 |
|
233 |
def chat_box_card():
|
234 |
-
# Explicit scrollbars and height
|
235 |
return dbc.Card(
|
236 |
dbc.CardBody([
|
237 |
html.Div(id="chat-window", style={
|
@@ -326,18 +331,46 @@ app.clientside_callback(
|
|
326 |
|
327 |
def _is_supported_doc(filename):
|
328 |
ext = os.path.splitext(filename)[1].lower()
|
329 |
-
return ext in [".txt", ".pdf", ".md", ".docx"]
|
330 |
|
331 |
def _extract_text_from_upload(filepath, ext):
|
332 |
-
|
333 |
-
|
334 |
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
335 |
text = f.read()
|
336 |
return text
|
337 |
-
|
338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
return ""
|
340 |
-
|
|
|
341 |
return ""
|
342 |
|
343 |
@app.callback(
|
|
|
16 |
from werkzeug.utils import secure_filename
|
17 |
import numpy as np
|
18 |
|
19 |
+
import io
|
20 |
+
|
21 |
+
from pdfminer.high_level import extract_text as pdf_extract_text
|
22 |
+
import docx
|
23 |
+
import openpyxl
|
24 |
+
|
25 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
|
26 |
logger = logging.getLogger("AskTricare")
|
27 |
|
|
|
237 |
], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
|
238 |
|
239 |
def chat_box_card():
|
|
|
240 |
return dbc.Card(
|
241 |
dbc.CardBody([
|
242 |
html.Div(id="chat-window", style={
|
|
|
331 |
|
332 |
def _is_supported_doc(filename):
|
333 |
ext = os.path.splitext(filename)[1].lower()
|
334 |
+
return ext in [".txt", ".pdf", ".md", ".docx", ".xlsx"]
|
335 |
|
336 |
def _extract_text_from_upload(filepath, ext):
|
337 |
+
try:
|
338 |
+
if ext in [".txt", ".md"]:
|
339 |
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
340 |
text = f.read()
|
341 |
return text
|
342 |
+
elif ext == ".pdf":
|
343 |
+
try:
|
344 |
+
text = pdf_extract_text(filepath)
|
345 |
+
return text
|
346 |
+
except Exception as e:
|
347 |
+
logger.error(f"Error reading PDF {filepath}: {e}")
|
348 |
+
return ""
|
349 |
+
elif ext == ".docx":
|
350 |
+
try:
|
351 |
+
doc = docx.Document(filepath)
|
352 |
+
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
353 |
+
return "\n".join(paragraphs)
|
354 |
+
except Exception as e:
|
355 |
+
logger.error(f"Error reading DOCX {filepath}: {e}")
|
356 |
+
return ""
|
357 |
+
elif ext == ".xlsx":
|
358 |
+
try:
|
359 |
+
wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True)
|
360 |
+
text_rows = []
|
361 |
+
for ws in wb.worksheets:
|
362 |
+
for row in ws.iter_rows(values_only=True):
|
363 |
+
row_strs = [str(cell) for cell in row if cell is not None]
|
364 |
+
if any(row_strs):
|
365 |
+
text_rows.append("\t".join(row_strs))
|
366 |
+
return "\n".join(text_rows)
|
367 |
+
except Exception as e:
|
368 |
+
logger.error(f"Error reading XLSX {filepath}: {e}")
|
369 |
+
return ""
|
370 |
+
else:
|
371 |
return ""
|
372 |
+
except Exception as e:
|
373 |
+
logger.error(f"Error extracting text from {filepath}: {e}")
|
374 |
return ""
|
375 |
|
376 |
@app.callback(
|