bluenevus commited on
Commit
da2c6ed
·
1 Parent(s): c058191

Update app.py via AI Editor

Browse files
Files changed (1) hide show
  1. app.py +40 -7
app.py CHANGED
@@ -16,6 +16,12 @@ import datetime
16
  from werkzeug.utils import secure_filename
17
  import numpy as np
18
 
 
 
 
 
 
 
19
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
20
  logger = logging.getLogger("AskTricare")
21
 
@@ -231,7 +237,6 @@ def left_navbar_static():
231
  ], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
232
 
233
  def chat_box_card():
234
- # Explicit scrollbars and height
235
  return dbc.Card(
236
  dbc.CardBody([
237
  html.Div(id="chat-window", style={
@@ -326,18 +331,46 @@ app.clientside_callback(
326
 
327
  def _is_supported_doc(filename):
328
  ext = os.path.splitext(filename)[1].lower()
329
- return ext in [".txt", ".pdf", ".md", ".docx"]
330
 
331
  def _extract_text_from_upload(filepath, ext):
332
- if ext in [".txt", ".md"]:
333
- try:
334
  with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
335
  text = f.read()
336
  return text
337
- except Exception as e:
338
- logger.error(f"Error reading {filepath}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  return ""
340
- else:
 
341
  return ""
342
 
343
  @app.callback(
 
16
  from werkzeug.utils import secure_filename
17
  import numpy as np
18
 
19
+ import io
20
+
21
+ from pdfminer.high_level import extract_text as pdf_extract_text
22
+ import docx
23
+ import openpyxl
24
+
25
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
26
  logger = logging.getLogger("AskTricare")
27
 
 
237
  ], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
238
 
239
  def chat_box_card():
 
240
  return dbc.Card(
241
  dbc.CardBody([
242
  html.Div(id="chat-window", style={
 
331
 
332
  def _is_supported_doc(filename):
333
  ext = os.path.splitext(filename)[1].lower()
334
+ return ext in [".txt", ".pdf", ".md", ".docx", ".xlsx"]
335
 
336
  def _extract_text_from_upload(filepath, ext):
337
+ try:
338
+ if ext in [".txt", ".md"]:
339
  with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
340
  text = f.read()
341
  return text
342
+ elif ext == ".pdf":
343
+ try:
344
+ text = pdf_extract_text(filepath)
345
+ return text
346
+ except Exception as e:
347
+ logger.error(f"Error reading PDF {filepath}: {e}")
348
+ return ""
349
+ elif ext == ".docx":
350
+ try:
351
+ doc = docx.Document(filepath)
352
+ paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
353
+ return "\n".join(paragraphs)
354
+ except Exception as e:
355
+ logger.error(f"Error reading DOCX {filepath}: {e}")
356
+ return ""
357
+ elif ext == ".xlsx":
358
+ try:
359
+ wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True)
360
+ text_rows = []
361
+ for ws in wb.worksheets:
362
+ for row in ws.iter_rows(values_only=True):
363
+ row_strs = [str(cell) for cell in row if cell is not None]
364
+ if any(row_strs):
365
+ text_rows.append("\t".join(row_strs))
366
+ return "\n".join(text_rows)
367
+ except Exception as e:
368
+ logger.error(f"Error reading XLSX {filepath}: {e}")
369
+ return ""
370
+ else:
371
  return ""
372
+ except Exception as e:
373
+ logger.error(f"Error extracting text from {filepath}: {e}")
374
  return ""
375
 
376
  @app.callback(