Spaces:

MicroHealth
/

ask-tricare

Paused

App Files Files Community

bluenevus commited on May 1

Commit

da2c6ed

1 Parent(s): c058191

Update app.py via AI Editor

Browse files

Files changed (1) hide show

app.py +40 -7

app.py CHANGED Viewed

@@ -16,6 +16,12 @@ import datetime
 from werkzeug.utils import secure_filename
 import numpy as np
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
 logger = logging.getLogger("AskTricare")
@@ -231,7 +237,6 @@ def left_navbar_static():
     ], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
 def chat_box_card():
-    # Explicit scrollbars and height
     return dbc.Card(
         dbc.CardBody([
             html.Div(id="chat-window", style={
@@ -326,18 +331,46 @@ app.clientside_callback(
 def _is_supported_doc(filename):
     ext = os.path.splitext(filename)[1].lower()
-    return ext in [".txt", ".pdf", ".md", ".docx"]
 def _extract_text_from_upload(filepath, ext):
-    if ext in [".txt", ".md"]:
-        try:
             with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                 text = f.read()
             return text
-        except Exception as e:
-            logger.error(f"Error reading {filepath}: {e}")
             return ""
-    else:
         return ""
 @app.callback(

 from werkzeug.utils import secure_filename
 import numpy as np
+import io
+from pdfminer.high_level import extract_text as pdf_extract_text
+import docx
+import openpyxl
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
 logger = logging.getLogger("AskTricare")
     ], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
 def chat_box_card():
     return dbc.Card(
         dbc.CardBody([
             html.Div(id="chat-window", style={
 def _is_supported_doc(filename):
     ext = os.path.splitext(filename)[1].lower()
+    return ext in [".txt", ".pdf", ".md", ".docx", ".xlsx"]
 def _extract_text_from_upload(filepath, ext):
+    try:
+        if ext in [".txt", ".md"]:
             with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
                 text = f.read()
             return text
+        elif ext == ".pdf":
+            try:
+                text = pdf_extract_text(filepath)
+                return text
+            except Exception as e:
+                logger.error(f"Error reading PDF {filepath}: {e}")
+                return ""
+        elif ext == ".docx":
+            try:
+                doc = docx.Document(filepath)
+                paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+                return "\n".join(paragraphs)
+            except Exception as e:
+                logger.error(f"Error reading DOCX {filepath}: {e}")
+                return ""
+        elif ext == ".xlsx":
+            try:
+                wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True)
+                text_rows = []
+                for ws in wb.worksheets:
+                    for row in ws.iter_rows(values_only=True):
+                        row_strs = [str(cell) for cell in row if cell is not None]
+                        if any(row_strs):
+                            text_rows.append("\t".join(row_strs))
+                return "\n".join(text_rows)
+            except Exception as e:
+                logger.error(f"Error reading XLSX {filepath}: {e}")
+                return ""
+        else:
             return ""
+    except Exception as e:
+        logger.error(f"Error extracting text from {filepath}: {e}")
         return ""
 @app.callback(