Spaces:
Paused
Paused
Update app.py via AI Editor
Browse files
app.py
CHANGED
@@ -463,15 +463,7 @@ def update_uploaded_doc_name(contents, filename, id_dict):
|
|
463 |
return filename, contents, "uploaded"
|
464 |
return "", None, "loaded"
|
465 |
|
466 |
-
def markdown_table_preview(md_text):
|
467 |
-
# Always use dcc.Markdown to render markdown tables, which supports tables natively
|
468 |
-
return dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'monospace', 'overflowX': 'auto'})
|
469 |
-
|
470 |
-
def markdown_narrative_preview(md_text):
|
471 |
-
return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'sans-serif'}))
|
472 |
-
|
473 |
def extract_markdown_tables(md_text):
|
474 |
-
# Extract all markdown tables from md_text as list of strings
|
475 |
tables = []
|
476 |
lines = md_text.split('\n')
|
477 |
in_table = False
|
@@ -492,12 +484,10 @@ def extract_markdown_tables(md_text):
|
|
492 |
return tables
|
493 |
|
494 |
def markdown_table_to_df(md_table):
|
495 |
-
# Convert a single markdown table string to pandas DataFrame
|
496 |
lines = [line.strip() for line in md_table.split('\n') if line.strip()]
|
497 |
if len(lines) < 2:
|
498 |
return None
|
499 |
header = [h.strip() for h in lines[0].strip('|').split('|')]
|
500 |
-
# Find the first line with at least 3 dashes, that's the separator
|
501 |
sep_idx = 1
|
502 |
while sep_idx < len(lines) and not re.match(r'^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', lines[sep_idx]):
|
503 |
sep_idx += 1
|
@@ -507,7 +497,6 @@ def markdown_table_to_df(md_table):
|
|
507 |
if not row.strip() or not row.strip().startswith('|'):
|
508 |
continue
|
509 |
cells = [c.strip() for c in row.strip('|').split('|')]
|
510 |
-
# Pad or trim cells to header length
|
511 |
if len(cells) < len(header):
|
512 |
cells += [''] * (len(header) - len(cells))
|
513 |
elif len(cells) > len(header):
|
@@ -516,6 +505,32 @@ def markdown_table_to_df(md_table):
|
|
516 |
df = pd.DataFrame(rows, columns=header)
|
517 |
return df
|
518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
def markdown_tables_to_xlsx(md_text):
|
520 |
tables = extract_markdown_tables(md_text)
|
521 |
output = BytesIO()
|
@@ -659,7 +674,6 @@ def download_document(n_clicks, selected_doc_type):
|
|
659 |
|
660 |
if selected_doc_type in spreadsheet_types:
|
661 |
try:
|
662 |
-
# Convert markdown tables to xlsx
|
663 |
xlsx_bytes = markdown_tables_to_xlsx(current_document)
|
664 |
logging.info(f"{selected_doc_type} document downloaded as Excel.")
|
665 |
return dcc.send_bytes(xlsx_bytes.read(), f"{selected_doc_type}.xlsx")
|
|
|
463 |
return filename, contents, "uploaded"
|
464 |
return "", None, "loaded"
|
465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
def extract_markdown_tables(md_text):
|
|
|
467 |
tables = []
|
468 |
lines = md_text.split('\n')
|
469 |
in_table = False
|
|
|
484 |
return tables
|
485 |
|
486 |
def markdown_table_to_df(md_table):
|
|
|
487 |
lines = [line.strip() for line in md_table.split('\n') if line.strip()]
|
488 |
if len(lines) < 2:
|
489 |
return None
|
490 |
header = [h.strip() for h in lines[0].strip('|').split('|')]
|
|
|
491 |
sep_idx = 1
|
492 |
while sep_idx < len(lines) and not re.match(r'^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', lines[sep_idx]):
|
493 |
sep_idx += 1
|
|
|
497 |
if not row.strip() or not row.strip().startswith('|'):
|
498 |
continue
|
499 |
cells = [c.strip() for c in row.strip('|').split('|')]
|
|
|
500 |
if len(cells) < len(header):
|
501 |
cells += [''] * (len(header) - len(cells))
|
502 |
elif len(cells) > len(header):
|
|
|
505 |
df = pd.DataFrame(rows, columns=header)
|
506 |
return df
|
507 |
|
508 |
+
def markdown_table_preview(md_text):
|
509 |
+
tables = extract_markdown_tables(md_text)
|
510 |
+
if not tables:
|
511 |
+
return html.Div("No table found.")
|
512 |
+
table_divs = []
|
513 |
+
for i, table in enumerate(tables):
|
514 |
+
df = markdown_table_to_df(table)
|
515 |
+
if df is not None and not df.empty:
|
516 |
+
table_divs.append(
|
517 |
+
html.Div([
|
518 |
+
DataTable(
|
519 |
+
columns=[{"name": str(col), "id": str(col)} for col in df.columns],
|
520 |
+
data=df.to_dict('records'),
|
521 |
+
style_table={'overflowX': 'auto'},
|
522 |
+
style_cell={'whiteSpace': 'normal', 'height': 'auto', 'textAlign': 'left', 'fontFamily': 'monospace', 'fontSize': '14px', 'maxWidth': '400px', 'minWidth': '80px', 'wordBreak': 'break-word'},
|
523 |
+
style_header={'fontWeight': 'bold'},
|
524 |
+
page_size=100,
|
525 |
+
id={'type': 'datatable-preview', 'index': i}
|
526 |
+
)
|
527 |
+
], className="mb-4")
|
528 |
+
)
|
529 |
+
return html.Div(table_divs)
|
530 |
+
|
531 |
+
def markdown_narrative_preview(md_text):
|
532 |
+
return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'sans-serif'}))
|
533 |
+
|
534 |
def markdown_tables_to_xlsx(md_text):
|
535 |
tables = extract_markdown_tables(md_text)
|
536 |
output = BytesIO()
|
|
|
674 |
|
675 |
if selected_doc_type in spreadsheet_types:
|
676 |
try:
|
|
|
677 |
xlsx_bytes = markdown_tables_to_xlsx(current_document)
|
678 |
logging.info(f"{selected_doc_type} document downloaded as Excel.")
|
679 |
return dcc.send_bytes(xlsx_bytes.read(), f"{selected_doc_type}.xlsx")
|