bluenevus commited on
Commit
55b788e
·
1 Parent(s): 56509bf

Update app.py via AI Editor

Browse files
Files changed (1) hide show
  1. app.py +26 -12
app.py CHANGED
@@ -463,15 +463,7 @@ def update_uploaded_doc_name(contents, filename, id_dict):
463
  return filename, contents, "uploaded"
464
  return "", None, "loaded"
465
 
466
- def markdown_table_preview(md_text):
467
- # Always use dcc.Markdown to render markdown tables, which supports tables natively
468
- return dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'monospace', 'overflowX': 'auto'})
469
-
470
- def markdown_narrative_preview(md_text):
471
- return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'sans-serif'}))
472
-
473
  def extract_markdown_tables(md_text):
474
- # Extract all markdown tables from md_text as list of strings
475
  tables = []
476
  lines = md_text.split('\n')
477
  in_table = False
@@ -492,12 +484,10 @@ def extract_markdown_tables(md_text):
492
  return tables
493
 
494
  def markdown_table_to_df(md_table):
495
- # Convert a single markdown table string to pandas DataFrame
496
  lines = [line.strip() for line in md_table.split('\n') if line.strip()]
497
  if len(lines) < 2:
498
  return None
499
  header = [h.strip() for h in lines[0].strip('|').split('|')]
500
- # Find the first line with at least 3 dashes, that's the separator
501
  sep_idx = 1
502
  while sep_idx < len(lines) and not re.match(r'^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', lines[sep_idx]):
503
  sep_idx += 1
@@ -507,7 +497,6 @@ def markdown_table_to_df(md_table):
507
  if not row.strip() or not row.strip().startswith('|'):
508
  continue
509
  cells = [c.strip() for c in row.strip('|').split('|')]
510
- # Pad or trim cells to header length
511
  if len(cells) < len(header):
512
  cells += [''] * (len(header) - len(cells))
513
  elif len(cells) > len(header):
@@ -516,6 +505,32 @@ def markdown_table_to_df(md_table):
516
  df = pd.DataFrame(rows, columns=header)
517
  return df
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  def markdown_tables_to_xlsx(md_text):
520
  tables = extract_markdown_tables(md_text)
521
  output = BytesIO()
@@ -659,7 +674,6 @@ def download_document(n_clicks, selected_doc_type):
659
 
660
  if selected_doc_type in spreadsheet_types:
661
  try:
662
- # Convert markdown tables to xlsx
663
  xlsx_bytes = markdown_tables_to_xlsx(current_document)
664
  logging.info(f"{selected_doc_type} document downloaded as Excel.")
665
  return dcc.send_bytes(xlsx_bytes.read(), f"{selected_doc_type}.xlsx")
 
463
  return filename, contents, "uploaded"
464
  return "", None, "loaded"
465
 
 
 
 
 
 
 
 
466
  def extract_markdown_tables(md_text):
 
467
  tables = []
468
  lines = md_text.split('\n')
469
  in_table = False
 
484
  return tables
485
 
486
  def markdown_table_to_df(md_table):
 
487
  lines = [line.strip() for line in md_table.split('\n') if line.strip()]
488
  if len(lines) < 2:
489
  return None
490
  header = [h.strip() for h in lines[0].strip('|').split('|')]
 
491
  sep_idx = 1
492
  while sep_idx < len(lines) and not re.match(r'^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?$', lines[sep_idx]):
493
  sep_idx += 1
 
497
  if not row.strip() or not row.strip().startswith('|'):
498
  continue
499
  cells = [c.strip() for c in row.strip('|').split('|')]
 
500
  if len(cells) < len(header):
501
  cells += [''] * (len(header) - len(cells))
502
  elif len(cells) > len(header):
 
505
  df = pd.DataFrame(rows, columns=header)
506
  return df
507
 
508
+ def markdown_table_preview(md_text):
509
+ tables = extract_markdown_tables(md_text)
510
+ if not tables:
511
+ return html.Div("No table found.")
512
+ table_divs = []
513
+ for i, table in enumerate(tables):
514
+ df = markdown_table_to_df(table)
515
+ if df is not None and not df.empty:
516
+ table_divs.append(
517
+ html.Div([
518
+ DataTable(
519
+ columns=[{"name": str(col), "id": str(col)} for col in df.columns],
520
+ data=df.to_dict('records'),
521
+ style_table={'overflowX': 'auto'},
522
+ style_cell={'whiteSpace': 'normal', 'height': 'auto', 'textAlign': 'left', 'fontFamily': 'monospace', 'fontSize': '14px', 'maxWidth': '400px', 'minWidth': '80px', 'wordBreak': 'break-word'},
523
+ style_header={'fontWeight': 'bold'},
524
+ page_size=100,
525
+ id={'type': 'datatable-preview', 'index': i}
526
+ )
527
+ ], className="mb-4")
528
+ )
529
+ return html.Div(table_divs)
530
+
531
+ def markdown_narrative_preview(md_text):
532
+ return html.Div(dcc.Markdown(md_text, dangerously_allow_html=True, style={'whiteSpace': 'pre-wrap', 'fontFamily': 'sans-serif'}))
533
+
534
  def markdown_tables_to_xlsx(md_text):
535
  tables = extract_markdown_tables(md_text)
536
  output = BytesIO()
 
674
 
675
  if selected_doc_type in spreadsheet_types:
676
  try:
 
677
  xlsx_bytes = markdown_tables_to_xlsx(current_document)
678
  logging.info(f"{selected_doc_type} document downloaded as Excel.")
679
  return dcc.send_bytes(xlsx_bytes.read(), f"{selected_doc_type}.xlsx")