import gradio as gr from norm_html import normalized_html_table import re def clear_table_cells(pred): # 清空 和 标签之间的内容 pred = re.sub(r'(]*>).*?()', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) pred = re.sub(r'(]*>).*?()', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) return pred def add_merged_cell_lines(html_content): """为合并的单元格添加虚线标识""" import re from bs4 import BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') table = soup.find('table') if not table: return html_content # 获取所有行 rows = table.find_all('tr') if not rows: return html_content # 为每个单元格添加虚线样式 for row_idx, row in enumerate(rows): cells = row.find_all(['td', 'th']) for cell in cells: colspan = int(cell.get('colspan', 1)) rowspan = int(cell.get('rowspan', 1)) # 只为合并的单元格添加虚线 if colspan > 1 or rowspan > 1: # 设置单元格为相对定位 style = cell.get('style', '') if 'position: relative' not in style: cell['style'] = f"{style}; position: relative;".strip('; ') # 添加colspan > 1时的垂直线 if colspan > 1: for i in range(1, colspan): line = soup.new_tag('div') line['style'] = f""" position: absolute; top: 0; bottom: 0; left: {i * 100 / colspan}%; width: 0; border-left: 2px dashed #666; pointer-events: none; z-index: 10; """ cell.append(line) # 添加rowspan > 1时的水平线 if rowspan > 1: for i in range(1, rowspan): line = soup.new_tag('div') line['style'] = f""" position: absolute; left: 0; right: 0; top: {i * 100 / rowspan}%; height: 0; border-top: 2px dashed #666; pointer-events: none; z-index: 10; """ cell.append(line) return str(soup) def show_html(pred_input, show_structure, show_merged_cell): show_html = normalized_html_table(pred_input) if show_structure: show_html = clear_table_cells(show_html) show_html = add_table_border(show_html) if show_merged_cell: show_html = add_merged_cell_lines(show_html) return show_html def add_table_border(pred_html): pred_html = pred_html.replace("", "
") pred_html = pred_html.replace("
input HTML here.
") show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)