Spaces:
Running
Running
import gradio as gr | |
from norm_html import normalized_html_table | |
import re | |
def clear_table_cells(pred): | |
# 清空 <td> 和 <th> 标签之间的内容 | |
pred = re.sub(r'(<td[^>]*>).*?(</td>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) | |
pred = re.sub(r'(<th[^>]*>).*?(</th>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE) | |
return pred | |
def add_merged_cell_lines(html_content): | |
"""为合并的单元格添加虚线标识""" | |
import re | |
from bs4 import BeautifulSoup | |
soup = BeautifulSoup(html_content, 'html.parser') | |
table = soup.find('table') | |
if not table: | |
return html_content | |
# 获取所有行 | |
rows = table.find_all('tr') | |
if not rows: | |
return html_content | |
# 为每个单元格添加虚线样式 | |
for row_idx, row in enumerate(rows): | |
cells = row.find_all(['td', 'th']) | |
for cell in cells: | |
colspan = int(cell.get('colspan', 1)) | |
rowspan = int(cell.get('rowspan', 1)) | |
# 只为合并的单元格添加虚线 | |
if colspan > 1 or rowspan > 1: | |
# 设置单元格为相对定位 | |
style = cell.get('style', '') | |
if 'position: relative' not in style: | |
cell['style'] = f"{style}; position: relative;".strip('; ') | |
# 添加colspan > 1时的垂直线 | |
if colspan > 1: | |
for i in range(1, colspan): | |
line = soup.new_tag('div') | |
line['style'] = f""" | |
position: absolute; | |
top: 0; | |
bottom: 0; | |
left: {i * 100 / colspan}%; | |
width: 0; | |
border-left: 2px dashed #666; | |
pointer-events: none; | |
z-index: 10; | |
""" | |
cell.append(line) | |
# 添加rowspan > 1时的水平线 | |
if rowspan > 1: | |
for i in range(1, rowspan): | |
line = soup.new_tag('div') | |
line['style'] = f""" | |
position: absolute; | |
left: 0; | |
right: 0; | |
top: {i * 100 / rowspan}%; | |
height: 0; | |
border-top: 2px dashed #666; | |
pointer-events: none; | |
z-index: 10; | |
""" | |
cell.append(line) | |
return str(soup) | |
def show_html(pred_input, show_structure, show_merged_cell): | |
show_html = normalized_html_table(pred_input) | |
if show_structure: | |
show_html = clear_table_cells(show_html) | |
show_html = add_table_border(show_html) | |
if show_merged_cell: | |
show_html = add_merged_cell_lines(show_html) | |
return show_html | |
def add_table_border(pred_html): | |
pred_html = pred_html.replace("<table>", "<table class='table table-bordered' style='border-collapse: collapse; border: 3px solid #333;'>") | |
pred_html = pred_html.replace("<td", "<td style='border: 2px solid #333;'") | |
pred_html = pred_html.replace("<th", "<th style='border: 2px solid #333;'") | |
return pred_html | |
if __name__ == "__main__": | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
pred_input = gr.Textbox(label='HTML Table', placeholder='type table html code here', interactive=True) | |
with gr.Row(): | |
show_structure = gr.Checkbox(label="只显示表格结构", value=True) | |
show_merged_cell = gr.Checkbox(label="显示合并单元格虚线", value=True) | |
show_html_btn = gr.Button("显示HTML") | |
pred_html = gr.HTML("<table><td>input HTML here.</td></table>") | |
show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html]) | |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True) |