import gradio as gr
from norm_html import normalized_html_table
import re
def clear_table_cells(pred):
# 清空
和 | 标签之间的内容
pred = re.sub(r'( | ]*>).*?( | )', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
pred = re.sub(r'(]*>).*?( | )', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
return pred
def add_merged_cell_lines(html_content):
"""为合并的单元格添加虚线标识"""
import re
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find('table')
if not table:
return html_content
# 获取所有行
rows = table.find_all('tr')
if not rows:
return html_content
# 为每个单元格添加虚线样式
for row_idx, row in enumerate(rows):
cells = row.find_all(['td', 'th'])
for cell in cells:
colspan = int(cell.get('colspan', 1))
rowspan = int(cell.get('rowspan', 1))
# 只为合并的单元格添加虚线
if colspan > 1 or rowspan > 1:
# 设置单元格为相对定位
style = cell.get('style', '')
if 'position: relative' not in style:
cell['style'] = f"{style}; position: relative;".strip('; ')
# 添加colspan > 1时的垂直线
if colspan > 1:
for i in range(1, colspan):
line = soup.new_tag('div')
line['style'] = f"""
position: absolute;
top: 0;
bottom: 0;
left: {i * 100 / colspan}%;
width: 0;
border-left: 2px dashed #666;
pointer-events: none;
z-index: 10;
"""
cell.append(line)
# 添加rowspan > 1时的水平线
if rowspan > 1:
for i in range(1, rowspan):
line = soup.new_tag('div')
line['style'] = f"""
position: absolute;
left: 0;
right: 0;
top: {i * 100 / rowspan}%;
height: 0;
border-top: 2px dashed #666;
pointer-events: none;
z-index: 10;
"""
cell.append(line)
return str(soup)
def show_html(pred_input, show_structure, show_merged_cell):
show_html = normalized_html_table(pred_input)
if show_structure:
show_html = clear_table_cells(show_html)
show_html = add_table_border(show_html)
if show_merged_cell:
show_html = add_merged_cell_lines(show_html)
return show_html
def add_table_border(pred_html):
pred_html = pred_html.replace("", "")
pred_html = pred_html.replace(" | input HTML here. |
")
show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)