File size: 4,506 Bytes
4ec6e8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
from norm_html import normalized_html_table
import re
        
def clear_table_cells(pred):
        # 清空 <td> 和 <th> 标签之间的内容
    pred = re.sub(r'(<td[^>]*>).*?(</td>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
    pred = re.sub(r'(<th[^>]*>).*?(</th>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
    return pred

def add_merged_cell_lines(html_content):
    """为合并的单元格添加虚线标识"""
    import re
    from bs4 import BeautifulSoup
    
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')
    if not table:
        return html_content
    
    # 获取所有行
    rows = table.find_all('tr')
    if not rows:
        return html_content
    
    # 为每个单元格添加虚线样式
    for row_idx, row in enumerate(rows):
        cells = row.find_all(['td', 'th'])
        
        for cell in cells:
            colspan = int(cell.get('colspan', 1))
            rowspan = int(cell.get('rowspan', 1))
            
            # 只为合并的单元格添加虚线
            if colspan > 1 or rowspan > 1:
                # 设置单元格为相对定位
                style = cell.get('style', '')
                if 'position: relative' not in style:
                    cell['style'] = f"{style}; position: relative;".strip('; ')
                
                # 添加colspan > 1时的垂直线
                if colspan > 1:
                    for i in range(1, colspan):
                        line = soup.new_tag('div')
                        line['style'] = f"""
                            position: absolute;
                            top: 0;
                            bottom: 0;
                            left: {i * 100 / colspan}%;
                            width: 0;
                            border-left: 2px dashed #666;
                            pointer-events: none;
                            z-index: 10;
                        """
                        cell.append(line)
                
                # 添加rowspan > 1时的水平线
                if rowspan > 1:
                    for i in range(1, rowspan):
                        line = soup.new_tag('div')
                        line['style'] = f"""
                            position: absolute;
                            left: 0;
                            right: 0;
                            top: {i * 100 / rowspan}%;
                            height: 0;
                            border-top: 2px dashed #666;
                            pointer-events: none;
                            z-index: 10;
                        """
                        cell.append(line)
    
    return str(soup)

def show_html(pred_input, show_structure, show_merged_cell):
    show_html = normalized_html_table(pred_input)
    if show_structure:
        show_html = clear_table_cells(show_html)

    show_html = add_table_border(show_html)
    if show_merged_cell:
        show_html = add_merged_cell_lines(show_html)
    return show_html

def add_table_border(pred_html):
    pred_html = pred_html.replace("<table>", "<table class='table table-bordered' style='border-collapse: collapse; border: 3px solid #333;'>")
    pred_html = pred_html.replace("<td", "<td style='border: 2px solid #333;'")
    pred_html = pred_html.replace("<th", "<th style='border: 2px solid #333;'")
    return pred_html

if __name__ == "__main__":
    
    with gr.Blocks() as demo:

        with gr.Row():
            with gr.Column():
                with gr.Row():
                    pred_input = gr.Textbox(label='HTML Table', placeholder='type table html code here', interactive=True)
                with gr.Row():
                    show_structure = gr.Checkbox(label="只显示表格结构", value=True)
                    show_merged_cell = gr.Checkbox(label="显示合并单元格虚线", value=True)
                    show_html_btn = gr.Button("显示HTML")
            
            pred_html = gr.HTML("<table><td>input HTML here.</td></table>")
        
        show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
        show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
        show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
        
    
    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)