wufan commited on
Commit
4ec6e8c
·
verified ·
1 Parent(s): bc11e8d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from norm_html import normalized_html_table
3
+ import re
4
+
5
+ def clear_table_cells(pred):
6
+ # 清空 <td> 和 <th> 标签之间的内容
7
+ pred = re.sub(r'(<td[^>]*>).*?(</td>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
8
+ pred = re.sub(r'(<th[^>]*>).*?(</th>)', r'\1\2', pred, flags=re.DOTALL | re.IGNORECASE)
9
+ return pred
10
+
11
+ def add_merged_cell_lines(html_content):
12
+ """为合并的单元格添加虚线标识"""
13
+ import re
14
+ from bs4 import BeautifulSoup
15
+
16
+ soup = BeautifulSoup(html_content, 'html.parser')
17
+ table = soup.find('table')
18
+ if not table:
19
+ return html_content
20
+
21
+ # 获取所有行
22
+ rows = table.find_all('tr')
23
+ if not rows:
24
+ return html_content
25
+
26
+ # 为每个单元格添加虚线样式
27
+ for row_idx, row in enumerate(rows):
28
+ cells = row.find_all(['td', 'th'])
29
+
30
+ for cell in cells:
31
+ colspan = int(cell.get('colspan', 1))
32
+ rowspan = int(cell.get('rowspan', 1))
33
+
34
+ # 只为合并的单元格添加虚线
35
+ if colspan > 1 or rowspan > 1:
36
+ # 设置单元格为相对定位
37
+ style = cell.get('style', '')
38
+ if 'position: relative' not in style:
39
+ cell['style'] = f"{style}; position: relative;".strip('; ')
40
+
41
+ # 添加colspan > 1时的垂直线
42
+ if colspan > 1:
43
+ for i in range(1, colspan):
44
+ line = soup.new_tag('div')
45
+ line['style'] = f"""
46
+ position: absolute;
47
+ top: 0;
48
+ bottom: 0;
49
+ left: {i * 100 / colspan}%;
50
+ width: 0;
51
+ border-left: 2px dashed #666;
52
+ pointer-events: none;
53
+ z-index: 10;
54
+ """
55
+ cell.append(line)
56
+
57
+ # 添加rowspan > 1时的水平线
58
+ if rowspan > 1:
59
+ for i in range(1, rowspan):
60
+ line = soup.new_tag('div')
61
+ line['style'] = f"""
62
+ position: absolute;
63
+ left: 0;
64
+ right: 0;
65
+ top: {i * 100 / rowspan}%;
66
+ height: 0;
67
+ border-top: 2px dashed #666;
68
+ pointer-events: none;
69
+ z-index: 10;
70
+ """
71
+ cell.append(line)
72
+
73
+ return str(soup)
74
+
75
+ def show_html(pred_input, show_structure, show_merged_cell):
76
+ show_html = normalized_html_table(pred_input)
77
+ if show_structure:
78
+ show_html = clear_table_cells(show_html)
79
+
80
+ show_html = add_table_border(show_html)
81
+ if show_merged_cell:
82
+ show_html = add_merged_cell_lines(show_html)
83
+ return show_html
84
+
85
+ def add_table_border(pred_html):
86
+ pred_html = pred_html.replace("<table>", "<table class='table table-bordered' style='border-collapse: collapse; border: 3px solid #333;'>")
87
+ pred_html = pred_html.replace("<td", "<td style='border: 2px solid #333;'")
88
+ pred_html = pred_html.replace("<th", "<th style='border: 2px solid #333;'")
89
+ return pred_html
90
+
91
+ if __name__ == "__main__":
92
+
93
+ with gr.Blocks() as demo:
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ with gr.Row():
98
+ pred_input = gr.Textbox(label='HTML Table', placeholder='type table html code here', interactive=True)
99
+ with gr.Row():
100
+ show_structure = gr.Checkbox(label="只显示表格结构", value=True)
101
+ show_merged_cell = gr.Checkbox(label="显示合并单元格虚线", value=True)
102
+ show_html_btn = gr.Button("显示HTML")
103
+
104
+ pred_html = gr.HTML("<table><td>input HTML here.</td></table>")
105
+
106
+ show_html_btn.click(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
107
+ show_structure.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
108
+ show_merged_cell.change(show_html, inputs=[pred_input, show_structure, show_merged_cell], outputs=[pred_html])
109
+
110
+
111
+ demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)