Zihao-Li commited on
Commit
029f30f
·
verified ·
1 Parent(s): 3daca56

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -18
app.py CHANGED
@@ -3,26 +3,24 @@ import json
3
  import os
4
  import tempfile
5
 
6
- # ======== 设置路径 ========
7
- LANG_DIR = "./human_eval" # 含有语言对子文件夹的目录
8
- SAVE_DIR = "./annotations" # 保存标注记录的目录
9
  os.makedirs(SAVE_DIR, exist_ok=True)
10
 
11
- # ======== 初始化数据结构 ========
12
  data = []
13
  user_annotations = []
 
14
 
15
- # ======== 获取可用语言对列表 ========
16
  language_options = sorted([f for f in os.listdir(LANG_DIR)])
17
 
18
 
19
- # ======== 加载选择的语言对数据 ========
20
  def load_data_for_lang(lang_pair):
21
- global data, user_annotations
22
  file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json")
23
  with open(file_path, "r", encoding="utf-8") as f:
24
  data = json.load(f)
25
  user_annotations = []
 
26
  return (
27
  0,
28
  data[0]["source"],
@@ -31,7 +29,45 @@ def load_data_for_lang(lang_pair):
31
  )
32
 
33
 
34
- # ======== 读取当前样本 ========
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def load_sample(i):
36
  if not data:
37
  return "", ""
@@ -39,13 +75,14 @@ def load_sample(i):
39
  return entry["source"], entry["hypothesis"]
40
 
41
 
42
- # ======== 提交打分并进入下一条 ========
43
  def annotate(index, score, comment, annotator):
 
44
  index = int(index)
45
  entry = data[index]
46
  record = {
47
  "index": index,
48
  "annotator": annotator,
 
49
  "source": entry["source"],
50
  "hypothesis": entry["hypothesis"],
51
  "score": score,
@@ -85,19 +122,20 @@ def annotate(index, score, comment, annotator):
85
  )
86
 
87
 
88
- # ======== 导出打分结果 ========
89
  def export_results():
 
 
90
  tmp = tempfile.NamedTemporaryFile(
91
  delete=False, suffix=".json", mode="w", encoding="utf-8"
92
  )
93
  json.dump(user_annotations, tmp, ensure_ascii=False, indent=2)
94
  tmp.close()
95
- return tmp.name
96
 
97
 
98
- # ======== UI 构建 ========
99
  with gr.Blocks() as demo:
100
- gr.Markdown("## Direct Assessment Annotation Tool")
101
 
102
  with gr.Row():
103
  lang_choice = gr.Dropdown(
@@ -107,6 +145,12 @@ with gr.Blocks() as demo:
107
  )
108
  load_button = gr.Button("🔄 Load Data")
109
 
 
 
 
 
 
 
110
  with gr.Row():
111
  annotator = gr.Textbox(
112
  label="Annotator ID",
@@ -122,16 +166,18 @@ with gr.Blocks() as demo:
122
  comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment")
123
  output = gr.Textbox(label="Status", interactive=False)
124
  next_button = gr.Button("Submit and Next")
125
-
126
- export_button = gr.Button("📥 Export My Results")
127
  export_file = gr.File(label="Download your results", visible=False)
128
 
129
- # 行为绑定
130
  load_button.click(
131
  fn=load_data_for_lang,
132
  inputs=[lang_choice],
133
  outputs=[idx, source, hyp, progress],
134
  )
 
 
 
 
 
135
  next_button.click(
136
  fn=annotate,
137
  inputs=[idx, score, comment, annotator],
@@ -146,9 +192,15 @@ with gr.Blocks() as demo:
146
  export_file,
147
  ],
148
  )
149
- export_button.click(fn=export_results, outputs=export_file)
 
 
 
 
 
 
 
150
  idx.change(fn=load_sample, inputs=idx, outputs=[source, hyp])
151
  demo.load(fn=load_sample, inputs=[idx], outputs=[source, hyp])
152
 
153
- # ======== 启动应用 ========
154
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
3
  import os
4
  import tempfile
5
 
6
+ LANG_DIR = "./human_eval"
7
+ SAVE_DIR = "./annotations"
 
8
  os.makedirs(SAVE_DIR, exist_ok=True)
9
 
 
10
  data = []
11
  user_annotations = []
12
+ current_lang = ""
13
 
 
14
  language_options = sorted([f for f in os.listdir(LANG_DIR)])
15
 
16
 
 
17
  def load_data_for_lang(lang_pair):
18
+ global data, user_annotations, current_lang
19
  file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json")
20
  with open(file_path, "r", encoding="utf-8") as f:
21
  data = json.load(f)
22
  user_annotations = []
23
+ current_lang = lang_pair
24
  return (
25
  0,
26
  data[0]["source"],
 
29
  )
30
 
31
 
32
+ def restore_previous_annotations(file_obj):
33
+ global data, user_annotations, current_lang
34
+
35
+ with open(file_obj.name, "r", encoding="utf-8") as f:
36
+ user_annotations = json.load(f)
37
+
38
+ if not user_annotations:
39
+ return 0, "", "", "No annotations found."
40
+
41
+ restored_lang = user_annotations[0].get("lang_pair", None)
42
+ if not restored_lang or not os.path.exists(
43
+ os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json")
44
+ ):
45
+ return 0, "", "", "❌ Language pair info missing or file not found."
46
+
47
+ file_path = os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json")
48
+ with open(file_path, "r", encoding="utf-8") as f:
49
+ data = json.load(f)
50
+ current_lang = restored_lang
51
+
52
+ # Back to last index
53
+ last_index = user_annotations[-1]["index"] + 1
54
+ if last_index >= len(data):
55
+ return (
56
+ last_index,
57
+ "",
58
+ "",
59
+ f"✅ Already completed {len(data)} samples of {restored_lang}.",
60
+ )
61
+
62
+ return (
63
+ last_index,
64
+ data[last_index]["source"],
65
+ data[last_index]["hypothesis"],
66
+ f"Restored {restored_lang}: {last_index}/{len(data)}",
67
+ restored_lang,
68
+ )
69
+
70
+
71
  def load_sample(i):
72
  if not data:
73
  return "", ""
 
75
  return entry["source"], entry["hypothesis"]
76
 
77
 
 
78
  def annotate(index, score, comment, annotator):
79
+ global current_lang
80
  index = int(index)
81
  entry = data[index]
82
  record = {
83
  "index": index,
84
  "annotator": annotator,
85
+ "lang_pair": current_lang,
86
  "source": entry["source"],
87
  "hypothesis": entry["hypothesis"],
88
  "score": score,
 
122
  )
123
 
124
 
 
125
  def export_results():
126
+ if not user_annotations:
127
+ raise ValueError("No annotations to export.")
128
  tmp = tempfile.NamedTemporaryFile(
129
  delete=False, suffix=".json", mode="w", encoding="utf-8"
130
  )
131
  json.dump(user_annotations, tmp, ensure_ascii=False, indent=2)
132
  tmp.close()
133
+ return tmp.name, gr.update(visible=True, value=tmp.name)
134
 
135
 
136
+ # ======== UI ========
137
  with gr.Blocks() as demo:
138
+ gr.Markdown("## 📝 Direct Assessment Annotation Tool")
139
 
140
  with gr.Row():
141
  lang_choice = gr.Dropdown(
 
145
  )
146
  load_button = gr.Button("🔄 Load Data")
147
 
148
+ with gr.Row():
149
+ upload_file = gr.File(
150
+ label="📤 Upload Previous Annotations", file_types=[".json"]
151
+ )
152
+ export_button = gr.Button("📥 Export My Results")
153
+
154
  with gr.Row():
155
  annotator = gr.Textbox(
156
  label="Annotator ID",
 
166
  comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment")
167
  output = gr.Textbox(label="Status", interactive=False)
168
  next_button = gr.Button("Submit and Next")
 
 
169
  export_file = gr.File(label="Download your results", visible=False)
170
 
 
171
  load_button.click(
172
  fn=load_data_for_lang,
173
  inputs=[lang_choice],
174
  outputs=[idx, source, hyp, progress],
175
  )
176
+ upload_file.change(
177
+ fn=restore_previous_annotations,
178
+ inputs=[upload_file],
179
+ outputs=[idx, source, hyp, progress, lang_choice],
180
+ )
181
  next_button.click(
182
  fn=annotate,
183
  inputs=[idx, score, comment, annotator],
 
192
  export_file,
193
  ],
194
  )
195
+ export_button.click(
196
+ fn=export_results,
197
+ inputs=[],
198
+ outputs=[
199
+ export_file,
200
+ export_file,
201
+ ], # 绑定两次 export_file,第二个用于更新它的可见性和路径
202
+ )
203
  idx.change(fn=load_sample, inputs=idx, outputs=[source, hyp])
204
  demo.load(fn=load_sample, inputs=[idx], outputs=[source, hyp])
205
 
 
206
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))