Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -3,26 +3,24 @@ import json
|
|
3 |
import os
|
4 |
import tempfile
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
SAVE_DIR = "./annotations" # 保存标注记录的目录
|
9 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
10 |
|
11 |
-
# ======== 初始化数据结构 ========
|
12 |
data = []
|
13 |
user_annotations = []
|
|
|
14 |
|
15 |
-
# ======== 获取可用语言对列表 ========
|
16 |
language_options = sorted([f for f in os.listdir(LANG_DIR)])
|
17 |
|
18 |
|
19 |
-
# ======== 加载选择的语言对数据 ========
|
20 |
def load_data_for_lang(lang_pair):
|
21 |
-
global data, user_annotations
|
22 |
file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json")
|
23 |
with open(file_path, "r", encoding="utf-8") as f:
|
24 |
data = json.load(f)
|
25 |
user_annotations = []
|
|
|
26 |
return (
|
27 |
0,
|
28 |
data[0]["source"],
|
@@ -31,7 +29,45 @@ def load_data_for_lang(lang_pair):
|
|
31 |
)
|
32 |
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def load_sample(i):
|
36 |
if not data:
|
37 |
return "", ""
|
@@ -39,13 +75,14 @@ def load_sample(i):
|
|
39 |
return entry["source"], entry["hypothesis"]
|
40 |
|
41 |
|
42 |
-
# ======== 提交打分并进入下一条 ========
|
43 |
def annotate(index, score, comment, annotator):
|
|
|
44 |
index = int(index)
|
45 |
entry = data[index]
|
46 |
record = {
|
47 |
"index": index,
|
48 |
"annotator": annotator,
|
|
|
49 |
"source": entry["source"],
|
50 |
"hypothesis": entry["hypothesis"],
|
51 |
"score": score,
|
@@ -85,19 +122,20 @@ def annotate(index, score, comment, annotator):
|
|
85 |
)
|
86 |
|
87 |
|
88 |
-
# ======== 导出打分结果 ========
|
89 |
def export_results():
|
|
|
|
|
90 |
tmp = tempfile.NamedTemporaryFile(
|
91 |
delete=False, suffix=".json", mode="w", encoding="utf-8"
|
92 |
)
|
93 |
json.dump(user_annotations, tmp, ensure_ascii=False, indent=2)
|
94 |
tmp.close()
|
95 |
-
return tmp.name
|
96 |
|
97 |
|
98 |
-
# ======== UI
|
99 |
with gr.Blocks() as demo:
|
100 |
-
gr.Markdown("## Direct Assessment Annotation Tool")
|
101 |
|
102 |
with gr.Row():
|
103 |
lang_choice = gr.Dropdown(
|
@@ -107,6 +145,12 @@ with gr.Blocks() as demo:
|
|
107 |
)
|
108 |
load_button = gr.Button("🔄 Load Data")
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
with gr.Row():
|
111 |
annotator = gr.Textbox(
|
112 |
label="Annotator ID",
|
@@ -122,16 +166,18 @@ with gr.Blocks() as demo:
|
|
122 |
comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment")
|
123 |
output = gr.Textbox(label="Status", interactive=False)
|
124 |
next_button = gr.Button("Submit and Next")
|
125 |
-
|
126 |
-
export_button = gr.Button("📥 Export My Results")
|
127 |
export_file = gr.File(label="Download your results", visible=False)
|
128 |
|
129 |
-
# 行为绑定
|
130 |
load_button.click(
|
131 |
fn=load_data_for_lang,
|
132 |
inputs=[lang_choice],
|
133 |
outputs=[idx, source, hyp, progress],
|
134 |
)
|
|
|
|
|
|
|
|
|
|
|
135 |
next_button.click(
|
136 |
fn=annotate,
|
137 |
inputs=[idx, score, comment, annotator],
|
@@ -146,9 +192,15 @@ with gr.Blocks() as demo:
|
|
146 |
export_file,
|
147 |
],
|
148 |
)
|
149 |
-
export_button.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
idx.change(fn=load_sample, inputs=idx, outputs=[source, hyp])
|
151 |
demo.load(fn=load_sample, inputs=[idx], outputs=[source, hyp])
|
152 |
|
153 |
-
# ======== 启动应用 ========
|
154 |
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
|
|
|
3 |
import os
|
4 |
import tempfile
|
5 |
|
6 |
+
LANG_DIR = "./human_eval"
|
7 |
+
SAVE_DIR = "./annotations"
|
|
|
8 |
os.makedirs(SAVE_DIR, exist_ok=True)
|
9 |
|
|
|
10 |
data = []
|
11 |
user_annotations = []
|
12 |
+
current_lang = ""
|
13 |
|
|
|
14 |
language_options = sorted([f for f in os.listdir(LANG_DIR)])
|
15 |
|
16 |
|
|
|
17 |
def load_data_for_lang(lang_pair):
|
18 |
+
global data, user_annotations, current_lang
|
19 |
file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json")
|
20 |
with open(file_path, "r", encoding="utf-8") as f:
|
21 |
data = json.load(f)
|
22 |
user_annotations = []
|
23 |
+
current_lang = lang_pair
|
24 |
return (
|
25 |
0,
|
26 |
data[0]["source"],
|
|
|
29 |
)
|
30 |
|
31 |
|
32 |
+
def restore_previous_annotations(file_obj):
|
33 |
+
global data, user_annotations, current_lang
|
34 |
+
|
35 |
+
with open(file_obj.name, "r", encoding="utf-8") as f:
|
36 |
+
user_annotations = json.load(f)
|
37 |
+
|
38 |
+
if not user_annotations:
|
39 |
+
return 0, "", "", "No annotations found."
|
40 |
+
|
41 |
+
restored_lang = user_annotations[0].get("lang_pair", None)
|
42 |
+
if not restored_lang or not os.path.exists(
|
43 |
+
os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json")
|
44 |
+
):
|
45 |
+
return 0, "", "", "❌ Language pair info missing or file not found."
|
46 |
+
|
47 |
+
file_path = os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json")
|
48 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
49 |
+
data = json.load(f)
|
50 |
+
current_lang = restored_lang
|
51 |
+
|
52 |
+
# Back to last index
|
53 |
+
last_index = user_annotations[-1]["index"] + 1
|
54 |
+
if last_index >= len(data):
|
55 |
+
return (
|
56 |
+
last_index,
|
57 |
+
"",
|
58 |
+
"",
|
59 |
+
f"✅ Already completed {len(data)} samples of {restored_lang}.",
|
60 |
+
)
|
61 |
+
|
62 |
+
return (
|
63 |
+
last_index,
|
64 |
+
data[last_index]["source"],
|
65 |
+
data[last_index]["hypothesis"],
|
66 |
+
f"Restored {restored_lang}: {last_index}/{len(data)}",
|
67 |
+
restored_lang,
|
68 |
+
)
|
69 |
+
|
70 |
+
|
71 |
def load_sample(i):
|
72 |
if not data:
|
73 |
return "", ""
|
|
|
75 |
return entry["source"], entry["hypothesis"]
|
76 |
|
77 |
|
|
|
78 |
def annotate(index, score, comment, annotator):
|
79 |
+
global current_lang
|
80 |
index = int(index)
|
81 |
entry = data[index]
|
82 |
record = {
|
83 |
"index": index,
|
84 |
"annotator": annotator,
|
85 |
+
"lang_pair": current_lang,
|
86 |
"source": entry["source"],
|
87 |
"hypothesis": entry["hypothesis"],
|
88 |
"score": score,
|
|
|
122 |
)
|
123 |
|
124 |
|
|
|
125 |
def export_results():
|
126 |
+
if not user_annotations:
|
127 |
+
raise ValueError("No annotations to export.")
|
128 |
tmp = tempfile.NamedTemporaryFile(
|
129 |
delete=False, suffix=".json", mode="w", encoding="utf-8"
|
130 |
)
|
131 |
json.dump(user_annotations, tmp, ensure_ascii=False, indent=2)
|
132 |
tmp.close()
|
133 |
+
return tmp.name, gr.update(visible=True, value=tmp.name)
|
134 |
|
135 |
|
136 |
+
# ======== UI ========
|
137 |
with gr.Blocks() as demo:
|
138 |
+
gr.Markdown("## 📝 Direct Assessment Annotation Tool")
|
139 |
|
140 |
with gr.Row():
|
141 |
lang_choice = gr.Dropdown(
|
|
|
145 |
)
|
146 |
load_button = gr.Button("🔄 Load Data")
|
147 |
|
148 |
+
with gr.Row():
|
149 |
+
upload_file = gr.File(
|
150 |
+
label="📤 Upload Previous Annotations", file_types=[".json"]
|
151 |
+
)
|
152 |
+
export_button = gr.Button("📥 Export My Results")
|
153 |
+
|
154 |
with gr.Row():
|
155 |
annotator = gr.Textbox(
|
156 |
label="Annotator ID",
|
|
|
166 |
comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment")
|
167 |
output = gr.Textbox(label="Status", interactive=False)
|
168 |
next_button = gr.Button("Submit and Next")
|
|
|
|
|
169 |
export_file = gr.File(label="Download your results", visible=False)
|
170 |
|
|
|
171 |
load_button.click(
|
172 |
fn=load_data_for_lang,
|
173 |
inputs=[lang_choice],
|
174 |
outputs=[idx, source, hyp, progress],
|
175 |
)
|
176 |
+
upload_file.change(
|
177 |
+
fn=restore_previous_annotations,
|
178 |
+
inputs=[upload_file],
|
179 |
+
outputs=[idx, source, hyp, progress, lang_choice],
|
180 |
+
)
|
181 |
next_button.click(
|
182 |
fn=annotate,
|
183 |
inputs=[idx, score, comment, annotator],
|
|
|
192 |
export_file,
|
193 |
],
|
194 |
)
|
195 |
+
export_button.click(
|
196 |
+
fn=export_results,
|
197 |
+
inputs=[],
|
198 |
+
outputs=[
|
199 |
+
export_file,
|
200 |
+
export_file,
|
201 |
+
], # 绑定两次 export_file,第二个用于更新它的可见性和路径
|
202 |
+
)
|
203 |
idx.change(fn=load_sample, inputs=idx, outputs=[source, hyp])
|
204 |
demo.load(fn=load_sample, inputs=[idx], outputs=[source, hyp])
|
205 |
|
|
|
206 |
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
|