Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
import os | |
import tempfile | |
LANG_DIR = "./human_eval" | |
SAVE_DIR = "./annotations" | |
os.makedirs(SAVE_DIR, exist_ok=True) | |
# 全局变量 data, user_annotations, current_lang 已被移除 | |
language_options = sorted([f for f in os.listdir(LANG_DIR)]) | |
# --- 函数修改 --- | |
# 每个需要访问或修改 data, user_annotations, current_lang 的函数 | |
# 都需要将 app_state 作为输入,并通常作为输出 | |
def load_data_for_lang(lang_pair, current_app_state): # 接收 app_state | |
file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json") | |
with open(file_path, "r", encoding="utf-8") as f: | |
loaded_data = json.load(f) | |
new_app_state = { # 创建新的状态 | |
"data": loaded_data, | |
"user_annotations": [], # Reset annotations for new language | |
"current_lang": lang_pair | |
} | |
if not loaded_data: | |
return 0, "", "", f"0/0 loaded from {lang_pair}", new_app_state | |
return ( | |
0, | |
loaded_data[0]["source"], | |
loaded_data[0]["hypothesis"], | |
f"0/{len(loaded_data)} loaded from {lang_pair}", | |
new_app_state, # 返回更新后的状态 | |
) | |
def restore_previous_annotations(file_obj, current_app_state): # 接收 app_state | |
if file_obj is None: # Check if a file was actually uploaded | |
return 0, "", "", "No file uploaded.", current_app_state, language_options[0] if language_options else "" | |
with open(file_obj.name, "r", encoding="utf-8") as f: | |
uploaded_annotations = json.load(f) | |
if not uploaded_annotations: | |
return 0, "", "", "No annotations found in file.", current_app_state, language_options[0] if language_options else "" | |
restored_lang = uploaded_annotations[0].get("lang_pair", None) | |
if not restored_lang or not os.path.exists( | |
os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json") | |
): | |
return 0, "", "", "❌ Language pair info missing or file not found.", current_app_state, language_options[0] if language_options else "" | |
file_path = os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json") | |
with open(file_path, "r", encoding="utf-8") as f: | |
loaded_data = json.load(f) | |
new_app_state = { # 创建新的状态 | |
"data": loaded_data, | |
"user_annotations": uploaded_annotations, | |
"current_lang": restored_lang | |
} | |
last_index = 0 | |
if new_app_state["user_annotations"]: # Check if there are any annotations | |
last_index = new_app_state["user_annotations"][-1].get("index", -1) + 1 | |
if last_index >= len(new_app_state["data"]): | |
return ( | |
last_index, | |
"", | |
"", | |
f"✅ Already completed {len(new_app_state['data'])} samples of {restored_lang}.", | |
new_app_state, # 返回状态 | |
restored_lang, | |
) | |
return ( | |
last_index, | |
new_app_state["data"][last_index]["source"], | |
new_app_state["data"][last_index]["hypothesis"], | |
f"Restored {restored_lang}: {last_index}/{len(new_app_state['data'])}", | |
new_app_state, # 返回状态 | |
restored_lang, # To update the dropdown | |
) | |
def load_sample(i, current_app_state): # 接收 app_state | |
# 这个函数只读取状态,不修改,所以不需要返回 app_state | |
# 但仍然需要接收它来获取数据 | |
data_from_state = current_app_state["data"] | |
if not data_from_state or int(i) >= len(data_from_state) or int(i) < 0 : | |
return "", "" | |
entry = data_from_state[int(i)] | |
return entry["source"], entry["hypothesis"] | |
def annotate(index, score, comment, annotator, current_app_state): # 接收 app_state | |
index = int(index) | |
app_data = current_app_state["data"] | |
app_current_lang = current_app_state["current_lang"] | |
app_user_annotations = list(current_app_state["user_annotations"]) # Create a mutable copy | |
if index >= len(app_data): # Safety check | |
return ( | |
"Error: Index out of bounds.", | |
index, | |
f"Error annotating.", | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(visible=False), | |
current_app_state # Return original state | |
) | |
entry = app_data[index] | |
record = { | |
"index": index, | |
"annotator": annotator, | |
"lang_pair": app_current_lang, | |
"source": entry["source"], | |
"hypothesis": entry["hypothesis"], | |
"score": score, | |
"comment": comment, | |
} | |
# Update user_annotations in the copied list | |
app_user_annotations = [ | |
rec | |
for rec in app_user_annotations | |
if not (rec["index"] == index and rec["annotator"] == annotator) # More robust removal | |
] | |
app_user_annotations.append(record) | |
app_user_annotations.sort(key=lambda x: x["index"]) # Keep sorted if needed | |
# 更新状态 | |
new_app_state = { | |
"data": app_data, | |
"user_annotations": app_user_annotations, | |
"current_lang": app_current_lang | |
} | |
completed = index + 1 | |
if completed >= len(app_data): | |
return ( | |
"🎉 All samples annotated!", | |
index, # or completed | |
f"✅ Completed {completed}/{len(app_data)} samples.", | |
gr.update(interactive=False), | |
gr.update(interactive=False), | |
gr.update(interactive=False), | |
gr.update(interactive=False), | |
gr.update(visible=True), | |
new_app_state, # 返回状态 | |
) | |
next_index = index + 1 | |
next_entry = app_data[next_index] | |
prev_score, prev_comment = 0, "" | |
for rec in new_app_state["user_annotations"]: | |
if rec["index"] == next_index and rec["annotator"] == annotator: | |
prev_score = rec["score"] | |
prev_comment = rec["comment"] | |
break | |
progress_text = f"{completed}/{len(app_data)} annotated by {annotator}" | |
is_at_start = next_index == 0 | |
return ( | |
"✅ Saved", | |
next_index, | |
progress_text, | |
gr.update(value=prev_score, interactive=True), | |
gr.update(value=prev_comment, interactive=True), | |
gr.update(interactive=not is_at_start), | |
gr.update(interactive=True), | |
gr.update(visible=False), | |
new_app_state, # 返回状态 | |
) | |
def go_previous(index, annotator, current_app_state): # 接收 app_state | |
index = int(index) | |
app_data = current_app_state["data"] | |
app_user_annotations = current_app_state["user_annotations"] | |
if not app_data: # No data loaded | |
return 0, "", "", 0, "", "No data loaded.", gr.update(interactive=False), gr.update(interactive=False) | |
if index <= 0: | |
prev_index = 0 | |
is_at_start = True | |
else: | |
prev_index = index - 1 | |
is_at_start = prev_index == 0 | |
entry = app_data[prev_index] | |
prev_score, prev_comment = 0, "" | |
for rec in app_user_annotations: | |
if rec["index"] == prev_index and rec["annotator"] == annotator: | |
prev_score = rec["score"] | |
prev_comment = rec["comment"] | |
break | |
progress_text = f"{prev_index}/{len(app_data)} annotated by {annotator}" | |
if not app_data: | |
progress_text = "No data loaded." | |
# This function doesn't change the state, so no need to return current_app_state unless it was modified | |
# However, to be consistent with other functions that MIGHT modify state or if you plan to, | |
# it's good practice to include it. In this specific case, it's only reading. | |
return ( | |
prev_index, | |
entry["source"], | |
entry["hypothesis"], | |
prev_score, | |
prev_comment, | |
progress_text, | |
gr.update(interactive=not is_at_start), | |
gr.update(interactive=True), | |
# No app_state in outputs here if it's not being changed | |
) | |
def export_results(current_app_state): # 接收 app_state | |
app_user_annotations = current_app_state["user_annotations"] | |
if not app_user_annotations: | |
# raise ValueError("No annotations to export.") # This will crash the app | |
# Instead, show a message or disable the button if no annotations | |
gr.Warning("No annotations to export.") | |
return None, gr.update(visible=False) | |
# Create a temporary file for download | |
with tempfile.NamedTemporaryFile( | |
delete=False, suffix=".json", mode="w", encoding="utf-8" | |
) as tmp: | |
json.dump(app_user_annotations, tmp, ensure_ascii=False, indent=2) | |
tmp_path = tmp.name # Get the path before closing | |
# The file is closed when exiting the 'with' block | |
return tmp_path, gr.update(visible=True, value=tmp_path) | |
# ======== UI ======== | |
with gr.Blocks() as demo: | |
# Define session state | |
app_state = gr.State( | |
value={"data": [], "user_annotations": [], "current_lang": ""} | |
) | |
gr.Markdown("## 📝 Direct Assessment Annotation Tool") | |
with gr.Row(): | |
lang_choice = gr.Dropdown( | |
label="Choose Language Pair", | |
choices=language_options, | |
value=language_options[0] if language_options else None, # Handle empty options | |
) | |
load_button = gr.Button("🔄 Load Data") | |
with gr.Row(): | |
upload_file = gr.File( | |
label="📤 Upload Previous Annotations", file_types=[".json"] | |
) | |
export_button = gr.Button("📥 Export My Results") | |
with gr.Row(): | |
annotator = gr.Textbox( | |
label="Annotator ID", | |
placeholder="Enter your name or ID", | |
value="annotator_1", | |
) | |
progress = gr.Textbox(label="Progress", interactive=False) | |
idx = gr.Number(value=0, visible=False, label="Current Index") # Added label for clarity if made visible | |
source = gr.Textbox(label="Source Sentence", interactive=False, lines=3) | |
hyp = gr.Textbox(label="Machine Translation", interactive=False, lines=3) | |
score = gr.Slider(0, 100, step=1, label="Translation Quality Score", value=0) | |
comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment") | |
output = gr.Textbox(label="Status", interactive=False) | |
previous_button = gr.Button("⏪Previous", interactive=False) # Initially disabled | |
next_button = gr.Button("⏩Next", interactive=False) # Initially disabled | |
export_file = gr.File(label="Download your results", visible=False, interactive=False) | |
# --- Component Event Handlers --- | |
# Add app_state to inputs and outputs where necessary | |
load_button.click( | |
fn=load_data_for_lang, | |
inputs=[lang_choice, app_state], # Add app_state | |
outputs=[idx, source, hyp, progress, app_state], # Add app_state | |
).then( | |
lambda: (gr.update(interactive=True), gr.update(interactive=False)), # Enable Next, Disable Prev | |
outputs=[next_button, previous_button] | |
) | |
upload_file.change( | |
fn=restore_previous_annotations, | |
inputs=[upload_file, app_state], # Add app_state | |
outputs=[idx, source, hyp, progress, app_state, lang_choice], # Add app_state and lang_choice to update dropdown | |
).then( | |
lambda x: (gr.update(interactive=True), gr.update(interactive=x!=0)), # Enable Next, Prev based on index | |
inputs=[idx], | |
outputs=[next_button, previous_button] | |
) | |
next_button.click( | |
fn=annotate, | |
inputs=[idx, score, comment, annotator, app_state], # Add app_state | |
outputs=[ | |
output, | |
idx, | |
progress, | |
score, # To reset/update score for next item | |
comment, # To reset/update comment for next item | |
previous_button, | |
next_button, | |
export_file, # This is the download component | |
app_state, # Add app_state | |
], | |
) | |
previous_button.click( | |
fn=go_previous, | |
inputs=[idx, annotator, app_state], # Add app_state | |
outputs=[ | |
idx, | |
source, | |
hyp, | |
score, | |
comment, | |
progress, | |
previous_button, | |
next_button, | |
# No app_state output if go_previous doesn't modify it | |
], | |
) | |
export_button.click( | |
fn=export_results, | |
inputs=[app_state], # Add app_state | |
outputs=[ | |
export_file, # For the file content/path | |
export_file, # For updating visibility/value | |
], | |
) | |
# This loads the sample when the index changes, e.g., after load_data or annotate | |
idx.change(fn=load_sample, inputs=[idx, app_state], outputs=[source, hyp]) | |
# Initial load logic (optional, if you want something on app start) | |
# Consider what should happen if language_options is empty | |
def initial_load_or_message(current_app_state): | |
if language_options: | |
# This duplicates load_data_for_lang slightly, could be refactored | |
lang_pair_to_load = language_options[0] | |
idx_val, src_val, hyp_val, prog_val, new_app_state = load_data_for_lang(lang_pair_to_load, current_app_state) | |
return idx_val, src_val, hyp_val, prog_val, new_app_state, lang_pair_to_load, gr.update(interactive=True), gr.update(interactive=False) | |
else: | |
return 0, "No languages found in LANG_DIR.", "", "Please add language files.", current_app_state, None, gr.update(interactive=False), gr.update(interactive=False) | |
# demo.load is tricky with state management if the function also needs to return the state. | |
# It's often better to trigger an initial load via a button or specific logic after UI setup. | |
# If you must use demo.load and it needs to initialize state, ensure it correctly returns the state. | |
# For simplicity, let's ensure load_button handles enabling next/prev: | |
# demo.load( | |
# fn=initial_load_or_message, | |
# inputs=[app_state], | |
# outputs=[idx, source, hyp, progress, app_state, lang_choice, next_button, previous_button] | |
# ) | |
# A simpler demo.load without initial data loading: | |
demo.load(lambda: (gr.update(interactive=False), gr.update(interactive=False)), outputs=[next_button, previous_button]) | |
if __name__ == "__main__": | |
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |