import gradio as gr import json import os import tempfile LANG_DIR = "./human_eval" SAVE_DIR = "./annotations" os.makedirs(SAVE_DIR, exist_ok=True) # 全局变量 data, user_annotations, current_lang 已被移除 language_options = sorted([f for f in os.listdir(LANG_DIR)]) # --- 函数修改 --- # 每个需要访问或修改 data, user_annotations, current_lang 的函数 # 都需要将 app_state 作为输入,并通常作为输出 def load_data_for_lang(lang_pair, current_app_state): # 接收 app_state file_path = os.path.join(LANG_DIR, lang_pair, f"{lang_pair}.json") with open(file_path, "r", encoding="utf-8") as f: loaded_data = json.load(f) new_app_state = { # 创建新的状态 "data": loaded_data, "user_annotations": [], # Reset annotations for new language "current_lang": lang_pair } if not loaded_data: return 0, "", "", f"0/0 loaded from {lang_pair}", new_app_state return ( 0, loaded_data[0]["source"], loaded_data[0]["hypothesis"], f"0/{len(loaded_data)} loaded from {lang_pair}", new_app_state, # 返回更新后的状态 ) def restore_previous_annotations(file_obj, current_app_state): # 接收 app_state if file_obj is None: # Check if a file was actually uploaded return 0, "", "", "No file uploaded.", current_app_state, language_options[0] if language_options else "" with open(file_obj.name, "r", encoding="utf-8") as f: uploaded_annotations = json.load(f) if not uploaded_annotations: return 0, "", "", "No annotations found in file.", current_app_state, language_options[0] if language_options else "" restored_lang = uploaded_annotations[0].get("lang_pair", None) if not restored_lang or not os.path.exists( os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json") ): return 0, "", "", "❌ Language pair info missing or file not found.", current_app_state, language_options[0] if language_options else "" file_path = os.path.join(LANG_DIR, restored_lang, f"{restored_lang}.json") with open(file_path, "r", encoding="utf-8") as f: loaded_data = json.load(f) new_app_state = { # 创建新的状态 "data": loaded_data, "user_annotations": uploaded_annotations, "current_lang": restored_lang } last_index = 0 if new_app_state["user_annotations"]: # Check if there are any annotations last_index = new_app_state["user_annotations"][-1].get("index", -1) + 1 if last_index >= len(new_app_state["data"]): return ( last_index, "", "", f"✅ Already completed {len(new_app_state['data'])} samples of {restored_lang}.", new_app_state, # 返回状态 restored_lang, ) return ( last_index, new_app_state["data"][last_index]["source"], new_app_state["data"][last_index]["hypothesis"], f"Restored {restored_lang}: {last_index}/{len(new_app_state['data'])}", new_app_state, # 返回状态 restored_lang, # To update the dropdown ) def load_sample(i, current_app_state): # 接收 app_state # 这个函数只读取状态,不修改,所以不需要返回 app_state # 但仍然需要接收它来获取数据 data_from_state = current_app_state["data"] if not data_from_state or int(i) >= len(data_from_state) or int(i) < 0 : return "", "" entry = data_from_state[int(i)] return entry["source"], entry["hypothesis"] def annotate(index, score, comment, annotator, current_app_state): # 接收 app_state index = int(index) app_data = current_app_state["data"] app_current_lang = current_app_state["current_lang"] app_user_annotations = list(current_app_state["user_annotations"]) # Create a mutable copy if index >= len(app_data): # Safety check return ( "Error: Index out of bounds.", index, f"Error annotating.", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible=False), current_app_state # Return original state ) entry = app_data[index] record = { "index": index, "annotator": annotator, "lang_pair": app_current_lang, "source": entry["source"], "hypothesis": entry["hypothesis"], "score": score, "comment": comment, } # Update user_annotations in the copied list app_user_annotations = [ rec for rec in app_user_annotations if not (rec["index"] == index and rec["annotator"] == annotator) # More robust removal ] app_user_annotations.append(record) app_user_annotations.sort(key=lambda x: x["index"]) # Keep sorted if needed # 更新状态 new_app_state = { "data": app_data, "user_annotations": app_user_annotations, "current_lang": app_current_lang } completed = index + 1 if completed >= len(app_data): return ( "🎉 All samples annotated!", index, # or completed f"✅ Completed {completed}/{len(app_data)} samples.", gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=True), new_app_state, # 返回状态 ) next_index = index + 1 next_entry = app_data[next_index] prev_score, prev_comment = 0, "" for rec in new_app_state["user_annotations"]: if rec["index"] == next_index and rec["annotator"] == annotator: prev_score = rec["score"] prev_comment = rec["comment"] break progress_text = f"{completed}/{len(app_data)} annotated by {annotator}" is_at_start = next_index == 0 return ( "✅ Saved", next_index, progress_text, gr.update(value=prev_score, interactive=True), gr.update(value=prev_comment, interactive=True), gr.update(interactive=not is_at_start), gr.update(interactive=True), gr.update(visible=False), new_app_state, # 返回状态 ) def go_previous(index, annotator, current_app_state): # 接收 app_state index = int(index) app_data = current_app_state["data"] app_user_annotations = current_app_state["user_annotations"] if not app_data: # No data loaded return 0, "", "", 0, "", "No data loaded.", gr.update(interactive=False), gr.update(interactive=False) if index <= 0: prev_index = 0 is_at_start = True else: prev_index = index - 1 is_at_start = prev_index == 0 entry = app_data[prev_index] prev_score, prev_comment = 0, "" for rec in app_user_annotations: if rec["index"] == prev_index and rec["annotator"] == annotator: prev_score = rec["score"] prev_comment = rec["comment"] break progress_text = f"{prev_index}/{len(app_data)} annotated by {annotator}" if not app_data: progress_text = "No data loaded." # This function doesn't change the state, so no need to return current_app_state unless it was modified # However, to be consistent with other functions that MIGHT modify state or if you plan to, # it's good practice to include it. In this specific case, it's only reading. return ( prev_index, entry["source"], entry["hypothesis"], prev_score, prev_comment, progress_text, gr.update(interactive=not is_at_start), gr.update(interactive=True), # No app_state in outputs here if it's not being changed ) def export_results(current_app_state): # 接收 app_state app_user_annotations = current_app_state["user_annotations"] if not app_user_annotations: # raise ValueError("No annotations to export.") # This will crash the app # Instead, show a message or disable the button if no annotations gr.Warning("No annotations to export.") return None, gr.update(visible=False) # Create a temporary file for download with tempfile.NamedTemporaryFile( delete=False, suffix=".json", mode="w", encoding="utf-8" ) as tmp: json.dump(app_user_annotations, tmp, ensure_ascii=False, indent=2) tmp_path = tmp.name # Get the path before closing # The file is closed when exiting the 'with' block return tmp_path, gr.update(visible=True, value=tmp_path) # ======== UI ======== with gr.Blocks() as demo: # Define session state app_state = gr.State( value={"data": [], "user_annotations": [], "current_lang": ""} ) gr.Markdown("## 📝 Direct Assessment Annotation Tool") with gr.Row(): lang_choice = gr.Dropdown( label="Choose Language Pair", choices=language_options, value=language_options[0] if language_options else None, # Handle empty options ) load_button = gr.Button("🔄 Load Data") with gr.Row(): upload_file = gr.File( label="📤 Upload Previous Annotations", file_types=[".json"] ) export_button = gr.Button("📥 Export My Results") with gr.Row(): annotator = gr.Textbox( label="Annotator ID", placeholder="Enter your name or ID", value="annotator_1", ) progress = gr.Textbox(label="Progress", interactive=False) idx = gr.Number(value=0, visible=False, label="Current Index") # Added label for clarity if made visible source = gr.Textbox(label="Source Sentence", interactive=False, lines=3) hyp = gr.Textbox(label="Machine Translation", interactive=False, lines=3) score = gr.Slider(0, 100, step=1, label="Translation Quality Score", value=0) comment = gr.Textbox(lines=2, placeholder="Optional comment...", label="Comment") output = gr.Textbox(label="Status", interactive=False) previous_button = gr.Button("⏪Previous", interactive=False) # Initially disabled next_button = gr.Button("⏩Next", interactive=False) # Initially disabled export_file = gr.File(label="Download your results", visible=False, interactive=False) # --- Component Event Handlers --- # Add app_state to inputs and outputs where necessary load_button.click( fn=load_data_for_lang, inputs=[lang_choice, app_state], # Add app_state outputs=[idx, source, hyp, progress, app_state], # Add app_state ).then( lambda: (gr.update(interactive=True), gr.update(interactive=False)), # Enable Next, Disable Prev outputs=[next_button, previous_button] ) upload_file.change( fn=restore_previous_annotations, inputs=[upload_file, app_state], # Add app_state outputs=[idx, source, hyp, progress, app_state, lang_choice], # Add app_state and lang_choice to update dropdown ).then( lambda x: (gr.update(interactive=True), gr.update(interactive=x!=0)), # Enable Next, Prev based on index inputs=[idx], outputs=[next_button, previous_button] ) next_button.click( fn=annotate, inputs=[idx, score, comment, annotator, app_state], # Add app_state outputs=[ output, idx, progress, score, # To reset/update score for next item comment, # To reset/update comment for next item previous_button, next_button, export_file, # This is the download component app_state, # Add app_state ], ) previous_button.click( fn=go_previous, inputs=[idx, annotator, app_state], # Add app_state outputs=[ idx, source, hyp, score, comment, progress, previous_button, next_button, # No app_state output if go_previous doesn't modify it ], ) export_button.click( fn=export_results, inputs=[app_state], # Add app_state outputs=[ export_file, # For the file content/path export_file, # For updating visibility/value ], ) # This loads the sample when the index changes, e.g., after load_data or annotate idx.change(fn=load_sample, inputs=[idx, app_state], outputs=[source, hyp]) # Initial load logic (optional, if you want something on app start) # Consider what should happen if language_options is empty def initial_load_or_message(current_app_state): if language_options: # This duplicates load_data_for_lang slightly, could be refactored lang_pair_to_load = language_options[0] idx_val, src_val, hyp_val, prog_val, new_app_state = load_data_for_lang(lang_pair_to_load, current_app_state) return idx_val, src_val, hyp_val, prog_val, new_app_state, lang_pair_to_load, gr.update(interactive=True), gr.update(interactive=False) else: return 0, "No languages found in LANG_DIR.", "", "Please add language files.", current_app_state, None, gr.update(interactive=False), gr.update(interactive=False) # demo.load is tricky with state management if the function also needs to return the state. # It's often better to trigger an initial load via a button or specific logic after UI setup. # If you must use demo.load and it needs to initialize state, ensure it correctly returns the state. # For simplicity, let's ensure load_button handles enabling next/prev: # demo.load( # fn=initial_load_or_message, # inputs=[app_state], # outputs=[idx, source, hyp, progress, app_state, lang_choice, next_button, previous_button] # ) # A simpler demo.load without initial data loading: demo.load(lambda: (gr.update(interactive=False), gr.update(interactive=False)), outputs=[next_button, previous_button]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))