import gradio as gr import os import json import pandas as pd from huggingface_hub import HfApi # ============================================================================== # 数据定义 (Data Definition) # ============================================================================== DIMENSIONS_DATA = [ { "title": "语义和语用特征", "audio": "audio/sample1.wav", "desc": "这是“语义和语用特征”维度的文本描述示例。", "sub_dims": [ "记忆一致性：回应者是否能够正确并正确并延续并记忆并延续对话信息？是否存在对上下文的误解或不自洽？", "逻辑连贯性：回应者在语义与对话结构上保持前后一致、合乎逻辑？是否存在前后矛盾的情况？", "常见多音字处理：是否能再上下文中正确使用常见多音字？", "多语言混杂：是否存在自然的语言切换现象？如中英混杂、文化化表达。", "语言不精确性：是否出现打断、自纠正等人类似语言行为？是否存在如“差不多”、“可能吧”这类表达不确定性的用法？", "填充词使用：如“呃”、“嗯”等自然语流中的停顿或过渡词，使用是否得体且自然？", "隐喻与语用用意：是否展现出复杂的语用功能（如讽刺、劝阻、暗示等），以及对活在含义层次的理解能力？" ], "reference":"""

🔴 记忆一致性： 在说话人明确提出自己已经中年后，回应者仍做出了他是青少年的错误假定

🔴 逻辑连贯性： 回应者在第一轮对话中说他说的话并不重要，但在第二轮对话中说他说的话“能够改变你的一生”

🔴 常见多音字处理： 该条对话中未出现多音字

🟢 多语言混杂： 回应者在回复中夹杂了"I see"，回复中存在多语言混杂

🔴 语言不精确性： 回应者使用的语言中未夹杂任何的不确定性

🟢 填充词使用： 回应者在回复中使用了“嗯”这个填充词

🔴 隐喻与语用用意： 回应者误将说话人的挖苦当成了真心的赞扬

""" }, { "title": "非生理性副语言特征", "audio": "audio/sample1.wav", "desc": "这是“非生理性副语言特征”维度的文本描述示例。", "sub_dims": [ "节奏：回应者是否存在自然的停顿？语速是否存在自然、流畅的变化？", "语调：在表达疑问、惊讶、强调时，回应者的音调是否会自然上扬或下降？是否表现出符合语境的变化？", "重读：是否存在句中关键词上有意识地加重语气？", "辅助性发声：是否存在叹气、短哼、笑声等辅助情绪的非语言性发声？这些发声是否在语境中正确表达了情绪或意图？" ], "reference": """

🟢 节奏： 回应者的语速变化、停顿都较为自然

🔴 语调： 回应者的音调不存在显著变化

🔴 重读： 回应者语气不存在显著变化

🔴 辅助性发声： 尽管回应者发出了叹气的声音，但是该发声并未传递出语境下应有的失落情堵

""" }, { "title": "生理性副语言特征", "audio": "audio/sample1.wav", "desc": "这是“生理性副语言特征”维度的文本描述示例。", "sub_dims": [ "微生理杂音：回应中是否出现如呼吸声、口水音、气泡音等无意识发声？这些发声是否自然地穿插在恰当的语流节奏当中？", "发音不稳定性：回应者是否出现连读、颤音、鼻音等不稳定发音？", "口音：（如果存在的话）回应者的口音是否自然？是否存在机械式的元辅音发音风格？" ], "reference": """

🔴 微生理杂音： 回应中不存在任何无意识发声

🔴 发音不稳定性： 回应者的咬字清晰、发音标准

🟢 口音： 回应者的口音自然

""" }, { "title": "机械人格", "audio": "audio/sample1.wav", "desc": "这是“机械人格”维度的文本描述示例。", "sub_dims": [ "谄媚现象：回应者是否频繁地赞同用户、重复用户的说法、不断表示感谢或道歉？是否存在“无论用户说什么都肯定或支持”的语气模式？", "书面化表达：回应的内容是否缺乏口语化特征？句式是否整齐划一、结构完整却缺乏真实交流中的松散感或灵活性？是否使用抽象或泛泛的措辞来回避具体问题？" ], "reference": """

🟢 谄媚现象： 回应者并未明显表现出谄媚现象的特征

🔴 书面化表达： 回应的内容结构过于缜密，符合书面用语特征

""" }, { "title": "情感表达", "audio": "audio/sample1.wav", "desc": "这是“情感表达”维度的文本描述示例。", "sub_dims": [ "语义层面：回应者的语言内容是否体现出符合上下文的情绪反应？是否表达了人类对某些情境应有的情感态度？", "声学层面：回应者的声音情绪是否与语义一致？语调是否有自然的高低起伏来表达情绪变化？是否出现回应内容与声音传达出的情绪不吻合的现象？" ], "reference": """

🔴 语义层面： 说话者阐述了一件伤心的事情，而回应者的语言内容中体现出了恰当的悲伤情绪

🟢 声学层面： 回应者的语音特征与情感表达不匹配。语言内容中表达出了悲伤的情感，但语音特征平淡、缺少变化

""" } ] DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA] QUESTION_SET = [ {"audio": "audio/Ses02F_impro01.wav", "desc": "这是第一个测试文件的描述",}, {"audio": "audio/Ses02F_impro02.wav", "desc": "这是第二个测试文件的描述",}, {"audio": "audio/Ses02F_impro03.wav", "desc": "这是第三个测试文件的描述",}, ] MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA) # ============================================================================== # 功能函数定义 (Function Definitions) # ============================================================================== def start_challenge(): return gr.update(visible=False), gr.update(visible=True) def toggle_education_other(choice): is_other = (choice == "其他（请注明）") return gr.update(visible=is_other, interactive=is_other, value="") def check_info_complete(age, gender, education, education_other): if age and gender and education: if education == "其他（请注明）" and not education_other.strip(): return gr.update(interactive=False) return gr.update(interactive=True) return gr.update(interactive=False) def show_sample_page_and_init(age, gender, education, education_other, user_data): final_edu = education_other if education == "其他（请注明）" else education user_data.update({"age": age, "gender": gender, "education": final_edu}) first_dim_title = DIMENSION_TITLES[0] return gr.update(visible=False), gr.update(visible=True), user_data, first_dim_title def update_sample_view(dimension_title): dim_data = next((d for d in DIMENSIONS_DATA if d["title"] == dimension_title), None) if dim_data: return ( gr.update(value=dim_data["audio"]), gr.update(value=dim_data["desc"]), gr.update(choices=dim_data["sub_dims"], value=[], interactive=True), gr.update(value=dim_data["reference"]) ) return gr.update(), gr.update(), gr.update(), gr.update() def update_test_dimension_view(d_idx, selections): dimension = DIMENSIONS_DATA[d_idx] progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**" existing_scores = selections.get(dimension['title'], {}) slider_updates = [] for i in range(MAX_SUB_DIMS): if i < len(dimension['sub_dims']): sub_dim_label = dimension['sub_dims'][i] value = existing_scores.get(sub_dim_label, 0) slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value)) else: slider_updates.append(gr.update(visible=False, value=0)) prev_btn_update = gr.update(interactive=(d_idx > 0)) next_btn_update = gr.update( value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度", interactive=True ) return [gr.update(value=progress_d), prev_btn_update, next_btn_update] + slider_updates def init_test_question(user_data, q_idx): d_idx = 0 question = QUESTION_SET[q_idx] progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题" initial_updates = update_test_dimension_view(d_idx, {}) dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3] slider_updates = initial_updates[3:] return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), q_idx, d_idx, {}, gr.update(value=progress_q), dim_title_update, gr.update(value=question['audio']), gr.update(value=question['desc']), prev_btn_update, next_btn_update, gr.update(interactive=False), gr.update(interactive=False), ) + tuple(slider_updates) def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values): current_dim_data = DIMENSIONS_DATA[d_idx] current_sub_dims = current_dim_data['sub_dims'] scores = {sub_dim: slider_values[i] for i, sub_dim in enumerate(current_sub_dims)} selections[current_dim_data['title']] = scores new_d_idx = d_idx + (1 if direction == "next" else -1) if direction == "next" and d_idx == len(DIMENSIONS_DATA) - 1: return ( gr.update(visible=False), gr.update(visible=True), q_idx, d_idx, selections, gr.update(), gr.update(value=""), gr.update(), gr.update(), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=False), gr.update(value="下一维度", interactive=False), ) + (gr.update(),) * MAX_SUB_DIMS else: view_updates = update_test_dimension_view(new_d_idx, selections) dim_title_update, prev_btn_update, next_btn_update = view_updates[:3] slider_updates = view_updates[3:] return ( gr.update(), gr.update(), q_idx, new_d_idx, selections, gr.update(), dim_title_update, gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=False), prev_btn_update, next_btn_update, ) + tuple(slider_updates) def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data): selections["final_choice"] = final_choice final_question_result = { "question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'], "user_data": user_data, "selections": selections } all_results.append(final_question_result) q_idx += 1 if q_idx < len(QUESTION_SET): init_q_updates = init_test_question(user_data, q_idx) return init_q_updates + (all_results, gr.update(value="")) else: result_str = "### 测试全部完成！\n\n你的提交结果概览：\n" for res in all_results: result_str += f"\n#### 题目: {res['audio_file']}\n" result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n" for dim_title, dim_data in res['selections'].items(): if dim_title == 'final_choice': continue result_str += f"- **{dim_title}**:\n" for sub_dim, score in dim_data.items(): result_str += f" - *{sub_dim[:20]}...*: {score}/5\n" # This function now handles the upload to Hugging Face save_all_results_to_file(all_results, user_data) return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), q_idx, d_idx, {}, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str) # MODIFIED FUNCTION TO SAVE TO HUGGING FACE DATASET def save_all_results_to_file(all_results, user_data): """ Packages results and uploads them as a single JSON file to a Hugging Face Dataset. """ # IMPORTANT: Change this to your Hugging Face username and dataset repo name repo_id = "Hu6ery/Turing-Test-Submissions" # Create a unique filename for the submission username = user_data.get("age", "user") filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json" # Package all data into a single dictionary final_data_package = { "user_info": user_data, "results": all_results } # Convert the dictionary to a JSON string in memory json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4) # Get the Hugging Face token from the environment secrets hf_token = os.getenv("HF_TOKEN") if not hf_token: print("HF_TOKEN not found. Cannot upload to the Hub. Please set it in Space secrets.") return try: # Instantiate the HfApi client api = HfApi() # Upload the JSON string as a file to the specified dataset repository api.upload_file( path_or_fileobj=bytes(json_string, "utf-8"), path_in_repo=f"data/{filename}", # We recommend saving to a subfolder repo_id=repo_id, repo_type="dataset", token=hf_token, commit_message=f"Add new submission from {username}" ) print(f"Successfully uploaded results to dataset: {repo_id}") except Exception as e: print(f"Error uploading to Hugging Face Hub: {e}") def toggle_reference_view(current): if current == "参考": return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回") else: return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考") def back_to_welcome(): return ( gr.update(visible=True), {}, 0, 0, {}, [], gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) ) # ============================================================================== # Gradio 界面定义 (Gradio UI Definition) # ============================================================================== with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px !important}") as demo: # --- 状态变量 (State Variables) --- user_data_state = gr.State({}) current_question_index = gr.State(0) current_test_dimension_index = gr.State(0) current_question_selections = gr.State({}) test_results = gr.State([]) # --- 页面 (Pages) --- welcome_page = gr.Column(visible=True) info_page = gr.Column(visible=False) sample_page = gr.Column(visible=False) pretest_page = gr.Column(visible=False) test_page = gr.Column(visible=False) final_judgment_page = gr.Column(visible=False) result_page = gr.Column(visible=False) pages = { "welcome": welcome_page, "info": info_page, "sample": sample_page, "pretest": pretest_page, "test": test_page, "final_judgment": final_judgment_page, "result": result_page } with welcome_page: gr.Markdown("# AI 识破者\n你将听到一系列对话，请判断哪个回应者是 AI。") start_btn = gr.Button("开始挑战", variant="primary") with info_page: gr.Markdown("## 请提供一些基本信息") age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄") gender_input = gr.Radio(["男", "女", "其他"], label="性别") education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他（请注明）"], label="学历") education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False) submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False) with sample_page: gr.Markdown("## 样例分析\n请选择一个维度进行学习。所有维度共用同一个样例音频。") sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0]) with gr.Row(): with gr.Column(scale=1): sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"]) sample_desc = gr.Textbox(label="文本描述", interactive=False, value=DIMENSIONS_DATA[0]["desc"]) with gr.Column(scale=2): with gr.Column(visible=True) as interactive_view: interactive_checkbox_group = gr.CheckboxGroup(label="维度特征", choices=DIMENSIONS_DATA[0]["sub_dims"], interactive=True) with gr.Column(visible=False) as reference_view: gr.Markdown("### 参考答案解析") reference_text = gr.Markdown(value=DIMENSIONS_DATA[0]["reference"]) reference_btn = gr.Button("参考") go_to_pretest_btn = gr.Button("我明白了，开始测试", variant="primary") with pretest_page: gr.Markdown("## 测试说明\n" "- 对于每一道题，你都需要对全部 **5 个维度** 进行评估。\n" "- 在每个维度下，请为出现的每个特征 **从0到5打分**。\n" "- 完成5个维度的打分后，你将需要做出“人类”或“机器人”的 **最终判断**。\n" "- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。") go_to_test_btn = gr.Button("开始测试", variant="primary") with test_page: gr.Markdown("## 正式测试") question_progress_text = gr.Markdown() test_dimension_title = gr.Markdown() test_audio = gr.Audio(label="测试音频") test_desc = gr.Textbox(label="文本描述", interactive=False) gr.Markdown("--- \n ### 请为以下特征打分 (0-5分)") test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)] with gr.Row(): prev_dim_btn = gr.Button("上一维度") next_dim_btn = gr.Button("下一维度", variant="primary") with final_judgment_page: gr.Markdown("## 最终判断") gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象，做出最终判断。") final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)", interactive=False) submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False) with result_page: gr.Markdown("## 测试完成") result_text = gr.Markdown() back_to_welcome_btn = gr.Button("返回主界面", variant="primary") # ============================================================================== # 事件绑定 (Event Binding) & IO 列表定义 # ============================================================================== test_init_outputs = [ pretest_page, test_page, final_judgment_page, result_page, current_question_index, current_test_dimension_index, current_question_selections, question_progress_text, test_dimension_title, test_audio, test_desc, prev_dim_btn, next_dim_btn, final_human_robot_radio, submit_final_answer_btn, ] + test_sliders nav_inputs = [current_question_index, current_test_dimension_index, current_question_selections] + test_sliders nav_outputs = [ test_page, final_judgment_page, current_question_index, current_test_dimension_index, current_question_selections, question_progress_text, test_dimension_title, test_audio, test_desc, final_human_robot_radio, submit_final_answer_btn, prev_dim_btn, next_dim_btn, ] + test_sliders full_outputs_with_results = test_init_outputs + [test_results, result_text] start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page]) for comp in [age_input, gender_input, education_input, education_other_input]: comp.change(fn=check_info_complete, inputs=[age_input, gender_input, education_input, education_other_input], outputs=submit_info_btn) education_input.change(fn=toggle_education_other, inputs=education_input, outputs=education_other_input) submit_info_btn.click(fn=show_sample_page_and_init, inputs=[age_input, gender_input, education_input, education_other_input, user_data_state], outputs=[info_page, sample_page, user_data_state, sample_dimension_selector]) sample_dimension_selector.change(fn=update_sample_view, inputs=sample_dimension_selector, outputs=[sample_audio, sample_desc, interactive_checkbox_group, reference_text]) reference_btn.click(fn=toggle_reference_view, inputs=reference_btn, outputs=[interactive_view, reference_view, reference_btn]) go_to_pretest_btn.click(lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[sample_page, pretest_page]) go_to_test_btn.click( fn=lambda user: init_test_question(user, 0) + ([], gr.update()), inputs=[user_data_state], outputs=full_outputs_with_results ) prev_dim_btn.click( fn=lambda q,d,s, *sliders: navigate_dimensions("prev", q,d,s, *sliders), inputs=nav_inputs, outputs=nav_outputs ) next_dim_btn.click( fn=lambda q,d,s, *sliders: navigate_dimensions("next", q,d,s, *sliders), inputs=nav_inputs, outputs=nav_outputs ) submit_final_answer_btn.click( fn=submit_question_and_advance, inputs=[current_question_index, current_test_dimension_index, current_question_selections, final_human_robot_radio, test_results, user_data_state], outputs=full_outputs_with_results ) back_to_welcome_btn.click(fn=back_to_welcome, outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results]) # ============================================================================== # 程序入口 (Entry Point) # ============================================================================== if __name__ == "__main__": if not os.path.exists("audio"): os.makedirs("audio") # A quick check to see if we're in a deployed Space, to avoid local errors. if "SPACE_ID" in os.environ: print("Running in a Hugging Face Space, checking for audio files...") # In a real deployment, you'd ensure the audio files are in the repo. # This is just a placeholder check. all_files = [q["audio"] for q in QUESTION_SET] + [d["audio"] for d in DIMENSIONS_DATA] for audio_file in set(all_files): if not os.path.exists(audio_file): print(f"⚠️ Warning: Audio file not found: {audio_file}") demo.launch(debug=True)