Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import json | |
import pandas as pd | |
from huggingface_hub import HfApi | |
# ============================================================================== | |
# 数据定义 (Data Definition) | |
# ============================================================================== | |
DIMENSIONS_DATA = [ | |
{ | |
"title": "语义和语用特征", | |
"audio": "audio/sample1.wav", | |
"desc": "这是“语义和语用特征”维度的文本描述示例。", | |
"sub_dims": [ | |
"记忆一致性:回应者是否能够正确并正确并延续并记忆并延续对话信息?是否存在对上下文的误解或不自洽?", "逻辑连贯性:回应者在语义与对话结构上保持前后一致、合乎逻辑?是否存在前后矛盾的情况?", | |
"常见多音字处理:是否能再上下文中正确使用常见多音字?", "多语言混杂:是否存在自然的语言切换现象?如中英混杂、文化化表达。", | |
"语言不精确性:是否出现打断、自纠正等人类似语言行为?是否存在如“差不多”、“可能吧”这类表达不确定性的用法?", "填充词使用:如“呃”、“嗯”等自然语流中的停顿或过渡词,使用是否得体且自然?", | |
"隐喻与语用用意:是否展现出复杂的语用功能(如讽刺、劝阻、暗示等),以及对活在含义层次的理解能力?" | |
], | |
"reference":""" | |
<p>🔴 <strong>记忆一致性:</strong> 在说话人明确提出自己已经中年后,回应者仍做出了他是青少年的错误假定</p> | |
<p>🔴 <strong>逻辑连贯性:</strong> 回应者在第一轮对话中说他说的话并不重要,但在第二轮对话中说他说的话“能够改变你的一生”</p> | |
<p>🔴 <strong>常见多音字处理:</strong> 该条对话中未出现多音字</p> | |
<p>🟢 <strong>多语言混杂:</strong> 回应者在回复中夹杂了"I see",回复中存在多语言混杂</p> | |
<p>🔴 <strong>语言不精确性:</strong> 回应者使用的语言中未夹杂任何的不确定性</p> | |
<p>🟢 <strong>填充词使用:</strong> 回应者在回复中使用了“嗯”这个填充词</p> | |
<p>🔴 <strong>隐喻与语用用意:</strong> 回应者误将说话人的挖苦当成了真心的赞扬</p> | |
""" | |
}, | |
{ | |
"title": "非生理性副语言特征", | |
"audio": "audio/sample1.wav", | |
"desc": "这是“非生理性副语言特征”维度的文本描述示例。", | |
"sub_dims": [ | |
"节奏:回应者是否存在自然的停顿?语速是否存在自然、流畅的变化?", "语调:在表达疑问、惊讶、强调时,回应者的音调是否会自然上扬或下降?是否表现出符合语境的变化?", | |
"重读:是否存在句中关键词上有意识地加重语气?", "辅助性发声:是否存在叹气、短哼、笑声等辅助情绪的非语言性发声?这些发声是否在语境中正确表达了情绪或意图?" | |
], | |
"reference": """ | |
<p>🟢 <strong>节奏:</strong> 回应者的语速变化、停顿都较为自然</p> | |
<p>🔴 <strong>语调:</strong> 回应者的音调不存在显著变化</p> | |
<p>🔴 <strong>重读:</strong> 回应者语气不存在显著变化</p> | |
<p>🔴 <strong>辅助性发声:</strong> 尽管回应者发出了叹气的声音,但是该发声并未传递出语境下应有的失落情堵</p> | |
""" | |
}, | |
{ | |
"title": "生理性副语言特征", | |
"audio": "audio/sample1.wav", | |
"desc": "这是“生理性副语言特征”维度的文本描述示例。", | |
"sub_dims": [ | |
"微生理杂音:回应中是否出现如呼吸声、口水音、气泡音等无意识发声?这些发声是否自然地穿插在恰当的语流节奏当中?", | |
"发音不稳定性:回应者是否出现连读、颤音、鼻音等不稳定发音?", "口音:(如果存在的话)回应者的口音是否自然?是否存在机械式的元辅音发音风格?" | |
], | |
"reference": """ | |
<p>🔴 <strong>微生理杂音:</strong> 回应中不存在任何无意识发声</p> | |
<p>🔴 <strong>发音不稳定性:</strong> 回应者的咬字清晰、发音标准</p> | |
<p>🟢 <strong>口音:</strong> 回应者的口音自然</p> | |
""" | |
}, | |
{ | |
"title": "机械人格", | |
"audio": "audio/sample1.wav", | |
"desc": "这是“机械人格”维度的文本描述示例。", | |
"sub_dims": [ | |
"谄媚现象:回应者是否频繁地赞同用户、重复用户的说法、不断表示感谢或道歉?是否存在“无论用户说什么都肯定或支持”的语气模式?", | |
"书面化表达:回应的内容是否缺乏口语化特征?句式是否整齐划一、结构完整却缺乏真实交流中的松散感或灵活性?是否使用抽象或泛泛的措辞来回避具体问题?" | |
], | |
"reference": """ | |
<p>🟢 <strong>谄媚现象:</strong> 回应者并未明显表现出谄媚现象的特征</p> | |
<p>🔴 <strong>书面化表达:</strong> 回应的内容结构过于缜密,符合书面用语特征</p> | |
""" | |
}, | |
{ | |
"title": "情感表达", | |
"audio": "audio/sample1.wav", | |
"desc": "这是“情感表达”维度的文本描述示例。", | |
"sub_dims": [ | |
"语义层面:回应者的语言内容是否体现出符合上下文的情绪反应?是否表达了人类对某些情境应有的情感态度?", | |
"声学层面:回应者的声音情绪是否与语义一致?语调是否有自然的高低起伏来表达情绪变化?是否出现回应内容与声音传达出的情绪不吻合的现象?" | |
], | |
"reference": """ | |
<p>🔴 <strong>语义层面:</strong> 说话者阐述了一件伤心的事情,而回应者的语言内容中体现出了恰当的悲伤情绪</p> | |
<p>🟢 <strong>声学层面:</strong> 回应者的语音特征与情感表达不匹配。语言内容中表达出了悲伤的情感,但语音特征平淡、缺少变化</p> | |
""" | |
} | |
] | |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA] | |
QUESTION_SET = [ | |
{"audio": "audio/Ses02F_impro01.wav", "desc": "这是第一个测试文件的描述",}, | |
{"audio": "audio/Ses02F_impro02.wav", "desc": "这是第二个测试文件的描述",}, | |
{"audio": "audio/Ses02F_impro03.wav", "desc": "这是第三个测试文件的描述",}, | |
] | |
MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA) | |
# ============================================================================== | |
# 功能函数定义 (Function Definitions) | |
# ============================================================================== | |
def start_challenge(): | |
return gr.update(visible=False), gr.update(visible=True) | |
def toggle_education_other(choice): | |
is_other = (choice == "其他(请注明)") | |
return gr.update(visible=is_other, interactive=is_other, value="") | |
def check_info_complete(age, gender, education, education_other): | |
if age and gender and education: | |
if education == "其他(请注明)" and not education_other.strip(): | |
return gr.update(interactive=False) | |
return gr.update(interactive=True) | |
return gr.update(interactive=False) | |
def show_sample_page_and_init(age, gender, education, education_other, user_data): | |
final_edu = education_other if education == "其他(请注明)" else education | |
user_data.update({"age": age, "gender": gender, "education": final_edu}) | |
first_dim_title = DIMENSION_TITLES[0] | |
return gr.update(visible=False), gr.update(visible=True), user_data, first_dim_title | |
def update_sample_view(dimension_title): | |
dim_data = next((d for d in DIMENSIONS_DATA if d["title"] == dimension_title), None) | |
if dim_data: | |
return ( | |
gr.update(value=dim_data["audio"]), | |
gr.update(value=dim_data["desc"]), | |
gr.update(choices=dim_data["sub_dims"], value=[], interactive=True), | |
gr.update(value=dim_data["reference"]) | |
) | |
return gr.update(), gr.update(), gr.update(), gr.update() | |
def update_test_dimension_view(d_idx, selections): | |
dimension = DIMENSIONS_DATA[d_idx] | |
progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**" | |
existing_scores = selections.get(dimension['title'], {}) | |
slider_updates = [] | |
for i in range(MAX_SUB_DIMS): | |
if i < len(dimension['sub_dims']): | |
sub_dim_label = dimension['sub_dims'][i] | |
value = existing_scores.get(sub_dim_label, 0) | |
slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value)) | |
else: | |
slider_updates.append(gr.update(visible=False, value=0)) | |
prev_btn_update = gr.update(interactive=(d_idx > 0)) | |
next_btn_update = gr.update( | |
value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度", | |
interactive=True | |
) | |
return [gr.update(value=progress_d), prev_btn_update, next_btn_update] + slider_updates | |
def init_test_question(user_data, q_idx): | |
d_idx = 0 | |
question = QUESTION_SET[q_idx] | |
progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题" | |
initial_updates = update_test_dimension_view(d_idx, {}) | |
dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3] | |
slider_updates = initial_updates[3:] | |
return ( | |
gr.update(visible=False), | |
gr.update(visible=True), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
q_idx, d_idx, {}, | |
gr.update(value=progress_q), | |
dim_title_update, | |
gr.update(value=question['audio']), | |
gr.update(value=question['desc']), | |
prev_btn_update, | |
next_btn_update, | |
gr.update(interactive=False), | |
gr.update(interactive=False), | |
) + tuple(slider_updates) | |
def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values): | |
current_dim_data = DIMENSIONS_DATA[d_idx] | |
current_sub_dims = current_dim_data['sub_dims'] | |
scores = {sub_dim: slider_values[i] for i, sub_dim in enumerate(current_sub_dims)} | |
selections[current_dim_data['title']] = scores | |
new_d_idx = d_idx + (1 if direction == "next" else -1) | |
if direction == "next" and d_idx == len(DIMENSIONS_DATA) - 1: | |
return ( | |
gr.update(visible=False), | |
gr.update(visible=True), | |
q_idx, d_idx, selections, | |
gr.update(), | |
gr.update(value=""), | |
gr.update(), | |
gr.update(), | |
gr.update(interactive=True), | |
gr.update(interactive=True), | |
gr.update(interactive=False), | |
gr.update(value="下一维度", interactive=False), | |
) + (gr.update(),) * MAX_SUB_DIMS | |
else: | |
view_updates = update_test_dimension_view(new_d_idx, selections) | |
dim_title_update, prev_btn_update, next_btn_update = view_updates[:3] | |
slider_updates = view_updates[3:] | |
return ( | |
gr.update(), gr.update(), | |
q_idx, new_d_idx, selections, | |
gr.update(), | |
dim_title_update, | |
gr.update(), | |
gr.update(), | |
gr.update(interactive=False), | |
gr.update(interactive=False), | |
prev_btn_update, | |
next_btn_update, | |
) + tuple(slider_updates) | |
def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data): | |
selections["final_choice"] = final_choice | |
final_question_result = { | |
"question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'], | |
"user_data": user_data, "selections": selections | |
} | |
all_results.append(final_question_result) | |
q_idx += 1 | |
if q_idx < len(QUESTION_SET): | |
init_q_updates = init_test_question(user_data, q_idx) | |
return init_q_updates + (all_results, gr.update(value="")) | |
else: | |
result_str = "### 测试全部完成!\n\n你的提交结果概览:\n" | |
for res in all_results: | |
result_str += f"\n#### 题目: {res['audio_file']}\n" | |
result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n" | |
for dim_title, dim_data in res['selections'].items(): | |
if dim_title == 'final_choice': continue | |
result_str += f"- **{dim_title}**:\n" | |
for sub_dim, score in dim_data.items(): | |
result_str += f" - *{sub_dim[:20]}...*: {score}/5\n" | |
# This function now handles the upload to Hugging Face | |
save_all_results_to_file(all_results, user_data) | |
return ( | |
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), | |
q_idx, d_idx, {}, | |
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), | |
gr.update(), gr.update(), | |
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str) | |
# MODIFIED FUNCTION TO SAVE TO HUGGING FACE DATASET | |
def save_all_results_to_file(all_results, user_data): | |
""" | |
Packages results and uploads them as a single JSON file to a Hugging Face Dataset. | |
""" | |
# IMPORTANT: Change this to your Hugging Face username and dataset repo name | |
repo_id = "Hu6ery/Turing-Test-Submissions" | |
# Create a unique filename for the submission | |
username = user_data.get("age", "user") | |
filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json" | |
# Package all data into a single dictionary | |
final_data_package = { | |
"user_info": user_data, | |
"results": all_results | |
} | |
# Convert the dictionary to a JSON string in memory | |
json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4) | |
# Get the Hugging Face token from the environment secrets | |
hf_token = os.getenv("HF_TOKEN") | |
if not hf_token: | |
print("HF_TOKEN not found. Cannot upload to the Hub. Please set it in Space secrets.") | |
return | |
try: | |
# Instantiate the HfApi client | |
api = HfApi() | |
# Upload the JSON string as a file to the specified dataset repository | |
api.upload_file( | |
path_or_fileobj=bytes(json_string, "utf-8"), | |
path_in_repo=f"data/{filename}", # We recommend saving to a subfolder | |
repo_id=repo_id, | |
repo_type="dataset", | |
token=hf_token, | |
commit_message=f"Add new submission from {username}" | |
) | |
print(f"Successfully uploaded results to dataset: {repo_id}") | |
except Exception as e: | |
print(f"Error uploading to Hugging Face Hub: {e}") | |
def toggle_reference_view(current): | |
if current == "参考": return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回") | |
else: return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考") | |
def back_to_welcome(): | |
return ( | |
gr.update(visible=True), {}, 0, 0, {}, [], | |
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), | |
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) | |
) | |
# ============================================================================== | |
# Gradio 界面定义 (Gradio UI Definition) | |
# ============================================================================== | |
with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px !important}") as demo: | |
# --- 状态变量 (State Variables) --- | |
user_data_state = gr.State({}) | |
current_question_index = gr.State(0) | |
current_test_dimension_index = gr.State(0) | |
current_question_selections = gr.State({}) | |
test_results = gr.State([]) | |
# --- 页面 (Pages) --- | |
welcome_page = gr.Column(visible=True) | |
info_page = gr.Column(visible=False) | |
sample_page = gr.Column(visible=False) | |
pretest_page = gr.Column(visible=False) | |
test_page = gr.Column(visible=False) | |
final_judgment_page = gr.Column(visible=False) | |
result_page = gr.Column(visible=False) | |
pages = { | |
"welcome": welcome_page, "info": info_page, "sample": sample_page, | |
"pretest": pretest_page, "test": test_page, "final_judgment": final_judgment_page, | |
"result": result_page | |
} | |
with welcome_page: | |
gr.Markdown("# AI 识破者\n你将听到一系列对话,请判断哪个回应者是 AI。") | |
start_btn = gr.Button("开始挑战", variant="primary") | |
with info_page: | |
gr.Markdown("## 请提供一些基本信息") | |
age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄") | |
gender_input = gr.Radio(["男", "女", "其他"], label="性别") | |
education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历") | |
education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False) | |
submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False) | |
with sample_page: | |
gr.Markdown("## 样例分析\n请选择一个维度进行学习。所有维度共用同一个样例音频。") | |
sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0]) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"]) | |
sample_desc = gr.Textbox(label="文本描述", interactive=False, value=DIMENSIONS_DATA[0]["desc"]) | |
with gr.Column(scale=2): | |
with gr.Column(visible=True) as interactive_view: | |
interactive_checkbox_group = gr.CheckboxGroup(label="维度特征", choices=DIMENSIONS_DATA[0]["sub_dims"], interactive=True) | |
with gr.Column(visible=False) as reference_view: | |
gr.Markdown("### 参考答案解析") | |
reference_text = gr.Markdown(value=DIMENSIONS_DATA[0]["reference"]) | |
reference_btn = gr.Button("参考") | |
go_to_pretest_btn = gr.Button("我明白了,开始测试", variant="primary") | |
with pretest_page: | |
gr.Markdown("## 测试说明\n" | |
"- 对于每一道题,你都需要对全部 **5 个维度** 进行评估。\n" | |
"- 在每个维度下,请为出现的每个特征 **从0到5打分**。\n" | |
"- 完成5个维度的打分后,你将需要做出“人类”或“机器人”的 **最终判断**。\n" | |
"- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。") | |
go_to_test_btn = gr.Button("开始测试", variant="primary") | |
with test_page: | |
gr.Markdown("## 正式测试") | |
question_progress_text = gr.Markdown() | |
test_dimension_title = gr.Markdown() | |
test_audio = gr.Audio(label="测试音频") | |
test_desc = gr.Textbox(label="文本描述", interactive=False) | |
gr.Markdown("--- \n ### 请为以下特征打分 (0-5分)") | |
test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)] | |
with gr.Row(): | |
prev_dim_btn = gr.Button("上一维度") | |
next_dim_btn = gr.Button("下一维度", variant="primary") | |
with final_judgment_page: | |
gr.Markdown("## 最终判断") | |
gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象,做出最终判断。") | |
final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)", interactive=False) | |
submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False) | |
with result_page: | |
gr.Markdown("## 测试完成") | |
result_text = gr.Markdown() | |
back_to_welcome_btn = gr.Button("返回主界面", variant="primary") | |
# ============================================================================== | |
# 事件绑定 (Event Binding) & IO 列表定义 | |
# ============================================================================== | |
test_init_outputs = [ | |
pretest_page, test_page, final_judgment_page, result_page, | |
current_question_index, current_test_dimension_index, current_question_selections, | |
question_progress_text, test_dimension_title, test_audio, test_desc, | |
prev_dim_btn, next_dim_btn, | |
final_human_robot_radio, submit_final_answer_btn, | |
] + test_sliders | |
nav_inputs = [current_question_index, current_test_dimension_index, current_question_selections] + test_sliders | |
nav_outputs = [ | |
test_page, final_judgment_page, | |
current_question_index, current_test_dimension_index, current_question_selections, | |
question_progress_text, test_dimension_title, test_audio, test_desc, | |
final_human_robot_radio, submit_final_answer_btn, | |
prev_dim_btn, next_dim_btn, | |
] + test_sliders | |
full_outputs_with_results = test_init_outputs + [test_results, result_text] | |
start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page]) | |
for comp in [age_input, gender_input, education_input, education_other_input]: | |
comp.change(fn=check_info_complete, inputs=[age_input, gender_input, education_input, education_other_input], outputs=submit_info_btn) | |
education_input.change(fn=toggle_education_other, inputs=education_input, outputs=education_other_input) | |
submit_info_btn.click(fn=show_sample_page_and_init, inputs=[age_input, gender_input, education_input, education_other_input, user_data_state], outputs=[info_page, sample_page, user_data_state, sample_dimension_selector]) | |
sample_dimension_selector.change(fn=update_sample_view, inputs=sample_dimension_selector, outputs=[sample_audio, sample_desc, interactive_checkbox_group, reference_text]) | |
reference_btn.click(fn=toggle_reference_view, inputs=reference_btn, outputs=[interactive_view, reference_view, reference_btn]) | |
go_to_pretest_btn.click(lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[sample_page, pretest_page]) | |
go_to_test_btn.click( | |
fn=lambda user: init_test_question(user, 0) + ([], gr.update()), | |
inputs=[user_data_state], | |
outputs=full_outputs_with_results | |
) | |
prev_dim_btn.click( | |
fn=lambda q,d,s, *sliders: navigate_dimensions("prev", q,d,s, *sliders), | |
inputs=nav_inputs, outputs=nav_outputs | |
) | |
next_dim_btn.click( | |
fn=lambda q,d,s, *sliders: navigate_dimensions("next", q,d,s, *sliders), | |
inputs=nav_inputs, outputs=nav_outputs | |
) | |
submit_final_answer_btn.click( | |
fn=submit_question_and_advance, | |
inputs=[current_question_index, current_test_dimension_index, current_question_selections, final_human_robot_radio, test_results, user_data_state], | |
outputs=full_outputs_with_results | |
) | |
back_to_welcome_btn.click(fn=back_to_welcome, outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results]) | |
# ============================================================================== | |
# 程序入口 (Entry Point) | |
# ============================================================================== | |
if __name__ == "__main__": | |
if not os.path.exists("audio"): | |
os.makedirs("audio") | |
# A quick check to see if we're in a deployed Space, to avoid local errors. | |
if "SPACE_ID" in os.environ: | |
print("Running in a Hugging Face Space, checking for audio files...") | |
# In a real deployment, you'd ensure the audio files are in the repo. | |
# This is just a placeholder check. | |
all_files = [q["audio"] for q in QUESTION_SET] + [d["audio"] for d in DIMENSIONS_DATA] | |
for audio_file in set(all_files): | |
if not os.path.exists(audio_file): | |
print(f"⚠️ Warning: Audio file not found: {audio_file}") | |
demo.launch(debug=True) |