Spaces:
Running
Running
Commit
·
814fa89
1
Parent(s):
df5152a
修改webui为英文
Browse files
src/podcast_transcribe/webui/app.py
CHANGED
@@ -62,14 +62,14 @@ def parse_rss_feed(rss_url: str):
|
|
62 |
if not rss_url:
|
63 |
print("RSS地址为空")
|
64 |
return {
|
65 |
-
status_message_area: gr.update(value="
|
66 |
podcast_title_display: gr.update(value="", visible=False),
|
67 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
68 |
podcast_data_state: None,
|
69 |
audio_player: gr.update(value=None),
|
70 |
current_audio_url_state: None,
|
71 |
episode_shownotes: gr.update(value="", visible=False),
|
72 |
-
transcription_output_df: gr.update(value=None, headers=["
|
73 |
transcribe_button: gr.update(interactive=False),
|
74 |
selected_episode_index_state: None
|
75 |
}
|
@@ -86,24 +86,24 @@ def parse_rss_feed(rss_url: str):
|
|
86 |
for i, episode in enumerate(podcast_data.episodes):
|
87 |
# 使用 (标题 (时长), guid 或索引) 作为选项
|
88 |
# 如果 guid 不可靠或缺失,可以使用索引
|
89 |
-
label = f"{episode.title or '
|
90 |
# 将 episode 对象直接作为值传递,或仅传递一个唯一标识符
|
91 |
# 为了简单起见,我们使用索引作为唯一ID,因为我们需要从 podcast_data_state 中检索完整的 episode
|
92 |
choices.append((label, i))
|
93 |
|
94 |
# 显示播客标题
|
95 |
-
podcast_title = f"## 🎙️ {podcast_data.title or '
|
96 |
if podcast_data.author:
|
97 |
-
podcast_title += f"\n
|
98 |
if podcast_data.description:
|
99 |
# 限制描述长度,避免界面过长
|
100 |
description = podcast_data.description[:300]
|
101 |
if len(podcast_data.description) > 300:
|
102 |
description += "..."
|
103 |
-
podcast_title += f"\n\n
|
104 |
|
105 |
return {
|
106 |
-
status_message_area: gr.update(value=f"
|
107 |
podcast_title_display: gr.update(value=podcast_title, visible=True),
|
108 |
episode_dropdown: gr.update(choices=choices, value=None, interactive=True),
|
109 |
podcast_data_state: podcast_data,
|
@@ -116,12 +116,12 @@ def parse_rss_feed(rss_url: str):
|
|
116 |
}
|
117 |
elif podcast_data: # 有 channel 信息但没有 episodes
|
118 |
print("解析成功但未找到剧集")
|
119 |
-
podcast_title = f"## 🎙️ {podcast_data.title or '
|
120 |
if podcast_data.author:
|
121 |
-
podcast_title += f"\n
|
122 |
|
123 |
return {
|
124 |
-
status_message_area: gr.update(value="
|
125 |
podcast_title_display: gr.update(value=podcast_title, visible=True),
|
126 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
127 |
podcast_data_state: podcast_data, # 仍然存储,以防万一
|
@@ -135,7 +135,7 @@ def parse_rss_feed(rss_url: str):
|
|
135 |
else:
|
136 |
print(f"解析RSS失败: {rss_url}")
|
137 |
return {
|
138 |
-
status_message_area: gr.update(value=f"
|
139 |
podcast_title_display: gr.update(value="", visible=False),
|
140 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
141 |
podcast_data_state: None,
|
@@ -150,7 +150,7 @@ def parse_rss_feed(rss_url: str):
|
|
150 |
print(f"解析 RSS 时发生错误: {e}")
|
151 |
traceback.print_exc()
|
152 |
return {
|
153 |
-
status_message_area: gr.update(value=f"
|
154 |
podcast_title_display: gr.update(value="", visible=False),
|
155 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
156 |
podcast_data_state: None,
|
@@ -172,7 +172,7 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
172 |
return {
|
173 |
audio_player: gr.update(value=None),
|
174 |
current_audio_url_state: None,
|
175 |
-
status_message_area: gr.update(value="
|
176 |
episode_shownotes: gr.update(value="", visible=False),
|
177 |
transcription_output_df: gr.update(value=None),
|
178 |
local_audio_file_path: None,
|
@@ -199,29 +199,29 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
199 |
# 清理多余空白
|
200 |
clean_shownotes = re.sub(r'\s+', ' ', clean_shownotes).strip()
|
201 |
|
202 |
-
episode_shownotes_content = f"### 📝
|
203 |
if episode.published_date:
|
204 |
-
episode_shownotes_content += f"
|
205 |
if episode.duration:
|
206 |
-
episode_shownotes_content += f"
|
207 |
|
208 |
-
episode_shownotes_content += f"
|
209 |
elif episode.summary:
|
210 |
# 如果没有shownotes,使用summary
|
211 |
-
episode_shownotes_content = f"### 📝
|
212 |
if episode.published_date:
|
213 |
-
episode_shownotes_content += f"
|
214 |
if episode.duration:
|
215 |
-
episode_shownotes_content += f"
|
216 |
|
217 |
-
episode_shownotes_content += f"
|
218 |
else:
|
219 |
# 最基本的信息
|
220 |
-
episode_shownotes_content = f"### 📝
|
221 |
if episode.published_date:
|
222 |
-
episode_shownotes_content += f"
|
223 |
if episode.duration:
|
224 |
-
episode_shownotes_content += f"
|
225 |
|
226 |
if audio_url:
|
227 |
# 更新状态消息
|
@@ -287,9 +287,9 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
287 |
print(f"音频已下载到临时文件: {temp_filepath}")
|
288 |
|
289 |
return {
|
290 |
-
audio_player: gr.update(value=temp_filepath, label=f"
|
291 |
current_audio_url_state: audio_url,
|
292 |
-
status_message_area: gr.update(value=f"
|
293 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
294 |
transcription_output_df: gr.update(value=None),
|
295 |
local_audio_file_path: temp_filepath,
|
@@ -302,7 +302,7 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
302 |
return {
|
303 |
audio_player: gr.update(value=None),
|
304 |
current_audio_url_state: None,
|
305 |
-
status_message_area: gr.update(value=f"
|
306 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
307 |
transcription_output_df: gr.update(value=None),
|
308 |
local_audio_file_path: None,
|
@@ -314,7 +314,7 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
314 |
return {
|
315 |
audio_player: gr.update(value=None),
|
316 |
current_audio_url_state: None,
|
317 |
-
status_message_area: gr.update(value=f"
|
318 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
319 |
transcription_output_df: gr.update(value=None),
|
320 |
local_audio_file_path: None,
|
@@ -326,7 +326,7 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
326 |
return {
|
327 |
audio_player: gr.update(value=None),
|
328 |
current_audio_url_state: None,
|
329 |
-
status_message_area: gr.update(value="
|
330 |
episode_shownotes: gr.update(value="", visible=False),
|
331 |
transcription_output_df: gr.update(value=None),
|
332 |
local_audio_file_path: None,
|
@@ -339,7 +339,7 @@ def load_episode_audio(selected_episode_index: int, podcast_data: PodcastChannel
|
|
339 |
return {
|
340 |
audio_player: gr.update(value=None),
|
341 |
current_audio_url_state: None,
|
342 |
-
status_message_area: gr.update(value=f"
|
343 |
episode_shownotes: gr.update(value="", visible=False),
|
344 |
transcription_output_df: gr.update(value=None),
|
345 |
local_audio_file_path: None,
|
@@ -354,7 +354,7 @@ def disable_buttons_before_transcription(local_audio_file_path: str):
|
|
354 |
parse_button: gr.update(interactive=False),
|
355 |
episode_dropdown: gr.update(interactive=False),
|
356 |
transcribe_button: gr.update(interactive=False),
|
357 |
-
status_message_area: gr.update(value="
|
358 |
}
|
359 |
|
360 |
def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel, selected_episode_index: int, progress=gr.Progress(track_tqdm=True)):
|
@@ -365,7 +365,7 @@ def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel
|
|
365 |
print("没有可用的本地音频文件")
|
366 |
return {
|
367 |
transcription_output_df: gr.update(value=None),
|
368 |
-
status_message_area: gr.update(value="
|
369 |
parse_button: gr.update(interactive=True),
|
370 |
episode_dropdown: gr.update(interactive=True),
|
371 |
transcribe_button: gr.update(interactive=True)
|
@@ -373,16 +373,16 @@ def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel
|
|
373 |
|
374 |
try:
|
375 |
# 先更新状态消息并禁用按钮
|
376 |
-
progress(0, desc="
|
377 |
|
378 |
# 使用progress回调来更新进度
|
379 |
-
progress(0.2, desc="
|
380 |
|
381 |
# 从文件加载音频
|
382 |
audio_segment = AudioSegment.from_file(local_audio_file_path)
|
383 |
print(f"音频加载完成,时长: {len(audio_segment)/1000}秒")
|
384 |
|
385 |
-
progress(0.4, desc="
|
386 |
|
387 |
# 获取当前选中的剧集信息
|
388 |
episode_info = None
|
@@ -399,7 +399,7 @@ def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel
|
|
399 |
segmentation_batch_size=64,
|
400 |
parallel=True)
|
401 |
print(f"转录完成,结果: {result is not None}, 段落数: {len(result.segments) if result and result.segments else 0}")
|
402 |
-
progress(0.9, desc="
|
403 |
|
404 |
if result and result.segments:
|
405 |
formatted_segments = []
|
@@ -407,28 +407,28 @@ def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel
|
|
407 |
time_str = f"{seg.start:.2f}s - {seg.end:.2f}s"
|
408 |
formatted_segments.append([seg.speaker, seg.speaker_name, seg.text, time_str])
|
409 |
|
410 |
-
progress(1.0, desc="
|
411 |
return {
|
412 |
transcription_output_df: gr.update(value=formatted_segments),
|
413 |
-
status_message_area: gr.update(value=f"
|
414 |
parse_button: gr.update(interactive=True),
|
415 |
episode_dropdown: gr.update(interactive=True),
|
416 |
transcribe_button: gr.update(interactive=True)
|
417 |
}
|
418 |
elif result: # 有 result 但没有 segments
|
419 |
-
progress(1.0, desc="
|
420 |
return {
|
421 |
transcription_output_df: gr.update(value=None),
|
422 |
-
status_message_area: gr.update(value="
|
423 |
parse_button: gr.update(interactive=True),
|
424 |
episode_dropdown: gr.update(interactive=True),
|
425 |
transcribe_button: gr.update(interactive=True)
|
426 |
}
|
427 |
else: # result 为 None
|
428 |
-
progress(1.0, desc="
|
429 |
return {
|
430 |
transcription_output_df: gr.update(value=None),
|
431 |
-
status_message_area: gr.update(value="
|
432 |
parse_button: gr.update(interactive=True),
|
433 |
episode_dropdown: gr.update(interactive=True),
|
434 |
transcribe_button: gr.update(interactive=True)
|
@@ -436,17 +436,17 @@ def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel
|
|
436 |
except Exception as e:
|
437 |
print(f"转录过程中发生错误: {e}")
|
438 |
traceback.print_exc()
|
439 |
-
progress(1.0, desc="
|
440 |
return {
|
441 |
transcription_output_df: gr.update(value=None),
|
442 |
-
status_message_area: gr.update(value=f"
|
443 |
parse_button: gr.update(interactive=True),
|
444 |
episode_dropdown: gr.update(interactive=True),
|
445 |
transcribe_button: gr.update(interactive=True)
|
446 |
}
|
447 |
|
448 |
# --- Gradio 界面定义 ---
|
449 |
-
with gr.Blocks(title="
|
450 |
.status-message-container {
|
451 |
min-height: 50px;
|
452 |
height: auto;
|
@@ -468,7 +468,7 @@ with gr.Blocks(title="播客转录工具 v2", css="""
|
|
468 |
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
469 |
}
|
470 |
""") as demo:
|
471 |
-
gr.Markdown("# 🎙️
|
472 |
|
473 |
# 状态管理
|
474 |
podcast_data_state = gr.State(None) # 存储解析后的 PodcastChannel 对象
|
@@ -478,11 +478,11 @@ with gr.Blocks(title="播客转录工具 v2", css="""
|
|
478 |
|
479 |
with gr.Row():
|
480 |
rss_url_input = gr.Textbox(
|
481 |
-
label="
|
482 |
-
placeholder="
|
483 |
elem_id="rss-url-input"
|
484 |
)
|
485 |
-
parse_button = gr.Button("🔗
|
486 |
|
487 |
status_message_area = gr.Markdown(
|
488 |
"",
|
@@ -499,7 +499,7 @@ with gr.Blocks(title="播客转录工具 v2", css="""
|
|
499 |
)
|
500 |
|
501 |
episode_dropdown = gr.Dropdown(
|
502 |
-
label="
|
503 |
choices=[],
|
504 |
interactive=False, # 初始时不可交互,解析成功后设为 True
|
505 |
elem_id="episode-dropdown"
|
@@ -515,16 +515,16 @@ with gr.Blocks(title="播客转录工具 v2", css="""
|
|
515 |
)
|
516 |
|
517 |
audio_player = gr.Audio(
|
518 |
-
label="
|
519 |
interactive=False, # 音频源由程序控制,用户不能直接修改
|
520 |
elem_id="audio-player"
|
521 |
)
|
522 |
|
523 |
-
transcribe_button = gr.Button("🔊
|
524 |
|
525 |
-
gr.Markdown("## 📝
|
526 |
transcription_output_df = gr.DataFrame(
|
527 |
-
headers=["
|
528 |
interactive=False,
|
529 |
wrap=True, # 允许文本换行
|
530 |
row_count=(10, "dynamic"), # 显示10行,可滚动
|
|
|
62 |
if not rss_url:
|
63 |
print("RSS地址为空")
|
64 |
return {
|
65 |
+
status_message_area: gr.update(value="Error: Please enter an RSS URL."),
|
66 |
podcast_title_display: gr.update(value="", visible=False),
|
67 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
68 |
podcast_data_state: None,
|
69 |
audio_player: gr.update(value=None),
|
70 |
current_audio_url_state: None,
|
71 |
episode_shownotes: gr.update(value="", visible=False),
|
72 |
+
transcription_output_df: gr.update(value=None, headers=["Speaker", "Text", "Time"]),
|
73 |
transcribe_button: gr.update(interactive=False),
|
74 |
selected_episode_index_state: None
|
75 |
}
|
|
|
86 |
for i, episode in enumerate(podcast_data.episodes):
|
87 |
# 使用 (标题 (时长), guid 或索引) 作为选项
|
88 |
# 如果 guid 不可靠或缺失,可以使用索引
|
89 |
+
label = f"{episode.title or 'Untitled'} (Duration: {episode.duration or 'Unknown'})"
|
90 |
# 将 episode 对象直接作为值传递,或仅传递一个唯一标识符
|
91 |
# 为了简单起见,我们使用索引作为唯一ID,因为我们需要从 podcast_data_state 中检索完整的 episode
|
92 |
choices.append((label, i))
|
93 |
|
94 |
# 显示播客标题
|
95 |
+
podcast_title = f"## 🎙️ {podcast_data.title or 'Unknown Podcast'}"
|
96 |
if podcast_data.author:
|
97 |
+
podcast_title += f"\n**Host/Producer:** {podcast_data.author}"
|
98 |
if podcast_data.description:
|
99 |
# 限制描述长度,避免界面过长
|
100 |
description = podcast_data.description[:300]
|
101 |
if len(podcast_data.description) > 300:
|
102 |
description += "..."
|
103 |
+
podcast_title += f"\n\n**Podcast Description:** {description}"
|
104 |
|
105 |
return {
|
106 |
+
status_message_area: gr.update(value=f"Successfully parsed {len(podcast_data.episodes)} episodes. Please select an episode."),
|
107 |
podcast_title_display: gr.update(value=podcast_title, visible=True),
|
108 |
episode_dropdown: gr.update(choices=choices, value=None, interactive=True),
|
109 |
podcast_data_state: podcast_data,
|
|
|
116 |
}
|
117 |
elif podcast_data: # 有 channel 信息但没有 episodes
|
118 |
print("解析成功但未找到剧集")
|
119 |
+
podcast_title = f"## 🎙️ {podcast_data.title or 'Unknown Podcast'}"
|
120 |
if podcast_data.author:
|
121 |
+
podcast_title += f"\n**Host/Producer:** {podcast_data.author}"
|
122 |
|
123 |
return {
|
124 |
+
status_message_area: gr.update(value="Parsing successful, but no episodes found."),
|
125 |
podcast_title_display: gr.update(value=podcast_title, visible=True),
|
126 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
127 |
podcast_data_state: podcast_data, # 仍然存储,以防万一
|
|
|
135 |
else:
|
136 |
print(f"解析RSS失败: {rss_url}")
|
137 |
return {
|
138 |
+
status_message_area: gr.update(value=f"Failed to parse RSS: {rss_url}. Please check the URL or network connection."),
|
139 |
podcast_title_display: gr.update(value="", visible=False),
|
140 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
141 |
podcast_data_state: None,
|
|
|
150 |
print(f"解析 RSS 时发生错误: {e}")
|
151 |
traceback.print_exc()
|
152 |
return {
|
153 |
+
status_message_area: gr.update(value=f"Serious error occurred while parsing RSS: {e}"),
|
154 |
podcast_title_display: gr.update(value="", visible=False),
|
155 |
episode_dropdown: gr.update(choices=[], value=None, interactive=False),
|
156 |
podcast_data_state: None,
|
|
|
172 |
return {
|
173 |
audio_player: gr.update(value=None),
|
174 |
current_audio_url_state: None,
|
175 |
+
status_message_area: gr.update(value="Please parse RSS first and select an episode."),
|
176 |
episode_shownotes: gr.update(value="", visible=False),
|
177 |
transcription_output_df: gr.update(value=None),
|
178 |
local_audio_file_path: None,
|
|
|
199 |
# 清理多余空白
|
200 |
clean_shownotes = re.sub(r'\s+', ' ', clean_shownotes).strip()
|
201 |
|
202 |
+
episode_shownotes_content = f"### 📝 Episode Details\n\n**Title:** {episode.title or 'Untitled'}\n\n"
|
203 |
if episode.published_date:
|
204 |
+
episode_shownotes_content += f"**Published Date:** {episode.published_date.strftime('%Y-%m-%d')}\n\n"
|
205 |
if episode.duration:
|
206 |
+
episode_shownotes_content += f"**Duration:** {episode.duration}\n\n"
|
207 |
|
208 |
+
episode_shownotes_content += f"**Episode Description:**\n\n{clean_shownotes}"
|
209 |
elif episode.summary:
|
210 |
# 如果没有shownotes,使用summary
|
211 |
+
episode_shownotes_content = f"### 📝 Episode Details\n\n**Title:** {episode.title or 'Untitled'}\n\n"
|
212 |
if episode.published_date:
|
213 |
+
episode_shownotes_content += f"**Published Date:** {episode.published_date.strftime('%Y-%m-%d')}\n\n"
|
214 |
if episode.duration:
|
215 |
+
episode_shownotes_content += f"**Duration:** {episode.duration}\n\n"
|
216 |
|
217 |
+
episode_shownotes_content += f"**Episode Summary:**\n\n{episode.summary}"
|
218 |
else:
|
219 |
# 最基本的信息
|
220 |
+
episode_shownotes_content = f"### 📝 Episode Details\n\n**Title:** {episode.title or 'Untitled'}\n\n"
|
221 |
if episode.published_date:
|
222 |
+
episode_shownotes_content += f"**Published Date:** {episode.published_date.strftime('%Y-%m-%d')}\n\n"
|
223 |
if episode.duration:
|
224 |
+
episode_shownotes_content += f"**Duration:** {episode.duration}\n\n"
|
225 |
|
226 |
if audio_url:
|
227 |
# 更新状态消息
|
|
|
287 |
print(f"音频已下载到临时文件: {temp_filepath}")
|
288 |
|
289 |
return {
|
290 |
+
audio_player: gr.update(value=temp_filepath, label=f"Now Playing: {episode.title or 'Untitled'}"),
|
291 |
current_audio_url_state: audio_url,
|
292 |
+
status_message_area: gr.update(value=f"Episode loaded: {episode.title or 'Untitled'}."),
|
293 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
294 |
transcription_output_df: gr.update(value=None),
|
295 |
local_audio_file_path: temp_filepath,
|
|
|
302 |
return {
|
303 |
audio_player: gr.update(value=None),
|
304 |
current_audio_url_state: None,
|
305 |
+
status_message_area: gr.update(value=f"Error: Failed to download audio: {e}"),
|
306 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
307 |
transcription_output_df: gr.update(value=None),
|
308 |
local_audio_file_path: None,
|
|
|
314 |
return {
|
315 |
audio_player: gr.update(value=None),
|
316 |
current_audio_url_state: None,
|
317 |
+
status_message_area: gr.update(value=f"Error: Selected episode '{episode.title}' does not provide a valid audio URL."),
|
318 |
episode_shownotes: gr.update(value=episode_shownotes_content, visible=True),
|
319 |
transcription_output_df: gr.update(value=None),
|
320 |
local_audio_file_path: None,
|
|
|
326 |
return {
|
327 |
audio_player: gr.update(value=None),
|
328 |
current_audio_url_state: None,
|
329 |
+
status_message_area: gr.update(value="Error: Invalid episode index selected."),
|
330 |
episode_shownotes: gr.update(value="", visible=False),
|
331 |
transcription_output_df: gr.update(value=None),
|
332 |
local_audio_file_path: None,
|
|
|
339 |
return {
|
340 |
audio_player: gr.update(value=None),
|
341 |
current_audio_url_state: None,
|
342 |
+
status_message_area: gr.update(value=f"Serious error occurred while loading audio: {e}"),
|
343 |
episode_shownotes: gr.update(value="", visible=False),
|
344 |
transcription_output_df: gr.update(value=None),
|
345 |
local_audio_file_path: None,
|
|
|
354 |
parse_button: gr.update(interactive=False),
|
355 |
episode_dropdown: gr.update(interactive=False),
|
356 |
transcribe_button: gr.update(interactive=False),
|
357 |
+
status_message_area: gr.update(value="Starting transcription process, please wait...")
|
358 |
}
|
359 |
|
360 |
def start_transcription(local_audio_file_path: str, podcast_data: PodcastChannel, selected_episode_index: int, progress=gr.Progress(track_tqdm=True)):
|
|
|
365 |
print("没有可用的本地音频文件")
|
366 |
return {
|
367 |
transcription_output_df: gr.update(value=None),
|
368 |
+
status_message_area: gr.update(value="Error: No valid audio file for transcription. Please select an episode first."),
|
369 |
parse_button: gr.update(interactive=True),
|
370 |
episode_dropdown: gr.update(interactive=True),
|
371 |
transcribe_button: gr.update(interactive=True)
|
|
|
373 |
|
374 |
try:
|
375 |
# 先更新状态消息并禁用按钮
|
376 |
+
progress(0, desc="Initializing transcription process...")
|
377 |
|
378 |
# 使用progress回调来更新进度
|
379 |
+
progress(0.2, desc="Loading audio file...")
|
380 |
|
381 |
# 从文件加载音频
|
382 |
audio_segment = AudioSegment.from_file(local_audio_file_path)
|
383 |
print(f"音频加载完成,时长: {len(audio_segment)/1000}秒")
|
384 |
|
385 |
+
progress(0.4, desc="Audio loaded, starting transcription (this may take a while)...")
|
386 |
|
387 |
# 获取当前选中的剧集信息
|
388 |
episode_info = None
|
|
|
399 |
segmentation_batch_size=64,
|
400 |
parallel=True)
|
401 |
print(f"转录完成,结果: {result is not None}, 段落数: {len(result.segments) if result and result.segments else 0}")
|
402 |
+
progress(0.9, desc="Transcription completed, formatting results...")
|
403 |
|
404 |
if result and result.segments:
|
405 |
formatted_segments = []
|
|
|
407 |
time_str = f"{seg.start:.2f}s - {seg.end:.2f}s"
|
408 |
formatted_segments.append([seg.speaker, seg.speaker_name, seg.text, time_str])
|
409 |
|
410 |
+
progress(1.0, desc="Transcription results generated!")
|
411 |
return {
|
412 |
transcription_output_df: gr.update(value=formatted_segments),
|
413 |
+
status_message_area: gr.update(value=f"Transcription completed! {len(result.segments)} segments generated. {result.num_speakers} speakers detected."),
|
414 |
parse_button: gr.update(interactive=True),
|
415 |
episode_dropdown: gr.update(interactive=True),
|
416 |
transcribe_button: gr.update(interactive=True)
|
417 |
}
|
418 |
elif result: # 有 result 但没有 segments
|
419 |
+
progress(1.0, desc="Transcription completed, but no text segments")
|
420 |
return {
|
421 |
transcription_output_df: gr.update(value=None),
|
422 |
+
status_message_area: gr.update(value="Transcription completed, but no text segments were generated."),
|
423 |
parse_button: gr.update(interactive=True),
|
424 |
episode_dropdown: gr.update(interactive=True),
|
425 |
transcribe_button: gr.update(interactive=True)
|
426 |
}
|
427 |
else: # result 为 None
|
428 |
+
progress(1.0, desc="Transcription failed")
|
429 |
return {
|
430 |
transcription_output_df: gr.update(value=None),
|
431 |
+
status_message_area: gr.update(value="Transcription failed, no results obtained."),
|
432 |
parse_button: gr.update(interactive=True),
|
433 |
episode_dropdown: gr.update(interactive=True),
|
434 |
transcribe_button: gr.update(interactive=True)
|
|
|
436 |
except Exception as e:
|
437 |
print(f"转录过程中发生错误: {e}")
|
438 |
traceback.print_exc()
|
439 |
+
progress(1.0, desc="Transcription failed: processing error")
|
440 |
return {
|
441 |
transcription_output_df: gr.update(value=None),
|
442 |
+
status_message_area: gr.update(value=f"Serious error occurred during transcription: {e}"),
|
443 |
parse_button: gr.update(interactive=True),
|
444 |
episode_dropdown: gr.update(interactive=True),
|
445 |
transcribe_button: gr.update(interactive=True)
|
446 |
}
|
447 |
|
448 |
# --- Gradio 界面定义 ---
|
449 |
+
with gr.Blocks(title="Podcast Transcriber v2", css="""
|
450 |
.status-message-container {
|
451 |
min-height: 50px;
|
452 |
height: auto;
|
|
|
468 |
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
469 |
}
|
470 |
""") as demo:
|
471 |
+
gr.Markdown("# 🎙️ Podcast Transcriber")
|
472 |
|
473 |
# 状态管理
|
474 |
podcast_data_state = gr.State(None) # 存储解析后的 PodcastChannel 对象
|
|
|
478 |
|
479 |
with gr.Row():
|
480 |
rss_url_input = gr.Textbox(
|
481 |
+
label="Podcast RSS URL",
|
482 |
+
placeholder="e.g., https://your-podcast-feed.com/rss.xml",
|
483 |
elem_id="rss-url-input"
|
484 |
)
|
485 |
+
parse_button = gr.Button("🔗 Parse RSS", elem_id="parse-rss-button")
|
486 |
|
487 |
status_message_area = gr.Markdown(
|
488 |
"",
|
|
|
499 |
)
|
500 |
|
501 |
episode_dropdown = gr.Dropdown(
|
502 |
+
label="Select Episode",
|
503 |
choices=[],
|
504 |
interactive=False, # 初始时不可交互,解析成功后设为 True
|
505 |
elem_id="episode-dropdown"
|
|
|
515 |
)
|
516 |
|
517 |
audio_player = gr.Audio(
|
518 |
+
label="Podcast Audio Player",
|
519 |
interactive=False, # 音频源由程序控制,用户不能直接修改
|
520 |
elem_id="audio-player"
|
521 |
)
|
522 |
|
523 |
+
transcribe_button = gr.Button("🔊 Start Transcription", elem_id="transcribe-button", interactive=False)
|
524 |
|
525 |
+
gr.Markdown("## 📝 Transcription Results")
|
526 |
transcription_output_df = gr.DataFrame(
|
527 |
+
headers=["Speaker ID", "Speaker Name", "Transcription Text", "Time Range"],
|
528 |
interactive=False,
|
529 |
wrap=True, # 允许文本换行
|
530 |
row_count=(10, "dynamic"), # 显示10行,可滚动
|