Upload app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,36 @@ logger = logging.getLogger(__name__)
|
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_api_key(key_name, ui_value):
|
16 |
return ui_value if ui_value else os.getenv(key_name)
|
17 |
|
@@ -38,6 +68,10 @@ def process_inputs(
|
|
38 |
api_key_label,
|
39 |
llm_model_name,
|
40 |
longform,
|
|
|
|
|
|
|
|
|
41 |
):
|
42 |
try:
|
43 |
logger.info("Starting podcast generation process")
|
@@ -124,9 +158,16 @@ def process_inputs(
|
|
124 |
"output_language": output_language,
|
125 |
"creativity": creativity_level,
|
126 |
"user_instructions": user_instructions,
|
127 |
-
"
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
}
|
131 |
|
132 |
# Generate podcast
|
@@ -140,7 +181,10 @@ def process_inputs(
|
|
140 |
text=text_input if text_input else None,
|
141 |
image_paths=image_paths if image_paths else None,
|
142 |
tts_model=tts_model,
|
143 |
-
conversation_config=conversation_config
|
|
|
|
|
|
|
144 |
)
|
145 |
|
146 |
logger.info("Podcast generation completed")
|
@@ -298,8 +342,8 @@ with gr.Blocks(
|
|
298 |
|
299 |
conversation_style = gr.Textbox(
|
300 |
label="对话风格",
|
301 |
-
value="
|
302 |
-
info="
|
303 |
)
|
304 |
|
305 |
# Roles and Structure
|
@@ -312,20 +356,26 @@ with gr.Blocks(
|
|
312 |
)
|
313 |
roles_person1 = gr.Textbox(
|
314 |
label="第一位发言者的角色",
|
315 |
-
value="
|
316 |
-
info="
|
317 |
)
|
318 |
|
319 |
roles_person2 = gr.Textbox(
|
320 |
label="第二位发言者的角色",
|
321 |
-
value="
|
322 |
-
info="
|
323 |
)
|
324 |
|
325 |
dialogue_structure = gr.Textbox(
|
326 |
label="对话结构",
|
327 |
-
value="
|
328 |
-
info="
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
)
|
330 |
|
331 |
# Podcast Identity
|
@@ -380,12 +430,29 @@ with gr.Blocks(
|
|
380 |
</h3>
|
381 |
""",
|
382 |
)
|
|
|
|
|
|
|
|
|
|
|
383 |
tts_model = gr.Radio(
|
384 |
choices=["openai", "elevenlabs", "edge"],
|
385 |
value="openai",
|
386 |
label="文本转语音模型",
|
387 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
388 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
# Advanced Settings
|
391 |
gr.Markdown(
|
@@ -440,7 +507,8 @@ with gr.Blocks(
|
|
440 |
dialogue_structure, podcast_name,
|
441 |
podcast_tagline, output_language, tts_model,
|
442 |
creativity_level, user_instructions,
|
443 |
-
api_key_label, llm_model_name, longform
|
|
|
444 |
],
|
445 |
outputs=audio_output
|
446 |
)
|
|
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
14 |
|
15 |
+
# 定义语音选项
|
16 |
+
VOICE_OPTIONS = [
|
17 |
+
{"id": "3b55b3d84d2f453a98d8ca9bb24182d6", "name": "邓紫琪"},
|
18 |
+
{"id": "fa756c4628b94b7394d1822e5848cf59", "name": "杨幂"},
|
19 |
+
{"id": "08f18a5692544543a6ca5fdd1eaa328c", "name": "宋雨琦"},
|
20 |
+
{"id": "f2ed19ca0ea246bf9cbc6382be00e4fc", "name": "王志文"},
|
21 |
+
{"id": "738d0cc1a3e9430a9de2b544a466a7fc", "name": "雷军"},
|
22 |
+
{"id": "1512d05841734931bf905d0520c272b1", "name": "周杰伦"},
|
23 |
+
{"id": "e4642e5edccd4d9ab61a69e82d4f8a14", "name": "蔡徐坤"},
|
24 |
+
{"id": "e04a3dc718864c999ef7db3035764aa8", "name": "刘华强"},
|
25 |
+
{"id": "7c66db6e457c4d53b1fe428a8c547953", "name": "郭德纲"},
|
26 |
+
{"id": "f6f293aabfe24e46aff0fc309c233d31", "name": "曹操"},
|
27 |
+
{"id": "22e8eb5f1f424c749592cd9db3927368", "name": "李云龙"},
|
28 |
+
{"id": "5e680ebc2eeb4f78a2224f2e1003b8c6", "name": "刘备"},
|
29 |
+
{"id": "zh-HK-HiuGaaiNeural", "name": "曉佳(粤语女声)"},
|
30 |
+
{"id": "zh-HK-HiuMaanNeural", "name": "曉曼(粤语女声)"},
|
31 |
+
{"id": "zh-HK-WanLungNeural", "name": "雲龍(粤语男声)"},
|
32 |
+
{"id": "zh-CN-XiaoxiaoNeural", "name": "晓晓(活泼女声)"},
|
33 |
+
{"id": "zh-CN-XiaoyiNeural", "name": "晓伊(女声)"},
|
34 |
+
{"id": "zh-CN-YunjianNeural", "name": "云健(解说男声)"},
|
35 |
+
{"id": "zh-CN-YunxiNeural", "name": "云希(阳光男声)"},
|
36 |
+
{"id": "zh-CN-YunxiaNeural", "name": "云夏(少年男声)"},
|
37 |
+
{"id": "zh-CN-YunyangNeural", "name": "云扬(专业男声)"},
|
38 |
+
{"id": "zh-CN-liaoning-XiaobeiNeural", "name": "晓贝(辽宁女声)"},
|
39 |
+
{"id": "zh-TW-HsiaoChenNeural", "name": "曉臻(湾湾女声)"},
|
40 |
+
{"id": "zh-TW-YunJheNeural", "name": "雲哲(湾湾男声)"},
|
41 |
+
{"id": "zh-TW-HsiaoYuNeural", "name": "曉雨(湾湾女声)"},
|
42 |
+
{"id": "zh-CN-shaanxi-XiaoniNeural", "name": "晓妮(陕西女声)"},
|
43 |
+
]
|
44 |
+
|
45 |
def get_api_key(key_name, ui_value):
|
46 |
return ui_value if ui_value else os.getenv(key_name)
|
47 |
|
|
|
68 |
api_key_label,
|
69 |
llm_model_name,
|
70 |
longform,
|
71 |
+
engagement_techniques,
|
72 |
+
tts_openai_question,
|
73 |
+
tts_openai_answer,
|
74 |
+
ending_message,
|
75 |
):
|
76 |
try:
|
77 |
logger.info("Starting podcast generation process")
|
|
|
158 |
"output_language": output_language,
|
159 |
"creativity": creativity_level,
|
160 |
"user_instructions": user_instructions,
|
161 |
+
"engagement_techniques": engagement_techniques,
|
162 |
+
'text_to_speech': {
|
163 |
+
'ending_message': ending_message,
|
164 |
+
'openai': {
|
165 |
+
'default_voices': {
|
166 |
+
'question': tts_openai_question,
|
167 |
+
'answer': tts_openai_answer
|
168 |
+
},
|
169 |
+
},
|
170 |
+
},
|
171 |
}
|
172 |
|
173 |
# Generate podcast
|
|
|
181 |
text=text_input if text_input else None,
|
182 |
image_paths=image_paths if image_paths else None,
|
183 |
tts_model=tts_model,
|
184 |
+
conversation_config=conversation_config,
|
185 |
+
api_key_label=api_key_label,
|
186 |
+
llm_model_name=llm_model_name,
|
187 |
+
longform=longform,
|
188 |
)
|
189 |
|
190 |
logger.info("Podcast generation completed")
|
|
|
342 |
|
343 |
conversation_style = gr.Textbox(
|
344 |
label="对话风格",
|
345 |
+
value="engaging,fast-paced,enthusiastic",
|
346 |
+
info="用于对话的风格列表(以逗号分隔)默认:生动活泼,节奏明快,热情洋溢"
|
347 |
)
|
348 |
|
349 |
# Roles and Structure
|
|
|
356 |
)
|
357 |
roles_person1 = gr.Textbox(
|
358 |
label="第一位发言者的角色",
|
359 |
+
value="main summarizer",
|
360 |
+
info="在对话中,第一个说话人扮演的角色,默认:主要负责总结的人"
|
361 |
)
|
362 |
|
363 |
roles_person2 = gr.Textbox(
|
364 |
label="第二位发言者的角色",
|
365 |
+
value="questioner/clarifier",
|
366 |
+
info="在对话中,第二个说话人所扮演的角色或承担的任务,默认:提问者/释疑者"
|
367 |
)
|
368 |
|
369 |
dialogue_structure = gr.Textbox(
|
370 |
label="对话结构",
|
371 |
+
value="Introduction,Main Content Summary,Conclusion",
|
372 |
+
info="对话结构的各个部分(用逗号隔开)默认:引言,主要内容的概括,总结"
|
373 |
+
)
|
374 |
+
|
375 |
+
engagement_techniques = gr.Textbox(
|
376 |
+
label="沟通技巧",
|
377 |
+
value="rhetorical questions,anecdotes,analogies,humor",
|
378 |
+
info="一些沟通和交流方式(用逗号隔开)"
|
379 |
)
|
380 |
|
381 |
# Podcast Identity
|
|
|
430 |
</h3>
|
431 |
""",
|
432 |
)
|
433 |
+
ending_message = gr.Textbox(
|
434 |
+
label="结束语",
|
435 |
+
value="撒由那拉!",
|
436 |
+
info="结束语"
|
437 |
+
)
|
438 |
tts_model = gr.Radio(
|
439 |
choices=["openai", "elevenlabs", "edge"],
|
440 |
value="openai",
|
441 |
label="文本转语音模型",
|
442 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
443 |
)
|
444 |
+
tts_openai_question = gr.Dropdown(
|
445 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
446 |
+
value=VOICE_OPTIONS[12]["name"], # 默认选择选项
|
447 |
+
label="OpenAI TTS 主持人",
|
448 |
+
info="选择OpenAI TTS 主持人角色语音"
|
449 |
+
)
|
450 |
+
tts_openai_answer = gr.Dropdown(
|
451 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
452 |
+
value=VOICE_OPTIONS[15]["name"], # 默认选择选项
|
453 |
+
label="OpenAI TTS 嘉宾",
|
454 |
+
info="选择OpenAI TTS 嘉宾角色语音"
|
455 |
+
)
|
456 |
|
457 |
# Advanced Settings
|
458 |
gr.Markdown(
|
|
|
507 |
dialogue_structure, podcast_name,
|
508 |
podcast_tagline, output_language, tts_model,
|
509 |
creativity_level, user_instructions,
|
510 |
+
api_key_label, llm_model_name, longform,
|
511 |
+
engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
|
512 |
],
|
513 |
outputs=audio_output
|
514 |
)
|