deeme commited on
Commit
8b55878
·
verified ·
1 Parent(s): cebcaa6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -13
app.py CHANGED
@@ -12,6 +12,36 @@ logger = logging.getLogger(__name__)
12
  # Load environment variables
13
  load_dotenv()
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def get_api_key(key_name, ui_value):
16
  return ui_value if ui_value else os.getenv(key_name)
17
 
@@ -38,6 +68,10 @@ def process_inputs(
38
  api_key_label,
39
  llm_model_name,
40
  longform,
 
 
 
 
41
  ):
42
  try:
43
  logger.info("Starting podcast generation process")
@@ -124,9 +158,16 @@ def process_inputs(
124
  "output_language": output_language,
125
  "creativity": creativity_level,
126
  "user_instructions": user_instructions,
127
- "api_key_label": api_key_label,
128
- "llm_model_name": llm_model_name,
129
- "longform": longform,
 
 
 
 
 
 
 
130
  }
131
 
132
  # Generate podcast
@@ -140,7 +181,10 @@ def process_inputs(
140
  text=text_input if text_input else None,
141
  image_paths=image_paths if image_paths else None,
142
  tts_model=tts_model,
143
- conversation_config=conversation_config
 
 
 
144
  )
145
 
146
  logger.info("Podcast generation completed")
@@ -298,8 +342,8 @@ with gr.Blocks(
298
 
299
  conversation_style = gr.Textbox(
300
  label="对话风格",
301
- value="生动活泼,节奏明快,热情洋溢",
302
- info="用于对话的风格列表(以逗号分隔)"
303
  )
304
 
305
  # Roles and Structure
@@ -312,20 +356,26 @@ with gr.Blocks(
312
  )
313
  roles_person1 = gr.Textbox(
314
  label="第一位发言者的角色",
315
- value="主要负责总结的人",
316
- info="在对话中,第一个说话人扮演的角色"
317
  )
318
 
319
  roles_person2 = gr.Textbox(
320
  label="第二位发言者的角色",
321
- value="提问者/释疑者",
322
- info="在对话中,第二个说话人所扮演的角色或承担的任务"
323
  )
324
 
325
  dialogue_structure = gr.Textbox(
326
  label="对话结构",
327
- value="引言,主要内容的概括,总结",
328
- info="对话结构的各个部分(用逗号隔开)"
 
 
 
 
 
 
329
  )
330
 
331
  # Podcast Identity
@@ -380,12 +430,29 @@ with gr.Blocks(
380
  </h3>
381
  """,
382
  )
 
 
 
 
 
383
  tts_model = gr.Radio(
384
  choices=["openai", "elevenlabs", "edge"],
385
  value="openai",
386
  label="文本转语音模型",
387
  info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
388
  )
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  # Advanced Settings
391
  gr.Markdown(
@@ -440,7 +507,8 @@ with gr.Blocks(
440
  dialogue_structure, podcast_name,
441
  podcast_tagline, output_language, tts_model,
442
  creativity_level, user_instructions,
443
- api_key_label, llm_model_name, longform
 
444
  ],
445
  outputs=audio_output
446
  )
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
+ # 定义语音选项
16
+ VOICE_OPTIONS = [
17
+ {"id": "3b55b3d84d2f453a98d8ca9bb24182d6", "name": "邓紫琪"},
18
+ {"id": "fa756c4628b94b7394d1822e5848cf59", "name": "杨幂"},
19
+ {"id": "08f18a5692544543a6ca5fdd1eaa328c", "name": "宋雨琦"},
20
+ {"id": "f2ed19ca0ea246bf9cbc6382be00e4fc", "name": "王志文"},
21
+ {"id": "738d0cc1a3e9430a9de2b544a466a7fc", "name": "雷军"},
22
+ {"id": "1512d05841734931bf905d0520c272b1", "name": "周杰伦"},
23
+ {"id": "e4642e5edccd4d9ab61a69e82d4f8a14", "name": "蔡徐坤"},
24
+ {"id": "e04a3dc718864c999ef7db3035764aa8", "name": "刘华强"},
25
+ {"id": "7c66db6e457c4d53b1fe428a8c547953", "name": "郭德纲"},
26
+ {"id": "f6f293aabfe24e46aff0fc309c233d31", "name": "曹操"},
27
+ {"id": "22e8eb5f1f424c749592cd9db3927368", "name": "李云龙"},
28
+ {"id": "5e680ebc2eeb4f78a2224f2e1003b8c6", "name": "刘备"},
29
+ {"id": "zh-HK-HiuGaaiNeural", "name": "曉佳(粤语女声)"},
30
+ {"id": "zh-HK-HiuMaanNeural", "name": "曉曼(粤语女声)"},
31
+ {"id": "zh-HK-WanLungNeural", "name": "雲龍(粤语男声)"},
32
+ {"id": "zh-CN-XiaoxiaoNeural", "name": "晓晓(活泼女声)"},
33
+ {"id": "zh-CN-XiaoyiNeural", "name": "晓伊(女声)"},
34
+ {"id": "zh-CN-YunjianNeural", "name": "云健(解说男声)"},
35
+ {"id": "zh-CN-YunxiNeural", "name": "云希(阳光男声)"},
36
+ {"id": "zh-CN-YunxiaNeural", "name": "云夏(少年男声)"},
37
+ {"id": "zh-CN-YunyangNeural", "name": "云扬(专业男声)"},
38
+ {"id": "zh-CN-liaoning-XiaobeiNeural", "name": "晓贝(辽宁女声)"},
39
+ {"id": "zh-TW-HsiaoChenNeural", "name": "曉臻(湾湾女声)"},
40
+ {"id": "zh-TW-YunJheNeural", "name": "雲哲(湾湾男声)"},
41
+ {"id": "zh-TW-HsiaoYuNeural", "name": "曉雨(湾湾女声)"},
42
+ {"id": "zh-CN-shaanxi-XiaoniNeural", "name": "晓妮(陕西女声)"},
43
+ ]
44
+
45
  def get_api_key(key_name, ui_value):
46
  return ui_value if ui_value else os.getenv(key_name)
47
 
 
68
  api_key_label,
69
  llm_model_name,
70
  longform,
71
+ engagement_techniques,
72
+ tts_openai_question,
73
+ tts_openai_answer,
74
+ ending_message,
75
  ):
76
  try:
77
  logger.info("Starting podcast generation process")
 
158
  "output_language": output_language,
159
  "creativity": creativity_level,
160
  "user_instructions": user_instructions,
161
+ "engagement_techniques": engagement_techniques,
162
+ 'text_to_speech': {
163
+ 'ending_message': ending_message,
164
+ 'openai': {
165
+ 'default_voices': {
166
+ 'question': tts_openai_question,
167
+ 'answer': tts_openai_answer
168
+ },
169
+ },
170
+ },
171
  }
172
 
173
  # Generate podcast
 
181
  text=text_input if text_input else None,
182
  image_paths=image_paths if image_paths else None,
183
  tts_model=tts_model,
184
+ conversation_config=conversation_config,
185
+ api_key_label=api_key_label,
186
+ llm_model_name=llm_model_name,
187
+ longform=longform,
188
  )
189
 
190
  logger.info("Podcast generation completed")
 
342
 
343
  conversation_style = gr.Textbox(
344
  label="对话风格",
345
+ value="engaging,fast-paced,enthusiastic",
346
+ info="用于对话的风格列表(以逗号分隔)默认:生动活泼,节奏明快,热情洋溢"
347
  )
348
 
349
  # Roles and Structure
 
356
  )
357
  roles_person1 = gr.Textbox(
358
  label="第一位发言者的角色",
359
+ value="main summarizer",
360
+ info="在对话中,第一个说话人扮演的角色,默认:主要负责总结的人"
361
  )
362
 
363
  roles_person2 = gr.Textbox(
364
  label="第二位发言者的角色",
365
+ value="questioner/clarifier",
366
+ info="在对话中,第二个说话人所扮演的角色或承担的任务,默认:提问者/释疑者"
367
  )
368
 
369
  dialogue_structure = gr.Textbox(
370
  label="对话结构",
371
+ value="Introduction,Main Content Summary,Conclusion",
372
+ info="对话结构的各个部分(用逗号隔开)默认:引言,主要内容的概括,总结"
373
+ )
374
+
375
+ engagement_techniques = gr.Textbox(
376
+ label="沟通技巧",
377
+ value="rhetorical questions,anecdotes,analogies,humor",
378
+ info="一些沟通和交流方式(用逗号隔开)"
379
  )
380
 
381
  # Podcast Identity
 
430
  </h3>
431
  """,
432
  )
433
+ ending_message = gr.Textbox(
434
+ label="结束语",
435
+ value="撒由那拉!",
436
+ info="结束语"
437
+ )
438
  tts_model = gr.Radio(
439
  choices=["openai", "elevenlabs", "edge"],
440
  value="openai",
441
  label="文本转语音模型",
442
  info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
443
  )
444
+ tts_openai_question = gr.Dropdown(
445
+ choices=[voice["name"] for voice in VOICE_OPTIONS],
446
+ value=VOICE_OPTIONS[12]["name"], # 默认选择选项
447
+ label="OpenAI TTS 主持人",
448
+ info="选择OpenAI TTS 主持人角色语音"
449
+ )
450
+ tts_openai_answer = gr.Dropdown(
451
+ choices=[voice["name"] for voice in VOICE_OPTIONS],
452
+ value=VOICE_OPTIONS[15]["name"], # 默认选择选项
453
+ label="OpenAI TTS 嘉宾",
454
+ info="选择OpenAI TTS 嘉宾角色语音"
455
+ )
456
 
457
  # Advanced Settings
458
  gr.Markdown(
 
507
  dialogue_structure, podcast_name,
508
  podcast_tagline, output_language, tts_model,
509
  creativity_level, user_instructions,
510
+ api_key_label, llm_model_name, longform,
511
+ engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
512
  ],
513
  outputs=audio_output
514
  )