Spaces:

deeme
/

pod

Running

App Files Files Community

deeme commited on Dec 3, 2024

Commit

8b55878

verified ·

1 Parent(s): cebcaa6

Upload app.py

Browse files

Files changed (1) hide show

app.py +81 -13

app.py CHANGED Viewed

@@ -12,6 +12,36 @@ logger = logging.getLogger(__name__)
 # Load environment variables
 load_dotenv()
 def get_api_key(key_name, ui_value):
     return ui_value if ui_value else os.getenv(key_name)
@@ -38,6 +68,10 @@ def process_inputs(
     api_key_label,
     llm_model_name,
     longform,
 ):
     try:
         logger.info("Starting podcast generation process")
@@ -124,9 +158,16 @@ def process_inputs(
             "output_language": output_language,
             "creativity": creativity_level,
             "user_instructions": user_instructions,
-            "api_key_label": api_key_label,
-            "llm_model_name": llm_model_name,
-            "longform": longform,
         }
         # Generate podcast
@@ -140,7 +181,10 @@ def process_inputs(
             text=text_input if text_input else None,
             image_paths=image_paths if image_paths else None,
             tts_model=tts_model,
-            conversation_config=conversation_config
         )
         logger.info("Podcast generation completed")
@@ -298,8 +342,8 @@ with gr.Blocks(
             conversation_style = gr.Textbox(
                 label="对话风格",
-                value="生动活泼,节奏明快,热情洋溢",
-                info="用于对话的风格列表（以逗号分隔）"
             )
             # Roles and Structure
@@ -312,20 +356,26 @@ with gr.Blocks(
             )
             roles_person1 = gr.Textbox(
                 label="第一位发言者的角色",
-                value="主要负责总结的人",
-                info="在对话中，第一个说话人扮演的角色"
             )
             roles_person2 = gr.Textbox(
                 label="第二位发言者的角色",
-                value="提问者/释疑者",
-                info="在对话中，第二个说话人所扮演的角色或承担的任务"
             )
             dialogue_structure = gr.Textbox(
                 label="对话结构",
-                value="引言,主要内容的概括,总结",
-                info="对话结构的各个部分（用逗号隔开）"
             )
             # Podcast Identity
@@ -380,12 +430,29 @@ with gr.Blocks(
                 </h3>
                 """,
             )
             tts_model = gr.Radio(
                 choices=["openai", "elevenlabs", "edge"],
                 value="openai",
                 label="文本转语音模型",
                 info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
             )
             # Advanced Settings
             gr.Markdown(
@@ -440,7 +507,8 @@ with gr.Blocks(
             dialogue_structure, podcast_name,
             podcast_tagline, output_language, tts_model,
             creativity_level, user_instructions,
-            api_key_label, llm_model_name, longform
         ],
         outputs=audio_output
     )

 # Load environment variables
 load_dotenv()
+# 定义语音选项
+VOICE_OPTIONS = [
+    {"id": "3b55b3d84d2f453a98d8ca9bb24182d6", "name": "邓紫琪"},
+    {"id": "fa756c4628b94b7394d1822e5848cf59", "name": "杨幂"},
+    {"id": "08f18a5692544543a6ca5fdd1eaa328c", "name": "宋雨琦"},
+    {"id": "f2ed19ca0ea246bf9cbc6382be00e4fc", "name": "王志文"},
+    {"id": "738d0cc1a3e9430a9de2b544a466a7fc", "name": "雷军"},
+    {"id": "1512d05841734931bf905d0520c272b1", "name": "周杰伦"},
+    {"id": "e4642e5edccd4d9ab61a69e82d4f8a14", "name": "蔡徐坤"},
+    {"id": "e04a3dc718864c999ef7db3035764aa8", "name": "刘华强"},
+    {"id": "7c66db6e457c4d53b1fe428a8c547953", "name": "郭德纲"},
+    {"id": "f6f293aabfe24e46aff0fc309c233d31", "name": "曹操"},
+    {"id": "22e8eb5f1f424c749592cd9db3927368", "name": "李云龙"},
+    {"id": "5e680ebc2eeb4f78a2224f2e1003b8c6", "name": "刘备"},
+    {"id": "zh-HK-HiuGaaiNeural", "name": "曉佳(粤语女声)"},
+    {"id": "zh-HK-HiuMaanNeural", "name": "曉曼(粤语女声)"},
+    {"id": "zh-HK-WanLungNeural", "name": "雲龍(粤语男声)"},
+    {"id": "zh-CN-XiaoxiaoNeural", "name": "晓晓(活泼女声)"},
+    {"id": "zh-CN-XiaoyiNeural", "name": "晓伊(女声)"},
+    {"id": "zh-CN-YunjianNeural", "name": "云健(解说男声)"},
+    {"id": "zh-CN-YunxiNeural", "name": "云希(阳光男声)"},
+    {"id": "zh-CN-YunxiaNeural", "name": "云夏(少年男声)"},
+    {"id": "zh-CN-YunyangNeural", "name": "云扬(专业男声)"},
+    {"id": "zh-CN-liaoning-XiaobeiNeural", "name": "晓贝(辽宁女声)"},
+    {"id": "zh-TW-HsiaoChenNeural", "name": "曉臻(湾湾女声)"},
+    {"id": "zh-TW-YunJheNeural", "name": "雲哲(湾湾男声)"},
+    {"id": "zh-TW-HsiaoYuNeural", "name": "曉雨(湾湾女声)"},
+    {"id": "zh-CN-shaanxi-XiaoniNeural", "name": "晓妮(陕西女声)"},
+]
 def get_api_key(key_name, ui_value):
     return ui_value if ui_value else os.getenv(key_name)
     api_key_label,
     llm_model_name,
     longform,
+    engagement_techniques,
+    tts_openai_question,
+    tts_openai_answer,
+    ending_message,
 ):
     try:
         logger.info("Starting podcast generation process")
             "output_language": output_language,
             "creativity": creativity_level,
             "user_instructions": user_instructions,
+            "engagement_techniques": engagement_techniques,
+            'text_to_speech': {
+                'ending_message': ending_message,
+                'openai': {
+                    'default_voices': {
+                        'question': tts_openai_question,
+                        'answer': tts_openai_answer
+                    },
+                },
+            },
         }
         # Generate podcast
             text=text_input if text_input else None,
             image_paths=image_paths if image_paths else None,
             tts_model=tts_model,
+            conversation_config=conversation_config,
+            api_key_label=api_key_label,
+            llm_model_name=llm_model_name,
+            longform=longform,
         )
         logger.info("Podcast generation completed")
             conversation_style = gr.Textbox(
                 label="对话风格",
+                value="engaging,fast-paced,enthusiastic",
+                info="用于对话的风格列表（以逗号分隔）默认：生动活泼,节奏明快,热情洋溢"
             )
             # Roles and Structure
             )
             roles_person1 = gr.Textbox(
                 label="第一位发言者的角色",
+                value="main summarizer",
+                info="在对话中，第一个说话人扮演的角色，默认：主要负责总结的人"
             )
             roles_person2 = gr.Textbox(
                 label="第二位发言者的角色",
+                value="questioner/clarifier",
+                info="在对话中，第二个说话人所扮演的角色或承担的任务，默认：提问者/释疑者"
             )
             dialogue_structure = gr.Textbox(
                 label="对话结构",
+                value="Introduction,Main Content Summary,Conclusion",
+                info="对话结构的各个部分（用逗号隔开）默认：引言,主要内容的概括,总结"
+            )
+            engagement_techniques = gr.Textbox(
+                label="沟通技巧",
+                value="rhetorical questions,anecdotes,analogies,humor",
+                info="一些沟通和交流方式（用逗号隔开）"
             )
             # Podcast Identity
                 </h3>
                 """,
             )
+            ending_message = gr.Textbox(
+                label="结束语",
+                value="撒由那拉!",
+                info="结束语"
+            )
             tts_model = gr.Radio(
                 choices=["openai", "elevenlabs", "edge"],
                 value="openai",
                 label="文本转语音模型",
                 info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
             )
+            tts_openai_question = gr.Dropdown(
+                choices=[voice["name"] for voice in VOICE_OPTIONS],
+                value=VOICE_OPTIONS[12]["name"],  # 默认选择选项
+                label="OpenAI TTS 主持人",
+                info="选择OpenAI TTS 主持人角色语音"
+            )
+            tts_openai_answer = gr.Dropdown(
+                choices=[voice["name"] for voice in VOICE_OPTIONS],
+                value=VOICE_OPTIONS[15]["name"],  # 默认选择选项
+                label="OpenAI TTS 嘉宾",
+                info="选择OpenAI TTS 嘉宾角色语音"
+            )
             # Advanced Settings
             gr.Markdown(
             dialogue_structure, podcast_name,
             podcast_tagline, output_language, tts_model,
             creativity_level, user_instructions,
+            api_key_label, llm_model_name, longform,
+            engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
         ],
         outputs=audio_output
     )