Spaces:

MicroHealth
/

ai-podcast-builder

Paused

App Files Files Community

bluenevus commited on Apr 12

Commit

89d4917

verified ·

1 Parent(s): 38f82cf

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -8

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 import edge_tts
 import asyncio
 import io
 # Set up logging
 import logging
@@ -22,15 +23,18 @@ def generate_podcast_script(api_key, content, duration):
     {content}
     The podcast should last approximately {duration}. Include natural speech patterns,
-    humor, and occasional off-topic chit-chat. Use speech fillers like "um", "ah",
-    "yes", "I see", "Ok now". Vary the emotional tone.
     Format the script as alternating lines of dialogue without speaker labels.
-    Do not include any other text, markdown, or formatting. Only include the alternating dialogue lines.
     Ensure the conversation flows naturally and stays relevant to the topic.
     """
     response = model.generate_content(prompt)
-    return response.text
 async def text_to_speech(text, voice):
     communicate = edge_tts.Communicate(text, voice)
@@ -65,7 +69,13 @@ async def render_podcast(api_key, script, voice1, voice2):
 async def get_voice_list():
     voices = await edge_tts.list_voices()
-    return [voice["Name"] for voice in voices]
 # Gradio Interface
 with gr.Blocks() as demo:
@@ -79,11 +89,28 @@ with gr.Blocks() as demo:
     duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
-    voice_list = asyncio.run(get_voice_list())
     with gr.Row():
-        voice1_select = gr.Dropdown(label="Select Voice 1", choices=voice_list)
-        voice2_select = gr.Dropdown(label="Select Voice 2", choices=voice_list)
     generate_btn = gr.Button("Generate Script")
     script_output = gr.Textbox(label="Generated Script", lines=10)
@@ -91,6 +118,13 @@ with gr.Blocks() as demo:
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
     def generate_script_wrapper(api_key, content, duration):
         return generate_podcast_script(api_key, content, duration)

 import edge_tts
 import asyncio
 import io
+import re
 # Set up logging
 import logging
     {content}
     The podcast should last approximately {duration}. Include natural speech patterns,
+    humor, and occasional off-topic chit-chat. Use speech fillers like um, ah,
+    yes, I see, Ok now. Vary the emotional tone.
     Format the script as alternating lines of dialogue without speaker labels.
+    Do not use any special characters, markdown, or formatting. Only include the alternating dialogue lines.
     Ensure the conversation flows naturally and stays relevant to the topic.
+    Limit the script length to match the requested duration of {duration}.
     """
     response = model.generate_content(prompt)
+    # Remove any special characters that might be read aloud
+    clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
+    return clean_text
 async def text_to_speech(text, voice):
     communicate = edge_tts.Communicate(text, voice)
 async def get_voice_list():
     voices = await edge_tts.list_voices()
+    voice_dict = {}
+    for voice in voices:
+        lang = voice["Locale"].split("-")[0]
+        if lang not in voice_dict:
+            voice_dict[lang] = []
+        voice_dict[lang].append(voice["Name"])
+    return voice_dict
 # Gradio Interface
 with gr.Blocks() as demo:
     duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
+    voice_dict = asyncio.run(get_voice_list())
+    languages = list(voice_dict.keys())
+    language_names = {
+        'en': 'English',
+        'es': 'Spanish',
+        'fr': 'French',
+        'de': 'German',
+        'it': 'Italian',
+        'ja': 'Japanese',
+        'ko': 'Korean',
+        'pt': 'Portuguese',
+        'ru': 'Russian',
+        'zh': 'Chinese'
+    }
     with gr.Row():
+        lang1_select = gr.Dropdown(label="Select Language 1", choices=[f"{language_names.get(lang, lang)} ({lang})" for lang in languages])
+        voice1_select = gr.Dropdown(label="Select Voice 1")
+    with gr.Row():
+        lang2_select = gr.Dropdown(label="Select Language 2", choices=[f"{language_names.get(lang, lang)} ({lang})" for lang in languages])
+        voice2_select = gr.Dropdown(label="Select Voice 2")
     generate_btn = gr.Button("Generate Script")
     script_output = gr.Textbox(label="Generated Script", lines=10)
     render_btn = gr.Button("Render Podcast")
     audio_output = gr.Audio(label="Generated Podcast")
+    def update_voices(lang):
+        lang_code = lang.split('(')[-1].strip(')')
+        return gr.Dropdown.update(choices=voice_dict[lang_code])
+    lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
+    lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
     def generate_script_wrapper(api_key, content, duration):
         return generate_podcast_script(api_key, content, duration)