Spaces:

MicroHealth
/

ai-podcast-builder

Paused

App Files Files Community

bluenevus commited on Apr 12

Commit

38f82cf

verified ·

1 Parent(s): a727789

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -15

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import google.generativeai as genai
 import numpy as np
 import edge_tts
 import asyncio
 # Set up logging
 import logging
@@ -17,19 +18,14 @@ def generate_podcast_script(api_key, content, duration):
     model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
     prompt = f"""
-    Create a podcast script for two people (Host 1 and Host 2) discussing the following content:
     {content}
     The podcast should last approximately {duration}. Include natural speech patterns,
     humor, and occasional off-topic chit-chat. Use speech fillers like "um", "ah",
     "yes", "I see", "Ok now". Vary the emotional tone.
-    Format the script as follows, with each line representing a single speaker's dialogue:
-    Host 1: Dialog
-    Host 2: Dialog
-    Host 1: Dialog
-    Host 2: Dialog
     Do not include any other text, markdown, or formatting. Only include the alternating dialogue lines.
     Ensure the conversation flows naturally and stays relevant to the topic.
     """
@@ -38,19 +34,21 @@ def generate_podcast_script(api_key, content, duration):
 async def text_to_speech(text, voice):
     communicate = edge_tts.Communicate(text, voice)
-    audio_data = await communicate.to_wav()
-    return audio_data
 async def render_podcast(api_key, script, voice1, voice2):
     lines = script.split('\n')
     audio_segments = []
-    for line in lines:
-        if line.startswith("Host 1:"):
-            audio = await text_to_speech(line[7:], voice1)
-            audio_segments.append(audio)
-        elif line.startswith("Host 2:"):
-            audio = await text_to_speech(line[7:], voice2)
             audio_segments.append(audio)
     if not audio_segments:

 import numpy as np
 import edge_tts
 import asyncio
+import io
 # Set up logging
 import logging
     model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
     prompt = f"""
+    Create a podcast script for two people discussing the following content:
     {content}
     The podcast should last approximately {duration}. Include natural speech patterns,
     humor, and occasional off-topic chit-chat. Use speech fillers like "um", "ah",
     "yes", "I see", "Ok now". Vary the emotional tone.
+    Format the script as alternating lines of dialogue without speaker labels.
     Do not include any other text, markdown, or formatting. Only include the alternating dialogue lines.
     Ensure the conversation flows naturally and stays relevant to the topic.
     """
 async def text_to_speech(text, voice):
     communicate = edge_tts.Communicate(text, voice)
+    audio = io.BytesIO()
+    async for chunk in communicate.stream():
+        if chunk["type"] == "audio":
+            audio.write(chunk["data"])
+    audio.seek(0)
+    return audio.read()
 async def render_podcast(api_key, script, voice1, voice2):
     lines = script.split('\n')
     audio_segments = []
+    for i, line in enumerate(lines):
+        if line.strip():  # Skip empty lines
+            voice = voice1 if i % 2 == 0 else voice2
+            audio = await text_to_speech(line, voice)
             audio_segments.append(audio)
     if not audio_segments: