Update app.py
Browse files
app.py
CHANGED
@@ -14,23 +14,39 @@ logger = logging.getLogger(__name__)
|
|
14 |
# Initialize Gemini AI
|
15 |
genai.configure(api_key='YOUR_GEMINI_API_KEY')
|
16 |
|
17 |
-
def generate_podcast_script(api_key, content, duration):
|
18 |
genai.configure(api_key=api_key)
|
19 |
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
response = model.generate_content(prompt)
|
35 |
# Remove any special characters that might be read aloud
|
36 |
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
|
@@ -45,14 +61,19 @@ async def text_to_speech(text, voice):
|
|
45 |
audio.seek(0)
|
46 |
return audio.read()
|
47 |
|
48 |
-
async def render_podcast(api_key, script, voice1, voice2):
|
49 |
lines = [line for line in script.split('\n') if line.strip()]
|
50 |
audio_segments = []
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
if not audio_segments:
|
58 |
logger.warning("No valid audio segments were generated.")
|
@@ -128,6 +149,8 @@ with gr.Blocks() as demo:
|
|
128 |
|
129 |
duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
|
130 |
|
|
|
|
|
131 |
voice_dict = asyncio.run(get_voice_list())
|
132 |
languages = list(voice_dict.keys())
|
133 |
languages.insert(0, "None") # Add "None" option for single speaker
|
@@ -166,18 +189,19 @@ with gr.Blocks() as demo:
|
|
166 |
lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
|
167 |
lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
|
168 |
|
169 |
-
def generate_script_wrapper(api_key, content, duration):
|
170 |
-
return generate_podcast_script(api_key, content, duration)
|
171 |
|
172 |
-
async def render_podcast_wrapper(api_key, script, voice1, voice2):
|
173 |
-
|
174 |
-
# Use only voice1 for all lines
|
175 |
-
return await render_podcast(api_key, script, voice1, voice1)
|
176 |
-
else:
|
177 |
-
return await render_podcast(api_key, script, voice1, voice2)
|
178 |
|
179 |
-
generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration], outputs=script_output)
|
180 |
-
render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select], outputs=audio_output)
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
if __name__ == "__main__":
|
183 |
demo.launch()
|
|
|
14 |
# Initialize Gemini AI
|
15 |
genai.configure(api_key='YOUR_GEMINI_API_KEY')
|
16 |
|
17 |
+
def generate_podcast_script(api_key, content, duration, num_hosts):
|
18 |
genai.configure(api_key=api_key)
|
19 |
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
|
20 |
|
21 |
+
if num_hosts == 1:
|
22 |
+
prompt = f"""
|
23 |
+
Create a podcast script for one person discussing the following content:
|
24 |
+
{content}
|
25 |
+
|
26 |
+
The podcast should last approximately {duration}. Include natural speech patterns,
|
27 |
+
humor, and occasional off-topic thoughts. Use occasional speech fillers like um, ah,
|
28 |
+
yes, I see, Ok now. Vary the emotional tone.
|
29 |
+
Format the script as a monologue without speaker labels.
|
30 |
+
Separate each paragraph with a blank line.
|
31 |
+
Do not use any special characters or markdown. Only include the monologue with proper punctuation.
|
32 |
+
Ensure the content flows naturally and stays relevant to the topic.
|
33 |
+
Limit the script length to match the requested duration of {duration}.
|
34 |
+
"""
|
35 |
+
else:
|
36 |
+
prompt = f"""
|
37 |
+
Create a podcast script for two people discussing the following content:
|
38 |
+
{content}
|
39 |
+
|
40 |
+
The podcast should last approximately {duration}. Include natural speech patterns,
|
41 |
+
humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah,
|
42 |
+
yes, I see, Ok now. Vary the emotional tone.
|
43 |
+
Format the script as alternating lines of dialogue without speaker labels.
|
44 |
+
Separate each line with a blank line.
|
45 |
+
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
|
46 |
+
Ensure the conversation flows naturally and stays relevant to the topic.
|
47 |
+
Limit the script length to match the requested duration of {duration}.
|
48 |
+
"""
|
49 |
+
|
50 |
response = model.generate_content(prompt)
|
51 |
# Remove any special characters that might be read aloud
|
52 |
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
|
|
|
61 |
audio.seek(0)
|
62 |
return audio.read()
|
63 |
|
64 |
+
async def render_podcast(api_key, script, voice1, voice2, num_hosts):
|
65 |
lines = [line for line in script.split('\n') if line.strip()]
|
66 |
audio_segments = []
|
67 |
|
68 |
+
if num_hosts == 1:
|
69 |
+
for line in lines:
|
70 |
+
audio = await text_to_speech(line, voice1)
|
71 |
+
audio_segments.append(audio)
|
72 |
+
else:
|
73 |
+
for i, line in enumerate(lines):
|
74 |
+
voice = voice1 if i % 2 == 0 else voice2
|
75 |
+
audio = await text_to_speech(line, voice)
|
76 |
+
audio_segments.append(audio)
|
77 |
|
78 |
if not audio_segments:
|
79 |
logger.warning("No valid audio segments were generated.")
|
|
|
149 |
|
150 |
duration = gr.Radio(["1-5 min", "5-10 min", "10-15 min"], label="Estimated podcast duration")
|
151 |
|
152 |
+
num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
|
153 |
+
|
154 |
voice_dict = asyncio.run(get_voice_list())
|
155 |
languages = list(voice_dict.keys())
|
156 |
languages.insert(0, "None") # Add "None" option for single speaker
|
|
|
189 |
lang1_select.change(update_voices, inputs=[lang1_select], outputs=[voice1_select])
|
190 |
lang2_select.change(update_voices, inputs=[lang2_select], outputs=[voice2_select])
|
191 |
|
192 |
+
def generate_script_wrapper(api_key, content, duration, num_hosts):
|
193 |
+
return generate_podcast_script(api_key, content, duration, num_hosts)
|
194 |
|
195 |
+
async def render_podcast_wrapper(api_key, script, voice1, voice2, num_hosts):
|
196 |
+
return await render_podcast(api_key, script, voice1, voice2, num_hosts)
|
|
|
|
|
|
|
|
|
197 |
|
198 |
+
generate_btn.click(generate_script_wrapper, inputs=[api_key_input, content_input, duration, num_hosts], outputs=script_output)
|
199 |
+
render_btn.click(render_podcast_wrapper, inputs=[api_key_input, script_output, voice1_select, voice2_select, num_hosts], outputs=audio_output)
|
200 |
+
|
201 |
+
def update_second_voice_visibility(num_hosts):
|
202 |
+
return gr.update(visible=num_hosts == 2), gr.update(visible=num_hosts == 2)
|
203 |
+
|
204 |
+
num_hosts.change(update_second_voice_visibility, inputs=[num_hosts], outputs=[lang2_select, voice2_select])
|
205 |
|
206 |
if __name__ == "__main__":
|
207 |
demo.launch()
|