File size: 17,817 Bytes
84fed9b f304938 11fdf2c f304938 11fdf2c 84fed9b c2d6c29 26af193 f304938 84fed9b 26af193 f304938 26af193 3c6d05f f304938 3c6d05f f304938 26af193 3c6d05f f304938 3c6d05f 84fed9b 3c6d05f f304938 3c6d05f f304938 3c6d05f 84fed9b bc010fb 8ea10e8 bc010fb 8ea10e8 bc010fb 84fed9b e3bea0f 26af193 84fed9b 11fdf2c 84fed9b 11fdf2c 84fed9b 11fdf2c 84fed9b 11fdf2c f3b14e5 84fed9b c2d6c29 644ec15 26af193 644ec15 26af193 644ec15 26af193 644ec15 37f0f07 644ec15 26af193 37f0f07 644ec15 37f0f07 26af193 644ec15 84fed9b 644ec15 26af193 644ec15 11fdf2c 644ec15 74245b5 644ec15 3c6d05f 644ec15 3c6d05f 644ec15 3c6d05f 11fdf2c 84fed9b 26af193 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 |
import dash
from dash import dcc, html, Input, Output, State, callback
import dash_bootstrap_components as dbc
import google.generativeai as genai
import numpy as np
import edge_tts
import asyncio
import io
import re
import base64
import logging
from dash.exceptions import PreventUpdate
import pandas as pd
import time
import os
from pydub import AudioSegment
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Initialize Gemini AI with environment variable
genai.configure(api_key=os.environ.get('GEMINI_API_KEY'))
def generate_podcast_script(content, duration, num_hosts):
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
if num_hosts == 1:
prompt = f"""
Create a podcast script for one person discussing the following content:
{content}
The podcast should last approximately {duration}. Include natural speech patterns,
humor, and occasional off-topic thoughts. Use occasional speech fillers like um, ah,
yes, I see, Ok now. Vary the emotional tone.
Format the script as a monologue without speaker labels.
Separate each paragraph with a blank line.
Do not use any special characters or markdown. Only include the monologue with proper punctuation.
Ensure the content flows naturally and stays relevant to the topic.
Limit the script length to match the requested duration of {duration}.
"""
else:
prompt = f"""
Create a podcast script for two people discussing the following content:
{content}
The podcast should last approximately {duration}. Include natural speech patterns,
humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah,
yes, I see, Ok now. Vary the emotional tone.
Format the script as alternating lines of dialogue without speaker labels.
Separate each line with a blank line.
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
Ensure the conversation flows naturally and stays relevant to the topic.
Limit the script length to match the requested duration of {duration}.
"""
response = model.generate_content(prompt)
# Remove any special characters that might be read aloud
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text)
return clean_text
async def text_to_speech(text, voice):
communicate = edge_tts.Communicate(text, voice)
audio = io.BytesIO()
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio.write(chunk["data"])
audio.seek(0)
return audio.read()
async def render_podcast(script, voice1, voice2, num_hosts):
lines = [line for line in script.split('\n') if line.strip()]
audio_segments = []
if num_hosts == 1:
for line in lines:
audio = await text_to_speech(line, voice1)
audio_segments.append(audio)
else:
for i, line in enumerate(lines):
voice = voice1 if i % 2 == 0 else voice2
audio = await text_to_speech(line, voice)
audio_segments.append(audio)
if not audio_segments:
logger.warning("No valid audio segments were generated.")
return (24000, np.zeros(24000, dtype=np.int16)) # Return silence if no valid audio was generated
# Concatenate audio segments
podcast_audio = b''.join(audio_segments)
# Convert to numpy array
podcast_audio = np.frombuffer(podcast_audio, dtype=np.int16)
return (24000, podcast_audio) # edge-tts uses 24000 Hz sample rate
async def get_voice_list():
voices = await edge_tts.list_voices()
voice_dict = {}
for voice in voices:
lang = voice["Locale"]
if lang not in voice_dict:
voice_dict[lang] = []
voice_dict[lang].append(voice["Name"])
return voice_dict
# Language names dictionary
language_names = {
'af-ZA': 'Afrikaans (South Africa)', 'am-ET': 'Amharic (Ethiopia)', 'ar-AE': 'Arabic (UAE)', 'ar-BH': 'Arabic (Bahrain)',
'ar-DZ': 'Arabic (Algeria)', 'ar-EG': 'Arabic (Egypt)', 'ar-IQ': 'Arabic (Iraq)', 'ar-JO': 'Arabic (Jordan)',
'ar-KW': 'Arabic (Kuwait)', 'ar-LB': 'Arabic (Lebanon)', 'ar-LY': 'Arabic (Libya)', 'ar-MA': 'Arabic (Morocco)',
'ar-OM': 'Arabic (Oman)', 'ar-QA': 'Arabic (Qatar)', 'ar-SA': 'Arabic (Saudi Arabia)', 'ar-SY': 'Arabic (Syria)',
'ar-TN': 'Arabic (Tunisia)', 'ar-YE': 'Arabic (Yemen)', 'az-AZ': 'Azerbaijani (Azerbaijan)', 'bg-BG': 'Bulgarian (Bulgaria)',
'bn-BD': 'Bengali (Bangladesh)', 'bn-IN': 'Bengali (India)', 'bs-BA': 'Bosnian (Bosnia and Herzegovina)', 'ca-ES': 'Catalan (Spain)',
'cs-CZ': 'Czech (Czech Republic)', 'cy-GB': 'Welsh (United Kingdom)', 'da-DK': 'Danish (Denmark)', 'de-AT': 'German (Austria)',
'de-CH': 'German (Switzerland)', 'de-DE': 'German (Germany)', 'el-GR': 'Greek (Greece)', 'en-AU': 'English (Australia)',
'en-CA': 'English (Canada)', 'en-GB': 'English (United Kingdom)', 'en-GH': 'English (Ghana)', 'en-HK': 'English (Hong Kong SAR)',
'en-IE': 'English (Ireland)', 'en-IN': 'English (India)', 'en-KE': 'English (Kenya)', 'en-NG': 'English (Nigeria)',
'en-NZ': 'English (New Zealand)', 'en-PH': 'English (Philippines)', 'en-SG': 'English (Singapore)', 'en-TZ': 'English (Tanzania)',
'en-US': 'English (United States)', 'en-ZA': 'English (South Africa)', 'es-AR': 'Spanish (Argentina)', 'es-BO': 'Spanish (Bolivia)',
'es-CL': 'Spanish (Chile)', 'es-CO': 'Spanish (Colombia)', 'es-CR': 'Spanish (Costa Rica)', 'es-CU': 'Spanish (Cuba)',
'es-DO': 'Spanish (Dominican Republic)', 'es-EC': 'Spanish (Ecuador)', 'es-ES': 'Spanish (Spain)', 'es-GQ': 'Spanish (Equatorial Guinea)',
'es-GT': 'Spanish (Guatemala)', 'es-HN': 'Spanish (Honduras)', 'es-MX': 'Spanish (Mexico)', 'es-NI': 'Spanish (Nicaragua)',
'es-PA': 'Spanish (Panama)', 'es-PE': 'Spanish (Peru)', 'es-PR': 'Spanish (Puerto Rico)', 'es-PY': 'Spanish (Paraguay)',
'es-SV': 'Spanish (El Salvador)', 'es-US': 'Spanish (United States)', 'es-UY': 'Spanish (Uruguay)', 'es-VE': 'Spanish (Venezuela)',
'et-EE': 'Estonian (Estonia)', 'eu-ES': 'Basque (Spain)', 'fa-IR': 'Persian (Iran)', 'fi-FI': 'Finnish (Finland)',
'fil-PH': 'Filipino (Philippines)', 'fr-BE': 'French (Belgium)', 'fr-CA': 'French (Canada)', 'fr-CH': 'French (Switzerland)',
'fr-FR': 'French (France)', 'ga-IE': 'Irish (Ireland)', 'gl-ES': 'Galician (Spain)', 'gu-IN': 'Gujarati (India)',
'he-IL': 'Hebrew (Israel)', 'hi-IN': 'Hindi (India)', 'hr-HR': 'Croatian (Croatia)', 'hu-HU': 'Hungarian (Hungary)',
'hy-AM': 'Armenian (Armenia)', 'id-ID': 'Indonesian (Indonesia)', 'is-IS': 'Icelandic (Iceland)', 'it-IT': 'Italian (Italy)',
'ja-JP': 'Japanese (Japan)', 'jv-ID': 'Javanese (Indonesia)', 'ka-GE': 'Georgian (Georgia)', 'kk-KZ': 'Kazakh (Kazakhstan)',
'km-KH': 'Khmer (Cambodia)', 'kn-IN': 'Kannada (India)', 'ko-KR': 'Korean (Korea)', 'lo-LA': 'Lao (Laos)',
'lt-LT': 'Lithuanian (Lithuania)', 'lv-LV': 'Latvian (Latvia)', 'mk-MK': 'Macedonian (North Macedonia)', 'ml-IN': 'Malayalam (India)',
'mn-MN': 'Mongolian (Mongolia)', 'mr-IN': 'Marathi (India)', 'ms-MY': 'Malay (Malaysia)', 'mt-MT': 'Maltese (Malta)',
'my-MM': 'Burmese (Myanmar)', 'nb-NO': 'Norwegian (Bokmål, Norway)', 'ne-NP': 'Nepali (Nepal)', 'nl-BE': 'Dutch (Belgium)',
'nl-NL': 'Dutch (Netherlands)', 'pl-PL': 'Polish (Poland)', 'ps-AF': 'Pashto (Afghanistan)', 'pt-BR': 'Portuguese (Brazil)',
'pt-PT': 'Portuguese (Portugal)', 'ro-RO': 'Romanian (Romania)', 'ru-RU': 'Russian (Russia)', 'si-LK': 'Sinhala (Sri Lanka)',
'sk-SK': 'Slovak (Slovakia)', 'sl-SI': 'Slovenian (Slovenia)', 'so-SO': 'Somali (Somalia)', 'sq-AL': 'Albanian (Albania)',
'sr-RS': 'Serbian (Serbia)', 'sv-SE': 'Swedish (Sweden)', 'sw-KE': 'Swahili (Kenya)', 'sw-TZ': 'Swahili (Tanzania)',
'ta-IN': 'Tamil (India)', 'ta-LK': 'Tamil (Sri Lanka)', 'ta-MY': 'Tamil (Malaysia)', 'ta-SG': 'Tamil (Singapore)',
'te-IN': 'Telugu (India)', 'th-TH': 'Thai (Thailand)', 'tr-TR': 'Turkish (Turkey)', 'uk-UA': 'Ukrainian (Ukraine)',
'ur-IN': 'Urdu (India)', 'ur-PK': 'Urdu (Pakistan)', 'uz-UZ': 'Uzbek (Uzbekistan)', 'vi-VN': 'Vietnamese (Vietnam)',
'wuu-CN': 'Wu Chinese (China)', 'yue-CN': 'Cantonese (China)', 'zh-CN': 'Chinese (Mandarin, Simplified)',
'zh-HK': 'Chinese (Cantonese, Traditional)', 'zh-TW': 'Chinese (Taiwanese Mandarin)', 'zu-ZA': 'Zulu (South Africa)'
}
# Get voice list (this should be run once at startup)
voice_dict = asyncio.run(get_voice_list())
# Layout
app.layout = dbc.Container([
html.H1("AI Podcast Generator", className="my-4"),
dbc.Row([
# Left Column (now containing input elements)
dbc.Col([
dbc.Card([
dbc.CardBody([
dbc.Textarea(id="content-input", placeholder="Paste your content or upload a document", rows=5, className="my-3"),
dcc.Upload(
id='document-upload',
children=html.Div(['Drag and Drop or ', html.A('Select a File')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px 0'
}
),
dbc.RadioItems(
id="duration",
options=[
{"label": "1-5 min", "value": "1-5 min"},
{"label": "5-10 min", "value": "5-10 min"},
{"label": "10-15 min", "value": "10-15 min"}
],
value="1-5 min",
inline=True,
className="my-3"
),
dbc.RadioItems(
id="num-hosts",
options=[
{"label": "1 host", "value": 1},
{"label": "2 hosts", "value": 2}
],
value=2,
inline=True,
className="my-3"
),
dbc.Select(
id="lang1-select",
options=[{"label": lang, "value": lang} for lang in language_names.values()],
value="English (United States)",
className="my-2"
),
dbc.Select(
id="voice1-select",
value="en-US-AriaNeural",
className="my-2"
),
dbc.Select(
id="lang2-select",
options=[{"label": lang, "value": lang} for lang in language_names.values()],
value="English (United States)",
className="my-2"
),
dbc.Select(
id="voice2-select",
value="en-US-BrianNeural",
className="my-2"
),
dbc.Button("Generate Script", id="generate-btn", color="primary", className="mt-3"),
])
])
], width=7), # Adjust the width as needed
# Right Column (now containing script output and podcast generation)
dbc.Col([
dbc.Card([
dbc.CardBody([
dcc.Loading(
id="loading-script",
type="default",
children=[
dbc.Progress(id="script-progress", value=0, className="my-3"),
dbc.Textarea(id="script-output", rows=20, className="my-3"),
]
),
dbc.Button("Generate Podcast", id="generate-podcast-btn", color="success", className="mt-3"),
dcc.Loading(
id="loading-podcast",
type="default",
children=[
dbc.Progress(id="podcast-progress", value=0, className="my-3"),
html.Div(id="audio-output", className="my-3"),
]
),
dcc.Download(id="download-audio")
])
])
], width=5), # Adjust the width as needed
]),
], fluid=True)
# Callbacks (continued)
@app.callback(
Output("voice1-select", "options"),
Input("lang1-select", "value")
)
def update_voice1_options(lang):
if lang == "None":
return []
selected_lang = next((key for key, value in language_names.items() if value == lang), None)
voices = voice_dict.get(selected_lang, [])
return [{"label": v, "value": v} for v in voices]
@app.callback(
Output("voice2-select", "options"),
Input("lang2-select", "value")
)
def update_voice2_options(lang):
if lang == "None":
return []
selected_lang = next((key for key, value in language_names.items() if value == lang), None)
voices = voice_dict.get(selected_lang, [])
return [{"label": v, "value": v} for v in voices]
@app.callback(
[Output("script-output", "value"),
Output("script-progress", "value")],
Input("generate-btn", "n_clicks"),
[State("content-input", "value"),
State("duration", "value"),
State("num-hosts", "value")],
prevent_initial_call=True
)
def generate_script(n_clicks, content, duration, num_hosts):
if n_clicks is None:
raise PreventUpdate
try:
for i in range(10):
time.sleep(0.5) # Simulate progress
# Instead of yielding, we'll just pass and update at the end
pass
script = generate_podcast_script(content, duration, num_hosts)
return script, 100
except Exception as e:
logger.error(f"Error generating script: {str(e)}")
return f"Error: {str(e)}", 0
@app.callback(
[Output("audio-output", "children"),
Output("download-audio", "data"),
Output("podcast-progress", "value")],
Input("generate-podcast-btn", "n_clicks"),
[State("script-output", "value"),
State("voice1-select", "value"),
State("voice2-select", "value"),
State("num-hosts", "value")],
prevent_initial_call=True
)
def render_and_download_podcast(n_clicks, script, voice1, voice2, num_hosts):
if n_clicks is None:
raise PreventUpdate
try:
# Run the async function in a synchronous context
sample_rate, audio_data = asyncio.run(render_podcast(script, voice1, voice2, num_hosts))
# Convert numpy array to WAV
wav_audio = AudioSegment(
audio_data.tobytes(),
frame_rate=sample_rate,
sample_width=audio_data.dtype.itemsize,
channels=1
)
# Convert WAV to MP3
buffer = io.BytesIO()
wav_audio.export(buffer, format="mp3")
buffer.seek(0)
mp3_bytes = buffer.getvalue()
# Create base64 audio for playback
audio_base64 = base64.b64encode(mp3_bytes).decode('utf-8')
audio_src = f"data:audio/mp3;base64,{audio_base64}"
return html.Audio(src=audio_src, controls=True), dcc.send_bytes(mp3_bytes, "podcast.mp3"), 100
except Exception as e:
logger.error(f"Error rendering podcast: {str(e)}")
return html.Div(f"Error: {str(e)}"), None, 0
@app.callback(
[Output("lang2-select", "style"),
Output("voice2-select", "style")],
Input("num-hosts", "value")
)
def update_second_voice_visibility(num_hosts):
if num_hosts == 2:
return {"display": "block"}, {"display": "block"}
else:
return {"display": "none"}, {"display": "none"}
@app.callback(
Output("content-input", "value"),
Input("document-upload", "contents"),
State("document-upload", "filename"),
prevent_initial_call=True
)
def update_content(contents, filename):
if contents is not None:
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
return df.to_string()
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
return df.to_string()
elif 'txt' in filename or 'md' in filename:
# Assume that the user uploaded a text or markdown file
return decoded.decode('utf-8')
else:
return 'Unsupported file type. Please upload a CSV, Excel, text, or markdown file.'
except Exception as e:
logger.error(f"Error processing uploaded file: {str(e)}")
return f'There was an error processing this file: {str(e)}'
# Run the app
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.")
|