|
import dash |
|
from dash import dcc, html, Input, Output, State, callback |
|
import dash_bootstrap_components as dbc |
|
import google.generativeai as genai |
|
import numpy as np |
|
import edge_tts |
|
import asyncio |
|
import io |
|
import re |
|
import base64 |
|
import logging |
|
from dash.exceptions import PreventUpdate |
|
import pandas as pd |
|
import time |
|
import os |
|
from pydub import AudioSegment |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) |
|
|
|
|
|
genai.configure(api_key=os.environ.get('GEMINI_API_KEY')) |
|
|
|
def generate_podcast_script(content, duration, num_hosts): |
|
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') |
|
|
|
if num_hosts == 1: |
|
prompt = f""" |
|
Create a podcast script for one person discussing the following content: |
|
{content} |
|
|
|
The podcast should last approximately {duration}. Include natural speech patterns, |
|
humor, and occasional off-topic thoughts. Use occasional speech fillers like um, ah, |
|
yes, I see, Ok now. Vary the emotional tone. |
|
Format the script as a monologue without speaker labels. |
|
Separate each paragraph with a blank line. |
|
Do not use any special characters or markdown. Only include the monologue with proper punctuation. |
|
Ensure the content flows naturally and stays relevant to the topic. |
|
Limit the script length to match the requested duration of {duration}. |
|
""" |
|
else: |
|
prompt = f""" |
|
Create a podcast script for two people discussing the following content: |
|
{content} |
|
|
|
The podcast should last approximately {duration}. Include natural speech patterns, |
|
humor, and occasional off-topic chit-chat. Use occasional speech fillers like um, ah, |
|
yes, I see, Ok now. Vary the emotional tone. |
|
Format the script as alternating lines of dialogue without speaker labels. |
|
Separate each line with a blank line. |
|
Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation. |
|
Ensure the conversation flows naturally and stays relevant to the topic. |
|
Limit the script length to match the requested duration of {duration}. |
|
""" |
|
|
|
response = model.generate_content(prompt) |
|
|
|
clean_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', response.text) |
|
return clean_text |
|
|
|
async def text_to_speech(text, voice): |
|
communicate = edge_tts.Communicate(text, voice) |
|
audio = io.BytesIO() |
|
async for chunk in communicate.stream(): |
|
if chunk["type"] == "audio": |
|
audio.write(chunk["data"]) |
|
audio.seek(0) |
|
return audio.read() |
|
|
|
async def render_podcast(script, voice1, voice2, num_hosts): |
|
lines = [line for line in script.split('\n') if line.strip()] |
|
audio_segments = [] |
|
|
|
if num_hosts == 1: |
|
for line in lines: |
|
audio = await text_to_speech(line, voice1) |
|
audio_segments.append(audio) |
|
else: |
|
for i, line in enumerate(lines): |
|
voice = voice1 if i % 2 == 0 else voice2 |
|
audio = await text_to_speech(line, voice) |
|
audio_segments.append(audio) |
|
|
|
if not audio_segments: |
|
logger.warning("No valid audio segments were generated.") |
|
return (24000, np.zeros(24000, dtype=np.int16)) |
|
|
|
|
|
podcast_audio = b''.join(audio_segments) |
|
|
|
|
|
podcast_audio = np.frombuffer(podcast_audio, dtype=np.int16) |
|
|
|
return (24000, podcast_audio) |
|
|
|
async def get_voice_list(): |
|
voices = await edge_tts.list_voices() |
|
voice_dict = {} |
|
for voice in voices: |
|
lang = voice["Locale"] |
|
if lang not in voice_dict: |
|
voice_dict[lang] = [] |
|
voice_dict[lang].append(voice["Name"]) |
|
return voice_dict |
|
|
|
|
|
language_names = { |
|
'af-ZA': 'Afrikaans (South Africa)', 'am-ET': 'Amharic (Ethiopia)', 'ar-AE': 'Arabic (UAE)', 'ar-BH': 'Arabic (Bahrain)', |
|
'ar-DZ': 'Arabic (Algeria)', 'ar-EG': 'Arabic (Egypt)', 'ar-IQ': 'Arabic (Iraq)', 'ar-JO': 'Arabic (Jordan)', |
|
'ar-KW': 'Arabic (Kuwait)', 'ar-LB': 'Arabic (Lebanon)', 'ar-LY': 'Arabic (Libya)', 'ar-MA': 'Arabic (Morocco)', |
|
'ar-OM': 'Arabic (Oman)', 'ar-QA': 'Arabic (Qatar)', 'ar-SA': 'Arabic (Saudi Arabia)', 'ar-SY': 'Arabic (Syria)', |
|
'ar-TN': 'Arabic (Tunisia)', 'ar-YE': 'Arabic (Yemen)', 'az-AZ': 'Azerbaijani (Azerbaijan)', 'bg-BG': 'Bulgarian (Bulgaria)', |
|
'bn-BD': 'Bengali (Bangladesh)', 'bn-IN': 'Bengali (India)', 'bs-BA': 'Bosnian (Bosnia and Herzegovina)', 'ca-ES': 'Catalan (Spain)', |
|
'cs-CZ': 'Czech (Czech Republic)', 'cy-GB': 'Welsh (United Kingdom)', 'da-DK': 'Danish (Denmark)', 'de-AT': 'German (Austria)', |
|
'de-CH': 'German (Switzerland)', 'de-DE': 'German (Germany)', 'el-GR': 'Greek (Greece)', 'en-AU': 'English (Australia)', |
|
'en-CA': 'English (Canada)', 'en-GB': 'English (United Kingdom)', 'en-GH': 'English (Ghana)', 'en-HK': 'English (Hong Kong SAR)', |
|
'en-IE': 'English (Ireland)', 'en-IN': 'English (India)', 'en-KE': 'English (Kenya)', 'en-NG': 'English (Nigeria)', |
|
'en-NZ': 'English (New Zealand)', 'en-PH': 'English (Philippines)', 'en-SG': 'English (Singapore)', 'en-TZ': 'English (Tanzania)', |
|
'en-US': 'English (United States)', 'en-ZA': 'English (South Africa)', 'es-AR': 'Spanish (Argentina)', 'es-BO': 'Spanish (Bolivia)', |
|
'es-CL': 'Spanish (Chile)', 'es-CO': 'Spanish (Colombia)', 'es-CR': 'Spanish (Costa Rica)', 'es-CU': 'Spanish (Cuba)', |
|
'es-DO': 'Spanish (Dominican Republic)', 'es-EC': 'Spanish (Ecuador)', 'es-ES': 'Spanish (Spain)', 'es-GQ': 'Spanish (Equatorial Guinea)', |
|
'es-GT': 'Spanish (Guatemala)', 'es-HN': 'Spanish (Honduras)', 'es-MX': 'Spanish (Mexico)', 'es-NI': 'Spanish (Nicaragua)', |
|
'es-PA': 'Spanish (Panama)', 'es-PE': 'Spanish (Peru)', 'es-PR': 'Spanish (Puerto Rico)', 'es-PY': 'Spanish (Paraguay)', |
|
'es-SV': 'Spanish (El Salvador)', 'es-US': 'Spanish (United States)', 'es-UY': 'Spanish (Uruguay)', 'es-VE': 'Spanish (Venezuela)', |
|
'et-EE': 'Estonian (Estonia)', 'eu-ES': 'Basque (Spain)', 'fa-IR': 'Persian (Iran)', 'fi-FI': 'Finnish (Finland)', |
|
'fil-PH': 'Filipino (Philippines)', 'fr-BE': 'French (Belgium)', 'fr-CA': 'French (Canada)', 'fr-CH': 'French (Switzerland)', |
|
'fr-FR': 'French (France)', 'ga-IE': 'Irish (Ireland)', 'gl-ES': 'Galician (Spain)', 'gu-IN': 'Gujarati (India)', |
|
'he-IL': 'Hebrew (Israel)', 'hi-IN': 'Hindi (India)', 'hr-HR': 'Croatian (Croatia)', 'hu-HU': 'Hungarian (Hungary)', |
|
'hy-AM': 'Armenian (Armenia)', 'id-ID': 'Indonesian (Indonesia)', 'is-IS': 'Icelandic (Iceland)', 'it-IT': 'Italian (Italy)', |
|
'ja-JP': 'Japanese (Japan)', 'jv-ID': 'Javanese (Indonesia)', 'ka-GE': 'Georgian (Georgia)', 'kk-KZ': 'Kazakh (Kazakhstan)', |
|
'km-KH': 'Khmer (Cambodia)', 'kn-IN': 'Kannada (India)', 'ko-KR': 'Korean (Korea)', 'lo-LA': 'Lao (Laos)', |
|
'lt-LT': 'Lithuanian (Lithuania)', 'lv-LV': 'Latvian (Latvia)', 'mk-MK': 'Macedonian (North Macedonia)', 'ml-IN': 'Malayalam (India)', |
|
'mn-MN': 'Mongolian (Mongolia)', 'mr-IN': 'Marathi (India)', 'ms-MY': 'Malay (Malaysia)', 'mt-MT': 'Maltese (Malta)', |
|
'my-MM': 'Burmese (Myanmar)', 'nb-NO': 'Norwegian (Bokmål, Norway)', 'ne-NP': 'Nepali (Nepal)', 'nl-BE': 'Dutch (Belgium)', |
|
'nl-NL': 'Dutch (Netherlands)', 'pl-PL': 'Polish (Poland)', 'ps-AF': 'Pashto (Afghanistan)', 'pt-BR': 'Portuguese (Brazil)', |
|
'pt-PT': 'Portuguese (Portugal)', 'ro-RO': 'Romanian (Romania)', 'ru-RU': 'Russian (Russia)', 'si-LK': 'Sinhala (Sri Lanka)', |
|
'sk-SK': 'Slovak (Slovakia)', 'sl-SI': 'Slovenian (Slovenia)', 'so-SO': 'Somali (Somalia)', 'sq-AL': 'Albanian (Albania)', |
|
'sr-RS': 'Serbian (Serbia)', 'sv-SE': 'Swedish (Sweden)', 'sw-KE': 'Swahili (Kenya)', 'sw-TZ': 'Swahili (Tanzania)', |
|
'ta-IN': 'Tamil (India)', 'ta-LK': 'Tamil (Sri Lanka)', 'ta-MY': 'Tamil (Malaysia)', 'ta-SG': 'Tamil (Singapore)', |
|
'te-IN': 'Telugu (India)', 'th-TH': 'Thai (Thailand)', 'tr-TR': 'Turkish (Turkey)', 'uk-UA': 'Ukrainian (Ukraine)', |
|
'ur-IN': 'Urdu (India)', 'ur-PK': 'Urdu (Pakistan)', 'uz-UZ': 'Uzbek (Uzbekistan)', 'vi-VN': 'Vietnamese (Vietnam)', |
|
'wuu-CN': 'Wu Chinese (China)', 'yue-CN': 'Cantonese (China)', 'zh-CN': 'Chinese (Mandarin, Simplified)', |
|
'zh-HK': 'Chinese (Cantonese, Traditional)', 'zh-TW': 'Chinese (Taiwanese Mandarin)', 'zu-ZA': 'Zulu (South Africa)' |
|
} |
|
|
|
|
|
voice_dict = asyncio.run(get_voice_list()) |
|
|
|
|
|
app.layout = dbc.Container([ |
|
html.H1("AI Podcast Generator", className="my-4"), |
|
|
|
dbc.Row([ |
|
|
|
dbc.Col([ |
|
dbc.Card([ |
|
dbc.CardBody([ |
|
dbc.Textarea(id="content-input", placeholder="Paste your content or upload a document", rows=5, className="my-3"), |
|
dcc.Upload( |
|
id='document-upload', |
|
children=html.Div(['Drag and Drop or ', html.A('Select a File')]), |
|
style={ |
|
'width': '100%', |
|
'height': '60px', |
|
'lineHeight': '60px', |
|
'borderWidth': '1px', |
|
'borderStyle': 'dashed', |
|
'borderRadius': '5px', |
|
'textAlign': 'center', |
|
'margin': '10px 0' |
|
} |
|
), |
|
dbc.RadioItems( |
|
id="duration", |
|
options=[ |
|
{"label": "1-5 min", "value": "1-5 min"}, |
|
{"label": "5-10 min", "value": "5-10 min"}, |
|
{"label": "10-15 min", "value": "10-15 min"} |
|
], |
|
value="1-5 min", |
|
inline=True, |
|
className="my-3" |
|
), |
|
dbc.RadioItems( |
|
id="num-hosts", |
|
options=[ |
|
{"label": "1 host", "value": 1}, |
|
{"label": "2 hosts", "value": 2} |
|
], |
|
value=2, |
|
inline=True, |
|
className="my-3" |
|
), |
|
dbc.Select( |
|
id="lang1-select", |
|
options=[{"label": lang, "value": lang} for lang in language_names.values()], |
|
value="English (United States)", |
|
className="my-2" |
|
), |
|
dbc.Select( |
|
id="voice1-select", |
|
value="en-US-AriaNeural", |
|
className="my-2" |
|
), |
|
dbc.Select( |
|
id="lang2-select", |
|
options=[{"label": lang, "value": lang} for lang in language_names.values()], |
|
value="English (United States)", |
|
className="my-2" |
|
), |
|
dbc.Select( |
|
id="voice2-select", |
|
value="en-US-BrianNeural", |
|
className="my-2" |
|
), |
|
dbc.Button("Generate Script", id="generate-btn", color="primary", className="mt-3"), |
|
]) |
|
]) |
|
], width=7), |
|
|
|
|
|
dbc.Col([ |
|
dbc.Card([ |
|
dbc.CardBody([ |
|
dcc.Loading( |
|
id="loading-script", |
|
type="default", |
|
children=[ |
|
dbc.Progress(id="script-progress", value=0, className="my-3"), |
|
dbc.Textarea(id="script-output", rows=20, className="my-3"), |
|
] |
|
), |
|
dbc.Button("Generate Podcast", id="generate-podcast-btn", color="success", className="mt-3"), |
|
dcc.Loading( |
|
id="loading-podcast", |
|
type="default", |
|
children=[ |
|
dbc.Progress(id="podcast-progress", value=0, className="my-3"), |
|
html.Div(id="audio-output", className="my-3"), |
|
] |
|
), |
|
dcc.Download(id="download-audio") |
|
]) |
|
]) |
|
], width=5), |
|
]), |
|
], fluid=True) |
|
|
|
|
|
@app.callback( |
|
Output("voice1-select", "options"), |
|
Input("lang1-select", "value") |
|
) |
|
def update_voice1_options(lang): |
|
if lang == "None": |
|
return [] |
|
selected_lang = next((key for key, value in language_names.items() if value == lang), None) |
|
voices = voice_dict.get(selected_lang, []) |
|
return [{"label": v, "value": v} for v in voices] |
|
|
|
@app.callback( |
|
Output("voice2-select", "options"), |
|
Input("lang2-select", "value") |
|
) |
|
def update_voice2_options(lang): |
|
if lang == "None": |
|
return [] |
|
selected_lang = next((key for key, value in language_names.items() if value == lang), None) |
|
voices = voice_dict.get(selected_lang, []) |
|
return [{"label": v, "value": v} for v in voices] |
|
|
|
@app.callback( |
|
[Output("script-output", "value"), |
|
Output("script-progress", "value")], |
|
Input("generate-btn", "n_clicks"), |
|
[State("content-input", "value"), |
|
State("duration", "value"), |
|
State("num-hosts", "value")], |
|
prevent_initial_call=True |
|
) |
|
def generate_script(n_clicks, content, duration, num_hosts): |
|
if n_clicks is None: |
|
raise PreventUpdate |
|
try: |
|
for i in range(10): |
|
time.sleep(0.5) |
|
|
|
pass |
|
script = generate_podcast_script(content, duration, num_hosts) |
|
return script, 100 |
|
except Exception as e: |
|
logger.error(f"Error generating script: {str(e)}") |
|
return f"Error: {str(e)}", 0 |
|
|
|
@app.callback( |
|
[Output("audio-output", "children"), |
|
Output("download-audio", "data"), |
|
Output("podcast-progress", "value")], |
|
Input("generate-podcast-btn", "n_clicks"), |
|
[State("script-output", "value"), |
|
State("voice1-select", "value"), |
|
State("voice2-select", "value"), |
|
State("num-hosts", "value")], |
|
prevent_initial_call=True |
|
) |
|
def render_and_download_podcast(n_clicks, script, voice1, voice2, num_hosts): |
|
if n_clicks is None: |
|
raise PreventUpdate |
|
try: |
|
|
|
sample_rate, audio_data = asyncio.run(render_podcast(script, voice1, voice2, num_hosts)) |
|
|
|
|
|
wav_audio = AudioSegment( |
|
audio_data.tobytes(), |
|
frame_rate=sample_rate, |
|
sample_width=audio_data.dtype.itemsize, |
|
channels=1 |
|
) |
|
|
|
|
|
buffer = io.BytesIO() |
|
wav_audio.export(buffer, format="mp3") |
|
buffer.seek(0) |
|
mp3_bytes = buffer.getvalue() |
|
|
|
|
|
audio_base64 = base64.b64encode(mp3_bytes).decode('utf-8') |
|
audio_src = f"data:audio/mp3;base64,{audio_base64}" |
|
|
|
return html.Audio(src=audio_src, controls=True), dcc.send_bytes(mp3_bytes, "podcast.mp3"), 100 |
|
except Exception as e: |
|
logger.error(f"Error rendering podcast: {str(e)}") |
|
return html.Div(f"Error: {str(e)}"), None, 0 |
|
|
|
@app.callback( |
|
[Output("lang2-select", "style"), |
|
Output("voice2-select", "style")], |
|
Input("num-hosts", "value") |
|
) |
|
def update_second_voice_visibility(num_hosts): |
|
if num_hosts == 2: |
|
return {"display": "block"}, {"display": "block"} |
|
else: |
|
return {"display": "none"}, {"display": "none"} |
|
|
|
@app.callback( |
|
Output("content-input", "value"), |
|
Input("document-upload", "contents"), |
|
State("document-upload", "filename"), |
|
prevent_initial_call=True |
|
) |
|
def update_content(contents, filename): |
|
if contents is not None: |
|
content_type, content_string = contents.split(',') |
|
decoded = base64.b64decode(content_string) |
|
try: |
|
if 'csv' in filename: |
|
|
|
df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) |
|
return df.to_string() |
|
elif 'xls' in filename: |
|
|
|
df = pd.read_excel(io.BytesIO(decoded)) |
|
return df.to_string() |
|
elif 'txt' in filename or 'md' in filename: |
|
|
|
return decoded.decode('utf-8') |
|
else: |
|
return 'Unsupported file type. Please upload a CSV, Excel, text, or markdown file.' |
|
except Exception as e: |
|
logger.error(f"Error processing uploaded file: {str(e)}") |
|
return f'There was an error processing this file: {str(e)}' |
|
|
|
|
|
if __name__ == '__main__': |
|
print("Starting the Dash application...") |
|
app.run(debug=True, host='0.0.0.0', port=7860) |
|
print("Dash application has finished running.") |
|
|