Spaces:
Running
Running
import gradio as gr | |
import requests | |
import json | |
import os | |
from pydub import AudioSegment | |
from pydub.playback import play | |
# --- Configuration --- | |
TALKBOT_TTS_URL = "https://talkbot.ir/TTS-tkun" | |
TALKBOT_API_BASE_URL = "https://talkbot.ir/api/v1/chat/completions" | |
TALKBOT_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5" # Replace with your actual Talkbot API key | |
# TALKBOT_API_KEY = os.environ.get("TALKBOT_API_KEY", "YOUR_DEFAULT_API_KEY_HERE") # More secure way | |
MODEL_NAME = "deepseek-v3-0324" | |
# --- Functions --- | |
def get_tts_audio_link(text: str) -> str: | |
""" | |
Retrieves a WAV audio link for the given text using TalkBot TTS. | |
""" | |
params = {"text": text} | |
response = requests.get(TALKBOT_TTS_URL, params=params) | |
response.raise_for_status() # Raise an exception for HTTP errors | |
return response.url | |
def generate_podcast_script_ai(prompt: str) -> str: | |
""" | |
Generates a podcast script using TalkBot AI. | |
""" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {TALKBOT_API_KEY}" | |
} | |
data = { | |
"model": MODEL_NAME, | |
"messages": [ | |
{"role": "system", "content": "شما یک هوش مصنوعی برای تولید متن پادکست هستید. خروجی شما باید متن پادکست باشد."}, | |
{"role": "user", "content": prompt} | |
], | |
"temperature": 0.7, | |
"max_tokens": 1000 | |
} | |
try: | |
response = requests.post(TALKBOT_API_BASE_URL, headers=headers, json=data) | |
response.raise_for_status() | |
result = response.json() | |
return result['choices'][0]['message']['content'].strip() | |
except requests.exceptions.RequestException as e: | |
return f"Error generating script: {e}" | |
except (KeyError, IndexError) as e: | |
return f"Error parsing AI response: {e}. Full response: {response.json()}" | |
def create_podcast(podcast_topic: str) -> tuple[str, str, gr.Audio | None]: | |
""" | |
Generates a podcast script using AI, then creates audio for two distinct voices, | |
and finally merges them into an MP3 file. | |
""" | |
if not TALKBOT_API_KEY or TALKBOT_API_KEY == "YOUR_DEFAULT_API_KEY_HERE": | |
return "خطا: کلید API Talkbot تنظیم نشده است. لطفاً آن را در کد وارد کنید.", None, None | |
# 1. Generate Podcast Script | |
gr.Info("در حال تولید متن پادکست توسط هوش مصنوعی...") | |
ai_prompt = f"یک متن پادکست کوتاه و جذاب در مورد '{podcast_topic}' با دو بخش مجزا برای دو گوینده (صدای اول و صدای دوم) بنویسید. هر بخش را با عنوان 'صدای اول:' و 'صدای دوم:' مشخص کنید. متن پادکست باید حدود 150-250 کلمه باشد." | |
generated_script = generate_podcast_script_ai(ai_prompt) | |
if "Error" in generated_script: | |
return generated_script, None, None | |
# 2. Extract Voices (simple split for demonstration) | |
gr.Info("در حال تفکیک و تولید صداها...") | |
voice1_text = "" | |
voice2_text = "" | |
# Simple parsing to get voice sections | |
script_lines = generated_script.split('\n') | |
current_voice = None | |
for line in script_lines: | |
if "صدای اول:" in line: | |
current_voice = 1 | |
voice1_text += line.replace("صدای اول:", "").strip() + " " | |
elif "صدای دوم:" in line: | |
current_voice = 2 | |
voice2_text += line.replace("صدای دوم:", "").strip() + " " | |
elif current_voice == 1: | |
voice1_text += line.strip() + " " | |
elif current_voice == 2: | |
voice2_text += line.strip() + " " | |
if not voice1_text or not voice2_text: | |
return f"خطا: متن پادکست تولید شده شامل 'صدای اول:' یا 'صدای دوم:' استاندارد نیست. متن کامل: \n{generated_script}", None, None | |
# 3. Generate Audio for each voice | |
try: | |
gr.Info("در حال دریافت صدای اول...") | |
voice1_wav_link = get_tts_audio_link(voice1_text.strip()) | |
voice1_audio_response = requests.get(voice1_wav_link) | |
voice1_audio_response.raise_for_status() | |
with open("voice1.wav", "wb") as f: | |
f.write(voice1_audio_response.content) | |
gr.Info("در حال دریافت صدای دوم...") | |
voice2_wav_link = get_tts_audio_link(voice2_text.strip()) | |
voice2_audio_response = requests.get(voice2_wav_link) | |
voice2_audio_response.raise_or_status() | |
with open("voice2.wav", "wb") as f: | |
f.write(voice2_audio_response.content) | |
except requests.exceptions.HTTPError as e: | |
return f"خطا در دریافت صدا از TTS: {e}. URL: {e.request.url}", None, None | |
except Exception as e: | |
return f"خطای unexpected در دریافت صدا: {e}", None, None | |
# 4. Merge Audio files | |
gr.Info("در حال ترکیب صداها و تولید فایل نهایی MP3...") | |
try: | |
audio1 = AudioSegment.from_wav("voice1.wav") | |
audio2 = AudioSegment.from_wav("voice2.wav") | |
# Simple alternating merge - adjust as needed for more complex dialogue | |
# For simplicity, let's just concatenate them here. A more sophisticated | |
# approach would involve splitting the script into turns and interleaving. | |
# Given the prompt, a simple concatenation of voice1's full speech then voice2's full speech might suffice as a starting point. | |
# Or, we can interleave by short segments if the AI output is structured that way. | |
# A basic concatenation for demonstration: | |
final_podcast_audio = audio1 + audio2 | |
# If the input text parsing above resulted in alternating segments, | |
# you would need to process those. For now, assuming voice1 speaks, then voice2 speaks. | |
output_mp3_path = "podcast_output.mp3" | |
final_podcast_audio.export(output_mp3_path, format="mp3") | |
# Clean up temporary WAV files | |
os.remove("voice1.wav") | |
os.remove("voice2.wav") | |
gr.Info("تولید پادکست با موفقیت انجام شد!") | |
return generated_script, output_mp3_path, gr.Audio(output_mp3_path, type="filepath", label="پادکست نهایی") | |
except Exception as e: | |
return f"خطا در ترکیب فایلهای صوتی: {e}", None, None | |
# --- Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# تولیدکننده پادکست هوشمند 🎙️ | |
با وارد کردن یک موضوع، هوش مصنوعی ما یک متن پادکست تولید میکند و سپس آن را با دو صدای مجزا به یک فایل MP3 پادکست تبدیل میکند. | |
""" | |
) | |
with gr.Row(): | |
topic_input = gr.Textbox( | |
label="موضوع پادکست", | |
placeholder="مثال: تاریخچه هوش مصنوعی، فواید مدیتیشن، آینده سفر فضایی", | |
value="مزایای یادگیری زبان پایتون" | |
) | |
generate_button = gr.Button("شروع تولید پادکست 🚀") | |
with gr.Column(): | |
script_output = gr.Textbox(label="متن پادکست تولید شده", interactive=False, lines=10) | |
audio_output = gr.Audio(None, type="filepath", label="پادکست نهایی (MP3)", format="mp3") | |
download_link = gr.File(label="دانلود فایل MP3", file_count="single", visible=False) | |
def on_generate_button_click(topic): | |
script_result, audio_path, audio_component = create_podcast(topic) | |
if audio_path: | |
return script_result, audio_component, gr.File(value=audio_path, visible=True) | |
else: | |
return script_result, None, gr.File(visible=False) | |
generate_button.click( | |
fn=on_generate_button_click, | |
inputs=topic_input, | |
outputs=[script_output, audio_output, download_link] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |