Spaces:
Running
Running
File size: 8,166 Bytes
319ea69 3a12f33 c296f8a 4a922a8 c296f8a 3a12f33 c296f8a 3a12f33 c296f8a 3a12f33 c296f8a 3a12f33 c296f8a 319ea69 c296f8a 319ea69 c296f8a 319ea69 c296f8a 319ea69 c296f8a 3a12f33 319ea69 c296f8a 319ea69 c296f8a 319ea69 c296f8a 319ea69 c296f8a 319ea69 c296f8a 319ea69 3a12f33 319ea69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
import gradio as gr
import requests
import json
import os
from pydub import AudioSegment
from pydub.playback import play
# --- Configuration ---
TALKBOT_TTS_URL = "https://talkbot.ir/TTS-tkun"
TALKBOT_API_BASE_URL = "https://talkbot.ir/api/v1/chat/completions"
TALKBOT_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5" # Replace with your actual Talkbot API key
# TALKBOT_API_KEY = os.environ.get("TALKBOT_API_KEY", "YOUR_DEFAULT_API_KEY_HERE") # More secure way
MODEL_NAME = "deepseek-v3-0324"
# --- Functions ---
def get_tts_audio_link(text: str) -> str:
"""
Retrieves a WAV audio link for the given text using TalkBot TTS.
"""
params = {"text": text}
response = requests.get(TALKBOT_TTS_URL, params=params)
response.raise_for_status() # Raise an exception for HTTP errors
return response.url
def generate_podcast_script_ai(prompt: str) -> str:
"""
Generates a podcast script using TalkBot AI.
"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {TALKBOT_API_KEY}"
}
data = {
"model": MODEL_NAME,
"messages": [
{"role": "system", "content": "شما یک هوش مصنوعی برای تولید متن پادکست هستید. خروجی شما باید متن پادکست باشد."},
{"role": "user", "content": prompt}
],
"temperature": 0.7,
"max_tokens": 1000
}
try:
response = requests.post(TALKBOT_API_BASE_URL, headers=headers, json=data)
response.raise_for_status()
result = response.json()
return result['choices'][0]['message']['content'].strip()
except requests.exceptions.RequestException as e:
return f"Error generating script: {e}"
except (KeyError, IndexError) as e:
return f"Error parsing AI response: {e}. Full response: {response.json()}"
def create_podcast(podcast_topic: str) -> tuple[str, str, gr.Audio | None]:
"""
Generates a podcast script using AI, then creates audio for two distinct voices,
and finally merges them into an MP3 file.
"""
if not TALKBOT_API_KEY or TALKBOT_API_KEY == "YOUR_DEFAULT_API_KEY_HERE":
return "خطا: کلید API Talkbot تنظیم نشده است. لطفاً آن را در کد وارد کنید.", None, None
# 1. Generate Podcast Script
gr.Info("در حال تولید متن پادکست توسط هوش مصنوعی...")
ai_prompt = f"یک متن پادکست کوتاه و جذاب در مورد '{podcast_topic}' با دو بخش مجزا برای دو گوینده (صدای اول و صدای دوم) بنویسید. هر بخش را با عنوان 'صدای اول:' و 'صدای دوم:' مشخص کنید. متن پادکست باید حدود 150-250 کلمه باشد."
generated_script = generate_podcast_script_ai(ai_prompt)
if "Error" in generated_script:
return generated_script, None, None
# 2. Extract Voices (simple split for demonstration)
gr.Info("در حال تفکیک و تولید صداها...")
voice1_text = ""
voice2_text = ""
# Simple parsing to get voice sections
script_lines = generated_script.split('\n')
current_voice = None
for line in script_lines:
if "صدای اول:" in line:
current_voice = 1
voice1_text += line.replace("صدای اول:", "").strip() + " "
elif "صدای دوم:" in line:
current_voice = 2
voice2_text += line.replace("صدای دوم:", "").strip() + " "
elif current_voice == 1:
voice1_text += line.strip() + " "
elif current_voice == 2:
voice2_text += line.strip() + " "
if not voice1_text or not voice2_text:
return f"خطا: متن پادکست تولید شده شامل 'صدای اول:' یا 'صدای دوم:' استاندارد نیست. متن کامل: \n{generated_script}", None, None
# 3. Generate Audio for each voice
try:
gr.Info("در حال دریافت صدای اول...")
voice1_wav_link = get_tts_audio_link(voice1_text.strip())
voice1_audio_response = requests.get(voice1_wav_link)
voice1_audio_response.raise_for_status()
with open("voice1.wav", "wb") as f:
f.write(voice1_audio_response.content)
gr.Info("در حال دریافت صدای دوم...")
voice2_wav_link = get_tts_audio_link(voice2_text.strip())
voice2_audio_response = requests.get(voice2_wav_link)
voice2_audio_response.raise_or_status()
with open("voice2.wav", "wb") as f:
f.write(voice2_audio_response.content)
except requests.exceptions.HTTPError as e:
return f"خطا در دریافت صدا از TTS: {e}. URL: {e.request.url}", None, None
except Exception as e:
return f"خطای unexpected در دریافت صدا: {e}", None, None
# 4. Merge Audio files
gr.Info("در حال ترکیب صداها و تولید فایل نهایی MP3...")
try:
audio1 = AudioSegment.from_wav("voice1.wav")
audio2 = AudioSegment.from_wav("voice2.wav")
# Simple alternating merge - adjust as needed for more complex dialogue
# For simplicity, let's just concatenate them here. A more sophisticated
# approach would involve splitting the script into turns and interleaving.
# Given the prompt, a simple concatenation of voice1's full speech then voice2's full speech might suffice as a starting point.
# Or, we can interleave by short segments if the AI output is structured that way.
# A basic concatenation for demonstration:
final_podcast_audio = audio1 + audio2
# If the input text parsing above resulted in alternating segments,
# you would need to process those. For now, assuming voice1 speaks, then voice2 speaks.
output_mp3_path = "podcast_output.mp3"
final_podcast_audio.export(output_mp3_path, format="mp3")
# Clean up temporary WAV files
os.remove("voice1.wav")
os.remove("voice2.wav")
gr.Info("تولید پادکست با موفقیت انجام شد!")
return generated_script, output_mp3_path, gr.Audio(output_mp3_path, type="filepath", label="پادکست نهایی")
except Exception as e:
return f"خطا در ترکیب فایلهای صوتی: {e}", None, None
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown(
"""
# تولیدکننده پادکست هوشمند 🎙️
با وارد کردن یک موضوع، هوش مصنوعی ما یک متن پادکست تولید میکند و سپس آن را با دو صدای مجزا به یک فایل MP3 پادکست تبدیل میکند.
"""
)
with gr.Row():
topic_input = gr.Textbox(
label="موضوع پادکست",
placeholder="مثال: تاریخچه هوش مصنوعی، فواید مدیتیشن، آینده سفر فضایی",
value="مزایای یادگیری زبان پایتون"
)
generate_button = gr.Button("شروع تولید پادکست 🚀")
with gr.Column():
script_output = gr.Textbox(label="متن پادکست تولید شده", interactive=False, lines=10)
audio_output = gr.Audio(None, type="filepath", label="پادکست نهایی (MP3)", format="mp3")
download_link = gr.File(label="دانلود فایل MP3", file_count="single", visible=False)
def on_generate_button_click(topic):
script_result, audio_path, audio_component = create_podcast(topic)
if audio_path:
return script_result, audio_component, gr.File(value=audio_path, visible=True)
else:
return script_result, None, gr.File(visible=False)
generate_button.click(
fn=on_generate_button_click,
inputs=topic_input,
outputs=[script_output, audio_output, download_link]
)
if __name__ == "__main__":
demo.launch()
|