talk-dial / app.py
suprimedev's picture
Update app.py
319ea69 verified
raw
history blame
6.38 kB
# -*- coding: utf-8 -*-
import gradio as gr
import requests
import json
from pydub import AudioSegment
import os
# API Keys and Endpoints
TTS_URL = "https://talkbot.ir/TTS-tkun"
TEXT_GEN_URL = "https://talkbot.ir/api/v1/chat/completions"
TEXT_GEN_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
TEXT_GEN_MODEL = "deepseek-v3-0324"
def generate_podcast(topic):
"""
Generates a podcast based on a given topic.
"""
try:
# 1. Generate podcast script using the text generation API
print("Generating podcast script...")
headers = {
"Authorization": f"Bearer {TEXT_GEN_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": TEXT_GEN_MODEL,
"messages": [
{"role": "system", "content": "به عنوان یک تولیدکننده پادکست، یک متن جذاب برای پادکست با دو گوینده (صدای اول و صدای دوم) در مورد موضوع زیر ایجاد کنید. متن باید شامل گفتگو بین دو گوینده باشد."},
{"role": "user", "content": f"موضوع: {topic}"}
],
"stream": False
}
response = requests.post(TEXT_GEN_URL, headers=headers, json=payload)
response.raise_for_status() # Raise an exception for bad status codes
script_data = response.json()
podcast_script = script_data['choices'][0]['message']['content']
print("Podcast script generated.")
# 2. Split the script into sections for each voice (This is a simplified approach)
# You might need a more sophisticated parsing method for real-world scenarios
lines = podcast_script.split('\n')
voice1_lines = []
voice2_lines = []
current_voice = 1 # Assuming the first line is for voice 1
for line in lines:
if line.strip().startswith("صدای اول:"):
voice1_lines.append(line.replace("صدای اول:", "", 1).strip())
current_voice = 1
elif line.strip().startswith("صدای دوم:"):
voice2_lines.append(line.replace("صدای دوم:", "", 1).strip())
current_voice = 2
elif line.strip(): # Lines withoutexplicit voice indicators
if current_voice == 1:
voice1_lines.append(line.strip())
else:
voice2_lines.append(line.strip())
voice1_text = " ".join(voice1_lines)
voice2_text = " ".join(voice2_lines)
if not voice1_text.strip() and not voice2_text.strip():
return None, "خطا در تجزیه متن پادکست. لطفاً متن generado شده را بررسی کنید.", None
# 3. Generate audio for each voice
print("Generating audio for voice 1...")
voice1_audio_url = requests.get(TTS_URL, params={"text": voice1_text}).text.strip()
print(f"Voice 1 audio URL: {voice1_audio_url}")
print("Generating audio for voice 2...")
voice2_audio_url = requests.get(TTS_URL, params={"text": voice2_text}).text.strip()
print(f"Voice 2 audio URL: {voice2_audio_url}")
if not voice1_audio_url.startswith("http") or not voice2_audio_url.startswith("http"):
return None, "خطا در تولید صدای TTS. لطفاً متن را بررسی کنید.", None
# 4. Download audio files
print("Downloading audio files...")
voice1_audio_response = requests.get(voice1_audio_url)
voice1_audio_response.raise_for_status()
voice1_audio_path = "voice1.wav"
with open(voice1_audio_path, "wb") as f:
f.write(voice1_audio_response.content)
voice2_audio_response = requests.get(voice2_audio_url)
voice2_audio_response.raise_for_status()
voice2_audio_path = "voice2.wav"
with open(voice2_audio_path, "wb") as f:
f.write(voice2_audio_response.content)
print("Audio files downloaded.")
# 5. Combine audio files (This is a simple concatenation for demonstration)
# For a proper podcast with alternating speakers, you would need more complex pydub manipulation
print("Combining audio files...")
audio1 = AudioSegment.from_wav(voice1_audio_path)
audio2 = AudioSegment.from_wav(voice2_audio_path)
# A simple way to interleave: This assumes segments are short and alternate
# For longer narrations, you'd need to split the audio files further
combined_audio = AudioSegment.empty()
max_len = max(len(audio1), len(audio2))
segment_length = 5000 # Example segment length in milliseconds
for i in range(0, max_len, segment_length):
segment1 = audio1[i : i + segment_length]
segment2 = audio2[i : i + segment_length]
if segment1:
combined_audio += segment1
if segment2:
combined_audio += segment2
output_mp3_path = "podcast.mp3"
combined_audio.export(output_mp3_path, format="mp3")
print(f"Podcast saved as {output_mp3_path}")
# 6. Clean up temporary files
os.remove(voice1_audio_path)
os.remove(voice2_audio_path)
return podcast_script, None, output_mp3_path
except requests.exceptions.RequestException as e:
return None, f"خطا در اتصال به API ها: {e}", None
except Exception as e:
return None, f"خطای نامشخص: {e}", None
# Gradio Interface
def gradio_interface(topic):
script, error, audio_path = generate_podcast(topic)
if error:
return error, None
else:
return script, audio_path
with gr.Blocks() as demo:
gr.Markdown("## ساخت پادکست با هوش مصنوعی")
topic_input = gr.Textbox(label="موضوع پادکست خود را وارد کنید")
generate_button = gr.Button("شروع تولید پادکست")
script_output = gr.Textbox(label="متن پادکست", interactive=False)
audio_output = gr.Audio(label="پادکست تولید شده", type="filepath")
generate_button.click(
fn=gradio_interface,
inputs=topic_input,
outputs=[script_output, audio_output]
)
if __name__ == "__main__":
demo.launch()