Spaces:
Running
Running
File size: 6,379 Bytes
319ea69 3a12f33 319ea69 3a12f33 319ea69 3a12f33 319ea69 3a12f33 319ea69 3a12f33 319ea69 3a12f33 319ea69 3a12f33 319ea69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# -*- coding: utf-8 -*-
import gradio as gr
import requests
import json
from pydub import AudioSegment
import os
# API Keys and Endpoints
TTS_URL = "https://talkbot.ir/TTS-tkun"
TEXT_GEN_URL = "https://talkbot.ir/api/v1/chat/completions"
TEXT_GEN_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
TEXT_GEN_MODEL = "deepseek-v3-0324"
def generate_podcast(topic):
"""
Generates a podcast based on a given topic.
"""
try:
# 1. Generate podcast script using the text generation API
print("Generating podcast script...")
headers = {
"Authorization": f"Bearer {TEXT_GEN_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": TEXT_GEN_MODEL,
"messages": [
{"role": "system", "content": "به عنوان یک تولیدکننده پادکست، یک متن جذاب برای پادکست با دو گوینده (صدای اول و صدای دوم) در مورد موضوع زیر ایجاد کنید. متن باید شامل گفتگو بین دو گوینده باشد."},
{"role": "user", "content": f"موضوع: {topic}"}
],
"stream": False
}
response = requests.post(TEXT_GEN_URL, headers=headers, json=payload)
response.raise_for_status() # Raise an exception for bad status codes
script_data = response.json()
podcast_script = script_data['choices'][0]['message']['content']
print("Podcast script generated.")
# 2. Split the script into sections for each voice (This is a simplified approach)
# You might need a more sophisticated parsing method for real-world scenarios
lines = podcast_script.split('\n')
voice1_lines = []
voice2_lines = []
current_voice = 1 # Assuming the first line is for voice 1
for line in lines:
if line.strip().startswith("صدای اول:"):
voice1_lines.append(line.replace("صدای اول:", "", 1).strip())
current_voice = 1
elif line.strip().startswith("صدای دوم:"):
voice2_lines.append(line.replace("صدای دوم:", "", 1).strip())
current_voice = 2
elif line.strip(): # Lines withoutexplicit voice indicators
if current_voice == 1:
voice1_lines.append(line.strip())
else:
voice2_lines.append(line.strip())
voice1_text = " ".join(voice1_lines)
voice2_text = " ".join(voice2_lines)
if not voice1_text.strip() and not voice2_text.strip():
return None, "خطا در تجزیه متن پادکست. لطفاً متن generado شده را بررسی کنید.", None
# 3. Generate audio for each voice
print("Generating audio for voice 1...")
voice1_audio_url = requests.get(TTS_URL, params={"text": voice1_text}).text.strip()
print(f"Voice 1 audio URL: {voice1_audio_url}")
print("Generating audio for voice 2...")
voice2_audio_url = requests.get(TTS_URL, params={"text": voice2_text}).text.strip()
print(f"Voice 2 audio URL: {voice2_audio_url}")
if not voice1_audio_url.startswith("http") or not voice2_audio_url.startswith("http"):
return None, "خطا در تولید صدای TTS. لطفاً متن را بررسی کنید.", None
# 4. Download audio files
print("Downloading audio files...")
voice1_audio_response = requests.get(voice1_audio_url)
voice1_audio_response.raise_for_status()
voice1_audio_path = "voice1.wav"
with open(voice1_audio_path, "wb") as f:
f.write(voice1_audio_response.content)
voice2_audio_response = requests.get(voice2_audio_url)
voice2_audio_response.raise_for_status()
voice2_audio_path = "voice2.wav"
with open(voice2_audio_path, "wb") as f:
f.write(voice2_audio_response.content)
print("Audio files downloaded.")
# 5. Combine audio files (This is a simple concatenation for demonstration)
# For a proper podcast with alternating speakers, you would need more complex pydub manipulation
print("Combining audio files...")
audio1 = AudioSegment.from_wav(voice1_audio_path)
audio2 = AudioSegment.from_wav(voice2_audio_path)
# A simple way to interleave: This assumes segments are short and alternate
# For longer narrations, you'd need to split the audio files further
combined_audio = AudioSegment.empty()
max_len = max(len(audio1), len(audio2))
segment_length = 5000 # Example segment length in milliseconds
for i in range(0, max_len, segment_length):
segment1 = audio1[i : i + segment_length]
segment2 = audio2[i : i + segment_length]
if segment1:
combined_audio += segment1
if segment2:
combined_audio += segment2
output_mp3_path = "podcast.mp3"
combined_audio.export(output_mp3_path, format="mp3")
print(f"Podcast saved as {output_mp3_path}")
# 6. Clean up temporary files
os.remove(voice1_audio_path)
os.remove(voice2_audio_path)
return podcast_script, None, output_mp3_path
except requests.exceptions.RequestException as e:
return None, f"خطا در اتصال به API ها: {e}", None
except Exception as e:
return None, f"خطای نامشخص: {e}", None
# Gradio Interface
def gradio_interface(topic):
script, error, audio_path = generate_podcast(topic)
if error:
return error, None
else:
return script, audio_path
with gr.Blocks() as demo:
gr.Markdown("## ساخت پادکست با هوش مصنوعی")
topic_input = gr.Textbox(label="موضوع پادکست خود را وارد کنید")
generate_button = gr.Button("شروع تولید پادکست")
script_output = gr.Textbox(label="متن پادکست", interactive=False)
audio_output = gr.Audio(label="پادکست تولید شده", type="filepath")
generate_button.click(
fn=gradio_interface,
inputs=topic_input,
outputs=[script_output, audio_output]
)
if __name__ == "__main__":
demo.launch()
|