Spaces:

suprimedev
/

talk-dial

Running

App Files Files Community

talk-dial / app.py

suprimedev

Update app.py

4a922a8 verified 2 months ago

raw

history blame

8.17 kB

	import gradio as gr
	import requests
	import json
	import os
	from pydub import AudioSegment
	from pydub.playback import play

	# --- Configuration ---
	TALKBOT_TTS_URL = "https://talkbot.ir/TTS-tkun"
	TALKBOT_API_BASE_URL = "https://talkbot.ir/api/v1/chat/completions"
	TALKBOT_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5" # Replace with your actual Talkbot API key
	# TALKBOT_API_KEY = os.environ.get("TALKBOT_API_KEY", "YOUR_DEFAULT_API_KEY_HERE") # More secure way
	MODEL_NAME = "deepseek-v3-0324"

	# --- Functions ---

	def get_tts_audio_link(text: str) -> str:
	"""
	Retrieves a WAV audio link for the given text using TalkBot TTS.
	"""
	params = {"text": text}
	response = requests.get(TALKBOT_TTS_URL, params=params)
	response.raise_for_status() # Raise an exception for HTTP errors
	return response.url

	def generate_podcast_script_ai(prompt: str) -> str:
	"""
	Generates a podcast script using TalkBot AI.
	"""
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {TALKBOT_API_KEY}"
	}

	data = {
	"model": MODEL_NAME,
	"messages": [
	{"role": "system", "content": "شما یک هوش مصنوعی برای تولید متن پادکست هستید. خروجی شما باید متن پادکست باشد."},
	{"role": "user", "content": prompt}
	],
	"temperature": 0.7,
	"max_tokens": 1000
	}

	try:
	response = requests.post(TALKBOT_API_BASE_URL, headers=headers, json=data)
	response.raise_for_status()
	result = response.json()
	return result['choices'][0]['message']['content'].strip()
	except requests.exceptions.RequestException as e:
	return f"Error generating script: {e}"
	except (KeyError, IndexError) as e:
	return f"Error parsing AI response: {e}. Full response: {response.json()}"

	def create_podcast(podcast_topic: str) -> tuple[str, str, gr.Audio \| None]:
	"""
	Generates a podcast script using AI, then creates audio for two distinct voices,
	and finally merges them into an MP3 file.
	"""
	if not TALKBOT_API_KEY or TALKBOT_API_KEY == "YOUR_DEFAULT_API_KEY_HERE":
	return "خطا: کلید API Talkbot تنظیم نشده است. لطفاً آن را در کد وارد کنید.", None, None

	# 1. Generate Podcast Script
	gr.Info("در حال تولید متن پادکست توسط هوش مصنوعی...")
	ai_prompt = f"یک متن پادکست کوتاه و جذاب در مورد '{podcast_topic}' با دو بخش مجزا برای دو گوینده (صدای اول و صدای دوم) بنویسید. هر بخش را با عنوان 'صدای اول:' و 'صدای دوم:' مشخص کنید. متن پادکست باید حدود 150-250 کلمه باشد."

	generated_script = generate_podcast_script_ai(ai_prompt)

	if "Error" in generated_script:
	return generated_script, None, None

	# 2. Extract Voices (simple split for demonstration)
	gr.Info("در حال تفکیک و تولید صداها...")

	voice1_text = ""
	voice2_text = ""

	# Simple parsing to get voice sections
	script_lines = generated_script.split('\n')
	current_voice = None
	for line in script_lines:
	if "صدای اول:" in line:
	current_voice = 1
	voice1_text += line.replace("صدای اول:", "").strip() + " "
	elif "صدای دوم:" in line:
	current_voice = 2
	voice2_text += line.replace("صدای دوم:", "").strip() + " "
	elif current_voice == 1:
	voice1_text += line.strip() + " "
	elif current_voice == 2:
	voice2_text += line.strip() + " "

	if not voice1_text or not voice2_text:
	return f"خطا: متن پادکست تولید شده شامل 'صدای اول:' یا 'صدای دوم:' استاندارد نیست. متن کامل: \n{generated_script}", None, None

	# 3. Generate Audio for each voice
	try:
	gr.Info("در حال دریافت صدای اول...")
	voice1_wav_link = get_tts_audio_link(voice1_text.strip())
	voice1_audio_response = requests.get(voice1_wav_link)
	voice1_audio_response.raise_for_status()

	with open("voice1.wav", "wb") as f:
	f.write(voice1_audio_response.content)

	gr.Info("در حال دریافت صدای دوم...")
	voice2_wav_link = get_tts_audio_link(voice2_text.strip())
	voice2_audio_response = requests.get(voice2_wav_link)
	voice2_audio_response.raise_or_status()

	with open("voice2.wav", "wb") as f:
	f.write(voice2_audio_response.content)

	except requests.exceptions.HTTPError as e:
	return f"خطا در دریافت صدا از TTS: {e}. URL: {e.request.url}", None, None
	except Exception as e:
	return f"خطای unexpected در دریافت صدا: {e}", None, None

	# 4. Merge Audio files
	gr.Info("در حال ترکیب صداها و تولید فایل نهایی MP3...")
	try:
	audio1 = AudioSegment.from_wav("voice1.wav")
	audio2 = AudioSegment.from_wav("voice2.wav")

	# Simple alternating merge - adjust as needed for more complex dialogue
	# For simplicity, let's just concatenate them here. A more sophisticated
	# approach would involve splitting the script into turns and interleaving.
	# Given the prompt, a simple concatenation of voice1's full speech then voice2's full speech might suffice as a starting point.
	# Or, we can interleave by short segments if the AI output is structured that way.

	# A basic concatenation for demonstration:
	final_podcast_audio = audio1 + audio2

	# If the input text parsing above resulted in alternating segments,
	# you would need to process those. For now, assuming voice1 speaks, then voice2 speaks.

	output_mp3_path = "podcast_output.mp3"
	final_podcast_audio.export(output_mp3_path, format="mp3")

	# Clean up temporary WAV files
	os.remove("voice1.wav")
	os.remove("voice2.wav")

	gr.Info("تولید پادکست با موفقیت انجام شد!")
	return generated_script, output_mp3_path, gr.Audio(output_mp3_path, type="filepath", label="پادکست نهایی")

	except Exception as e:
	return f"خطا در ترکیب فایل‌های صوتی: {e}", None, None

	# --- Gradio Interface ---

	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# تولیدکننده پادکست هوشمند 🎙️
	با وارد کردن یک موضوع، هوش مصنوعی ما یک متن پادکست تولید می‌کند و سپس آن را با دو صدای مجزا به یک فایل MP3 پادکست تبدیل می‌کند.
	"""
	)

	with gr.Row():
	topic_input = gr.Textbox(
	label="موضوع پادکست",
	placeholder="مثال: تاریخچه هوش مصنوعی، فواید مدیتیشن، آینده سفر فضایی",
	value="مزایای یادگیری زبان پایتون"
	)
	generate_button = gr.Button("شروع تولید پادکست 🚀")

	with gr.Column():
	script_output = gr.Textbox(label="متن پادکست تولید شده", interactive=False, lines=10)
	audio_output = gr.Audio(None, type="filepath", label="پادکست نهایی (MP3)", format="mp3")
	download_link = gr.File(label="دانلود فایل MP3", file_count="single", visible=False)

	def on_generate_button_click(topic):
	script_result, audio_path, audio_component = create_podcast(topic)
	if audio_path:
	return script_result, audio_component, gr.File(value=audio_path, visible=True)
	else:
	return script_result, None, gr.File(visible=False)

	generate_button.click(
	fn=on_generate_button_click,
	inputs=topic_input,
	outputs=[script_output, audio_output, download_link]
	)

	if __name__ == "__main__":
	demo.launch()