Spaces:

KarhuSuomi
/

google-tts

Running

App Files Files Community

google-tts / app.py

KarhuSuomi

Update app.py

8ad63c1 verified 3 months ago

raw

history blame contribute delete

2.91 kB

	import gradio as gr
	from google.cloud import texttospeech
	import pysrt
	import os
	from pydub import AudioSegment
	from dotenv import load_dotenv
	from google.oauth2 import service_account
	import json

	# 加载环境变量
	load_dotenv()

	def get_google_client():
	# Get credentials from environment variable
	creds_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
	if not creds_json:
	raise ValueError("Google Cloud credentials not configured")

	credentials = service_account.Credentials.from_service_account_info(
	json.loads(creds_json)
	)

	return texttospeech.TextToSpeechClient(
	credentials=credentials,
	client_options={"quota_project_id": os.getenv("GOOGLE_QUOTA_PROJECT_ID")}
	)

	client = get_google_client()

	def srt_to_speech(srt_file, language_code, voice_name, gender):
	subs = pysrt.open(srt_file.name)
	final_audio = AudioSegment.empty()

	for sub in subs:
	synthesis_input = texttospeech.SynthesisInput(text=sub.text)

	voice = texttospeech.VoiceSelectionParams(
	language_code=language_code,
	name=voice_name,
	ssml_gender=texttospeech.SsmlVoiceGender[gender]
	)

	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3
	)

	response = client.synthesize_speech(
	input=synthesis_input, voice=voice, audio_config=audio_config
	)

	# 保存临时音频片段
	temp_file = f"temp_{sub.index}.mp3"
	with open(temp_file, "wb") as out:
	out.write(response.audio_content)

	# 计算静音时长
	silence_duration = (sub.start.ordinal - (0 if sub.index == 1 else subs[sub.index-2].end.ordinal)) / 1000
	if silence_duration > 0:
	silence = AudioSegment.silent(duration=silence_duration)
	final_audio += silence

	# 添加音频片段
	segment = AudioSegment.from_mp3(temp_file)
	final_audio += segment
	os.remove(temp_file)

	# 保存最终音频
	output_file = "output_with_timeline.mp3"
	final_audio.export(output_file, format="mp3")
	return output_file

	# Gradio界面
	iface = gr.Interface(
	fn=srt_to_speech,
	inputs=[
	gr.File(label="Lataa SRT/tiedosto"),
	gr.Dropdown(["fi-FI", "en-US"], label="语言代码", value="fi-FI"),
	gr.Dropdown(["fi-FI-Standard-A", "fi-FI-Wavenet-A", "en-US-Standard-A"],
	label="Äänen nimi", value="fi-FI-Standard-A"),
	gr.Dropdown(["FEMALE", "MALE", "NEUTRAL"], label="Sukupoli", value="FEMALE")
	],
	outputs=gr.Audio(label="Luotu äänitetty tekstitys"),
	title="SRT-äänityökalu",
	description="Lataa SRT-tekstitystiedosto ja luo äänitetty versio ajoituksella"
	)

	if __name__ == "__main__":
	iface.launch()
	ssr=False