Spaces:
Running
Running
import gradio as gr | |
from google.cloud import texttospeech | |
import pysrt | |
import os | |
from pydub import AudioSegment | |
from dotenv import load_dotenv | |
from google.oauth2 import service_account | |
import json | |
# 加载环境变量 | |
load_dotenv() | |
def get_google_client(): | |
# Get credentials from environment variable | |
creds_json = os.getenv("GOOGLE_CREDENTIALS_JSON") | |
if not creds_json: | |
raise ValueError("Google Cloud credentials not configured") | |
credentials = service_account.Credentials.from_service_account_info( | |
json.loads(creds_json) | |
) | |
return texttospeech.TextToSpeechClient( | |
credentials=credentials, | |
client_options={"quota_project_id": os.getenv("GOOGLE_QUOTA_PROJECT_ID")} | |
) | |
client = get_google_client() | |
def srt_to_speech(srt_file, language_code, voice_name, gender): | |
subs = pysrt.open(srt_file.name) | |
final_audio = AudioSegment.empty() | |
for sub in subs: | |
synthesis_input = texttospeech.SynthesisInput(text=sub.text) | |
voice = texttospeech.VoiceSelectionParams( | |
language_code=language_code, | |
name=voice_name, | |
ssml_gender=texttospeech.SsmlVoiceGender[gender] | |
) | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.MP3 | |
) | |
response = client.synthesize_speech( | |
input=synthesis_input, voice=voice, audio_config=audio_config | |
) | |
# 保存临时音频片段 | |
temp_file = f"temp_{sub.index}.mp3" | |
with open(temp_file, "wb") as out: | |
out.write(response.audio_content) | |
# 计算静音时长 | |
silence_duration = (sub.start.ordinal - (0 if sub.index == 1 else subs[sub.index-2].end.ordinal)) / 1000 | |
if silence_duration > 0: | |
silence = AudioSegment.silent(duration=silence_duration) | |
final_audio += silence | |
# 添加音频片段 | |
segment = AudioSegment.from_mp3(temp_file) | |
final_audio += segment | |
os.remove(temp_file) | |
# 保存最终音频 | |
output_file = "output_with_timeline.mp3" | |
final_audio.export(output_file, format="mp3") | |
return output_file | |
# Gradio界面 | |
iface = gr.Interface( | |
fn=srt_to_speech, | |
inputs=[ | |
gr.File(label="Lataa SRT/tiedosto"), | |
gr.Dropdown(["fi-FI", "en-US"], label="语言代码", value="fi-FI"), | |
gr.Dropdown(["fi-FI-Standard-A", "fi-FI-Wavenet-A", "en-US-Standard-A"], | |
label="Äänen nimi", value="fi-FI-Standard-A"), | |
gr.Dropdown(["FEMALE", "MALE", "NEUTRAL"], label="Sukupoli", value="FEMALE") | |
], | |
outputs=gr.Audio(label="Luotu äänitetty tekstitys"), | |
title="SRT-äänityökalu", | |
description="Lataa SRT-tekstitystiedosto ja luo äänitetty versio ajoituksella" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |
ssr=False |