Spaces:
Running
Running
File size: 2,910 Bytes
89fafa1 ccd704d 89fafa1 49b03ce 89fafa1 49b03ce 89fafa1 b495876 89fafa1 b495876 89fafa1 b495876 89fafa1 8ad63c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from google.cloud import texttospeech
import pysrt
import os
from pydub import AudioSegment
from dotenv import load_dotenv
from google.oauth2 import service_account
import json
# 加载环境变量
load_dotenv()
def get_google_client():
# Get credentials from environment variable
creds_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
if not creds_json:
raise ValueError("Google Cloud credentials not configured")
credentials = service_account.Credentials.from_service_account_info(
json.loads(creds_json)
)
return texttospeech.TextToSpeechClient(
credentials=credentials,
client_options={"quota_project_id": os.getenv("GOOGLE_QUOTA_PROJECT_ID")}
)
client = get_google_client()
def srt_to_speech(srt_file, language_code, voice_name, gender):
subs = pysrt.open(srt_file.name)
final_audio = AudioSegment.empty()
for sub in subs:
synthesis_input = texttospeech.SynthesisInput(text=sub.text)
voice = texttospeech.VoiceSelectionParams(
language_code=language_code,
name=voice_name,
ssml_gender=texttospeech.SsmlVoiceGender[gender]
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# 保存临时音频片段
temp_file = f"temp_{sub.index}.mp3"
with open(temp_file, "wb") as out:
out.write(response.audio_content)
# 计算静音时长
silence_duration = (sub.start.ordinal - (0 if sub.index == 1 else subs[sub.index-2].end.ordinal)) / 1000
if silence_duration > 0:
silence = AudioSegment.silent(duration=silence_duration)
final_audio += silence
# 添加音频片段
segment = AudioSegment.from_mp3(temp_file)
final_audio += segment
os.remove(temp_file)
# 保存最终音频
output_file = "output_with_timeline.mp3"
final_audio.export(output_file, format="mp3")
return output_file
# Gradio界面
iface = gr.Interface(
fn=srt_to_speech,
inputs=[
gr.File(label="Lataa SRT/tiedosto"),
gr.Dropdown(["fi-FI", "en-US"], label="语言代码", value="fi-FI"),
gr.Dropdown(["fi-FI-Standard-A", "fi-FI-Wavenet-A", "en-US-Standard-A"],
label="Äänen nimi", value="fi-FI-Standard-A"),
gr.Dropdown(["FEMALE", "MALE", "NEUTRAL"], label="Sukupoli", value="FEMALE")
],
outputs=gr.Audio(label="Luotu äänitetty tekstitys"),
title="SRT-äänityökalu",
description="Lataa SRT-tekstitystiedosto ja luo äänitetty versio ajoituksella"
)
if __name__ == "__main__":
iface.launch()
ssr=False |