import gradio as gr from google.cloud import texttospeech import pysrt import os from pydub import AudioSegment from dotenv import load_dotenv from google.oauth2 import service_account import json # 加载环境变量 load_dotenv() def get_google_client(): # Get credentials from environment variable creds_json = os.getenv("GOOGLE_CREDENTIALS_JSON") if not creds_json: raise ValueError("Google Cloud credentials not configured") credentials = service_account.Credentials.from_service_account_info( json.loads(creds_json) ) return texttospeech.TextToSpeechClient( credentials=credentials, client_options={"quota_project_id": os.getenv("GOOGLE_QUOTA_PROJECT_ID")} ) client = get_google_client() def srt_to_speech(srt_file, language_code, voice_name, gender): subs = pysrt.open(srt_file.name) final_audio = AudioSegment.empty() for sub in subs: synthesis_input = texttospeech.SynthesisInput(text=sub.text) voice = texttospeech.VoiceSelectionParams( language_code=language_code, name=voice_name, ssml_gender=texttospeech.SsmlVoiceGender[gender] ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) # 保存临时音频片段 temp_file = f"temp_{sub.index}.mp3" with open(temp_file, "wb") as out: out.write(response.audio_content) # 计算静音时长 silence_duration = (sub.start.ordinal - (0 if sub.index == 1 else subs[sub.index-2].end.ordinal)) / 1000 if silence_duration > 0: silence = AudioSegment.silent(duration=silence_duration) final_audio += silence # 添加音频片段 segment = AudioSegment.from_mp3(temp_file) final_audio += segment os.remove(temp_file) # 保存最终音频 output_file = "output_with_timeline.mp3" final_audio.export(output_file, format="mp3") return output_file # Gradio界面 iface = gr.Interface( fn=srt_to_speech, inputs=[ gr.File(label="Lataa SRT/tiedosto"), gr.Dropdown(["fi-FI", "en-US"], label="语言代码", value="fi-FI"), gr.Dropdown(["fi-FI-Standard-A", "fi-FI-Wavenet-A", "en-US-Standard-A"], label="Äänen nimi", value="fi-FI-Standard-A"), gr.Dropdown(["FEMALE", "MALE", "NEUTRAL"], label="Sukupoli", value="FEMALE") ], outputs=gr.Audio(label="Luotu äänitetty tekstitys"), title="SRT-äänityökalu", description="Lataa SRT-tekstitystiedosto ja luo äänitetty versio ajoituksella" ) if __name__ == "__main__": iface.launch() ssr=False