Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import zipfile | |
from docx import Document | |
from TTS.api import TTS | |
from pydub import AudioSegment | |
import gradio as gr | |
# Available TTS models with voice descriptions | |
VOICE_MODELS = { | |
"Jenny (Expressive Female)": "tts_models/en/jenny/jenny", | |
"LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits", | |
"VCTK (Multiple Speakers)": "tts_models/en/vctk/vits" | |
} | |
# Function to update speaker choices based on the selected model | |
def update_speaker_choices(selected_voice): | |
if selected_voice == "VCTK (Multiple Speakers)": | |
return ["Speaker 1", "Speaker 2", "Speaker 3"] # Modify with actual speaker names or indices | |
return ["Default Speaker"] | |
def docx_to_wav_zip(doc_file, selected_voice, speaker_name): | |
# Load the selected TTS model | |
tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False) | |
# Extract text from .docx | |
document = Document(doc_file.name) | |
full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()]) | |
# Generate temporary paths | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav: | |
wav_path = tmp_wav.name | |
zip_path = wav_path.replace(".wav", ".zip") | |
# Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.) | |
speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0 | |
# Generate speech with the selected speaker index | |
tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path) | |
# Convert wav to mp3 and zip the result | |
sound = AudioSegment.from_wav(wav_path) | |
sound.export(wav_path, format="wav") # keeping the wav format | |
# Zip the files | |
with zipfile.ZipFile(zip_path, 'w') as zipf: | |
zipf.write(wav_path, os.path.basename(wav_path)) | |
return zip_path | |
# Gradio interface | |
interface = gr.Interface( | |
fn=docx_to_wav_zip, | |
inputs=[ | |
gr.File(label="Upload .docx File"), | |
gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"), | |
gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1") # Example | |
], | |
outputs=gr.File(label="Download Zip File"), | |
title="Realistic Voiceover from DOCX (Multiple Voices)", | |
description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |