Spaces:
Sleeping
Sleeping
File size: 2,526 Bytes
a3e2313 b9bf9b2 152fe30 68f40ec b9bf9b2 68f40ec 152fe30 68f40ec 152fe30 b9bf9b2 68f40ec e929cde a3e2313 e929cde a3e2313 68f40ec b9bf9b2 68f40ec 152fe30 68f40ec 152fe30 68f40ec b9bf9b2 a3e2313 b9bf9b2 152fe30 68f40ec 152fe30 68f40ec b9bf9b2 a3e2313 68f40ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
import tempfile
import zipfile
from docx import Document
from TTS.api import TTS
from pydub import AudioSegment
import gradio as gr
# Available TTS models with voice descriptions
VOICE_MODELS = {
"Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
"LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
"VCTK (Multiple Speakers)": "tts_models/en/vctk/vits"
}
# Function to update speaker choices based on the selected model
def update_speaker_choices(selected_voice):
if selected_voice == "VCTK (Multiple Speakers)":
return ["Speaker 1", "Speaker 2", "Speaker 3"] # Modify with actual speaker names or indices
return ["Default Speaker"]
def docx_to_wav_zip(doc_file, selected_voice, speaker_name):
# Load the selected TTS model
tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
# Extract text from .docx
document = Document(doc_file.name)
full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
# Generate temporary paths
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
wav_path = tmp_wav.name
zip_path = wav_path.replace(".wav", ".zip")
# Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.)
speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0
# Generate speech with the selected speaker index
tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path)
# Convert wav to mp3 and zip the result
sound = AudioSegment.from_wav(wav_path)
sound.export(wav_path, format="wav") # keeping the wav format
# Zip the files
with zipfile.ZipFile(zip_path, 'w') as zipf:
zipf.write(wav_path, os.path.basename(wav_path))
return zip_path
# Gradio interface
interface = gr.Interface(
fn=docx_to_wav_zip,
inputs=[
gr.File(label="Upload .docx File"),
gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"),
gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1") # Example
],
outputs=gr.File(label="Download Zip File"),
title="Realistic Voiceover from DOCX (Multiple Voices)",
description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format."
)
if __name__ == "__main__":
interface.launch()
|