SohomToom's picture
Update app.py
152fe30 verified
raw
history blame
1.81 kB
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"
import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import zipfile
# Available TTS models with voice descriptions
VOICE_MODELS = {
"Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
"LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
"VCTK (Multiple Speakers)": "tts_models/en/vctk/vits",
"Blizzard (Deep Male Voice)": "tts_models/en/blizzard2013/capacitron-t2-cv-v1"
}
def docx_to_wav_zip(doc_file, selected_voice):
tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
document = Document(doc_file.name)
paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
temp_dir = tempfile.mkdtemp()
audio_files = []
for i, chunk in enumerate(paragraphs):
wav_path = os.path.join(temp_dir, f"chunk_{i+1}.wav")
tts.tts_to_file(text=chunk, file_path=wav_path)
audio_files.append(wav_path)
zip_path = os.path.join(temp_dir, "voiceover_chunks.zip")
with zipfile.ZipFile(zip_path, 'w') as zipf:
for wav_file in audio_files:
zipf.write(wav_file, arcname=os.path.basename(wav_file))
return zip_path
# Gradio interface
interface = gr.Interface(
fn=docx_to_wav_zip,
inputs=[
gr.File(label="Upload .docx File"),
gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)")
],
outputs=gr.File(label="Download ZIP of WAV Files"),
title="Realistic Voiceover from DOCX (Multiple Voices)",
description="Upload a .docx file and choose a realistic voice to generate WAV voiceover files chunked by paragraph, downloadable as a ZIP archive."
)
if __name__ == "__main__":
interface.launch()