SohomToom's picture
Update app.py
e929cde verified
raw
history blame
2.1 kB
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"
import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import zipfile
# Available male English voices with accents (VCTK dataset)
VOICE_MODELS = {
"VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
"VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
"VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
"VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
}
def docx_to_wav_zip(doc_file, selected_voice):
model_name, speaker_idx = VOICE_MODELS[selected_voice]
tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
# Extract text from .docx
document = Document(doc_file.name)
full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
# Split by paragraphs for shorter audio chunks
paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
with tempfile.TemporaryDirectory() as tmpdir:
audio_files = []
for i, para in enumerate(paragraphs):
wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
audio_files.append(wav_path)
# Create a zip of all wav files
zip_path = os.path.join(tmpdir, "output_audio.zip")
with zipfile.ZipFile(zip_path, 'w') as zipf:
for audio in audio_files:
zipf.write(audio, os.path.basename(audio))
return zip_path
# Gradio interface
interface = gr.Interface(
fn=docx_to_wav_zip,
inputs=[
gr.File(label="Upload .docx File"),
gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
],
outputs=gr.File(label="Download ZIP of WAV Files"),
title="Convert DOCX to WAV with Male Voices and Accents",
description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
)
if __name__ == "__main__":
interface.launch()