Spaces:
Sleeping
Sleeping
File size: 2,104 Bytes
a3e2313 152fe30 a3e2313 b9bf9b2 152fe30 b9bf9b2 e929cde 152fe30 e929cde 152fe30 b9bf9b2 152fe30 e929cde a3e2313 e929cde a3e2313 e929cde b9bf9b2 e929cde 152fe30 e929cde 152fe30 e929cde b9bf9b2 a3e2313 b9bf9b2 152fe30 e929cde 152fe30 e929cde b9bf9b2 a3e2313 e929cde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"
import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import zipfile
# Available male English voices with accents (VCTK dataset)
VOICE_MODELS = {
"VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
"VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
"VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
"VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
}
def docx_to_wav_zip(doc_file, selected_voice):
model_name, speaker_idx = VOICE_MODELS[selected_voice]
tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
# Extract text from .docx
document = Document(doc_file.name)
full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
# Split by paragraphs for shorter audio chunks
paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
with tempfile.TemporaryDirectory() as tmpdir:
audio_files = []
for i, para in enumerate(paragraphs):
wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
audio_files.append(wav_path)
# Create a zip of all wav files
zip_path = os.path.join(tmpdir, "output_audio.zip")
with zipfile.ZipFile(zip_path, 'w') as zipf:
for audio in audio_files:
zipf.write(audio, os.path.basename(audio))
return zip_path
# Gradio interface
interface = gr.Interface(
fn=docx_to_wav_zip,
inputs=[
gr.File(label="Upload .docx File"),
gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
],
outputs=gr.File(label="Download ZIP of WAV Files"),
title="Convert DOCX to WAV with Male Voices and Accents",
description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
)
if __name__ == "__main__":
interface.launch() |