File size: 2,104 Bytes
a3e2313
152fe30
a3e2313
b9bf9b2
 
 
 
152fe30
b9bf9b2
e929cde
152fe30
e929cde
 
 
 
152fe30
b9bf9b2
152fe30
e929cde
 
 
 
a3e2313
e929cde
a3e2313
e929cde
 
b9bf9b2
e929cde
 
 
 
 
 
152fe30
e929cde
 
 
 
 
152fe30
e929cde
b9bf9b2
a3e2313
b9bf9b2
152fe30
 
 
e929cde
152fe30
 
e929cde
 
b9bf9b2
 
a3e2313
e929cde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"

import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import zipfile

# Available male English voices with accents (VCTK dataset)
VOICE_MODELS = {
    "VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
    "VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
    "VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
    "VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
}

def docx_to_wav_zip(doc_file, selected_voice):
    model_name, speaker_idx = VOICE_MODELS[selected_voice]
    tts = TTS(model_name=model_name, progress_bar=False, gpu=False)

    # Extract text from .docx
    document = Document(doc_file.name)
    full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])

    # Split by paragraphs for shorter audio chunks
    paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]

    with tempfile.TemporaryDirectory() as tmpdir:
        audio_files = []
        for i, para in enumerate(paragraphs):
            wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
            tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
            audio_files.append(wav_path)

        # Create a zip of all wav files
        zip_path = os.path.join(tmpdir, "output_audio.zip")
        with zipfile.ZipFile(zip_path, 'w') as zipf:
            for audio in audio_files:
                zipf.write(audio, os.path.basename(audio))

        return zip_path

# Gradio interface
interface = gr.Interface(
    fn=docx_to_wav_zip,
    inputs=[
        gr.File(label="Upload .docx File"),
        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
    ],
    outputs=gr.File(label="Download ZIP of WAV Files"),
    title="Convert DOCX to WAV with Male Voices and Accents",
    description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
)

if __name__ == "__main__":
    interface.launch()