File size: 1,813 Bytes
a3e2313
152fe30
a3e2313
b9bf9b2
 
 
 
152fe30
b9bf9b2
152fe30
 
 
 
 
 
 
b9bf9b2
152fe30
 
a3e2313
152fe30
a3e2313
152fe30
 
b9bf9b2
152fe30
 
 
 
 
 
 
 
 
 
 
b9bf9b2
a3e2313
b9bf9b2
152fe30
 
 
 
 
 
 
 
b9bf9b2
 
a3e2313
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"

import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile
import zipfile

# Available TTS models with voice descriptions
VOICE_MODELS = {
    "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
    "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
    "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits",
    "Blizzard (Deep Male Voice)": "tts_models/en/blizzard2013/capacitron-t2-cv-v1"
}

def docx_to_wav_zip(doc_file, selected_voice):
    tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
    document = Document(doc_file.name)
    paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]

    temp_dir = tempfile.mkdtemp()
    audio_files = []

    for i, chunk in enumerate(paragraphs):
        wav_path = os.path.join(temp_dir, f"chunk_{i+1}.wav")
        tts.tts_to_file(text=chunk, file_path=wav_path)
        audio_files.append(wav_path)

    zip_path = os.path.join(temp_dir, "voiceover_chunks.zip")
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for wav_file in audio_files:
            zipf.write(wav_file, arcname=os.path.basename(wav_file))

    return zip_path

# Gradio interface
interface = gr.Interface(
    fn=docx_to_wav_zip,
    inputs=[
        gr.File(label="Upload .docx File"),
        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)")
    ],
    outputs=gr.File(label="Download ZIP of WAV Files"),
    title="Realistic Voiceover from DOCX (Multiple Voices)",
    description="Upload a .docx file and choose a realistic voice to generate WAV voiceover files chunked by paragraph, downloadable as a ZIP archive."
)

if __name__ == "__main__":
    interface.launch()