File size: 1,093 Bytes
a3e2313
 
 
b9bf9b2
 
 
 
 
a3e2313
b9bf9b2
 
a3e2313
 
 
 
 
 
 
 
b9bf9b2
a3e2313
 
 
b9bf9b2
a3e2313
b9bf9b2
a3e2313
 
 
 
 
b9bf9b2
 
a3e2313
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
os.environ["NUMBA_DISABLE_CACHE"] = "1"  # Fix for Numba caching issue in cloud

import gradio as gr
from docx import Document
from TTS.api import TTS
import tempfile

# Load Coqui TTS model (offline + realistic)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

def docx_to_audio(doc_file):
    # Read text from .docx file
    document = Document(doc_file.name)
    full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])

    # Create temporary output .wav file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        audio_path = f.name

    # Generate audio
    tts.tts_to_file(text=full_text, file_path=audio_path)
    return audio_path

# Gradio interface
interface = gr.Interface(
    fn=docx_to_audio,
    inputs=gr.File(label="Upload .docx File"),
    outputs=gr.Audio(type="filepath", label="Download Audio"),
    title="Docx to Realistic Voiceover",
    description="Upload a .docx file and get realistic speech audio."
)

if __name__ == "__main__":
    interface.launch()