import os os.environ["NUMBA_DISABLE_CACHE"] = "1" # Fix for Numba caching issue in cloud import gradio as gr from docx import Document from TTS.api import TTS import tempfile # Load Coqui TTS model (offline + realistic) tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) def docx_to_audio(doc_file): # Read text from .docx file document = Document(doc_file.name) full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()]) # Create temporary output .wav file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: audio_path = f.name # Generate audio tts.tts_to_file(text=full_text, file_path=audio_path) return audio_path # Gradio interface interface = gr.Interface( fn=docx_to_audio, inputs=gr.File(label="Upload .docx File"), outputs=gr.Audio(type="filepath", label="Download Audio"), title="Docx to Realistic Voiceover", description="Upload a .docx file and get realistic speech audio." ) if __name__ == "__main__": interface.launch()