SohomToom commited on
Commit
152fe30
·
verified ·
1 Parent(s): f403fef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -17
app.py CHANGED
@@ -1,34 +1,50 @@
1
  import os
2
- os.environ["NUMBA_DISABLE_CACHE"] = "1" # Fix for Numba caching issue in cloud
3
 
4
  import gradio as gr
5
  from docx import Document
6
  from TTS.api import TTS
7
  import tempfile
 
8
 
9
- # Load Coqui TTS model (offline + realistic)
10
- tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 
 
 
 
 
11
 
12
- def docx_to_audio(doc_file):
13
- # Read text from .docx file
14
  document = Document(doc_file.name)
15
- full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
16
 
17
- # Create temporary output .wav file
18
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
19
- audio_path = f.name
20
 
21
- # Generate audio
22
- tts.tts_to_file(text=full_text, file_path=audio_path)
23
- return audio_path
 
 
 
 
 
 
 
 
24
 
25
  # Gradio interface
26
  interface = gr.Interface(
27
- fn=docx_to_audio,
28
- inputs=gr.File(label="Upload .docx File"),
29
- outputs=gr.Audio(type="filepath", label="Download Audio"),
30
- title="Docx to Realistic Voiceover",
31
- description="Upload a .docx file and get realistic speech audio."
 
 
 
32
  )
33
 
34
  if __name__ == "__main__":
 
1
  import os
2
+ os.environ["NUMBA_DISABLE_CACHE"] = "1"
3
 
4
  import gradio as gr
5
  from docx import Document
6
  from TTS.api import TTS
7
  import tempfile
8
+ import zipfile
9
 
10
+ # Available TTS models with voice descriptions
11
+ VOICE_MODELS = {
12
+ "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
13
+ "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
14
+ "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits",
15
+ "Blizzard (Deep Male Voice)": "tts_models/en/blizzard2013/capacitron-t2-cv-v1"
16
+ }
17
 
18
+ def docx_to_wav_zip(doc_file, selected_voice):
19
+ tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
20
  document = Document(doc_file.name)
21
+ paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
22
 
23
+ temp_dir = tempfile.mkdtemp()
24
+ audio_files = []
 
25
 
26
+ for i, chunk in enumerate(paragraphs):
27
+ wav_path = os.path.join(temp_dir, f"chunk_{i+1}.wav")
28
+ tts.tts_to_file(text=chunk, file_path=wav_path)
29
+ audio_files.append(wav_path)
30
+
31
+ zip_path = os.path.join(temp_dir, "voiceover_chunks.zip")
32
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
33
+ for wav_file in audio_files:
34
+ zipf.write(wav_file, arcname=os.path.basename(wav_file))
35
+
36
+ return zip_path
37
 
38
  # Gradio interface
39
  interface = gr.Interface(
40
+ fn=docx_to_wav_zip,
41
+ inputs=[
42
+ gr.File(label="Upload .docx File"),
43
+ gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)")
44
+ ],
45
+ outputs=gr.File(label="Download ZIP of WAV Files"),
46
+ title="Realistic Voiceover from DOCX (Multiple Voices)",
47
+ description="Upload a .docx file and choose a realistic voice to generate WAV voiceover files chunked by paragraph, downloadable as a ZIP archive."
48
  )
49
 
50
  if __name__ == "__main__":