SohomToom commited on
Commit
68f40ec
·
verified ·
1 Parent(s): 44984cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -32
app.py CHANGED
@@ -1,57 +1,65 @@
1
  import os
2
- os.environ["NUMBA_DISABLE_CACHE"] = "1"
3
-
4
- import gradio as gr
5
- from docx import Document
6
- from TTS.api import TTS
7
  import tempfile
8
  import zipfile
 
 
 
 
9
 
10
- # Available male English voices with accents (VCTK dataset)
11
  VOICE_MODELS = {
12
- "VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
13
- "VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
14
- "VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
15
- "VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
16
  }
17
 
18
- def docx_to_wav_zip(doc_file, selected_voice):
19
- model_name, speaker_idx = VOICE_MODELS[selected_voice]
20
- tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
 
 
 
 
 
 
21
 
22
  # Extract text from .docx
23
  document = Document(doc_file.name)
24
  full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
25
 
26
- # Split by paragraphs for shorter audio chunks
27
- paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
 
 
 
 
 
 
 
 
28
 
29
- with tempfile.TemporaryDirectory() as tmpdir:
30
- audio_files = []
31
- for i, para in enumerate(paragraphs):
32
- wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
33
- tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
34
- audio_files.append(wav_path)
35
 
36
- # Create a zip of all wav files
37
- zip_path = os.path.join(tmpdir, "output_audio.zip")
38
- with zipfile.ZipFile(zip_path, 'w') as zipf:
39
- for audio in audio_files:
40
- zipf.write(audio, os.path.basename(audio))
41
 
42
- return zip_path
43
 
44
  # Gradio interface
45
  interface = gr.Interface(
46
  fn=docx_to_wav_zip,
47
  inputs=[
48
  gr.File(label="Upload .docx File"),
49
- gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
 
50
  ],
51
- outputs=gr.File(label="Download ZIP of WAV Files"),
52
- title="Convert DOCX to WAV with Male Voices and Accents",
53
- description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
54
  )
55
 
56
  if __name__ == "__main__":
57
- interface.launch()
 
1
  import os
 
 
 
 
 
2
  import tempfile
3
  import zipfile
4
+ from docx import Document
5
+ from TTS.api import TTS
6
+ from pydub import AudioSegment
7
+ import gradio as gr
8
 
9
+ # Available TTS models with voice descriptions
10
  VOICE_MODELS = {
11
+ "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
12
+ "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
13
+ "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits"
 
14
  }
15
 
16
+ # Function to update speaker choices based on the selected model
17
+ def update_speaker_choices(selected_voice):
18
+ if selected_voice == "VCTK (Multiple Speakers)":
19
+ return ["Speaker 1", "Speaker 2", "Speaker 3"] # Modify with actual speaker names or indices
20
+ return ["Default Speaker"]
21
+
22
+ def docx_to_wav_zip(doc_file, selected_voice, speaker_name):
23
+ # Load the selected TTS model
24
+ tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
25
 
26
  # Extract text from .docx
27
  document = Document(doc_file.name)
28
  full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
29
 
30
+ # Generate temporary paths
31
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
32
+ wav_path = tmp_wav.name
33
+ zip_path = wav_path.replace(".wav", ".zip")
34
+
35
+ # Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.)
36
+ speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0
37
+
38
+ # Generate speech with the selected speaker index
39
+ tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path)
40
 
41
+ # Convert wav to mp3 and zip the result
42
+ sound = AudioSegment.from_wav(wav_path)
43
+ sound.export(wav_path, format="wav") # keeping the wav format
 
 
 
44
 
45
+ # Zip the files
46
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
47
+ zipf.write(wav_path, os.path.basename(wav_path))
 
 
48
 
49
+ return zip_path
50
 
51
  # Gradio interface
52
  interface = gr.Interface(
53
  fn=docx_to_wav_zip,
54
  inputs=[
55
  gr.File(label="Upload .docx File"),
56
+ gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"),
57
+ gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1") # Example
58
  ],
59
+ outputs=gr.File(label="Download Zip File"),
60
+ title="Realistic Voiceover from DOCX (Multiple Voices)",
61
+ description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format."
62
  )
63
 
64
  if __name__ == "__main__":
65
+ interface.launch()