Spaces:

SohomToom
/

DocToAudioConverted

Sleeping

App Files Files Community

SohomToom commited on May 6

Commit

68f40ec

verified ·

1 Parent(s): 44984cf

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -32

app.py CHANGED Viewed

@@ -1,57 +1,65 @@
 import os
-os.environ["NUMBA_DISABLE_CACHE"] = "1"
-import gradio as gr
-from docx import Document
-from TTS.api import TTS
 import tempfile
 import zipfile
-# Available male English voices with accents (VCTK dataset)
 VOICE_MODELS = {
-    "VCTK Speaker 23 (British Male)": ("tts_models/en/vctk/vits", 23),
-    "VCTK Speaker 27 (Scottish Male)": ("tts_models/en/vctk/vits", 27),
-    "VCTK Speaker 36 (US Male)": ("tts_models/en/vctk/vits", 36),
-    "VCTK Speaker 42 (Irish Male)": ("tts_models/en/vctk/vits", 42)
 }
-def docx_to_wav_zip(doc_file, selected_voice):
-    model_name, speaker_idx = VOICE_MODELS[selected_voice]
-    tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
     # Extract text from .docx
     document = Document(doc_file.name)
     full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
-    # Split by paragraphs for shorter audio chunks
-    paragraphs = [para.text.strip() for para in document.paragraphs if para.text.strip()]
-    with tempfile.TemporaryDirectory() as tmpdir:
-        audio_files = []
-        for i, para in enumerate(paragraphs):
-            wav_path = os.path.join(tmpdir, f"part_{i + 1}.wav")
-            tts.tts_to_file(text=para, speaker_idx=speaker_idx, file_path=wav_path)
-            audio_files.append(wav_path)
-        # Create a zip of all wav files
-        zip_path = os.path.join(tmpdir, "output_audio.zip")
-        with zipfile.ZipFile(zip_path, 'w') as zipf:
-            for audio in audio_files:
-                zipf.write(audio, os.path.basename(audio))
-        return zip_path
 # Gradio interface
 interface = gr.Interface(
     fn=docx_to_wav_zip,
     inputs=[
         gr.File(label="Upload .docx File"),
-        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Male Voice with Accent")
     ],
-    outputs=gr.File(label="Download ZIP of WAV Files"),
-    title="Convert DOCX to WAV with Male Voices and Accents",
-    description="Upload a .docx file and choose a male voice (with accent) to generate paragraph-wise audio and download as ZIP."
 )
 if __name__ == "__main__":
-    interface.launch()

 import os
 import tempfile
 import zipfile
+from docx import Document
+from TTS.api import TTS
+from pydub import AudioSegment
+import gradio as gr
+# Available TTS models with voice descriptions
 VOICE_MODELS = {
+    "Jenny (Expressive Female)": "tts_models/en/jenny/jenny",
+    "LJSpeech (Standard Female)": "tts_models/en/ljspeech/vits",
+    "VCTK (Multiple Speakers)": "tts_models/en/vctk/vits"
 }
+# Function to update speaker choices based on the selected model
+def update_speaker_choices(selected_voice):
+    if selected_voice == "VCTK (Multiple Speakers)":
+        return ["Speaker 1", "Speaker 2", "Speaker 3"]  # Modify with actual speaker names or indices
+    return ["Default Speaker"]
+def docx_to_wav_zip(doc_file, selected_voice, speaker_name):
+    # Load the selected TTS model
+    tts = TTS(model_name=VOICE_MODELS[selected_voice], progress_bar=False, gpu=False)
     # Extract text from .docx
     document = Document(doc_file.name)
     full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
+    # Generate temporary paths
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
+        wav_path = tmp_wav.name
+    zip_path = wav_path.replace(".wav", ".zip")
+    # Get speaker index (this part assumes speaker names are like 'Speaker 1', 'Speaker 2', etc.)
+    speaker_idx = int(speaker_name.split()[-1]) - 1 if speaker_name.startswith("Speaker") else 0
+    # Generate speech with the selected speaker index
+    tts.tts_to_file(text=full_text, speaker_idx=speaker_idx, file_path=wav_path)
+    # Convert wav to mp3 and zip the result
+    sound = AudioSegment.from_wav(wav_path)
+    sound.export(wav_path, format="wav")  # keeping the wav format
+    # Zip the files
+    with zipfile.ZipFile(zip_path, 'w') as zipf:
+        zipf.write(wav_path, os.path.basename(wav_path))
+    return zip_path
 # Gradio interface
 interface = gr.Interface(
     fn=docx_to_wav_zip,
     inputs=[
         gr.File(label="Upload .docx File"),
+        gr.Dropdown(choices=list(VOICE_MODELS.keys()), label="Choose Voice", value="Jenny (Expressive Female)"),
+        gr.Dropdown(choices=update_speaker_choices("VCTK (Multiple Speakers)"), label="Choose Speaker", value="Speaker 1")  # Example
     ],
+    outputs=gr.File(label="Download Zip File"),
+    title="Realistic Voiceover from DOCX (Multiple Voices)",
+    description="Upload a .docx file, choose a realistic voice, and pick a speaker to generate a voiceover in WAV format."
 )
 if __name__ == "__main__":
+    interface.launch()