Spaces:

husseinelsaadi
/

ai-interviewer-demo

Paused

App Files Files Community

husseinelsaadi commited on about 10 hours ago

Commit

b986477

1 Parent(s): ae625e2

added edge tts

Browse files

Files changed (2) hide show

app.py +14 -19
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1695,7 +1695,7 @@ import numpy as np
 import scipy.io.wavfile as wavfile
 import os
 import json
-from TTS.api import TTS
 import torch, gc
 from faster_whisper import WhisperModel
 import asyncio
@@ -1707,9 +1707,9 @@ torch.cuda.empty_cache()
 gc.collect()
 # Global variables for lazy loading
-tts_model = None
 faster_whisper_model = None
-bark_voice_preset = "v2/en_speaker_6"
 # Thread pool for async operations
 executor = ThreadPoolExecutor(max_workers=2)
@@ -1725,17 +1725,11 @@ else:
 def load_models_lazy():
     """Load models only when needed"""
-    global tts_model, faster_whisper_model
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"🔁 Using device: {device}")
-    if tts_model is None:
-        print("🔁 Loading Coqui TTS model...")
-        tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False,
-                        gpu=torch.cuda.is_available())
-        print("✅ Coqui TTS model loaded")
     if faster_whisper_model is None:
         print("🔁 Loading Faster-Whisper model...")
         compute_type = "float16" if device == "cuda" else "int8"
@@ -1743,15 +1737,16 @@ def load_models_lazy():
         print(f"✅ Faster-Whisper model loaded on {device}")
 def tts_async(text):
-    def _generate():
-        load_models_lazy()
-        print(f"🎙️ Speaking: {text}")
-        temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-        tts_model.tts_to_file(text=text, file_path=temp_wav.name)
-        return temp_wav.name
-    return executor.submit(_generate)
@@ -1901,8 +1896,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         cleaned_text = first_q.strip().replace("\n", " ")
         audio_future = tts_async(cleaned_text)
         audio_path = audio_future.result()
-        print("⏱️ Bark TTS took", round(time.perf_counter() - start, 2), "seconds")
         # Log question
         state["log"].append({
             "type": "question",

 import scipy.io.wavfile as wavfile
 import os
 import json
+import edge_tts
 import torch, gc
 from faster_whisper import WhisperModel
 import asyncio
 gc.collect()
 # Global variables for lazy loading
 faster_whisper_model = None
+tts_voice = "en-US-AriaNeural"
 # Thread pool for async operations
 executor = ThreadPoolExecutor(max_workers=2)
 def load_models_lazy():
     """Load models only when needed"""
+    global faster_whisper_model
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"🔁 Using device: {device}")
     if faster_whisper_model is None:
         print("🔁 Loading Faster-Whisper model...")
         compute_type = "float16" if device == "cuda" else "int8"
         print(f"✅ Faster-Whisper model loaded on {device}")
+async def edge_tts_to_file(text, output_path="tts.wav", voice=tts_voice):
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_path)
+    return output_path
 def tts_async(text):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    return executor.submit(loop.run_until_complete, edge_tts_to_file(text))
         cleaned_text = first_q.strip().replace("\n", " ")
         audio_future = tts_async(cleaned_text)
         audio_path = audio_future.result()
+        print("⏱️ TTS (edge-tts) took", round(time.perf_counter() - start, 2), "seconds")
         # Log question
         state["log"].append({
             "type": "question",

requirements.txt CHANGED Viewed

@@ -39,4 +39,5 @@ huggingface_hub==0.20.3
 textract==1.6.3
 bitsandbytes
 faster-whisper==0.10.0
-TTS==0.22.0

 textract==1.6.3
 bitsandbytes
 faster-whisper==0.10.0
+edge-tts==6.1.2