husseinelsaadi commited on
Commit
c5f793f
Β·
verified Β·
1 Parent(s): 1751c99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -1751,7 +1751,7 @@ gc.collect()
1751
  model_bark = None
1752
  processor_bark = None
1753
  whisper_model = None
1754
- bark_voice_preset = "v2/en_speaker_1"
1755
 
1756
  # Thread pool for async operations
1757
  executor = ThreadPoolExecutor(max_workers=2)
@@ -1788,31 +1788,31 @@ def load_models_lazy():
1788
  print(f"βœ… Whisper model loaded on {device}")
1789
 
1790
  def bark_tts_async(text):
1791
- """Async TTS generation"""
1792
  def _generate():
1793
- load_models_lazy() # Load only when needed
1794
- print(f"πŸ” Synthesizing TTS for: {text}")
1795
-
1796
- # Ensure we're using the correct device
1797
  device = next(model_bark.parameters()).device
1798
- print(f"πŸ” Bark model is on device: {device}")
1799
-
 
 
1800
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1801
- input_ids = inputs["input_ids"].to(device) # Move to same device as model
1802
-
1803
- start = time.time()
1804
  with torch.no_grad():
1805
- speech_values = model_bark.generate(input_ids=input_ids)
1806
- print(f"βœ… Bark finished in {round(time.time() - start, 2)}s on {device}")
1807
-
1808
  speech = speech_values.cpu().numpy().squeeze()
1809
  speech = (speech * 32767).astype(np.int16)
1810
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1811
  wavfile.write(temp_wav.name, 22050, speech)
1812
  return temp_wav.name
1813
-
1814
  return executor.submit(_generate)
1815
 
 
1816
  def whisper_stt(audio_path):
1817
  """Lazy loading whisper STT"""
1818
  if not audio_path or not os.path.exists(audio_path):
 
1751
  model_bark = None
1752
  processor_bark = None
1753
  whisper_model = None
1754
+ bark_voice_preset = "v2/en_speaker_6"
1755
 
1756
  # Thread pool for async operations
1757
  executor = ThreadPoolExecutor(max_workers=2)
 
1788
  print(f"βœ… Whisper model loaded on {device}")
1789
 
1790
  def bark_tts_async(text):
1791
+ """Fully correct async TTS generation with Bark"""
1792
  def _generate():
1793
+ load_models_lazy()
 
 
 
1794
  device = next(model_bark.parameters()).device
1795
+ print(f"πŸ” Bark model on: {device}")
1796
+ print(f"πŸŽ™οΈ Speaking: {text}")
1797
+
1798
+ # 🧠 Prepare full input using processor (not just input_ids)
1799
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1800
+ inputs = {k: v.to(device) for k, v in inputs.items()}
1801
+
1802
+ # βœ… Generate using unpacked args β€” this includes all required prompt tensors
1803
  with torch.no_grad():
1804
+ speech_values = model_bark.generate(**inputs)
1805
+
1806
+ # βœ… Convert to audio
1807
  speech = speech_values.cpu().numpy().squeeze()
1808
  speech = (speech * 32767).astype(np.int16)
1809
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1810
  wavfile.write(temp_wav.name, 22050, speech)
1811
  return temp_wav.name
1812
+
1813
  return executor.submit(_generate)
1814
 
1815
+
1816
  def whisper_stt(audio_path):
1817
  """Lazy loading whisper STT"""
1818
  if not audio_path or not os.path.exists(audio_path):