husseinelsaadi commited on
Commit
ba4fd9a
Β·
verified Β·
1 Parent(s): 146709e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -1484,33 +1484,33 @@ bark_voice_preset = "v2/en_speaker_5"
1484
 
1485
  def bark_tts(text):
1486
  print(f"πŸ” Synthesizing TTS for: {text}")
 
 
1487
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
 
 
1488
  input_ids = inputs["input_ids"].to(model_bark.device)
1489
- attention_mask = inputs.get("attention_mask", None)
1490
- if attention_mask is not None:
1491
- attention_mask = attention_mask.to(model_bark.device)
1492
-
1493
  start = time.time()
1494
- # βœ… DO NOT use **inputs here to avoid duplicate keys
1495
- generate_args = {
1496
- "input_ids": input_ids,
1497
- "attention_mask": attention_mask,
1498
- "pad_token_id": model_bark.config.eos_token_id,
1499
- "max_new_tokens": 100
1500
- }
1501
- # Remove conflicting keys if already in processor output
1502
- for key in ["max_new_tokens", "pad_token_id"]:
1503
- if key in inputs:
1504
- del inputs[key]
1505
-
1506
- speech_values = model_bark.generate(**generate_args)
1507
-
1508
  print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1509
-
 
1510
  speech = speech_values.cpu().numpy().squeeze()
1511
  speech = (speech * 32767).astype(np.int16)
 
1512
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1513
  wavfile.write(temp_wav.name, 22050, speech)
 
1514
  return temp_wav.name
1515
 
1516
 
 
1484
 
1485
  def bark_tts(text):
1486
  print(f"πŸ” Synthesizing TTS for: {text}")
1487
+
1488
+ # Process the text
1489
  inputs = processor_bark(text, return_tensors="pt", voice_preset=bark_voice_preset)
1490
+
1491
+ # Move tensors to device
1492
  input_ids = inputs["input_ids"].to(model_bark.device)
1493
+
 
 
 
1494
  start = time.time()
1495
+
1496
+ # Generate speech with only the required parameters
1497
+ with torch.no_grad():
1498
+ speech_values = model_bark.generate(
1499
+ input_ids=input_ids,
1500
+ do_sample=True,
1501
+ fine_temperature=0.4,
1502
+ coarse_temperature=0.8
1503
+ )
1504
+
 
 
 
 
1505
  print(f"βœ… Bark finished in {round(time.time() - start, 2)}s")
1506
+
1507
+ # Convert to audio
1508
  speech = speech_values.cpu().numpy().squeeze()
1509
  speech = (speech * 32767).astype(np.int16)
1510
+
1511
  temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
1512
  wavfile.write(temp_wav.name, 22050, speech)
1513
+
1514
  return temp_wav.name
1515
 
1516