yongyeol commited on
Commit
6ee0045
Β·
verified Β·
1 Parent(s): 256226a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -31,14 +31,20 @@ tts_model = VitsModel.from_pretrained("facebook/mms-tts-kor")
31
  tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
32
  tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
33
 
 
 
34
  def synthesize_tts(text: str):
35
- inputs = tts_tokenizer(text, return_tensors="pt")
36
- input_ids = inputs["input_ids"].to(tts_model.device)
 
 
37
  with torch.no_grad():
38
  output = tts_model(input_ids=input_ids)
 
39
  waveform = output.waveform.squeeze().cpu().numpy()
40
  return (tts_model.config.sampling_rate, waveform)
41
 
 
42
  # ─────────────── 4. 이미지 β†’ μΊ‘μ…˜ + λ²ˆμ—­ + μŒμ„± 좜λ ₯ ───────────────
43
  def describe_and_speak(img: Image.Image):
44
  logging.info("[DEBUG] describe_and_speak ν•¨μˆ˜ 호좜됨")
 
31
  tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
32
  tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
33
 
34
+ from uroman import uromanize
35
+
36
  def synthesize_tts(text: str):
37
+ romanized = uromanize(text)
38
+ inputs = tts_tokenizer(romanized, return_tensors="pt")
39
+ input_ids = inputs["input_ids"].long().to(tts_model.device)
40
+
41
  with torch.no_grad():
42
  output = tts_model(input_ids=input_ids)
43
+
44
  waveform = output.waveform.squeeze().cpu().numpy()
45
  return (tts_model.config.sampling_rate, waveform)
46
 
47
+
48
  # ─────────────── 4. 이미지 β†’ μΊ‘μ…˜ + λ²ˆμ—­ + μŒμ„± 좜λ ₯ ───────────────
49
  def describe_and_speak(img: Image.Image):
50
  logging.info("[DEBUG] describe_and_speak ν•¨μˆ˜ 호좜됨")