Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -31,14 +31,20 @@ tts_model = VitsModel.from_pretrained("facebook/mms-tts-kor")
|
|
31 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
|
32 |
tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
|
|
|
|
|
34 |
def synthesize_tts(text: str):
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
with torch.no_grad():
|
38 |
output = tts_model(input_ids=input_ids)
|
|
|
39 |
waveform = output.waveform.squeeze().cpu().numpy()
|
40 |
return (tts_model.config.sampling_rate, waveform)
|
41 |
|
|
|
42 |
# βββββββββββββββ 4. μ΄λ―Έμ§ β μΊ‘μ
+ λ²μ + μμ± μΆλ ₯ βββββββββββββββ
|
43 |
def describe_and_speak(img: Image.Image):
|
44 |
logging.info("[DEBUG] describe_and_speak ν¨μ νΈμΆλ¨")
|
|
|
31 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
|
32 |
tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
|
34 |
+
from uroman import uromanize
|
35 |
+
|
36 |
def synthesize_tts(text: str):
|
37 |
+
romanized = uromanize(text)
|
38 |
+
inputs = tts_tokenizer(romanized, return_tensors="pt")
|
39 |
+
input_ids = inputs["input_ids"].long().to(tts_model.device)
|
40 |
+
|
41 |
with torch.no_grad():
|
42 |
output = tts_model(input_ids=input_ids)
|
43 |
+
|
44 |
waveform = output.waveform.squeeze().cpu().numpy()
|
45 |
return (tts_model.config.sampling_rate, waveform)
|
46 |
|
47 |
+
|
48 |
# βββββββββββββββ 4. μ΄λ―Έμ§ β μΊ‘μ
+ λ²μ + μμ± μΆλ ₯ βββββββββββββββ
|
49 |
def describe_and_speak(img: Image.Image):
|
50 |
logging.info("[DEBUG] describe_and_speak ν¨μ νΈμΆλ¨")
|