Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,14 +31,20 @@ tts_model = VitsModel.from_pretrained("facebook/mms-tts-kor")
|
|
| 31 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
|
| 32 |
tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 33 |
|
|
|
|
|
|
|
| 34 |
def synthesize_tts(text: str):
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
with torch.no_grad():
|
| 38 |
output = tts_model(input_ids=input_ids)
|
|
|
|
| 39 |
waveform = output.waveform.squeeze().cpu().numpy()
|
| 40 |
return (tts_model.config.sampling_rate, waveform)
|
| 41 |
|
|
|
|
| 42 |
# βββββββββββββββ 4. μ΄λ―Έμ§ β μΊ‘μ
+ λ²μ + μμ± μΆλ ₯ βββββββββββββββ
|
| 43 |
def describe_and_speak(img: Image.Image):
|
| 44 |
logging.info("[DEBUG] describe_and_speak ν¨μ νΈμΆλ¨")
|
|
|
|
| 31 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kor")
|
| 32 |
tts_model.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 33 |
|
| 34 |
+
from uroman import uromanize
|
| 35 |
+
|
| 36 |
def synthesize_tts(text: str):
|
| 37 |
+
romanized = uromanize(text)
|
| 38 |
+
inputs = tts_tokenizer(romanized, return_tensors="pt")
|
| 39 |
+
input_ids = inputs["input_ids"].long().to(tts_model.device)
|
| 40 |
+
|
| 41 |
with torch.no_grad():
|
| 42 |
output = tts_model(input_ids=input_ids)
|
| 43 |
+
|
| 44 |
waveform = output.waveform.squeeze().cpu().numpy()
|
| 45 |
return (tts_model.config.sampling_rate, waveform)
|
| 46 |
|
| 47 |
+
|
| 48 |
# βββββββββββββββ 4. μ΄λ―Έμ§ β μΊ‘μ
+ λ²μ + μμ± μΆλ ₯ βββββββββββββββ
|
| 49 |
def describe_and_speak(img: Image.Image):
|
| 50 |
logging.info("[DEBUG] describe_and_speak ν¨μ νΈμΆλ¨")
|