Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -63,9 +63,18 @@ def generate(img: Image.Image, lang: str):
|
|
63 |
|
64 |
pix = processor(images=img, return_tensors="pt").pixel_values.to(blip_model.device)
|
65 |
cap_en = processor.batch_decode(
|
66 |
-
blip_model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
)[0].strip()
|
68 |
|
|
|
69 |
if lang == "en":
|
70 |
sr, wav = tts(tts_en, tok_en, cap_en)
|
71 |
return cap_en, (sr, wav)
|
|
|
63 |
|
64 |
pix = processor(images=img, return_tensors="pt").pixel_values.to(blip_model.device)
|
65 |
cap_en = processor.batch_decode(
|
66 |
+
blip_model.generate(
|
67 |
+
pix,
|
68 |
+
max_length=128, # 최대 길이 ↑
|
69 |
+
min_length=20, # 너무 짧게 멈추는 것 방지
|
70 |
+
num_beams=5, # 빔 탐색 품질 ↑ (속도는 조금 느려짐)
|
71 |
+
temperature=0.7, # 다양성 조절
|
72 |
+
repetition_penalty=1.1,
|
73 |
+
),
|
74 |
+
skip_special_tokens=True
|
75 |
)[0].strip()
|
76 |
|
77 |
+
|
78 |
if lang == "en":
|
79 |
sr, wav = tts(tts_en, tok_en, cap_en)
|
80 |
return cap_en, (sr, wav)
|