Update app.py
Browse files
app.py
CHANGED
|
@@ -288,10 +288,13 @@ def respond(
|
|
| 288 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 289 |
llama.encode(tokens)
|
| 290 |
tokens = [llama.decoder_start_token()]
|
|
|
|
| 291 |
for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1, repeat_penalty=1.0):
|
| 292 |
-
|
|
|
|
| 293 |
if token == llama.token_eos():
|
| 294 |
break
|
|
|
|
| 295 |
|
| 296 |
"""
|
| 297 |
Respond to a message using the Gemma3 model via Llama.cpp.
|
|
|
|
| 288 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 289 |
llama.encode(tokens)
|
| 290 |
tokens = [llama.decoder_start_token()]
|
| 291 |
+
outputs =""
|
| 292 |
for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1, repeat_penalty=1.0):
|
| 293 |
+
outputs+= llama.detokenize([token]).decode()
|
| 294 |
+
yield outputs
|
| 295 |
if token == llama.token_eos():
|
| 296 |
break
|
| 297 |
+
return outputs
|
| 298 |
|
| 299 |
"""
|
| 300 |
Respond to a message using the Gemma3 model via Llama.cpp.
|