Update app.py
Browse files
app.py
CHANGED
|
@@ -58,25 +58,26 @@ def generate(req: UserInputRequest):
|
|
| 58 |
{"role": "user", "content": req.user_input}
|
| 59 |
]
|
| 60 |
|
| 61 |
-
|
| 62 |
messages,
|
| 63 |
add_generation_prompt=True,
|
| 64 |
-
return_tensors="pt"
|
|
|
|
| 65 |
).to(model.device)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
| 67 |
terminators = [
|
| 68 |
tokenizer.eos_token_id,
|
| 69 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 70 |
]
|
| 71 |
|
| 72 |
outputs = model.generate(
|
| 73 |
-
input_ids,
|
|
|
|
| 74 |
max_new_tokens=200,
|
| 75 |
-
eos_token_id=terminators
|
| 76 |
-
do_sample=False,
|
| 77 |
-
temperature=0.0,
|
| 78 |
-
top_p=1.0,
|
| 79 |
-
repetition_penalty=1.0
|
| 80 |
)
|
| 81 |
|
| 82 |
response = outputs[0][input_ids.shape[-1]:]
|
|
|
|
| 58 |
{"role": "user", "content": req.user_input}
|
| 59 |
]
|
| 60 |
|
| 61 |
+
input_data = tokenizer.apply_chat_template(
|
| 62 |
messages,
|
| 63 |
add_generation_prompt=True,
|
| 64 |
+
return_tensors="pt",
|
| 65 |
+
padding=True
|
| 66 |
).to(model.device)
|
| 67 |
|
| 68 |
+
input_ids = input_data['input_ids']
|
| 69 |
+
attention_mask = input_data['attention_mask']
|
| 70 |
+
|
| 71 |
terminators = [
|
| 72 |
tokenizer.eos_token_id,
|
| 73 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 74 |
]
|
| 75 |
|
| 76 |
outputs = model.generate(
|
| 77 |
+
input_ids=input_ids,
|
| 78 |
+
attention_mask=attention_mask,
|
| 79 |
max_new_tokens=200,
|
| 80 |
+
eos_token_id=terminators
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
)
|
| 82 |
|
| 83 |
response = outputs[0][input_ids.shape[-1]:]
|