Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,7 +62,7 @@ def chat(message, history, temperature, max_new_tokens):
|
|
62 |
def generate():
|
63 |
stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
|
64 |
nonlocal generated_tokens
|
65 |
-
|
66 |
prompt,
|
67 |
max_new_tokens=max_new_tokens,
|
68 |
temperature=temperature,
|
@@ -71,20 +71,12 @@ def chat(message, history, temperature, max_new_tokens):
|
|
71 |
pad_token_id=tokenizer.eos_token_id,
|
72 |
streamer=streamer # Use the TextStreamer here
|
73 |
):
|
74 |
-
|
75 |
-
|
76 |
-
if
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
elapsed_time = time.time() - start_time
|
81 |
-
tokens_per_second = generated_tokens / elapsed_time if elapsed_time > 0 else 0
|
82 |
-
|
83 |
-
yield bot_text, f"Generating... Tokens/s: {tokens_per_second:.2f}"
|
84 |
-
|
85 |
-
for partial_response, status in generate():
|
86 |
-
yield partial_response, status
|
87 |
-
|
88 |
def reload_model_button():
|
89 |
"""Reload the model manually via a button."""
|
90 |
global model_loaded
|
|
|
62 |
def generate():
|
63 |
stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
|
64 |
nonlocal generated_tokens
|
65 |
+
response = model_pipeline(
|
66 |
prompt,
|
67 |
max_new_tokens=max_new_tokens,
|
68 |
temperature=temperature,
|
|
|
71 |
pad_token_id=tokenizer.eos_token_id,
|
72 |
streamer=streamer # Use the TextStreamer here
|
73 |
):
|
74 |
+
for new_token in streamer:
|
75 |
+
outputs.append(new_token)
|
76 |
+
if new_token in stop_tokens:
|
77 |
+
|
78 |
+
break
|
79 |
+
yield "".join(outputs), "not implemented"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def reload_model_button():
|
81 |
"""Reload the model manually via a button."""
|
82 |
global model_loaded
|