Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -53,6 +53,7 @@ def chat(message, history, temperature, max_new_tokens):
|
|
53 |
|
54 |
# Stream the response
|
55 |
start_time = time.time()
|
|
|
56 |
|
57 |
# Create a TextStreamer for token streaming
|
58 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
@@ -73,9 +74,18 @@ def chat(message, history, temperature, max_new_tokens):
|
|
73 |
outputs = []
|
74 |
for new_token in streamer:
|
75 |
outputs.append(new_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
if any(stop_token in new_token for stop_token in stop_tokens):
|
77 |
break
|
78 |
-
yield "".join(outputs)
|
79 |
|
80 |
def reload_model_button():
|
81 |
"""Reload the model manually via a button."""
|
@@ -114,8 +124,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
114 |
|
115 |
def respond(message, chat_history, temperature, max_new_tokens):
|
116 |
bot_message = ""
|
117 |
-
for partial_response in chat(message, chat_history, temperature, max_new_tokens):
|
118 |
bot_message = partial_response
|
|
|
119 |
yield "", chat_history + [(message, bot_message)]
|
120 |
|
121 |
send_button.click(respond, inputs=[textbox, chatbot, temperature_slider, max_tokens_slider], outputs=[textbox, chatbot])
|
|
|
53 |
|
54 |
# Stream the response
|
55 |
start_time = time.time()
|
56 |
+
token_count = 0
|
57 |
|
58 |
# Create a TextStreamer for token streaming
|
59 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
|
74 |
outputs = []
|
75 |
for new_token in streamer:
|
76 |
outputs.append(new_token)
|
77 |
+
token_count += 1
|
78 |
+
|
79 |
+
# Calculate tokens per second
|
80 |
+
elapsed_time = time.time() - start_time
|
81 |
+
tokens_per_second = token_count / elapsed_time if elapsed_time > 0 else 0
|
82 |
+
|
83 |
+
# Update the token status
|
84 |
+
token_status_value = f"Tokens Generated: {token_count}, Tokens/Second: {tokens_per_second:.2f}"
|
85 |
+
yield "".join(outputs), token_status_value
|
86 |
+
|
87 |
if any(stop_token in new_token for stop_token in stop_tokens):
|
88 |
break
|
|
|
89 |
|
90 |
def reload_model_button():
|
91 |
"""Reload the model manually via a button."""
|
|
|
124 |
|
125 |
def respond(message, chat_history, temperature, max_new_tokens):
|
126 |
bot_message = ""
|
127 |
+
for partial_response, token_status_value in chat(message, chat_history, temperature, max_new_tokens):
|
128 |
bot_message = partial_response
|
129 |
+
token_status.update(value=token_status_value) # Update token generation status
|
130 |
yield "", chat_history + [(message, bot_message)]
|
131 |
|
132 |
send_button.click(respond, inputs=[textbox, chatbot, temperature_slider, max_tokens_slider], outputs=[textbox, chatbot])
|