Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,34 +41,45 @@ def check_model_status():
|
|
41 |
load_model()
|
42 |
return model_loaded
|
43 |
|
44 |
-
def chat(message, history):
|
45 |
global model_pipeline
|
46 |
|
47 |
# Ensure the model is loaded before proceeding
|
48 |
if not check_model_status():
|
49 |
-
|
|
|
50 |
|
51 |
prompt = f"Human: {message}\n\nAssistant:"
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
max_new_tokens=2048,
|
57 |
-
temperature=0.7,
|
58 |
-
do_sample=True,
|
59 |
-
truncation=True,
|
60 |
-
pad_token_id=50256
|
61 |
-
)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
|
|
|
72 |
|
73 |
def reload_model_button():
|
74 |
"""Reload the model manually via a button."""
|
@@ -82,10 +93,10 @@ def update_status_periodically(status_text):
|
|
82 |
while True:
|
83 |
time.sleep(5) # Update every 5 seconds
|
84 |
status = "Model is loaded and ready." if model_loaded else "Model is not loaded."
|
85 |
-
status_text.
|
86 |
|
87 |
# Gradio Interface
|
88 |
-
with gr.Blocks() as demo:
|
89 |
gr.Markdown("# DeepSeek-R1 Chatbot")
|
90 |
gr.Markdown("DeepSeek-R1-Distill-Qwen-1.5B λͺ¨λΈμ μ¬μ©ν λν ν
μ€νΈμ© λ°λͺ¨μ
λλ€.")
|
91 |
|
@@ -98,14 +109,23 @@ with gr.Blocks() as demo:
|
|
98 |
clear_button = gr.Button("Clear")
|
99 |
reload_button = gr.Button("Reload Model")
|
100 |
|
|
|
|
|
|
|
|
|
101 |
status_text = gr.Textbox(label="Model Status", value="Model not loaded yet.", interactive=False)
|
|
|
102 |
|
103 |
-
def respond(message, chat_history):
|
104 |
-
bot_message =
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
107 |
|
108 |
-
send_button.click(respond, inputs=[textbox, chatbot], outputs=[textbox, chatbot])
|
109 |
clear_button.click(lambda: [], None, chatbot)
|
110 |
reload_button.click(reload_model_button, None, status_text)
|
111 |
|
|
|
41 |
load_model()
|
42 |
return model_loaded
|
43 |
|
44 |
+
def chat(message, history, temperature, max_new_tokens):
|
45 |
global model_pipeline
|
46 |
|
47 |
# Ensure the model is loaded before proceeding
|
48 |
if not check_model_status():
|
49 |
+
yield "Model is not ready. Please try again later."
|
50 |
+
return
|
51 |
|
52 |
prompt = f"Human: {message}\n\nAssistant:"
|
53 |
|
54 |
+
# Stream the response
|
55 |
+
start_time = time.time()
|
56 |
+
generated_tokens = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
def generate():
|
59 |
+
nonlocal generated_tokens
|
60 |
+
for response in model_pipeline(
|
61 |
+
prompt,
|
62 |
+
max_new_tokens=max_new_tokens,
|
63 |
+
temperature=temperature,
|
64 |
+
do_sample=True,
|
65 |
+
truncation=True,
|
66 |
+
pad_token_id=50256,
|
67 |
+
return_full_text=False,
|
68 |
+
streamer=True
|
69 |
+
):
|
70 |
+
bot_text = response[0]["generated_text"]
|
71 |
+
bot_text = bot_text.split("Assistant:")[-1].strip()
|
72 |
+
if "</think>" in bot_text:
|
73 |
+
bot_text = bot_text.split("</think>")[-1].strip()
|
74 |
+
|
75 |
+
generated_tokens += len(bot_text.split())
|
76 |
+
elapsed_time = time.time() - start_time
|
77 |
+
tokens_per_second = generated_tokens / elapsed_time if elapsed_time > 0 else 0
|
78 |
+
|
79 |
+
yield bot_text, f"Generating... Tokens/s: {tokens_per_second:.2f}"
|
80 |
|
81 |
+
for partial_response, status in generate():
|
82 |
+
yield partial_response, status
|
83 |
|
84 |
def reload_model_button():
|
85 |
"""Reload the model manually via a button."""
|
|
|
93 |
while True:
|
94 |
time.sleep(5) # Update every 5 seconds
|
95 |
status = "Model is loaded and ready." if model_loaded else "Model is not loaded."
|
96 |
+
status_text.value = status # Update the value directly
|
97 |
|
98 |
# Gradio Interface
|
99 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
100 |
gr.Markdown("# DeepSeek-R1 Chatbot")
|
101 |
gr.Markdown("DeepSeek-R1-Distill-Qwen-1.5B λͺ¨λΈμ μ¬μ©ν λν ν
μ€νΈμ© λ°λͺ¨μ
λλ€.")
|
102 |
|
|
|
109 |
clear_button = gr.Button("Clear")
|
110 |
reload_button = gr.Button("Reload Model")
|
111 |
|
112 |
+
with gr.Row():
|
113 |
+
temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
|
114 |
+
max_tokens_slider = gr.Slider(minimum=32, maximum=2048, value=2048, step=32, label="Max New Tokens")
|
115 |
+
|
116 |
status_text = gr.Textbox(label="Model Status", value="Model not loaded yet.", interactive=False)
|
117 |
+
token_status = gr.Textbox(label="Token Generation Status", value="", interactive=False)
|
118 |
|
119 |
+
def respond(message, chat_history, temperature, max_new_tokens):
|
120 |
+
bot_message = ""
|
121 |
+
status = ""
|
122 |
+
for partial_response, partial_status in chat(message, chat_history, temperature, max_new_tokens):
|
123 |
+
bot_message = partial_response
|
124 |
+
status = partial_status
|
125 |
+
token_status.update(value=status)
|
126 |
+
yield "", chat_history + [(message, bot_message)]
|
127 |
|
128 |
+
send_button.click(respond, inputs=[textbox, chatbot, temperature_slider, max_tokens_slider], outputs=[textbox, chatbot])
|
129 |
clear_button.click(lambda: [], None, chatbot)
|
130 |
reload_button.click(reload_model_button, None, status_text)
|
131 |
|