sdafd commited on
Commit
8858101
Β·
verified Β·
1 Parent(s): 881f4c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -26
app.py CHANGED
@@ -41,34 +41,45 @@ def check_model_status():
41
  load_model()
42
  return model_loaded
43
 
44
- def chat(message, history):
45
  global model_pipeline
46
 
47
  # Ensure the model is loaded before proceeding
48
  if not check_model_status():
49
- return "Model is not ready. Please try again later."
 
50
 
51
  prompt = f"Human: {message}\n\nAssistant:"
52
 
53
- # Generate response using the pre-loaded model
54
- response = model_pipeline(
55
- prompt,
56
- max_new_tokens=2048,
57
- temperature=0.7,
58
- do_sample=True,
59
- truncation=True,
60
- pad_token_id=50256
61
- )
62
 
63
- try:
64
- bot_text = response[0]["generated_text"]
65
- bot_text = bot_text.split("Assistant:")[-1].strip()
66
- if "</think>" in bot_text:
67
- bot_text = bot_text.split("</think>")[-1].strip()
68
- except Exception as e:
69
- bot_text = f"Sorry, there was a problem generating the response: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- return bot_text
 
72
 
73
  def reload_model_button():
74
  """Reload the model manually via a button."""
@@ -82,10 +93,10 @@ def update_status_periodically(status_text):
82
  while True:
83
  time.sleep(5) # Update every 5 seconds
84
  status = "Model is loaded and ready." if model_loaded else "Model is not loaded."
85
- status_text.update(value=status)
86
 
87
  # Gradio Interface
88
- with gr.Blocks() as demo:
89
  gr.Markdown("# DeepSeek-R1 Chatbot")
90
  gr.Markdown("DeepSeek-R1-Distill-Qwen-1.5B λͺ¨λΈμ„ μ‚¬μš©ν•œ λŒ€ν™” ν…ŒμŠ€νŠΈμš© 데λͺ¨μž…λ‹ˆλ‹€.")
91
 
@@ -98,14 +109,23 @@ with gr.Blocks() as demo:
98
  clear_button = gr.Button("Clear")
99
  reload_button = gr.Button("Reload Model")
100
 
 
 
 
 
101
  status_text = gr.Textbox(label="Model Status", value="Model not loaded yet.", interactive=False)
 
102
 
103
- def respond(message, chat_history):
104
- bot_message = chat(message, chat_history)
105
- chat_history.append((message, bot_message))
106
- return "", chat_history
 
 
 
 
107
 
108
- send_button.click(respond, inputs=[textbox, chatbot], outputs=[textbox, chatbot])
109
  clear_button.click(lambda: [], None, chatbot)
110
  reload_button.click(reload_model_button, None, status_text)
111
 
 
41
  load_model()
42
  return model_loaded
43
 
44
+ def chat(message, history, temperature, max_new_tokens):
45
  global model_pipeline
46
 
47
  # Ensure the model is loaded before proceeding
48
  if not check_model_status():
49
+ yield "Model is not ready. Please try again later."
50
+ return
51
 
52
  prompt = f"Human: {message}\n\nAssistant:"
53
 
54
+ # Stream the response
55
+ start_time = time.time()
56
+ generated_tokens = 0
 
 
 
 
 
 
57
 
58
+ def generate():
59
+ nonlocal generated_tokens
60
+ for response in model_pipeline(
61
+ prompt,
62
+ max_new_tokens=max_new_tokens,
63
+ temperature=temperature,
64
+ do_sample=True,
65
+ truncation=True,
66
+ pad_token_id=50256,
67
+ return_full_text=False,
68
+ streamer=True
69
+ ):
70
+ bot_text = response[0]["generated_text"]
71
+ bot_text = bot_text.split("Assistant:")[-1].strip()
72
+ if "</think>" in bot_text:
73
+ bot_text = bot_text.split("</think>")[-1].strip()
74
+
75
+ generated_tokens += len(bot_text.split())
76
+ elapsed_time = time.time() - start_time
77
+ tokens_per_second = generated_tokens / elapsed_time if elapsed_time > 0 else 0
78
+
79
+ yield bot_text, f"Generating... Tokens/s: {tokens_per_second:.2f}"
80
 
81
+ for partial_response, status in generate():
82
+ yield partial_response, status
83
 
84
  def reload_model_button():
85
  """Reload the model manually via a button."""
 
93
  while True:
94
  time.sleep(5) # Update every 5 seconds
95
  status = "Model is loaded and ready." if model_loaded else "Model is not loaded."
96
+ status_text.value = status # Update the value directly
97
 
98
  # Gradio Interface
99
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
  gr.Markdown("# DeepSeek-R1 Chatbot")
101
  gr.Markdown("DeepSeek-R1-Distill-Qwen-1.5B λͺ¨λΈμ„ μ‚¬μš©ν•œ λŒ€ν™” ν…ŒμŠ€νŠΈμš© 데λͺ¨μž…λ‹ˆλ‹€.")
102
 
 
109
  clear_button = gr.Button("Clear")
110
  reload_button = gr.Button("Reload Model")
111
 
112
+ with gr.Row():
113
+ temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
114
+ max_tokens_slider = gr.Slider(minimum=32, maximum=2048, value=2048, step=32, label="Max New Tokens")
115
+
116
  status_text = gr.Textbox(label="Model Status", value="Model not loaded yet.", interactive=False)
117
+ token_status = gr.Textbox(label="Token Generation Status", value="", interactive=False)
118
 
119
+ def respond(message, chat_history, temperature, max_new_tokens):
120
+ bot_message = ""
121
+ status = ""
122
+ for partial_response, partial_status in chat(message, chat_history, temperature, max_new_tokens):
123
+ bot_message = partial_response
124
+ status = partial_status
125
+ token_status.update(value=status)
126
+ yield "", chat_history + [(message, bot_message)]
127
 
128
+ send_button.click(respond, inputs=[textbox, chatbot, temperature_slider, max_tokens_slider], outputs=[textbox, chatbot])
129
  clear_button.click(lambda: [], None, chatbot)
130
  reload_button.click(reload_model_button, None, status_text)
131