Axcel1 commited on
Commit
67b8b63
·
verified ·
1 Parent(s): 867915a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -19
app.py CHANGED
@@ -17,6 +17,16 @@ except ImportError:
17
  model = None
18
  model_loaded = False
19
 
 
 
 
 
 
 
 
 
 
 
20
  # HuggingFace repository information
21
  HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
22
  HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
@@ -148,7 +158,7 @@ def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_downl
148
  print(error_msg)
149
  return False, f"❌ {error_msg}"
150
 
151
- def generate_response_stream(message, history, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
152
  """Generate response from the model with streaming"""
153
  global model, model_loaded
154
 
@@ -160,6 +170,10 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
160
  # Format the conversation history for Llama-3
161
  conversation = []
162
 
 
 
 
 
163
  # Add conversation history
164
  for human, assistant in history:
165
  conversation.append({"role": "user", "content": human})
@@ -190,7 +204,7 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
190
  except Exception as e:
191
  yield f"Error generating response: {str(e)}"
192
 
193
- def chat_interface(message, history, max_tokens, temperature, top_p, repeat_penalty):
194
  """Main chat interface function"""
195
  if not message.strip():
196
  return history, ""
@@ -203,7 +217,7 @@ def chat_interface(message, history, max_tokens, temperature, top_p, repeat_pena
203
  history = history + [(message, "")]
204
 
205
  # Generate response
206
- for response in generate_response_stream(message, history[:-1], max_tokens, temperature, top_p, repeat_penalty):
207
  history[-1] = (message, response)
208
  yield history, ""
209
 
@@ -211,6 +225,10 @@ def clear_chat():
211
  """Clear the chat history"""
212
  return [], ""
213
 
 
 
 
 
214
  def load_model_interface(context_size, selected_model):
215
  """Interface function to load model with configurable context size"""
216
  success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
@@ -272,9 +290,25 @@ def create_interface():
272
 
273
  with gr.Row():
274
  with gr.Column(scale=4):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  # Chat interface
276
  chatbot = gr.Chatbot(
277
- height=500,
278
  show_copy_button=True,
279
  bubble_full_width=False,
280
  show_label=False,
@@ -295,8 +329,6 @@ def create_interface():
295
  # Model loading section
296
  gr.HTML("<h3>🔧 Model Control</h3>")
297
 
298
- # gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
299
-
300
  # Model selection dropdown
301
  model_dropdown = gr.Dropdown(
302
  choices=initial_choices,
@@ -305,6 +337,16 @@ def create_interface():
305
  info="Choose from available models in the repository",
306
  interactive=True
307
  )
 
 
 
 
 
 
 
 
 
 
308
 
309
  load_btn = gr.Button("Load Model", variant="primary", size="lg")
310
  model_status = gr.Textbox(
@@ -316,16 +358,7 @@ def create_interface():
316
 
317
  # Generation parameters
318
  gr.HTML("<h3>⚙️ Generation Settings</h3>")
319
-
320
- # Context size (limited for Spaces)
321
- context_size = gr.Slider(
322
- minimum=512,
323
- maximum=4096,
324
- value=2048,
325
- step=256,
326
- label="Context Size",
327
- info="Token context window (requires model reload)"
328
- )
329
 
330
  max_tokens = gr.Slider(
331
  minimum=50,
@@ -367,7 +400,7 @@ def create_interface():
367
  <p><strong>Quantization:</strong> Q4_K_M</p>
368
  <p><strong>Format:</strong> GGUF (optimized)</p>
369
  <p><strong>Backend:</strong> llama-cpp-python</p>
370
- <p><strong>Features:</strong> CPU/GPU support, streaming</p>
371
  <p><strong>Specialty:</strong> Medical assistance</p>
372
  <p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
373
  """)
@@ -392,13 +425,13 @@ def create_interface():
392
 
393
  submit_btn.click(
394
  chat_interface,
395
- inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
396
  outputs=[chatbot, msg]
397
  )
398
 
399
  msg.submit(
400
  chat_interface,
401
- inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
402
  outputs=[chatbot, msg]
403
  )
404
 
@@ -407,6 +440,11 @@ def create_interface():
407
  outputs=[chatbot, msg]
408
  )
409
 
 
 
 
 
 
410
  return demo
411
 
412
  if __name__ == "__main__":
 
17
  model = None
18
  model_loaded = False
19
 
20
+ # Default system prompt
21
+ DEFAULT_SYSTEM_PROMPT = """You are MMed-Llama-Alpaca, a helpful AI assistant specialized in medical and healthcare topics. You provide accurate, evidence-based information while being empathetic and understanding.
22
+
23
+ Important guidelines:
24
+ - Always remind users that your responses are for educational purposes only
25
+ - Encourage users to consult healthcare professionals for medical advice
26
+ - Be thorough but clear in your explanations
27
+ - If unsure about medical information, acknowledge limitations
28
+ - Maintain a professional yet caring tone"""
29
+
30
  # HuggingFace repository information
31
  HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
32
  HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
 
158
  print(error_msg)
159
  return False, f"❌ {error_msg}"
160
 
161
+ def generate_response_stream(message, history, system_prompt, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
162
  """Generate response from the model with streaming"""
163
  global model, model_loaded
164
 
 
170
  # Format the conversation history for Llama-3
171
  conversation = []
172
 
173
+ # Add system prompt if provided
174
+ if system_prompt and system_prompt.strip():
175
+ conversation.append({"role": "system", "content": system_prompt.strip()})
176
+
177
  # Add conversation history
178
  for human, assistant in history:
179
  conversation.append({"role": "user", "content": human})
 
204
  except Exception as e:
205
  yield f"Error generating response: {str(e)}"
206
 
207
+ def chat_interface(message, history, system_prompt, max_tokens, temperature, top_p, repeat_penalty):
208
  """Main chat interface function"""
209
  if not message.strip():
210
  return history, ""
 
217
  history = history + [(message, "")]
218
 
219
  # Generate response
220
+ for response in generate_response_stream(message, history[:-1], system_prompt, max_tokens, temperature, top_p, repeat_penalty):
221
  history[-1] = (message, response)
222
  yield history, ""
223
 
 
225
  """Clear the chat history"""
226
  return [], ""
227
 
228
+ def reset_system_prompt():
229
+ """Reset system prompt to default"""
230
+ return DEFAULT_SYSTEM_PROMPT
231
+
232
  def load_model_interface(context_size, selected_model):
233
  """Interface function to load model with configurable context size"""
234
  success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
 
290
 
291
  with gr.Row():
292
  with gr.Column(scale=4):
293
+ # System prompt configuration
294
+ gr.HTML("<h3>🎯 System Prompt Configuration</h3>")
295
+ with gr.Row():
296
+ system_prompt = gr.Textbox(
297
+ label="System Prompt",
298
+ value=DEFAULT_SYSTEM_PROMPT,
299
+ placeholder="Enter system prompt to define the AI's behavior and role...",
300
+ lines=4,
301
+ max_lines=8,
302
+ scale=4,
303
+ autoscroll=True,
304
+ )
305
+ # with gr.Column(scale=1):
306
+ # reset_prompt_btn = gr.Button("Reset to Default", variant="secondary", size="sm")
307
+ # gr.HTML("<p style='font-size: 0.8em; color: #666; margin-top: 10px;'>The system prompt defines how the AI should behave and respond. Changes apply to new conversations.</p>")
308
+
309
  # Chat interface
310
  chatbot = gr.Chatbot(
311
+ height=400,
312
  show_copy_button=True,
313
  bubble_full_width=False,
314
  show_label=False,
 
329
  # Model loading section
330
  gr.HTML("<h3>🔧 Model Control</h3>")
331
 
 
 
332
  # Model selection dropdown
333
  model_dropdown = gr.Dropdown(
334
  choices=initial_choices,
 
337
  info="Choose from available models in the repository",
338
  interactive=True
339
  )
340
+
341
+ # Context size (limited for Spaces)
342
+ context_size = gr.Slider(
343
+ minimum=512,
344
+ maximum=8192,
345
+ value=2048,
346
+ step=256,
347
+ label="Context Size",
348
+ info="Token context window (requires model reload)"
349
+ )
350
 
351
  load_btn = gr.Button("Load Model", variant="primary", size="lg")
352
  model_status = gr.Textbox(
 
358
 
359
  # Generation parameters
360
  gr.HTML("<h3>⚙️ Generation Settings</h3>")
361
+
 
 
 
 
 
 
 
 
 
362
 
363
  max_tokens = gr.Slider(
364
  minimum=50,
 
400
  <p><strong>Quantization:</strong> Q4_K_M</p>
401
  <p><strong>Format:</strong> GGUF (optimized)</p>
402
  <p><strong>Backend:</strong> llama-cpp-python</p>
403
+ <p><strong>Features:</strong> CPU/GPU support, streaming, system prompts</p>
404
  <p><strong>Specialty:</strong> Medical assistance</p>
405
  <p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
406
  """)
 
425
 
426
  submit_btn.click(
427
  chat_interface,
428
+ inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
429
  outputs=[chatbot, msg]
430
  )
431
 
432
  msg.submit(
433
  chat_interface,
434
+ inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
435
  outputs=[chatbot, msg]
436
  )
437
 
 
440
  outputs=[chatbot, msg]
441
  )
442
 
443
+ # reset_prompt_btn.click(
444
+ # reset_system_prompt,
445
+ # outputs=system_prompt
446
+ # )
447
+
448
  return demo
449
 
450
  if __name__ == "__main__":