Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,16 @@ except ImportError:
|
|
17 |
model = None
|
18 |
model_loaded = False
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# HuggingFace repository information
|
21 |
HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
|
22 |
HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
@@ -148,7 +158,7 @@ def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_downl
|
|
148 |
print(error_msg)
|
149 |
return False, f"❌ {error_msg}"
|
150 |
|
151 |
-
def generate_response_stream(message, history, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
|
152 |
"""Generate response from the model with streaming"""
|
153 |
global model, model_loaded
|
154 |
|
@@ -160,6 +170,10 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
|
|
160 |
# Format the conversation history for Llama-3
|
161 |
conversation = []
|
162 |
|
|
|
|
|
|
|
|
|
163 |
# Add conversation history
|
164 |
for human, assistant in history:
|
165 |
conversation.append({"role": "user", "content": human})
|
@@ -190,7 +204,7 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
|
|
190 |
except Exception as e:
|
191 |
yield f"Error generating response: {str(e)}"
|
192 |
|
193 |
-
def chat_interface(message, history, max_tokens, temperature, top_p, repeat_penalty):
|
194 |
"""Main chat interface function"""
|
195 |
if not message.strip():
|
196 |
return history, ""
|
@@ -203,7 +217,7 @@ def chat_interface(message, history, max_tokens, temperature, top_p, repeat_pena
|
|
203 |
history = history + [(message, "")]
|
204 |
|
205 |
# Generate response
|
206 |
-
for response in generate_response_stream(message, history[:-1], max_tokens, temperature, top_p, repeat_penalty):
|
207 |
history[-1] = (message, response)
|
208 |
yield history, ""
|
209 |
|
@@ -211,6 +225,10 @@ def clear_chat():
|
|
211 |
"""Clear the chat history"""
|
212 |
return [], ""
|
213 |
|
|
|
|
|
|
|
|
|
214 |
def load_model_interface(context_size, selected_model):
|
215 |
"""Interface function to load model with configurable context size"""
|
216 |
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
@@ -272,9 +290,25 @@ def create_interface():
|
|
272 |
|
273 |
with gr.Row():
|
274 |
with gr.Column(scale=4):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
# Chat interface
|
276 |
chatbot = gr.Chatbot(
|
277 |
-
height=
|
278 |
show_copy_button=True,
|
279 |
bubble_full_width=False,
|
280 |
show_label=False,
|
@@ -295,8 +329,6 @@ def create_interface():
|
|
295 |
# Model loading section
|
296 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
297 |
|
298 |
-
# gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
|
299 |
-
|
300 |
# Model selection dropdown
|
301 |
model_dropdown = gr.Dropdown(
|
302 |
choices=initial_choices,
|
@@ -305,6 +337,16 @@ def create_interface():
|
|
305 |
info="Choose from available models in the repository",
|
306 |
interactive=True
|
307 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
310 |
model_status = gr.Textbox(
|
@@ -316,16 +358,7 @@ def create_interface():
|
|
316 |
|
317 |
# Generation parameters
|
318 |
gr.HTML("<h3>⚙️ Generation Settings</h3>")
|
319 |
-
|
320 |
-
# Context size (limited for Spaces)
|
321 |
-
context_size = gr.Slider(
|
322 |
-
minimum=512,
|
323 |
-
maximum=4096,
|
324 |
-
value=2048,
|
325 |
-
step=256,
|
326 |
-
label="Context Size",
|
327 |
-
info="Token context window (requires model reload)"
|
328 |
-
)
|
329 |
|
330 |
max_tokens = gr.Slider(
|
331 |
minimum=50,
|
@@ -367,7 +400,7 @@ def create_interface():
|
|
367 |
<p><strong>Quantization:</strong> Q4_K_M</p>
|
368 |
<p><strong>Format:</strong> GGUF (optimized)</p>
|
369 |
<p><strong>Backend:</strong> llama-cpp-python</p>
|
370 |
-
<p><strong>Features:</strong> CPU/GPU support, streaming</p>
|
371 |
<p><strong>Specialty:</strong> Medical assistance</p>
|
372 |
<p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
|
373 |
""")
|
@@ -392,13 +425,13 @@ def create_interface():
|
|
392 |
|
393 |
submit_btn.click(
|
394 |
chat_interface,
|
395 |
-
inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
|
396 |
outputs=[chatbot, msg]
|
397 |
)
|
398 |
|
399 |
msg.submit(
|
400 |
chat_interface,
|
401 |
-
inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
|
402 |
outputs=[chatbot, msg]
|
403 |
)
|
404 |
|
@@ -407,6 +440,11 @@ def create_interface():
|
|
407 |
outputs=[chatbot, msg]
|
408 |
)
|
409 |
|
|
|
|
|
|
|
|
|
|
|
410 |
return demo
|
411 |
|
412 |
if __name__ == "__main__":
|
|
|
17 |
model = None
|
18 |
model_loaded = False
|
19 |
|
20 |
+
# Default system prompt
|
21 |
+
DEFAULT_SYSTEM_PROMPT = """You are MMed-Llama-Alpaca, a helpful AI assistant specialized in medical and healthcare topics. You provide accurate, evidence-based information while being empathetic and understanding.
|
22 |
+
|
23 |
+
Important guidelines:
|
24 |
+
- Always remind users that your responses are for educational purposes only
|
25 |
+
- Encourage users to consult healthcare professionals for medical advice
|
26 |
+
- Be thorough but clear in your explanations
|
27 |
+
- If unsure about medical information, acknowledge limitations
|
28 |
+
- Maintain a professional yet caring tone"""
|
29 |
+
|
30 |
# HuggingFace repository information
|
31 |
HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
|
32 |
HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
|
|
158 |
print(error_msg)
|
159 |
return False, f"❌ {error_msg}"
|
160 |
|
161 |
+
def generate_response_stream(message, history, system_prompt, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
|
162 |
"""Generate response from the model with streaming"""
|
163 |
global model, model_loaded
|
164 |
|
|
|
170 |
# Format the conversation history for Llama-3
|
171 |
conversation = []
|
172 |
|
173 |
+
# Add system prompt if provided
|
174 |
+
if system_prompt and system_prompt.strip():
|
175 |
+
conversation.append({"role": "system", "content": system_prompt.strip()})
|
176 |
+
|
177 |
# Add conversation history
|
178 |
for human, assistant in history:
|
179 |
conversation.append({"role": "user", "content": human})
|
|
|
204 |
except Exception as e:
|
205 |
yield f"Error generating response: {str(e)}"
|
206 |
|
207 |
+
def chat_interface(message, history, system_prompt, max_tokens, temperature, top_p, repeat_penalty):
|
208 |
"""Main chat interface function"""
|
209 |
if not message.strip():
|
210 |
return history, ""
|
|
|
217 |
history = history + [(message, "")]
|
218 |
|
219 |
# Generate response
|
220 |
+
for response in generate_response_stream(message, history[:-1], system_prompt, max_tokens, temperature, top_p, repeat_penalty):
|
221 |
history[-1] = (message, response)
|
222 |
yield history, ""
|
223 |
|
|
|
225 |
"""Clear the chat history"""
|
226 |
return [], ""
|
227 |
|
228 |
+
def reset_system_prompt():
|
229 |
+
"""Reset system prompt to default"""
|
230 |
+
return DEFAULT_SYSTEM_PROMPT
|
231 |
+
|
232 |
def load_model_interface(context_size, selected_model):
|
233 |
"""Interface function to load model with configurable context size"""
|
234 |
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
|
|
290 |
|
291 |
with gr.Row():
|
292 |
with gr.Column(scale=4):
|
293 |
+
# System prompt configuration
|
294 |
+
gr.HTML("<h3>🎯 System Prompt Configuration</h3>")
|
295 |
+
with gr.Row():
|
296 |
+
system_prompt = gr.Textbox(
|
297 |
+
label="System Prompt",
|
298 |
+
value=DEFAULT_SYSTEM_PROMPT,
|
299 |
+
placeholder="Enter system prompt to define the AI's behavior and role...",
|
300 |
+
lines=4,
|
301 |
+
max_lines=8,
|
302 |
+
scale=4,
|
303 |
+
autoscroll=True,
|
304 |
+
)
|
305 |
+
# with gr.Column(scale=1):
|
306 |
+
# reset_prompt_btn = gr.Button("Reset to Default", variant="secondary", size="sm")
|
307 |
+
# gr.HTML("<p style='font-size: 0.8em; color: #666; margin-top: 10px;'>The system prompt defines how the AI should behave and respond. Changes apply to new conversations.</p>")
|
308 |
+
|
309 |
# Chat interface
|
310 |
chatbot = gr.Chatbot(
|
311 |
+
height=400,
|
312 |
show_copy_button=True,
|
313 |
bubble_full_width=False,
|
314 |
show_label=False,
|
|
|
329 |
# Model loading section
|
330 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
331 |
|
|
|
|
|
332 |
# Model selection dropdown
|
333 |
model_dropdown = gr.Dropdown(
|
334 |
choices=initial_choices,
|
|
|
337 |
info="Choose from available models in the repository",
|
338 |
interactive=True
|
339 |
)
|
340 |
+
|
341 |
+
# Context size (limited for Spaces)
|
342 |
+
context_size = gr.Slider(
|
343 |
+
minimum=512,
|
344 |
+
maximum=8192,
|
345 |
+
value=2048,
|
346 |
+
step=256,
|
347 |
+
label="Context Size",
|
348 |
+
info="Token context window (requires model reload)"
|
349 |
+
)
|
350 |
|
351 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
352 |
model_status = gr.Textbox(
|
|
|
358 |
|
359 |
# Generation parameters
|
360 |
gr.HTML("<h3>⚙️ Generation Settings</h3>")
|
361 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
|
363 |
max_tokens = gr.Slider(
|
364 |
minimum=50,
|
|
|
400 |
<p><strong>Quantization:</strong> Q4_K_M</p>
|
401 |
<p><strong>Format:</strong> GGUF (optimized)</p>
|
402 |
<p><strong>Backend:</strong> llama-cpp-python</p>
|
403 |
+
<p><strong>Features:</strong> CPU/GPU support, streaming, system prompts</p>
|
404 |
<p><strong>Specialty:</strong> Medical assistance</p>
|
405 |
<p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
|
406 |
""")
|
|
|
425 |
|
426 |
submit_btn.click(
|
427 |
chat_interface,
|
428 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
|
429 |
outputs=[chatbot, msg]
|
430 |
)
|
431 |
|
432 |
msg.submit(
|
433 |
chat_interface,
|
434 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
|
435 |
outputs=[chatbot, msg]
|
436 |
)
|
437 |
|
|
|
440 |
outputs=[chatbot, msg]
|
441 |
)
|
442 |
|
443 |
+
# reset_prompt_btn.click(
|
444 |
+
# reset_system_prompt,
|
445 |
+
# outputs=system_prompt
|
446 |
+
# )
|
447 |
+
|
448 |
return demo
|
449 |
|
450 |
if __name__ == "__main__":
|