Spaces:
Running
Running
import requests | |
check_ipinfo = requests.get("https://ipinfo.io").json()['country'] | |
print("Run-Location-As: ",check_ipinfo) | |
import gradio as gr | |
import ollama | |
# List of available models for selection. | |
# IMPORTANT: These names must correspond to models that have been either | |
# Model from run.sh | |
MODEL_ID_MAP = { | |
"Tencent混元1.8B":'hf.co/bartowski/tencent_Hunyuan-1.8B-Instruct-GGUF:Q4_K_M', | |
"Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M', | |
#"Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M', | |
"SmolLM2-360M": 'smollm2:360m-instruct-q5_K_M', | |
"Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU | |
#"Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M', | |
"Granite3.3-2B": 'granite3.3:2b', | |
"Hunyuan-4B-Instruct": 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M' | |
} | |
# Default System Prompt | |
DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範 | |
2. 要提供引用規則依据 | |
3. 如果查詢是以英文輸入,使用英文回答""" | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
gr.Markdown(f"## Small Language Model (SLM) run with CPU") # Changed title to be more generic | |
gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# Model Selection | |
with gr.Row(): | |
selected_model_label = gr.Radio( | |
choices=list(MODEL_ID_MAP.keys()), | |
value=list(MODEL_ID_MAP.keys())[0], # Default to first display name | |
label="Select Model", | |
info="Choose the LLM model to chat with.", | |
interactive=True | |
) | |
chatbot = gr.Chatbot( | |
label="Conversation", | |
height=400, | |
type='messages', | |
layout="bubble" | |
) | |
with gr.Row(): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
container=False | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
# --- New: System Prompt Options --- | |
SYSTEM_PROMPT_OPTIONS = { | |
"Smart & Accurate (Auto TC/EN)": DEFAULT_SYSTEM_PROMPT, | |
"繁體中文回答":"無論如何,必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"简体中文回答":"无论如何,必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"English Caht":"You must reply by English. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
"Friendly & Conversational":"Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.", | |
"Professional & Formal":"Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.", | |
"Elon Musk style":"You must chat in Elon Musk style!", | |
"Test":"Always detect the user's input language and respond in that same language. Do not translate unless explicitly requested. Answer everything in simple, smart, relevant and accurate style. No chatty!" | |
} | |
system_prompt_selector = gr.Radio( | |
label="Choose a System Prompt Style", | |
choices=list(SYSTEM_PROMPT_OPTIONS.keys()), | |
value="Smart & Accurate (Default)", | |
interactive=True | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False | |
) | |
# Function to toggle the interactivity of the system prompt textbox | |
def toggle_system_prompt(use_custom): | |
return gr.update(interactive=use_custom) | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# Function to update textbox when prompt style changes | |
def update_prompt_text(selected_key, use_custom): | |
if not use_custom: | |
return gr.update(value=SYSTEM_PROMPT_OPTIONS[selected_key]) | |
else: | |
return gr.update() | |
system_prompt_selector.change( | |
fn=update_prompt_text, | |
inputs=[system_prompt_selector, use_custom_prompt_checkbox], | |
outputs=system_prompt_textbox, | |
queue=False | |
) | |
# --- Core Chat Logic --- | |
# This function is the heart of the application. | |
def respond(history, system_prompt, stream_output, selected_model_name, selected_prompt_key, use_custom_prompt): # Added selected_model_name | |
""" | |
This is the single function that handles the entire chat process. | |
It takes the history, prepends the system prompt, calls the Ollama API, | |
and streams the response back to the chatbot. | |
""" | |
current_selected_model = MODEL_ID_MAP[selected_model_name] | |
#Disable Qwen3 thinking | |
if "Qwen3".lower() in current_selected_model: | |
system_prompt = system_prompt+" /no_think" | |
# Use selected predefined prompt unless custom is enabled | |
if not use_custom_prompt: | |
system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key] | |
# The 'history' variable from Gradio contains the entire conversation. | |
# We prepend the system prompt to this history to form the final payload. | |
messages = [{"role": "system", "content": system_prompt}] + history | |
# Add a placeholder for the assistant's response to the UI history. | |
# This creates the space where the streamed response will be displayed. | |
history.append({"role": "assistant", "content": ""}) | |
# Stream the response from the Ollama API using the currently selected model | |
response_stream = ollama.chat( | |
model=current_selected_model, # Use the dynamically selected model | |
messages=messages, | |
stream=True | |
) | |
# Iterate through the stream, updating the placeholder with each new chunk. | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
history[-1]['content'] += chunk['message']['content'] | |
# Yield the updated history to the chatbot for a real-time effect. | |
yield history | |
# This function handles the user's submission. | |
def user_submit(history, user_message): | |
""" | |
Adds the user's message to the chat history and clears the input box. | |
This prepares the state for the main 'respond' function. | |
""" | |
return history + [{"role": "user", "content": user_message}], "" | |
# Gradio Event Wiring | |
msg.submit( | |
user_submit, | |
inputs=[chatbot, msg], | |
outputs=[chatbot, msg], | |
queue=False | |
).then( | |
respond, | |
inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model_label, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs | |
outputs=[chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) |