SkyNetWalker commited on
Commit
224325d
·
verified ·
1 Parent(s): 6f4bea7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ check_ipinfo = requests.get("https://ipinfo.io").json()['country']
4
+ print("Run-Location-As: ",check_ipinfo)
5
+
6
+
7
+ import gradio as gr
8
+ import ollama
9
+
10
+ # List of available models for selection.
11
+ # IMPORTANT: These names must correspond to models that have been either
12
+
13
+ ollama pull hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
14
+ #ollama pull hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
15
+ ollama pull smollm2:360m-instruct-q5_K_M
16
+ ollama pull hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
17
+ #ollama pull gemma3n:e2b-it-q4_K_M #slow on Spaces CPU
18
+ ollama pull granite3.3:2b
19
+ ollama pull hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M
20
+
21
+
22
+ # Model from run.sh
23
+ AVAILABLE_MODELS = [
24
+ 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
25
+ #'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
26
+ 'smollm2:360m-instruct-q5_K_M',
27
+ 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
28
+ #'gemma3n:e2b-it-q4_K_M',
29
+ 'granite3.3:2b',
30
+ 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M'
31
+ ]
32
+
33
+ #---fail to run
34
+ #'hf.co/ggml-org/SmolLM3-3B-GGUF:Q4_K_M',
35
+ #'hf.co/bartowski/nvidia_OpenReasoning-Nemotron-1.5B-GGUF:Q5_K_M',
36
+
37
+
38
+ # Default System Prompt
39
+ DEFAULT_SYSTEM_PROMPT = """
40
+ 1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範
41
+ 2. 要提供引用規則依据
42
+ 3. 如果查詢是以英文輸入,使用英文回答
43
+ Answer everything in simple, smart, relevant and accurate style, within 20 words. No chatty!
44
+ """
45
+
46
+ # --- Gradio Interface ---
47
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
48
+ gr.Markdown(f"## Small Language Model (SLM) run with CPU") # Changed title to be more generic
49
+ gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)")
50
+ gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
51
+
52
+ # Model Selection
53
+ with gr.Row():
54
+ selected_model = gr.Radio(
55
+ choices=AVAILABLE_MODELS,
56
+ value=AVAILABLE_MODELS[0], # Default to the first model in the list
57
+ label="Select Model",
58
+ info="Choose the LLM model to chat with.",
59
+ interactive=True
60
+ )
61
+
62
+ chatbot = gr.Chatbot(
63
+ label="Conversation",
64
+ height=400,
65
+ type='messages',
66
+ layout="bubble"
67
+ )
68
+
69
+ with gr.Row():
70
+ msg = gr.Textbox(
71
+ show_label=False,
72
+ placeholder="Type your message here and press Enter...",
73
+ lines=1,
74
+ scale=4,
75
+ container=False
76
+ )
77
+
78
+ with gr.Accordion("Advanced Options", open=False):
79
+ with gr.Row():
80
+ stream_checkbox = gr.Checkbox(
81
+ label="Stream Output",
82
+ value=True,
83
+ info="Enable to see the response generate in real-time."
84
+ )
85
+ use_custom_prompt_checkbox = gr.Checkbox(
86
+ label="Use Custom System Prompt",
87
+ value=False,
88
+ info="Check this box to provide your own system prompt below."
89
+ )
90
+
91
+ # --- New: System Prompt Options ---
92
+ SYSTEM_PROMPT_OPTIONS = {
93
+ "Smart & Accurate (Default)": DEFAULT_SYSTEM_PROMPT,
94
+ "Friendly & Conversational": """Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.""",
95
+ "Professional & Formal": """Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts."""
96
+ }
97
+
98
+ system_prompt_selector = gr.Radio(
99
+ label="Choose a System Prompt Style",
100
+ choices=list(SYSTEM_PROMPT_OPTIONS.keys()),
101
+ value="Smart & Accurate (Default)",
102
+ interactive=True
103
+ )
104
+
105
+ system_prompt_textbox = gr.Textbox(
106
+ label="System Prompt",
107
+ value=DEFAULT_SYSTEM_PROMPT,
108
+ lines=3,
109
+ placeholder="Enter a system prompt to guide the model's behavior...",
110
+ interactive=False
111
+ )
112
+
113
+ # Function to toggle the interactivity of the system prompt textbox
114
+ def toggle_system_prompt(use_custom):
115
+ return gr.update(interactive=use_custom)
116
+
117
+ use_custom_prompt_checkbox.change(
118
+ fn=toggle_system_prompt,
119
+ inputs=use_custom_prompt_checkbox,
120
+ outputs=system_prompt_textbox,
121
+ queue=False
122
+ )
123
+
124
+ # --- Core Chat Logic ---
125
+ # This function is the heart of the application.
126
+ def respond(history, system_prompt, stream_output, current_selected_model, selected_prompt_key, use_custom_prompt): # Added selected_prompt_key and use_custom_prompt
127
+ """
128
+ This is the single function that handles the entire chat process.
129
+ It takes the history, prepends the system prompt, calls the Ollama API,
130
+ and streams the response back to the chatbot.
131
+ """
132
+
133
+ #Disable Qwen3 thinking
134
+ if "Qwen3".lower() in current_selected_model:
135
+ system_prompt = system_prompt+" /no_think"
136
+
137
+ # Use selected predefined prompt unless custom is enabled
138
+ if not use_custom_prompt:
139
+ system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key]
140
+
141
+ # The 'history' variable from Gradio contains the entire conversation.
142
+ # We prepend the system prompt to this history to form the final payload.
143
+ messages = [{"role": "system", "content": system_prompt}] + history
144
+
145
+ # Add a placeholder for the assistant's response to the UI history.
146
+ # This creates the space where the streamed response will be displayed.
147
+ history.append({"role": "assistant", "content": ""})
148
+
149
+ # Stream the response from the Ollama API using the currently selected model
150
+ response_stream = ollama.chat(
151
+ model=current_selected_model, # Use the dynamically selected model
152
+ messages=messages,
153
+ stream=True
154
+ )
155
+
156
+ # Iterate through the stream, updating the placeholder with each new chunk.
157
+ for chunk in response_stream:
158
+ if chunk['message']['content']:
159
+ history[-1]['content'] += chunk['message']['content']
160
+ # Yield the updated history to the chatbot for a real-time effect.
161
+ yield history
162
+
163
+ # This function handles the user's submission.
164
+ def user_submit(history, user_message):
165
+ """
166
+ Adds the user's message to the chat history and clears the input box.
167
+ This prepares the state for the main 'respond' function.
168
+ """
169
+ return history + [{"role": "user", "content": user_message}], ""
170
+
171
+ # Gradio Event Wiring
172
+ msg.submit(
173
+ user_submit,
174
+ inputs=[chatbot, msg],
175
+ outputs=[chatbot, msg],
176
+ queue=False
177
+ ).then(
178
+ respond,
179
+ inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs
180
+ outputs=[chatbot]
181
+ )
182
+
183
+ # Launch the Gradio interface
184
+ demo.launch(server_name="0.0.0.0", server_port=7860)