ProCreations commited on
Commit
da40d52
Β·
verified Β·
1 Parent(s): b2e1a95

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +439 -0
app.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+
6
+ # Load model and tokenizer
7
+ model_name = "HuggingFaceTB/SmolLM3-3B"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
10
+
11
+ @spaces.GPU
12
+ def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768):
13
+ """
14
+ Chat with SmolLM3-3B model with full feature support
15
+ """
16
+ # Prepare messages
17
+ messages = []
18
+
19
+ # Add system prompt if provided
20
+ if system_prompt.strip():
21
+ # Handle thinking mode flags in system prompt
22
+ if enable_thinking and "/no_think" not in system_prompt:
23
+ if "/think" not in system_prompt:
24
+ system_prompt += "/think"
25
+ elif not enable_thinking and "/think" not in system_prompt:
26
+ if "/no_think" not in system_prompt:
27
+ system_prompt += "/no_think"
28
+ messages.append({"role": "system", "content": system_prompt})
29
+ else:
30
+ # Use enable_thinking parameter if no system prompt
31
+ if not enable_thinking:
32
+ messages.append({"role": "system", "content": "/no_think"})
33
+
34
+ # Add conversation history
35
+ for human_msg, assistant_msg in history:
36
+ messages.append({"role": "user", "content": human_msg})
37
+ if assistant_msg:
38
+ messages.append({"role": "assistant", "content": assistant_msg})
39
+
40
+ # Add current message
41
+ messages.append({"role": "user", "content": message})
42
+
43
+ # Apply chat template
44
+ text = tokenizer.apply_chat_template(
45
+ messages,
46
+ tokenize=False,
47
+ add_generation_prompt=True,
48
+ enable_thinking=enable_thinking if not system_prompt.strip() else None
49
+ )
50
+
51
+ # Tokenize input
52
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
53
+
54
+ # Generate response
55
+ with torch.no_grad():
56
+ generated_ids = model.generate(
57
+ **model_inputs,
58
+ max_new_tokens=max_tokens,
59
+ temperature=temperature,
60
+ top_p=top_p,
61
+ do_sample=True,
62
+ pad_token_id=tokenizer.eos_token_id
63
+ )
64
+
65
+ # Decode response
66
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
67
+ response = tokenizer.decode(output_ids, skip_special_tokens=True)
68
+
69
+ return response
70
+
71
+ @spaces.GPU
72
+ def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768):
73
+ """
74
+ Chat with SmolLM3-3B using tool calling capabilities
75
+ """
76
+ # Parse tools if provided
77
+ tools = []
78
+ if tools_json.strip():
79
+ try:
80
+ import json
81
+ tools = json.loads(tools_json)
82
+ except:
83
+ return "Error: Invalid JSON format for tools"
84
+
85
+ # Prepare messages
86
+ messages = []
87
+
88
+ # Add system prompt if provided
89
+ if system_prompt.strip():
90
+ messages.append({"role": "system", "content": system_prompt})
91
+
92
+ # Add conversation history
93
+ for human_msg, assistant_msg in history:
94
+ messages.append({"role": "user", "content": human_msg})
95
+ if assistant_msg:
96
+ messages.append({"role": "assistant", "content": assistant_msg})
97
+
98
+ # Add current message
99
+ messages.append({"role": "user", "content": message})
100
+
101
+ # Apply chat template with tools
102
+ text = tokenizer.apply_chat_template(
103
+ messages,
104
+ tokenize=False,
105
+ add_generation_prompt=True,
106
+ enable_thinking=enable_thinking,
107
+ xml_tools=tools if tools else None
108
+ )
109
+
110
+ # Tokenize input
111
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
112
+
113
+ # Generate response
114
+ with torch.no_grad():
115
+ generated_ids = model.generate(
116
+ **model_inputs,
117
+ max_new_tokens=max_tokens,
118
+ temperature=temperature,
119
+ top_p=top_p,
120
+ do_sample=True,
121
+ pad_token_id=tokenizer.eos_token_id
122
+ )
123
+
124
+ # Decode response
125
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
126
+ response = tokenizer.decode(output_ids, skip_special_tokens=True)
127
+
128
+ return response
129
+
130
+ # Example tools for demonstration
131
+ example_tools = """[
132
+ {
133
+ "name": "get_weather",
134
+ "description": "Get the weather in a city",
135
+ "parameters": {
136
+ "type": "object",
137
+ "properties": {
138
+ "city": {
139
+ "type": "string",
140
+ "description": "The city to get the weather for"
141
+ }
142
+ }
143
+ }
144
+ },
145
+ {
146
+ "name": "calculate",
147
+ "description": "Perform basic mathematical calculations",
148
+ "parameters": {
149
+ "type": "object",
150
+ "properties": {
151
+ "expression": {
152
+ "type": "string",
153
+ "description": "Mathematical expression to evaluate"
154
+ }
155
+ }
156
+ }
157
+ }
158
+ ]"""
159
+
160
+ # Create Gradio interface with dark theme and mobile support
161
+ with gr.Blocks(
162
+ title="SmolLM3-3B Chat",
163
+ theme=gr.themes.Base().set(
164
+ background_fill_primary="#1a1a1a",
165
+ background_fill_secondary="#2d2d2d",
166
+ border_color_primary="#404040",
167
+ button_primary_background_fill="#4a9eff",
168
+ button_primary_background_fill_hover="#5aa3ff",
169
+ button_primary_text_color="#ffffff",
170
+ block_background_fill="#2d2d2d",
171
+ block_border_color="#404040",
172
+ input_background_fill="#3a3a3a",
173
+ input_border_color="#404040",
174
+ slider_color="#4a9eff",
175
+ checkbox_background_color="#3a3a3a",
176
+ checkbox_border_color="#404040",
177
+ textbox_border_color="#404040",
178
+ ),
179
+ css="""
180
+ /* Mobile-first responsive design */
181
+ @media (max-width: 768px) {
182
+ .gradio-container {
183
+ padding: 8px !important;
184
+ }
185
+ .gr-row {
186
+ flex-direction: column !important;
187
+ }
188
+ .gr-column {
189
+ width: 100% !important;
190
+ min-width: 0 !important;
191
+ }
192
+ .gr-tabs {
193
+ font-size: 14px !important;
194
+ }
195
+ .gr-button {
196
+ width: 100% !important;
197
+ margin: 2px 0 !important;
198
+ }
199
+ .gr-textbox {
200
+ font-size: 16px !important;
201
+ }
202
+ .gr-chatbot {
203
+ height: 400px !important;
204
+ }
205
+ .gr-markdown {
206
+ font-size: 14px !important;
207
+ }
208
+ .gr-slider {
209
+ width: 100% !important;
210
+ }
211
+ .settings-panel {
212
+ margin-top: 20px !important;
213
+ }
214
+ }
215
+
216
+ /* Settings panel styling */
217
+ .settings-panel {
218
+ background-color: #2d2d2d !important;
219
+ border: 1px solid #404040 !important;
220
+ border-radius: 8px !important;
221
+ padding: 16px !important;
222
+ margin-top: 12px !important;
223
+ }
224
+
225
+ .settings-button {
226
+ background-color: #3a3a3a !important;
227
+ border: 1px solid #404040 !important;
228
+ color: #ffffff !important;
229
+ padding: 8px 16px !important;
230
+ border-radius: 6px !important;
231
+ cursor: pointer !important;
232
+ font-size: 14px !important;
233
+ margin-bottom: 8px !important;
234
+ }
235
+
236
+ .settings-button:hover {
237
+ background-color: #4a4a4a !important;
238
+ }
239
+
240
+ /* Dark mode improvements */
241
+ .gr-chatbot {
242
+ background-color: #2d2d2d !important;
243
+ }
244
+
245
+ .gr-chatbot .message {
246
+ background-color: #3a3a3a !important;
247
+ border: 1px solid #404040 !important;
248
+ border-radius: 8px !important;
249
+ margin: 4px 0 !important;
250
+ padding: 8px !important;
251
+ }
252
+
253
+ .gr-chatbot .message.user {
254
+ background-color: #4a9eff !important;
255
+ color: white !important;
256
+ }
257
+
258
+ .gr-chatbot .message.bot {
259
+ background-color: #3a3a3a !important;
260
+ color: #ffffff !important;
261
+ }
262
+
263
+ /* Better mobile touch targets */
264
+ @media (max-width: 768px) {
265
+ .gr-button {
266
+ min-height: 44px !important;
267
+ padding: 12px !important;
268
+ }
269
+ .gr-slider input {
270
+ min-height: 44px !important;
271
+ }
272
+ .gr-checkbox {
273
+ min-height: 44px !important;
274
+ }
275
+ }
276
+
277
+ /* Improve readability */
278
+ .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
279
+ color: #ffffff !important;
280
+ }
281
+
282
+ .gr-markdown p, .gr-markdown li {
283
+ color: #e0e0e0 !important;
284
+ }
285
+
286
+ /* Tab styling */
287
+ .gr-tabs .gr-tab {
288
+ background-color: #3a3a3a !important;
289
+ color: #ffffff !important;
290
+ border-color: #404040 !important;
291
+ }
292
+
293
+ .gr-tabs .gr-tab.selected {
294
+ background-color: #4a9eff !important;
295
+ color: #ffffff !important;
296
+ }
297
+ """
298
+ ) as demo:
299
+ gr.Markdown("# πŸ€– SmolLM3-3B Chat Interface")
300
+ gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.")
301
+
302
+ with gr.Tabs():
303
+ with gr.TabItem("πŸ’¬ Standard Chat"):
304
+ chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B")
305
+ msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
306
+
307
+ with gr.Row():
308
+ submit = gr.Button("Send", variant="primary")
309
+ clear = gr.Button("Clear")
310
+ settings_btn = gr.Button("βš™οΈ Settings", size="sm")
311
+
312
+ with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel:
313
+ gr.Markdown("### βš™οΈ Advanced Settings")
314
+ system_prompt = gr.Textbox(
315
+ label="System Prompt",
316
+ placeholder="Enter system instructions (optional)",
317
+ lines=3,
318
+ value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
319
+ )
320
+ enable_thinking = gr.Checkbox(
321
+ label="Enable Extended Thinking",
322
+ value=True,
323
+ info="Enable reasoning traces for better responses"
324
+ )
325
+ temperature = gr.Slider(
326
+ minimum=0.0,
327
+ maximum=2.0,
328
+ value=0.6,
329
+ step=0.1,
330
+ label="Temperature"
331
+ )
332
+ top_p = gr.Slider(
333
+ minimum=0.0,
334
+ maximum=1.0,
335
+ value=0.95,
336
+ step=0.05,
337
+ label="Top-p"
338
+ )
339
+ max_tokens = gr.Slider(
340
+ minimum=1,
341
+ maximum=32768,
342
+ value=32768,
343
+ step=1,
344
+ label="Max Tokens"
345
+ )
346
+
347
+ def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok):
348
+ response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok)
349
+ history.append((message, response))
350
+ return "", history
351
+
352
+ def toggle_settings():
353
+ return gr.update(visible=not settings_panel.visible)
354
+
355
+ submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
356
+ msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
357
+ clear.click(lambda: ([], ""), outputs=[chatbot, msg])
358
+ settings_btn.click(toggle_settings, outputs=[settings_panel])
359
+
360
+ with gr.TabItem("πŸ› οΈ Tool Calling"):
361
+ tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools")
362
+ tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...")
363
+
364
+ with gr.Row():
365
+ tool_submit = gr.Button("Send", variant="primary")
366
+ tool_clear = gr.Button("Clear")
367
+ tool_settings_btn = gr.Button("βš™οΈ Settings", size="sm")
368
+
369
+ with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel:
370
+ gr.Markdown("### πŸ› οΈ Tool Settings")
371
+ tools_json = gr.Textbox(
372
+ label="Tools JSON",
373
+ placeholder="Enter tools as JSON array",
374
+ lines=10,
375
+ value=example_tools
376
+ )
377
+ tool_system_prompt = gr.Textbox(
378
+ label="System Prompt",
379
+ placeholder="Enter system instructions (optional)",
380
+ lines=2,
381
+ value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
382
+ )
383
+ tool_thinking = gr.Checkbox(
384
+ label="Enable Extended Thinking",
385
+ value=False,
386
+ info="Enable reasoning traces for tool usage"
387
+ )
388
+ tool_temperature = gr.Slider(
389
+ minimum=0.0,
390
+ maximum=2.0,
391
+ value=0.6,
392
+ step=0.1,
393
+ label="Temperature"
394
+ )
395
+ tool_top_p = gr.Slider(
396
+ minimum=0.0,
397
+ maximum=1.0,
398
+ value=0.95,
399
+ step=0.05,
400
+ label="Top-p"
401
+ )
402
+ tool_max_tokens = gr.Slider(
403
+ minimum=1,
404
+ maximum=32768,
405
+ value=32768,
406
+ step=1,
407
+ label="Max Tokens"
408
+ )
409
+
410
+ def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok):
411
+ response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok)
412
+ history.append((message, response))
413
+ return "", history
414
+
415
+ def toggle_tool_settings():
416
+ return gr.update(visible=not tool_settings_panel.visible)
417
+
418
+ tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
419
+ tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
420
+ tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg])
421
+ tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel])
422
+
423
+ gr.Markdown("""
424
+ ### πŸ“š Model Information
425
+ - **Model**: HuggingFaceTB/SmolLM3-3B
426
+ - **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support
427
+ - **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian)
428
+ - **Extended Thinking**: Provides reasoning traces for better responses
429
+ - **Tool Calling**: Supports XML-based tool calling for agentic workflows
430
+
431
+ ### πŸ’‘ Usage Tips
432
+ - Use Extended Thinking for complex reasoning tasks
433
+ - Adjust temperature (0.6 recommended) for response creativity
434
+ - Try different system prompts for specialized behaviors
435
+ - Use tool calling for function-based interactions
436
+ """)
437
+
438
+ if __name__ == "__main__":
439
+ demo.launch()