Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -100,6 +100,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
100 |
and streams the response back to the chatbot.
|
101 |
"""
|
102 |
|
|
|
|
|
|
|
|
|
103 |
# The 'history' variable from Gradio contains the entire conversation.
|
104 |
# We prepend the system prompt to this history to form the final payload.
|
105 |
messages = [{"role": "system", "content": system_prompt}] + history
|
@@ -112,7 +116,8 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
112 |
response_stream = ollama.chat(
|
113 |
model=current_selected_model, # Use the dynamically selected model
|
114 |
messages=messages,
|
115 |
-
stream=True
|
|
|
116 |
)
|
117 |
|
118 |
# Iterate through the stream, updating the placeholder with each new chunk.
|
|
|
100 |
and streams the response back to the chatbot.
|
101 |
"""
|
102 |
|
103 |
+
#Disable Qwen3 thinking
|
104 |
+
#if "Qwen3".lower() in current_selected_model:
|
105 |
+
# system_prompt = system_prompt+" /no_think"
|
106 |
+
|
107 |
# The 'history' variable from Gradio contains the entire conversation.
|
108 |
# We prepend the system prompt to this history to form the final payload.
|
109 |
messages = [{"role": "system", "content": system_prompt}] + history
|
|
|
116 |
response_stream = ollama.chat(
|
117 |
model=current_selected_model, # Use the dynamically selected model
|
118 |
messages=messages,
|
119 |
+
stream=True,
|
120 |
+
think=False
|
121 |
)
|
122 |
|
123 |
# Iterate through the stream, updating the placeholder with each new chunk.
|