Spaces:
Running
on
Zero
Running
on
Zero
user can define search timeout
Browse files
app.py
CHANGED
@@ -116,7 +116,7 @@ def format_conversation(history, system_prompt, tokenizer):
|
|
116 |
def chat_response(user_msg, chat_history, system_prompt,
|
117 |
enable_search, max_results, max_chars,
|
118 |
model_name, max_tokens, temperature,
|
119 |
-
top_k, top_p, repeat_penalty):
|
120 |
"""
|
121 |
Generates streaming chat responses, optionally with background web search.
|
122 |
"""
|
@@ -149,7 +149,7 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
149 |
|
150 |
# wait up to 1s for snippets, then replace debug with them
|
151 |
if enable_search:
|
152 |
-
thread_search.join(timeout=
|
153 |
if search_results:
|
154 |
debug = "### Search results merged into prompt\n\n" + "\n".join(
|
155 |
f"- {r}" for r in search_results
|
@@ -280,6 +280,7 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
280 |
gr.Markdown("### Web Search Settings")
|
281 |
mr = gr.Number(value=6, precision=0, label="Max Results")
|
282 |
mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
|
|
|
283 |
clr = gr.Button("Clear Chat")
|
284 |
cnl = gr.Button("Cancel Generation")
|
285 |
with gr.Column(scale=7):
|
@@ -292,6 +293,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
292 |
cnl.click(fn=cancel_generation, outputs=dbg)
|
293 |
txt.submit(fn=chat_response,
|
294 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
295 |
-
model_dd, max_tok, temp, k, p, rp],
|
296 |
outputs=[chat, dbg])
|
297 |
demo.launch()
|
|
|
116 |
def chat_response(user_msg, chat_history, system_prompt,
|
117 |
enable_search, max_results, max_chars,
|
118 |
model_name, max_tokens, temperature,
|
119 |
+
top_k, top_p, repeat_penalty, search_timeout):
|
120 |
"""
|
121 |
Generates streaming chat responses, optionally with background web search.
|
122 |
"""
|
|
|
149 |
|
150 |
# wait up to 1s for snippets, then replace debug with them
|
151 |
if enable_search:
|
152 |
+
thread_search.join(timeout=float(search_timeout))
|
153 |
if search_results:
|
154 |
debug = "### Search results merged into prompt\n\n" + "\n".join(
|
155 |
f"- {r}" for r in search_results
|
|
|
280 |
gr.Markdown("### Web Search Settings")
|
281 |
mr = gr.Number(value=6, precision=0, label="Max Results")
|
282 |
mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
|
283 |
+
st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
|
284 |
clr = gr.Button("Clear Chat")
|
285 |
cnl = gr.Button("Cancel Generation")
|
286 |
with gr.Column(scale=7):
|
|
|
293 |
cnl.click(fn=cancel_generation, outputs=dbg)
|
294 |
txt.submit(fn=chat_response,
|
295 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
296 |
+
model_dd, max_tok, temp, k, p, rp, st],
|
297 |
outputs=[chat, dbg])
|
298 |
demo.launch()
|