Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	user can define search timeout
Browse files
    	
        app.py
    CHANGED
    
    | @@ -116,7 +116,7 @@ def format_conversation(history, system_prompt, tokenizer): | |
| 116 | 
             
            def chat_response(user_msg, chat_history, system_prompt,
         | 
| 117 | 
             
                              enable_search, max_results, max_chars,
         | 
| 118 | 
             
                              model_name, max_tokens, temperature,
         | 
| 119 | 
            -
                              top_k, top_p, repeat_penalty):
         | 
| 120 | 
             
                """
         | 
| 121 | 
             
                Generates streaming chat responses, optionally with background web search.
         | 
| 122 | 
             
                """
         | 
| @@ -149,7 +149,7 @@ def chat_response(user_msg, chat_history, system_prompt, | |
| 149 |  | 
| 150 | 
             
                    # wait up to 1s for snippets, then replace debug with them
         | 
| 151 | 
             
                    if enable_search:
         | 
| 152 | 
            -
                        thread_search.join(timeout= | 
| 153 | 
             
                        if search_results:
         | 
| 154 | 
             
                            debug = "### Search results merged into prompt\n\n" + "\n".join(
         | 
| 155 | 
             
                                f"- {r}" for r in search_results
         | 
| @@ -280,6 +280,7 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo: | |
| 280 | 
             
                        gr.Markdown("### Web Search Settings")
         | 
| 281 | 
             
                        mr = gr.Number(value=6, precision=0, label="Max Results")
         | 
| 282 | 
             
                        mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
         | 
|  | |
| 283 | 
             
                        clr = gr.Button("Clear Chat")
         | 
| 284 | 
             
                        cnl = gr.Button("Cancel Generation")
         | 
| 285 | 
             
                    with gr.Column(scale=7):
         | 
| @@ -292,6 +293,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo: | |
| 292 | 
             
                cnl.click(fn=cancel_generation, outputs=dbg)
         | 
| 293 | 
             
                txt.submit(fn=chat_response,
         | 
| 294 | 
             
                           inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
         | 
| 295 | 
            -
                                   model_dd, max_tok, temp, k, p, rp],
         | 
| 296 | 
             
                           outputs=[chat, dbg])
         | 
| 297 | 
             
                demo.launch()
         | 
|  | |
| 116 | 
             
            def chat_response(user_msg, chat_history, system_prompt,
         | 
| 117 | 
             
                              enable_search, max_results, max_chars,
         | 
| 118 | 
             
                              model_name, max_tokens, temperature,
         | 
| 119 | 
            +
                              top_k, top_p, repeat_penalty, search_timeout):
         | 
| 120 | 
             
                """
         | 
| 121 | 
             
                Generates streaming chat responses, optionally with background web search.
         | 
| 122 | 
             
                """
         | 
|  | |
| 149 |  | 
| 150 | 
             
                    # wait up to 1s for snippets, then replace debug with them
         | 
| 151 | 
             
                    if enable_search:
         | 
| 152 | 
            +
                        thread_search.join(timeout=float(search_timeout))
         | 
| 153 | 
             
                        if search_results:
         | 
| 154 | 
             
                            debug = "### Search results merged into prompt\n\n" + "\n".join(
         | 
| 155 | 
             
                                f"- {r}" for r in search_results
         | 
|  | |
| 280 | 
             
                        gr.Markdown("### Web Search Settings")
         | 
| 281 | 
             
                        mr = gr.Number(value=6, precision=0, label="Max Results")
         | 
| 282 | 
             
                        mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
         | 
| 283 | 
            +
                        st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
         | 
| 284 | 
             
                        clr = gr.Button("Clear Chat")
         | 
| 285 | 
             
                        cnl = gr.Button("Cancel Generation")
         | 
| 286 | 
             
                    with gr.Column(scale=7):
         | 
|  | |
| 293 | 
             
                cnl.click(fn=cancel_generation, outputs=dbg)
         | 
| 294 | 
             
                txt.submit(fn=chat_response,
         | 
| 295 | 
             
                           inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
         | 
| 296 | 
            +
                                   model_dd, max_tok, temp, k, p, rp, st],
         | 
| 297 | 
             
                           outputs=[chat, dbg])
         | 
| 298 | 
             
                demo.launch()
         | 
