from gpt4all import GPT4All import gradio as gr model = GPT4All("ggml-model-gpt4all-falcon-q4_0.bin") def run_falcon(input_text): total_text = input_text yield total_text for token in model.generate(input_text, max_tokens=2048, streaming=True): print(f"Sending {token}") total_text += token yield total_text app = gr.Interface(fn=run_falcon, inputs="text", outputs="text") app.queue() app.launch()