from gpt4all import GPT4All
import gradio as gr

model = GPT4All("ggml-model-gpt4all-falcon-q4_0.bin")

def run_falcon(input_text):
    total_text = input_text
    yield total_text
    for token in model.generate(input_text, max_tokens=2048, streaming=True):
        print(f"Sending {token}")
        total_text += token
        yield total_text


app = gr.Interface(fn=run_falcon, inputs="text", outputs="text")
app.queue()
app.launch()