dondoesstuff's picture
Update app.py
bc6741e
raw
history blame
2.12 kB
import gradio as gr
from gpt4all import GPT4All
# Load the GPT-4 model
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
# Templates for the user prompts
prompt_template = 'USER: {0}\nASSISTANT: '
# Function to generate responses using the GPT-4 model with custom settings
def generate_response(prompt, settings):
# Extract settings from the input
max_tokens = settings[0]
temp = settings[1]
top_k = settings[2]
top_p = settings[3]
repeat_penalty = settings[4]
repeat_last_n = settings[5]
n_batch = settings[6]
n_predict = settings[7]
streaming = settings[8]
# Generate chat history and input prompt
chat_history_with_prompt = prompt_template.format(prompt)
# Generate response with custom settings
response = model.generate(
chat_history_with_prompt,
max_tokens=max_tokens,
temp=temp,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
n_batch=n_batch,
n_predict=n_predict,
streaming=streaming
)
return response
# Initialize Gradio Interface
interface = gr.Interface(
fn=generate_response,
inputs=[
gr.inputs.Textbox(label="Chat Input", placeholder="Start the conversation..."),
gr.inputs.Number(default=200, label="Max Tokens"),
gr.inputs.Number(default=0.7, label="Temperature"),
gr.inputs.Number(default=40, label="Top-k"),
gr.inputs.Number(default=0.4, label="Top-p"),
gr.inputs.Number(default=1.18, label="Repeat Penalty"),
gr.inputs.Number(default=64, label="Repeat Last n"),
gr.inputs.Number(default=8, label="Batch Size"),
gr.inputs.Textbox(default="Auto", label="Number of Predictions"),
gr.inputs.Checkbox(default=False, label="Streaming"),
],
outputs=gr.outputs.Textbox(),
title="GPT-4 Chatbot",
description="Chat with the GPT-4 based chatbot. Configure generation settings and see the chat history for this session.",
)
# Launch Gradio Interface
if __name__ == "__main__":
interface.launch(debug=True)