Spaces:

NLPark
/

orca-mini-3b-chat

Runtime error

File size: 2,886 Bytes

b1cf6ad
 
 
 
 
 
 
 
 
b2fdc53
 
 
 
 
 
 
 
 
 
 
 
0239c04
b2fdc53
 
0239c04
b2fdc53
 
 
 
 
 
 
 
 
 
 
 
 
0239c04
b2fdc53
 
 
 
 
 
 
 
b1e246f
 
 
 
 
 
 
 
 
 
b2fdc53
14e5286
b2fdc53
 
 
b1cf6ad
b2fdc53
 
 
b1cf6ad
b2fdc53

import gradio as gr
from gpt4all import GPT4All

# Load the GPT-4 model
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')

# Templates for the user prompts
prompt_template = 'USER: {0}\nASSISTANT: '

# Function to generate responses using the GPT-4 model with custom settings
def generate_response(prompt, settings):
    # Extract settings from the input
    max_tokens = settings["max_tokens"]
    temp = settings["temp"]
    top_k = settings["top_k"]
    top_p = settings["top_p"]
    repeat_penalty = settings["repeat_penalty"]
    repeat_last_n = settings["repeat_last_n"]
    n_batch = settings["n_batch"]
    n_predict = settings["n_predict"]
    streaming = settings["streaming"]
    
    # Generate chat history and input prompt
    chat_history_with_prompt = prompt_template.format(prompt)
    
    # Generate response with custom settings
    response = model.generate(
        chat_history_with_prompt,
        max_tokens=max_tokens,
        temp=temp,
        top_k=top_k,
        top_p=top_p,
        repeat_penalty=repeat_penalty,
        repeat_last_n=repeat_last_n,
        n_batch=n_batch,
        n_predict=n_predict,
        streaming=streaming
    )
    
    return response

# Initialize Gradio Interface
with gr.Blocks() as chatbot_demo:
    with gr.Tab("Chat"):
        gr.Interface(
            fn=generate_response,
            inputs=[
                gr.component(type="textbox", label="Chat Input", placeholder="Start the conversation..."),
                gr.component(type="slider", label="Max Tokens", default=200, minimum=1, maximum=512, step=1),
                gr.component(type="slider", label="Temperature", default=0.7, minimum=0.1, maximum=2.0, step=0.01),
                gr.component(type="slider", label="Top-k", default=40, minimum=1, maximum=512, step=1),
                gr.component(type="slider", label="Top-p", default=0.4, minimum=0.01, maximum=1.0, step=0.01),
                gr.component(type="slider", label="Repeat Penalty", default=1.18, minimum=1.0, maximum=2.0, step=0.01),
                gr.component(type="slider", label="Repeat Last n", default=64, minimum=1, maximum=512, step=1),
                gr.component(type="slider", label="Batch Size", default=8, minimum=1, maximum=128, step=1),
                gr.component(type="textbox", label="Number of Predictions", placeholder="Auto"),
                gr.component(type="checkbox", label="Streaming", default=False),
            ],
            outputs=gr.OutputComponent(type="textbox"),
            title="GPT-4 Chatbot",
            description="Chat with the GPT-4 based chatbot. Configure generation settings and see the chat history for this session.",
        )

    with gr.Tab("Settings"):
        # Settings UI to be defined here
        gr.Text("Settings tab content")

# Launch Gradio Interface
chatbot_demo.queue(concurrency_count=75).launch(debug=True)