Spaces:

frameai
/

Loxa-1.6B-uncensored

Sleeping

File size: 1,783 Bytes

6dae430
02ce4ee
9afc14b
6dae430
9afc14b
 
 
6dae430
9afc14b
02ce4ee
6dae430
 
 
9afc14b
6dae430
 
 
 
9afc14b
02ce4ee
9afc14b
 
 
6dae430
 
9afc14b
 
6dae430
9afc14b
 
 
 
 
6dae430
 
9afc14b
 
6dae430
9afc14b
 
6dae430
9afc14b
6dae430
9afc14b
6dae430
 
 
9afc14b
02ce4ee
6dae430
 
 
 
 
 
 
 
 
 
 
9afc14b

import gradio as gr
import os
from transformers import pipeline, AutoTokenizer

# Load the tokenizer and model using the pipeline
pipe = pipeline("text-generation", model="explorewithai/Loxa-4B", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("explorewithai/Loxa-4B")

# Get the system prompt from environment variables
meo_system = os.environ.get("MEO")

def respond(
    message,
    history,
    max_tokens,
    temperature,
    top_p,
):
    # Format the messages for the pipeline
    messages = [{"role": "system", "content": meo_system}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    messages.append({"role": "user", "content": message})

    # Generate the prompt using the tokenizer's chat template
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)

    # Generate the response using the pipeline
    outputs = pipe(
        prompt,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        return_full_text=False  # We only want the generated part
    )

    # Extract the generated text
    response = outputs[0]['generated_text']

    return response

# Create the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()