Mixtral-8x7B-Instruct-v0.1

Sleeping

File size: 4,252 Bytes

9c9ed59
 
 
 
d858a86
9c9ed59
 
 
 
 
 
7464c03
d858a86
9c9ed59
 
 
 
e223c51
9c9ed59
 
d1912d1
9c9ed59
 
ca677a9
9c9ed59
 
 
 
 
 
 
 
d858a86
 
 
 
 
 
9c9ed59
 
ca677a9
 
 
 
d1912d1
 
 
 
 
 
 
 
 
9c9ed59
 
 
 
 
 
 
 
 
 
d1912d1
 
 
 
 
9c9ed59
d1912d1
9c9ed59
 
 
 
 
 
 
 
 
 
 
d1912d1
3fd3013
 
 
 
9c9ed59
d1912d1
 
 
 
 
 
 
d858a86
 
 
 
 
 
9c9ed59
 
 
d858a86
 
 
 
 
 
1afe06d
9c9ed59
e95e8e1
 
 
2891dae
d858a86
96c2246
1afe06d
 
e95e8e1

from huggingface_hub import InferenceClient
import gradio as gr

def generate(
    prompt, system_prompt, history, max_new_tokens, repetition_penalty, temperature, top_p, top_k, seed, model_name
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    client = InferenceClient(f"{model_name}")

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=seed,
    )

    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

def format_prompt(message, history):
  prompt = "ydney"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response} [INST] {message} [/INST]"
  return prompt

additional_inputs=[
    gr.Textbox(
        label="System Prompt",
        interactive=True,
    ),
    gr.Slider(
        label="Max new tokens",
        value=1000,
        minimum=0,
        maximum=32768,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Top-k",
        value=1,
        minimum=0,
        maximum=100,
        step=1,
        interactive=True,
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Number(
        label="Seed",
        value=42,
        minimum=1,
        info="A starting point to initiate the generation process",
    ),
    gr.Dropdown(
        label="Model",
        choices=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-Mixtral-v0.1"],
        value="mistralai/Mixtral-8x7B-Instruct-v0.1",
        info="Choose the model to use"
    )
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, None],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None, None],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None, None],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None, None],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None, None],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None, None],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 8x7b Versions Chatbot",
    description="If you get an erorr, you putted a too much high Max_New_Tokens or your system prompt+prompt is too long, shorten up one of these",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)