Mixtral-8x7B-Instruct-v0.1

Sleeping

File size: 4,521 Bytes

9c9ed59
 
c3a6303
 
 
9c9ed59
 
c3a6303
9c9ed59
 
 
 
 
 
c3a6303
 
 
 
d858a86
9c9ed59
 
 
 
e223c51
9c9ed59
 
d1912d1
9c9ed59
 
ca677a9
9c9ed59
 
 
 
 
 
 
 
d858a86
 
 
 
 
 
9c9ed59
 
ca677a9
 
 
 
d1912d1
 
 
 
 
 
 
 
 
9c9ed59
 
 
 
 
 
 
 
 
 
d1912d1
 
 
 
 
9c9ed59
d1912d1
9c9ed59
 
 
 
 
 
 
 
 
 
 
d1912d1
3fd3013
 
 
 
9c9ed59
d1912d1
 
c3a6303
 
 
 
 
d1912d1
c3a6303
d1912d1
 
c3a6303
d858a86
9c9ed59
 
d858a86
 
 
 
 
 
1afe06d
9c9ed59
e95e8e1
 
 
2891dae
c3a6303
 
1afe06d
 
e95e8e1

from huggingface_hub import InferenceClient
import gradio as gr
import random

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def generate(
    prompt, system_prompt, history, max_new_tokens, repetition_penalty, temperature, top_p, top_k, random_seed, manual_seed,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    if random_seed:
        seed = randint(1, 100000)
    else:
        seed = manual_seed

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=seed,
    )

    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

def format_prompt(message, history):
  prompt = "ydney"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response} [INST] {message} [/INST]"
  return prompt

additional_inputs=[
    gr.Textbox(
        label="System Prompt",
        interactive=True,
    ),
    gr.Slider(
        label="Max new tokens",
        value=1000,
        minimum=0,
        maximum=32768,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Top-k",
        value=1,
        minimum=0,
        maximum=100,
        step=1,
        interactive=True,
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Checkbox(
        label="Use Random Seed",
        value=False,
        info="Use a random starting point to initiate the generation process instead of the manual one"
    ),
    gr.Number(
        label="Manual Seed",
        value=42,
        minimum=1,
        info="Use a manual starting point to initiate the generation process",
    ),
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, None],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None, None],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None, None],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None, None],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None, None],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None, None],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 8x7b Instruct v0.1 Chatbot",
    description="Chatbot space with costumizable options for model: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 \nSpace made by [Nick088](https://linktr.ee/Nick088) \nIf you get an erorr, you putted a too much high Max_New_Tokens or your system prompt+prompt is too long, shorten up one of these",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)