File size: 4,182 Bytes
9c9ed59
 
c3a6303
1822503
 
 
9c9ed59
1822503
 
 
 
 
 
 
508d7db
9c9ed59
c709684
9c9ed59
 
 
 
 
 
fd47081
9c9ed59
 
 
 
e093f93
9c9ed59
 
fd47081
9c9ed59
 
ca677a9
9c9ed59
 
 
 
 
 
 
 
 
 
ca677a9
 
 
 
4b01506
 
 
45761fb
4b01506
 
 
 
 
9c9ed59
 
7119a57
9c9ed59
 
 
 
 
 
 
4b01506
 
 
 
 
9c9ed59
4b01506
9c9ed59
 
 
 
 
 
 
 
 
 
3cbb361
 
 
 
 
 
 
 
 
fd47081
822039c
fd47081
 
822039c
fd47081
9c9ed59
 
4b01506
 
 
 
 
 
1afe06d
9c9ed59
e95e8e1
 
 
2891dae
4b01506
fd47081
1afe06d
 
e95e8e1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.1"
)

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(
    system_prompt, prompt, history, max_new_tokens, temperature, repetition_penalty, top_p, top_k, seed,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)


    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=seed,
    )

    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


additional_inputs=[
    gr.Textbox(
        label="System Prompt",
        interactive=True,
    ),
    gr.Slider(
        label="Max new tokens",
        value=1000,
        minimum=100,
        maximum=32768,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Top-k",
        value=1,
        minimum=0,
        maximum=100,
        step=1,
        interactive=True,
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Number(
        label="Seed",
        value=42,
        minimum=1,
        info="Use an integer starting point to initiate the generation process",
    ),
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, None],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None, None],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None, None],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None, None],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None, None],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None, None],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 8x7b Instruct v0.1 Chatbot",
    description="Chatbot space with costumizable options for model: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1\nIf you get an erorr, you putted a too much high Max_New_Tokens or your system prompt+prompt is too long, shorten up one of these",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)