File size: 4,078 Bytes
9c9ed59
 
31e0a12
c3a6303
8329725
9c9ed59
1822503
 
 
 
 
 
 
508d7db
9c9ed59
0287f0d
9c9ed59
fd47081
61c5d7e
 
9e30ec0
61c5d7e
9e30ec0
61c5d7e
9c9ed59
 
 
 
e093f93
9c9ed59
 
 
 
0287f0d
9c9ed59
 
 
 
 
 
 
 
 
 
4b01506
 
 
45761fb
4b01506
 
 
 
 
9c9ed59
 
7119a57
9c9ed59
 
 
 
 
 
 
4b01506
 
 
 
 
9c9ed59
4b01506
9c9ed59
 
 
 
 
 
 
 
 
 
3cbb361
 
 
 
 
 
 
 
 
fd47081
822039c
fd47081
 
61c5d7e
fd47081
9c9ed59
 
0287f0d
 
 
 
 
 
1afe06d
9c9ed59
e95e8e1
 
 
2891dae
e21f915
75d3abc
1afe06d
37eeddb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from huggingface_hub import InferenceClient
import gradio as gr
import torch

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(
    prompt, history, max_new_tokens, temperature, repetition_penalty, top_p, top_k, seed,
):

    if seed == 0:
        seed = random.randint(1, 100000)
        torch.manual_seed(seed)
    else:
        torch.manual_seed(seed)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        do_sample=True,
    )

    formatted_prompt = format_prompt(prompt, history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


additional_inputs=[
    gr.Slider(
        label="Max new tokens",
        value=1000,
        minimum=100,
        maximum=32768,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Top-k",
        value=1,
        minimum=0,
        maximum=100,
        step=1,
        interactive=True,
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Number(
        label="Seed",
        value=42,
        minimum=1,
        info="Use an integer starting point to initiate the generation process, put 0 for a random",
    ),
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 8x7b Instruct v0.1",
    description="Chatbot Hugging Face space made by [Nick088](https://linktr.ee/Nick088) with costumizable options for model: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1<br>If you get an erorr, you putted a too much high Max_New_Tokens or your prompt is too long, shorten up one of these",
    examples=examples,
).launch(show_api=False, share=True)