jonathanmutal commited on
Commit
7de9c57
·
verified ·
1 Parent(s): 85db968

Update app.py

Browse files

Changing the interface

Files changed (1) hide show
  1. app.py +92 -1
app.py CHANGED
@@ -1,3 +1,94 @@
 
1
  import gradio as gr
2
 
3
- gr.load("models/mistralai/Mistral-7B-Instruct-v0.3").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
  import gradio as gr
3
 
4
+ client = InferenceClient(
5
+ "mistralai/Mistral-7B-Instruct-v0.3"
6
+ )
7
+
8
+ def format_prompt(message, history):
9
+ prompt = "<s>"
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"[INST] {user_prompt} [/INST]"
12
+ prompt += f" {bot_response}</s> "
13
+ prompt += f"[INST] {message} [/INST]"
14
+ return prompt
15
+
16
+ def generate(
17
+ prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
18
+ ):
19
+ temperature = float(temperature)
20
+ if temperature < 1e-2:
21
+ temperature = 1e-2
22
+ top_p = float(top_p)
23
+
24
+ generate_kwargs = dict(
25
+ temperature=temperature,
26
+ max_new_tokens=max_new_tokens,
27
+ top_p=top_p,
28
+ repetition_penalty=repetition_penalty,
29
+ do_sample=True,
30
+ seed=42,
31
+ )
32
+
33
+ formatted_prompt = format_prompt(prompt, history)
34
+
35
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
36
+ output = ""
37
+
38
+ for response in stream:
39
+ output += response.token.text
40
+ yield output
41
+ return output
42
+
43
+
44
+ additional_inputs=[
45
+ gr.Slider(
46
+ label="Temperature",
47
+ value=0.9,
48
+ minimum=0.0,
49
+ maximum=1.0,
50
+ step=0.05,
51
+ interactive=True,
52
+ info="Higher values produce more diverse outputs",
53
+ ),
54
+ gr.Slider(
55
+ label="Max new tokens",
56
+ value=256,
57
+ minimum=0,
58
+ maximum=1048,
59
+ step=64,
60
+ interactive=True,
61
+ info="The maximum numbers of new tokens",
62
+ ),
63
+ gr.Slider(
64
+ label="Top-p (nucleus sampling)",
65
+ value=0.90,
66
+ minimum=0.0,
67
+ maximum=1,
68
+ step=0.05,
69
+ interactive=True,
70
+ info="Higher values sample more low-probability tokens",
71
+ ),
72
+ gr.Slider(
73
+ label="Repetition penalty",
74
+ value=1.2,
75
+ minimum=1.0,
76
+ maximum=2.0,
77
+ step=0.05,
78
+ interactive=True,
79
+ info="Penalize repeated tokens",
80
+ )
81
+ ]
82
+
83
+
84
+ gr.ChatInterface(
85
+ fn=generate,
86
+ chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
87
+ additional_inputs=additional_inputs,
88
+ title="""Mistral 7B v0.3"""
89
+ ).launch(show_api=False)
90
+
91
+
92
+ gr.load("models/ehristoforu/dalle-3-xl-v2").launch()
93
+
94
+ gr.load("models/microsoft/Phi-3-mini-4k-instruct").launch()