Nick088 commited on
Commit
7119a57
·
verified ·
1 Parent(s): 33cd43e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -33
app.py CHANGED
@@ -15,7 +15,7 @@ def format_prompt(message, history):
15
  return prompt
16
 
17
  def generate(
18
- prompt, system_prompt, history, max_new_tokens, repetition_penalty, temperature, top_p, top_k, seed
19
  ):
20
  temperature = float(temperature)
21
  if temperature < 1e-2:
@@ -26,10 +26,9 @@ def generate(
26
  temperature=temperature,
27
  max_new_tokens=max_new_tokens,
28
  top_p=top_p,
29
- top_k=top_k,
30
  repetition_penalty=repetition_penalty,
31
  do_sample=True,
32
- seed=seed,
33
  )
34
 
35
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
@@ -45,20 +44,12 @@ def generate(
45
  additional_inputs=[
46
  gr.Textbox(
47
  label="System Prompt",
 
48
  interactive=True,
49
  ),
50
- gr.Slider(
51
- label="Max new tokens",
52
- value=1000,
53
- minimum=100,
54
- maximum=32768,
55
- step=64,
56
- interactive=True,
57
- info="The maximum numbers of new tokens, controls how long is the output",
58
- ),
59
  gr.Slider(
60
  label="Temperature",
61
- value=0.7,
62
  minimum=0.0,
63
  maximum=1.0,
64
  step=0.05,
@@ -66,13 +57,13 @@ additional_inputs=[
66
  info="Higher values produce more diverse outputs",
67
  ),
68
  gr.Slider(
69
- label="Repetition penalty",
70
- value=1.2,
71
- minimum=1.0,
72
- maximum=2.0,
73
- step=0.05,
74
  interactive=True,
75
- info="Penalize repeated tokens, making the AI repeat less itself",
76
  ),
77
  gr.Slider(
78
  label="Top-p (nucleus sampling)",
@@ -84,19 +75,13 @@ additional_inputs=[
84
  info="Higher values sample more low-probability tokens",
85
  ),
86
  gr.Slider(
87
- label="Top-k",
88
- value=1,
89
- minimum=0,
90
- maximum=100,
91
- step=1,
92
  interactive=True,
93
- info="Higher k means more diverse outputs by considering a range of tokens",
94
- ),
95
- gr.Number(
96
- label="Seed",
97
- value=42,
98
- minimum=1,
99
- info="A starting point to initiate the generation process",
100
  )
101
  ]
102
 
@@ -112,8 +97,7 @@ gr.ChatInterface(
112
  fn=generate,
113
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
114
  additional_inputs=additional_inputs,
115
- title="Mixtral-8x7B-Instruct-v0.1",
116
- description="If you get an erorr, you putted a too much high Max_New_Tokens or your system prompt+prompt is too long, shorten up one of these",
117
  examples=examples,
118
  concurrency_limit=20,
119
  ).launch(show_api=False)
 
15
  return prompt
16
 
17
  def generate(
18
+ prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
19
  ):
20
  temperature = float(temperature)
21
  if temperature < 1e-2:
 
26
  temperature=temperature,
27
  max_new_tokens=max_new_tokens,
28
  top_p=top_p,
 
29
  repetition_penalty=repetition_penalty,
30
  do_sample=True,
31
+ seed=42,
32
  )
33
 
34
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
 
44
  additional_inputs=[
45
  gr.Textbox(
46
  label="System Prompt",
47
+ max_lines=1,
48
  interactive=True,
49
  ),
 
 
 
 
 
 
 
 
 
50
  gr.Slider(
51
  label="Temperature",
52
+ value=0.9,
53
  minimum=0.0,
54
  maximum=1.0,
55
  step=0.05,
 
57
  info="Higher values produce more diverse outputs",
58
  ),
59
  gr.Slider(
60
+ label="Max new tokens",
61
+ value=256,
62
+ minimum=0,
63
+ maximum=1048,
64
+ step=64,
65
  interactive=True,
66
+ info="The maximum numbers of new tokens",
67
  ),
68
  gr.Slider(
69
  label="Top-p (nucleus sampling)",
 
75
  info="Higher values sample more low-probability tokens",
76
  ),
77
  gr.Slider(
78
+ label="Repetition penalty",
79
+ value=1.2,
80
+ minimum=1.0,
81
+ maximum=2.0,
82
+ step=0.05,
83
  interactive=True,
84
+ info="Penalize repeated tokens",
 
 
 
 
 
 
85
  )
86
  ]
87
 
 
97
  fn=generate,
98
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
99
  additional_inputs=additional_inputs,
100
+ title="Mixtral 46.7B",
 
101
  examples=examples,
102
  concurrency_limit=20,
103
  ).launch(show_api=False)