Spaces:
Runtime error
Runtime error
Commit
·
d7ce43f
1
Parent(s):
56ad9c6
Update app.py
Browse files
app.py
CHANGED
|
@@ -208,6 +208,34 @@ def format_prompt_zephyr(message, history, system_message=system_message):
|
|
| 208 |
print(prompt)
|
| 209 |
return prompt
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
def generate_local(
|
| 212 |
prompt,
|
| 213 |
history,
|
|
|
|
| 208 |
print(prompt)
|
| 209 |
return prompt
|
| 210 |
|
| 211 |
+
def generate(
|
| 212 |
+
prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
|
| 213 |
+
):
|
| 214 |
+
temperature = float(temperature)
|
| 215 |
+
if temperature < 1e-2:
|
| 216 |
+
temperature = 1e-2
|
| 217 |
+
top_p = float(top_p)
|
| 218 |
+
|
| 219 |
+
generate_kwargs = dict(
|
| 220 |
+
temperature=temperature,
|
| 221 |
+
max_tokens=max_new_tokens,
|
| 222 |
+
max_new_tokens=max_new_tokens,
|
| 223 |
+
top_p=top_p,
|
| 224 |
+
stop=LLM_STOP_WORDS
|
| 225 |
+
)
|
| 226 |
+
formatted_prompt = format_prompt(prompt, history)
|
| 227 |
+
|
| 228 |
+
del generate_kwargs["max_tokens"]
|
| 229 |
+
del generate_kwargs["stop"]
|
| 230 |
+
|
| 231 |
+
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
| 232 |
+
output = ""
|
| 233 |
+
|
| 234 |
+
for response in stream:
|
| 235 |
+
output += response.token.text
|
| 236 |
+
yield output
|
| 237 |
+
return output
|
| 238 |
+
|
| 239 |
def generate_local(
|
| 240 |
prompt,
|
| 241 |
history,
|