Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
model_name = "berkeley-nest/Starling-LM-7B-alpha"
|
2 |
|
3 |
-
title = "👋🏻Welcome to Tonic's 💫🌠Starling 7B"
|
4 |
-
description = "You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
|
5 |
examples = [
|
6 |
[
|
7 |
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
@@ -26,6 +26,8 @@ import accelerate
|
|
26 |
import bitsandbytes
|
27 |
|
28 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
29 |
temperature=0.4
|
30 |
max_new_tokens=240
|
31 |
top_p=0.92
|
@@ -45,7 +47,7 @@ class StarlingBot:
|
|
45 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
46 |
try:
|
47 |
conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
|
48 |
-
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=
|
49 |
input_ids = input_ids.to(device)
|
50 |
response = model.generate(
|
51 |
input_ids=input_ids,
|
|
|
1 |
model_name = "berkeley-nest/Starling-LM-7B-alpha"
|
2 |
|
3 |
+
title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
|
4 |
+
description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
|
5 |
examples = [
|
6 |
[
|
7 |
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
|
|
26 |
import bitsandbytes
|
27 |
|
28 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
29 |
+
"bos_token_id": 1,
|
30 |
+
"eos_token_id": 32000,
|
31 |
temperature=0.4
|
32 |
max_new_tokens=240
|
33 |
top_p=0.92
|
|
|
47 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
48 |
try:
|
49 |
conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
|
50 |
+
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
|
51 |
input_ids = input_ids.to(device)
|
52 |
response = model.generate(
|
53 |
input_ids=input_ids,
|