Update app.py
Browse files
app.py
CHANGED
@@ -2,17 +2,6 @@ model_name = "berkeley-nest/Starling-LM-7B-alpha"
|
|
2 |
|
3 |
title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
|
4 |
description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
|
5 |
-
examples = [
|
6 |
-
[
|
7 |
-
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
8 |
-
"[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.", # assistant_message
|
9 |
-
0.9, # temperature
|
10 |
-
450, # max_new_tokens
|
11 |
-
0.90, # top_p
|
12 |
-
1.9, # repetition_penalty
|
13 |
-
]
|
14 |
-
]
|
15 |
-
|
16 |
|
17 |
import transformers
|
18 |
from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
|
@@ -35,11 +24,9 @@ top_p=0.92
|
|
35 |
repetition_penalty=1.7
|
36 |
|
37 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
|
38 |
-
model =
|
39 |
-
|
40 |
-
|
41 |
-
)
|
42 |
-
# model.eval()
|
43 |
|
44 |
class StarlingBot:
|
45 |
def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
|
@@ -47,7 +34,7 @@ class StarlingBot:
|
|
47 |
|
48 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
49 |
try:
|
50 |
-
conversation = f" <s> [INST] {self.system_prompt} [INST]
|
51 |
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
|
52 |
input_ids = input_ids.to(device)
|
53 |
response = model.generate(
|
@@ -72,6 +59,16 @@ class StarlingBot:
|
|
72 |
torch.cuda.empty_cache()
|
73 |
|
74 |
starling_bot = StarlingBot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
iface = gr.Interface(
|
77 |
fn=starling_bot.predict,
|
|
|
2 |
|
3 |
title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
|
4 |
description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import transformers
|
7 |
from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
|
|
|
24 |
repetition_penalty=1.7
|
25 |
|
26 |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
|
27 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
28 |
+
model.eval()
|
29 |
+
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
|
|
|
|
|
30 |
|
31 |
class StarlingBot:
|
32 |
def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
|
|
|
34 |
|
35 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
36 |
try:
|
37 |
+
conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
|
38 |
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
|
39 |
input_ids = input_ids.to(device)
|
40 |
response = model.generate(
|
|
|
59 |
torch.cuda.empty_cache()
|
60 |
|
61 |
starling_bot = StarlingBot()
|
62 |
+
examples = [
|
63 |
+
[
|
64 |
+
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
65 |
+
"[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.", # assistant_message
|
66 |
+
0.9, # temperature
|
67 |
+
450, # max_new_tokens
|
68 |
+
0.90, # top_p
|
69 |
+
1.9, # repetition_penalty
|
70 |
+
]
|
71 |
+
]
|
72 |
|
73 |
iface = gr.Interface(
|
74 |
fn=starling_bot.predict,
|