Tonic commited on
Commit
46b7e93
·
1 Parent(s): 1f03a85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -2,17 +2,6 @@ model_name = "berkeley-nest/Starling-LM-7B-alpha"
2
 
3
  title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
4
  description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
5
- examples = [
6
- [
7
- "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
8
- "[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.", # assistant_message
9
- 0.9, # temperature
10
- 450, # max_new_tokens
11
- 0.90, # top_p
12
- 1.9, # repetition_penalty
13
- ]
14
- ]
15
-
16
 
17
  import transformers
18
  from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
@@ -35,11 +24,9 @@ top_p=0.92
35
  repetition_penalty=1.7
36
 
37
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
38
- model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
39
- device_map="auto",
40
- torch_dtype="auto"
41
- )
42
- # model.eval()
43
 
44
  class StarlingBot:
45
  def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
@@ -47,7 +34,7 @@ class StarlingBot:
47
 
48
  def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
49
  try:
50
- conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
51
  input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
52
  input_ids = input_ids.to(device)
53
  response = model.generate(
@@ -72,6 +59,16 @@ class StarlingBot:
72
  torch.cuda.empty_cache()
73
 
74
  starling_bot = StarlingBot()
 
 
 
 
 
 
 
 
 
 
75
 
76
  iface = gr.Interface(
77
  fn=starling_bot.predict,
 
2
 
3
  title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
4
  description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import transformers
7
  from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
 
24
  repetition_penalty=1.7
25
 
26
  tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
27
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
28
+ model.eval()
29
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
 
 
30
 
31
  class StarlingBot:
32
  def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
 
34
 
35
  def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
36
  try:
37
+ conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
38
  input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
39
  input_ids = input_ids.to(device)
40
  response = model.generate(
 
59
  torch.cuda.empty_cache()
60
 
61
  starling_bot = StarlingBot()
62
+ examples = [
63
+ [
64
+ "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
65
+ "[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.", # assistant_message
66
+ 0.9, # temperature
67
+ 450, # max_new_tokens
68
+ 0.90, # top_p
69
+ 1.9, # repetition_penalty
70
+ ]
71
+ ]
72
 
73
  iface = gr.Interface(
74
  fn=starling_bot.predict,