Starling

Paused

App Files Files Community

Tonic commited on Nov 28, 2023

Commit

46b7e93

1 Parent(s): 1f03a85

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -17

app.py CHANGED Viewed

@@ -2,17 +2,6 @@ model_name = "berkeley-nest/Starling-LM-7B-alpha"
 title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
 description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
-examples = [
-    [
-        "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:",  # user_message
-        "[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.",  # assistant_message
-        0.9,  # temperature
-        450,  # max_new_tokens
-        0.90,  # top_p
-        1.9,  # repetition_penalty
-    ]
-]
 import transformers
 from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
@@ -35,11 +24,9 @@ top_p=0.92
 repetition_penalty=1.7
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
-model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
-    device_map="auto",
-    torch_dtype="auto"
-)
-# model.eval()
 class StarlingBot:
     def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
@@ -47,7 +34,7 @@ class StarlingBot:
     def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         try:
-            conversation = f" <s> [INST] {self.system_prompt} [INST]  {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
             input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
             input_ids = input_ids.to(device)
             response = model.generate(
@@ -72,6 +59,16 @@ class StarlingBot:
             torch.cuda.empty_cache()
 starling_bot = StarlingBot()
 iface = gr.Interface(
     fn=starling_bot.predict,

 title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
 description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
 import transformers
 from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
 repetition_penalty=1.7
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+model =  AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
+model.eval()
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
 class StarlingBot:
     def __init__(self, system_prompt="I am Starling-7B by Tonic-AI, I ready to do anything to help my user."):
     def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         try:
+            conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
             input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
             input_ids = input_ids.to(device)
             response = model.generate(
             torch.cuda.empty_cache()
 starling_bot = StarlingBot()
+examples = [
+    [
+        "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:",  # user_message
+        "[Emmanuel Macron]: Hello Mr. Musk. Thank you for receiving me today.",  # assistant_message
+        0.9,  # temperature
+        450,  # max_new_tokens
+        0.90,  # top_p
+        1.9,  # repetition_penalty
+    ]
+]
 iface = gr.Interface(
     fn=starling_bot.predict,