Starling

Paused

App Files Files Community

Tonic commited on Nov 27, 2023

Commit

ea3b3e9

1 Parent(s): bfb620f

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -13

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ examples = [
 ]
 model_name = "berkeley-nest/Starling-RM-7B-alpha"
-base_model = "meta-llama/Llama-2-7b-chat-hf"
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -46,9 +46,8 @@ class StarlingBot:
     def __init__(self, system_prompt="The following dialogue is a conversation"):
         self.system_prompt = system_prompt
-    def predict(self, user_message, assistant_message, system_prompt, advanced, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         conversation = f" <s> [INST] {self.system_prompt} [INST]  {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
-        # Encode the conversation using the tokenizer
         input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         input_ids = input_ids.to(device)
         response = model.generate(
@@ -67,21 +66,18 @@ class StarlingBot:
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
-# Create the Falcon chatbot instance
-StarlingBot_bot = StarlingBot()
-starling_bot = StarlingBot()  # Renamed for consistency
 iface = gr.Interface(
-    fn=starling_bot.predict,  # Corrected to match the instance name
     title=title,
     description=description,
-    examples=examples,
     inputs=[
-        gr.Textbox(label="User Message", type="text", lines=5),
         gr.Textbox(label="💫🌠Starling Assistant Message or Instructions ", lines=2),
         gr.Textbox(label="💫🌠Starling System Prompt or Instruction", lines=2),
-        gr.Checkbox(label="Advanced", value=False),  # Ensure this is connected to functionality
         gr.Slider(label="Temperature", value=0.7, minimum=0.05, maximum=1.0, step=0.05),
         gr.Slider(label="Max new tokens", value=100, minimum=25, maximum=256, step=1),
         gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
@@ -89,5 +85,4 @@ iface = gr.Interface(
     ],
     outputs="text",
     theme="ParityError/Anime"
-)

 ]
 model_name = "berkeley-nest/Starling-RM-7B-alpha"
+base_model = "michaelfeil/ct2fast-Llama-2-7b-chat-hf"
 device = "cuda" if torch.cuda.is_available() else "cpu"
     def __init__(self, system_prompt="The following dialogue is a conversation"):
         self.system_prompt = system_prompt
+    def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         conversation = f" <s> [INST] {self.system_prompt} [INST]  {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
         input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         input_ids = input_ids.to(device)
         response = model.generate(
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
+starling_bot = StarlingBot()
 iface = gr.Interface(
+    fn=starling_bot.predict,
     title=title,
     description=description,
+#   examples=examples,
     inputs=[
+        gr.Textbox(label="🌟🤩User Message", type="text", lines=5),
         gr.Textbox(label="💫🌠Starling Assistant Message or Instructions ", lines=2),
         gr.Textbox(label="💫🌠Starling System Prompt or Instruction", lines=2),
+        gr.Checkbox(label="Advanced", value=False),
         gr.Slider(label="Temperature", value=0.7, minimum=0.05, maximum=1.0, step=0.05),
         gr.Slider(label="Max new tokens", value=100, minimum=25, maximum=256, step=1),
         gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
     ],
     outputs="text",
     theme="ParityError/Anime"
+)