my_test

Sleeping

App Files Files Community

AlexHung29629 commited on Jul 10

Commit

92abb65

verified ·

1 Parent(s): d4567ac

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -5

app.py CHANGED Viewed

@@ -9,8 +9,7 @@ import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, AutoModelForImageTextToText
-DESCRIPTION = """# AlexHung29629/fix_magistral_fc2
-開頭有<think>"""
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
@@ -34,7 +33,7 @@ CHAT_TEMPLATE="""{%- set default_system_message = "A user will ask you to solve
 {%- for message in loop_messages %}
     {%- if message['role'] == 'user' %}
-        {{- '[INST]' + message['content'] + '[/INST]<think>' }}
     {%- elif message['role'] == 'system' %}
         {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
     {%- elif message['role'] == 'assistant' %}
@@ -47,7 +46,7 @@ CHAT_TEMPLATE="""{%- set default_system_message = "A user will ask you to solve
 if torch.cuda.is_available():
     model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
     model = AutoModelForImageTextToText.from_pretrained("AlexHung29629/add_vision_3", torch_dtype=torch.bfloat16, device_map="auto")
-    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-Small-24B-Instruct-2501")
 @spaces.GPU
@@ -62,7 +61,7 @@ def generate(
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
-    input_ids = tokenizer.apply_chat_template(conversation, chat_template=CHAT_TEMPLATE, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, AutoModelForImageTextToText
+DESCRIPTION = """# 測試"""
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 {%- for message in loop_messages %}
     {%- if message['role'] == 'user' %}
+        {{- '[INST]' + message['content'] + '[/INST]' }}
     {%- elif message['role'] == 'system' %}
         {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
     {%- elif message['role'] == 'assistant' %}
 if torch.cuda.is_available():
     model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
     model = AutoModelForImageTextToText.from_pretrained("AlexHung29629/add_vision_3", torch_dtype=torch.bfloat16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained("AlexHung29629/add_vision_3")
 @spaces.GPU
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")