my_test

Running on Zero

AlexHung29629 commited on Jul 3

Commit

713bc6b

verified ·

1 Parent(s): 2ef161a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,8 +45,8 @@ CHAT_TEMPLATE="""{%- set default_system_message = "A user will ask you to solve
 if torch.cuda.is_available():
     model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
-    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
@@ -61,7 +61,7 @@ def generate(
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
-    input_ids = tokenizer.apply_chat_template(conversation, chat_template=CHAT_TEMPLATE, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

 if torch.cuda.is_available():
     model_id = "mistralai/Mistral-Small-24B-Instruct-2501"
+    model = AutoModelForCausalLM.from_pretrained("AlexHung29629/fix_magistra4b", torch_dtype=torch.bfloat16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained("AlexHung29629/fix_magistra4")
 @spaces.GPU
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")