Spaces:

blitz1809
/

sre-arena

Sleeping

blitz1809 commited on 29 days ago

Commit

e43130f

1 Parent(s): c616409

fix: switch to bf16 for HF Jobs (newer transformers + bnb prefers bf16, no GradScaler needed)

Files changed (4) hide show

configs/colab_demo.yaml CHANGED Viewed

@@ -9,7 +9,7 @@ per_generation:
 model:
   name: "Qwen/Qwen2.5-3B-Instruct"
   load_in_4bit: true
-  bnb_4bit_compute_dtype: "float16"
 lora:
   r: 16

 model:
   name: "Qwen/Qwen2.5-3B-Instruct"
   load_in_4bit: true
+  bnb_4bit_compute_dtype: "bfloat16"
 lora:
   r: 16

configs/l4_training.yaml CHANGED Viewed

@@ -9,7 +9,7 @@ per_generation:
 model:
   name: "Qwen/Qwen2.5-3B-Instruct"
   load_in_4bit: true
-  bnb_4bit_compute_dtype: "float16"
 lora:
   r: 16

 model:
   name: "Qwen/Qwen2.5-3B-Instruct"
   load_in_4bit: true
+  bnb_4bit_compute_dtype: "bfloat16"
 lora:
   r: 16

training/train_attacker.py CHANGED Viewed

@@ -160,7 +160,7 @@ def train_attacker(
         model_name,
         quantization_config=bnb_config,
         device_map="auto",
-        dtype=torch.float16,
     )
     model = prepare_model_for_kbit_training(model)
@@ -198,7 +198,7 @@ def train_attacker(
         num_generations=tr["rollouts_per_episode"],
         temperature=tr["temperature"],
         top_p=tr["top_p"],
-        fp16=True,
     )
     if opponent is not None:

         model_name,
         quantization_config=bnb_config,
         device_map="auto",
+        dtype=torch.bfloat16,
     )
     model = prepare_model_for_kbit_training(model)
         num_generations=tr["rollouts_per_episode"],
         temperature=tr["temperature"],
         top_p=tr["top_p"],
+        bf16=True,
     )
     if opponent is not None:

training/train_defender.py CHANGED Viewed

@@ -179,7 +179,7 @@ def train_defender(
         model_name,
         quantization_config=bnb_config,
         device_map="auto",
-        dtype=torch.float16,
     )
     model = prepare_model_for_kbit_training(model)
@@ -217,7 +217,7 @@ def train_defender(
         num_generations=tr["rollouts_per_episode"],
         temperature=tr["temperature"],
         top_p=tr["top_p"],
-        fp16=True,
     )
     reward_fn = make_reward_function(task_id=cfg["env"]["task_id"])

         model_name,
         quantization_config=bnb_config,
         device_map="auto",
+        dtype=torch.bfloat16,
     )
     model = prepare_model_for_kbit_training(model)
         num_generations=tr["rollouts_per_episode"],
         temperature=tr["temperature"],
         top_p=tr["top_p"],
+        bf16=True,
     )
     reward_fn = make_reward_function(task_id=cfg["env"]["task_id"])