Spaces:

bird-of-paradise
/

ReTool-Implementation

Running

App Files Files Community

bird-of-paradise commited on Jul 6

Commit

cfa2a65

verified ·

1 Parent(s): 2fc6f4d

first commit --curriculum callback

Browse files

Files changed (1) hide show

src/utils/callbacks.py +195 -0

src/utils/callbacks.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import subprocess
+from typing import List
+from transformers import TrainerCallback
+from transformers.trainer_callback import TrainerControl, TrainerState
+from transformers.training_args import TrainingArguments
+class CurriculumLearningCallback(TrainerCallback):
+    def __init__(self):
+        self.current_stage = "format_stage"
+        self.stages = {
+            "format_stage": {
+                "reward_weights": {"format": 1.0, "accuracy": 0.0, "code_execution": 0.0,
+                                  "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                "beta": 0.1, # Higher KL - stay close to base model format
+                "steps": 1000
+            },
+            "code_execution_stage": {
+                "reward_weights": {"format": 0.3, "accuracy": 0.0, "code_execution": 0.7,
+                                  "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                "beta": 0.05, # Medium KL
+                "steps": 2000
+            },
+            "accuracy_stage": {
+                "reward_weights": {"format": 0.2, "accuracy": 0.8, "code_execution": 0.0,
+                                  "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                "beta": 0.01, # Very low KL - allow exploration
+                "steps": 3000
+            },
+            "refinement_stage": {
+                "reward_weights": {"format": 0.1, "accuracy": 0.6, "code_execution": 0.1,
+                                  "length": 0.1, "code_ratio": 0.05, "code_timing": 0.05},
+                "beta": 0.03, # Medium-low KL - stabilize learning
+                "steps": 5000
+            }
+        }
+        self.total_steps = sum(stage_config["steps"] for stage_config in self.stages.values())
+        self.stage_transitions = self._calculate_stage_transitions()
+    def _calculate_stage_transitions(self):
+        """Calculate at which step each stage transition occurs."""
+        transitions = {}
+        current_step = 0
+        for stage, config in self.stages.items():
+            current_step += config["steps"]
+            transitions[stage] = current_step
+        return transitions
+    def on_step_end(self, args, state, control, **kwargs):
+        """Update reward weights based on current training stage."""
+        trainer = kwargs.get('trainer')
+        if trainer is None:
+            return
+        # Check if it's time to transition to the next stage
+        current_step = state.global_step
+        # Determine current stage
+        previous_stage = self.current_stage
+        for stage, transition_step in self.stage_transitions.items():
+            if current_step <= transition_step:
+                self.current_stage = stage
+                break
+        # If stage changed, update weights and log the transition
+        if previous_stage != self.current_stage:
+            print(f"Transitioning from {previous_stage} to {self.current_stage} at step {current_step}")
+        # Apply weights for current stage
+        stage_weights = self.stages[self.current_stage]["reward_weights"]
+        # Update trainer's reward weights
+        # This assumes the trainer has a reward_weights attribute
+        for i, func_name in enumerate(trainer.reward_func_names):
+            if func_name in stage_weights:
+                trainer.reward_weights[i] = stage_weights[func_name]
+class CurriculumLearningCallback(TrainerCallback):
+    """A callback to implement curriculum learning stages during training."""
+    def __init__(self, debug=False):
+        self.debug = debug
+        self.current_stage = "format_stage"
+        self.stages = {
+                    "format_stage": {
+                        "reward_weights": {"format": 1.0, "accuracy": 0.0, "code_execution": 0.0,
+                                        "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                        "beta": 0.1, # Higher KL - stay close to base model format
+                        "steps": 1000
+                    },
+                    "code_execution_stage": {
+                        "reward_weights": {"format": 0.3, "accuracy": 0.0, "code_execution": 0.7,
+                                        "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                        "beta": 0.05, # Medium KL
+                        "steps": 2000
+                    },
+                    "accuracy_stage": {
+                        "reward_weights": {"format": 0.2, "accuracy": 0.8, "code_execution": 0.0,
+                                        "length": 0.0, "code_ratio": 0.0, "code_timing": 0.0},
+                        "beta": 0.01, # Very low KL - allow exploration
+                        "steps": 3000
+                    },
+                    "refinement_stage": {
+                        "reward_weights": {"format": 0.1, "accuracy": 0.6, "code_execution": 0.1,
+                                        "length": 0.1, "code_ratio": 0.05, "code_timing": 0.05},
+                        "beta": 0.03, # Medium-low KL - stabilize learning
+                        "steps": 5000
+                    }
+                }
+        self.total_steps = sum(stage_config["steps"] for stage_config in self.stages.values())
+        self.stage_transitions = self._calculate_stage_transitions()
+        print(f"Curriculum learning initialized with {len(self.stages)} stages:")
+        for stage, end_step in self.stage_transitions.items():
+            print(f"  {stage}: ends at step {end_step}")
+    def _calculate_stage_transitions(self):
+        """Calculate at which step each stage transition occurs."""
+        transitions = {}
+        current_step = 0
+        for stage, config in self.stages.items():
+            current_step += config["steps"]
+            transitions[stage] = current_step
+        return transitions
+    def on_train_begin(self, args, state, control, **kwargs):
+        """Initialize reward weights and beta at the start of training."""
+        trainer = kwargs.get('trainer')
+        if trainer is None:
+            return
+        # Set initial weights and beta from first stage
+        first_stage = list(self.stages.keys())[0]
+        stage_config = self.stages[first_stage]
+        # Update reward weights
+        if hasattr(trainer, "reward_weights") and hasattr(trainer, "reward_func_names"):
+            for i, func_name in enumerate(trainer.reward_func_names):
+                if func_name in stage_config["reward_weights"]:
+                    trainer.reward_weights[i] = stage_config["reward_weights"][func_name]
+                    if self.debug:
+                        print(f"Setting initial weight for {func_name}: {trainer.reward_weights[i]}")
+        else:
+            print("Warning: Trainer doesn't have reward_weights or reward_func_names attributes")
+        # Update beta (KL coefficient)
+        if hasattr(trainer, "beta"):
+            trainer.beta = stage_config.get("beta", 0.1)
+            if self.debug:
+                print(f"Setting initial beta: {trainer.beta}")
+        else:
+            print("Warning: Trainer doesn't have a beta attribute")
+    def on_step_end(self, args, state, control, **kwargs):
+        """Update reward weights and beta based on current training stage."""
+        trainer = kwargs.get('trainer')
+        if trainer is None:
+            return
+        # Check if it's time to transition to the next stage
+        current_step = state.global_step
+        # Determine current stage
+        previous_stage = self.current_stage
+        for stage, transition_step in sorted(self.stage_transitions.items()):
+            if current_step <= transition_step:
+                self.current_stage = stage
+                break
+        # If stage changed, update weights and log the transition
+        if previous_stage != self.current_stage:
+            print(f"Transitioning from {previous_stage} to {self.current_stage} at step {current_step}")
+        # Get config for current stage
+        stage_config = self.stages[self.current_stage]
+        # Update reward weights
+        if hasattr(trainer, "reward_weights") and hasattr(trainer, "reward_func_names"):
+            for i, func_name in enumerate(trainer.reward_func_names):
+                if func_name in stage_config["reward_weights"]:
+                    new_weight = stage_config["reward_weights"][func_name]
+                    if trainer.reward_weights[i] != new_weight:
+                        trainer.reward_weights[i] = new_weight
+                        if self.debug:
+                            print(f"Updated weight for {func_name}: {new_weight}")
+        # Update beta (KL coefficient)
+        if hasattr(trainer, "beta"):
+            new_beta = stage_config.get("beta", 0.1)
+            if trainer.beta != new_beta:
+                trainer.beta = new_beta
+                if self.debug:
+                    print(f"Updated beta: {new_beta}")