Spaces:

Victarry
/

PP-schedule-visualizer

Running

App Files Files Community

Victarry commited on Mar 13

Commit

d3e7e66

1 Parent(s): 65e77cc

Add support for 1F1B-interleave-overlap.

Browse files

Files changed (5) hide show

README.md +6 -0
assets/1f1b_interleave_overlap.png +3 -0
main.py +21 -0
src/execution_model.py +2 -2
src/strategies.py +179 -105

README.md CHANGED Viewed

@@ -57,6 +57,12 @@ uv run python main.py strategy=1f1b_overlap num_devices=4 num_stages=4 num_batch
 ```
 ![1f1b_overlap](assets/1f1b_overlap.png)
 ## Configuration
 The default configuration is in `conf/config.yaml`. You can override any parameter on the command line or create configuration groups for different scenarios.

 ```
 ![1f1b_overlap](assets/1f1b_overlap.png)
+Running for 1F1B-interleave-overlap strategy:
+```bash
+uv run python main.py strategy=1f1b_interleave_overlap num_devices=4 num_stages=4 num_batches=8
+```
+![1f1b_interleave_overlap](assets/1f1b_interleave_overlap.png)
 ## Configuration
 The default configuration is in `conf/config.yaml`. You can override any parameter on the command line or create configuration groups for different scenarios.

assets/1f1b_interleave_overlap.png ADDED Viewed

Git LFS Details

SHA256: 4447a83144505b4e82b231aca58411c4a7f3e85b06f1e40cd864179e08f94514
Pointer size: 130 Bytes
Size of remote file: 84.3 kB

main.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from src.execution_model import ScheduleConfig
 from src.strategies import (
     generate_1f1b_interleave_schedule,
     generate_1f1b_overlap_schedule,
     generate_1f1b_schedule,
@@ -23,6 +24,8 @@ def main(cfg: DictConfig) -> None:
         run_zero_bubble_1p(cfg)
     elif cfg.strategy == "1f1b_overlap":
         run_1f1b_overlap(cfg)
     else:
         raise ValueError(f"Unknown strategy: {cfg.strategy}")
@@ -107,6 +110,24 @@ def run_1f1b_overlap(cfg: DictConfig) -> None:
     schedule.execute()
     visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
 if __name__ == "__main__":
     main()

 from src.execution_model import ScheduleConfig
 from src.strategies import (
+    generate_1f1b_interleave_overlap_schedule,
     generate_1f1b_interleave_schedule,
     generate_1f1b_overlap_schedule,
     generate_1f1b_schedule,
         run_zero_bubble_1p(cfg)
     elif cfg.strategy == "1f1b_overlap":
         run_1f1b_overlap(cfg)
+    elif cfg.strategy == "1f1b_interleave_overlap":
+        run_1f1b_interleave_overlap(cfg)
     else:
         raise ValueError(f"Unknown strategy: {cfg.strategy}")
     schedule.execute()
     visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
+def run_1f1b_interleave_overlap(cfg: DictConfig) -> None:
+    """Run 1F1B interleave overlapped pipeline parallelism simulation."""
+    # Convert OmegaConf to dict for op_times if it exists
+    op_times = (
+        OmegaConf.to_container(cfg.op_times) if hasattr(cfg, "op_times") else None
+    )
+    schedule_config = ScheduleConfig(
+        num_devices=cfg.num_devices,
+        num_stages=cfg.num_stages,
+        num_batches=cfg.num_batches,
+        p2p_latency=cfg.p2p_latency,
+        placement_strategy="interleave",
+        op_times=op_times,
+    )
+    schedule = generate_1f1b_interleave_overlap_schedule(schedule_config)
+    schedule.execute()
+    visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
 if __name__ == "__main__":
     main()

src/execution_model.py CHANGED Viewed

@@ -158,8 +158,8 @@ class ScheduleConfig:
                 # Check if we have a specific time for this combination
                 if (op_type1, op_type2) in self.overlapped_op_times:
                     return self.overlapped_op_times[(op_type1, op_type2)]
-                # Otherwise, use the max of individual times plus a small overhead
-                return max(self.get_op_time(op_type1, stage_id), self.get_op_time(op_type2, stage_id)) + 0.2
         if op_type not in self.op_times:
             raise ValueError(f"Invalid operation type: {op_type}")

                 # Check if we have a specific time for this combination
                 if (op_type1, op_type2) in self.overlapped_op_times:
                     return self.overlapped_op_times[(op_type1, op_type2)]
+                # Otherwise, use the max of individual times
+                return max(self.get_op_time(op_type1, stage_id), self.get_op_time(op_type2, stage_id))
         if op_type not in self.op_times:
             raise ValueError(f"Invalid operation type: {op_type}")

src/strategies.py CHANGED Viewed

@@ -130,116 +130,104 @@ def generate_1f1b_overlap_schedule(config: ScheduleConfig):
     return schedule
-# Some codes are copied from Megatron-LM
-def generate_1f1b_interleave_schedule(config: ScheduleConfig):
-    schedule = Schedule(config)
-    def get_pp_rank_microbatches(
-        num_microbatches,
-        num_devices,
-        device_id,
-        num_stages_per_device,
-        microbatch_group_size_per_vp_stage,
     ):
-        """Get the number of total, warmup, and remaining microbatches in PP scheduling."""
-        total_num_microbatches = num_microbatches * num_stages_per_device
-        are_all_microbatches_in_warmup = False
-        if num_devices > 1:
-            if num_stages_per_device is None:
-                # forward_backward_pipelining_without_interleaving
-                num_warmup_microbatches = num_devices - device_id - 1
-            else:
-                # forward_backward_pipelining_with_interleaving
-                # Run (num_model_chunks-1)*microbatch_group_size_per_vp_stage on
-                # all workers, followed by more microbatches after depending on
-                # stage ID (more forward passes for earlier stages, later stages can
-                # immediately start with 1F1B).
-                num_warmup_microbatches = (num_devices - device_id - 1) * 2
-                num_warmup_microbatches += (num_stages_per_device - 1) * microbatch_group_size_per_vp_stage
         else:
-            # forward_backward_no_pipelining
-            num_warmup_microbatches = 1
-        if num_warmup_microbatches >= total_num_microbatches:
-            num_warmup_microbatches = total_num_microbatches
-            are_all_microbatches_in_warmup = True
-        num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches
-        return (
-            total_num_microbatches,
-            are_all_microbatches_in_warmup,
-            num_warmup_microbatches,
-            num_microbatches_remaining,
-        )
-    def get_schedule_table(num_microbatches, num_model_chunks, microbatch_group_size_per_vp_stage):
-        """Get the schedule table for PP scheduling.
-        Create a tunable schedule lookup table.
-        The schedule lookup table uses the virtual_microbatch_id to find the corresponding microbatch_id and model_chunk_id.
-        For example, the tunable schedule table for PP2 N3M5 with VP2 is constructed as below:
-        virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
-        microbatch_id         | 0 1 2 0 1 2 3 4 3 4
-        model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
-        """
-        schedule_table = []
-        for min_microbatch_id_in_group in range(
-            0, num_microbatches, microbatch_group_size_per_vp_stage
-        ):
-            if min_microbatch_id_in_group + microbatch_group_size_per_vp_stage >= num_microbatches:
-                # Construct schedule for the last microbatch group
-                schedule_table.extend(
-                    [
-                        (microbatch_id, model_chunk_id)
-                        for model_chunk_id in range(num_model_chunks)
-                        for microbatch_id in range(min_microbatch_id_in_group, num_microbatches)
-                    ]
-                )
-            else:
-                # Construct schedule for other microbatch groups
-                schedule_table.extend(
-                    [
-                        (microbatch_id, model_chunk_id)
-                        for model_chunk_id in range(num_model_chunks)
-                        for microbatch_id in range(
-                            min_microbatch_id_in_group,
-                            min_microbatch_id_in_group + microbatch_group_size_per_vp_stage,
-                        )
-                    ]
-                )
-        return schedule_table
-    def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, schedule_table):
-        """Convert a tunable schedule lookup table to the te.make_graphed_callables() accepted
-        order format. For example, the tunable schedule table for PP2 N3M5 with VP2 is as below:
-        virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
-        microbatch_id         | 0 1 2 0 1 2 3 4 3 4
-        model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
-        Then the forward backward separated order is:
-        forward               | 1 1 1 2 2 2 1 1 2 2
-        backward              | -2 -2 -2 -1 -1 -1 -2 -2 -1 -1
-        If num_warmup_microbatches is 5, the output order is:
-        1 1 1 2 2 2 -2 1 -2 1 -2 2 -1 2 -1 -1 -2 -2 -1 -1
-        """
-        _, model_chunk_id_table = zip(*schedule_table)
-        forward_order = [chunk_id + 1 for chunk_id in model_chunk_id_table]
-        backward_order = [chunk_id - num_model_chunks for chunk_id in model_chunk_id_table]
-        order = forward_order[:num_warmup_microbatches]
-        for i in range(num_warmup_microbatches, len(forward_order)):
-            order.append(forward_order[i])
-            order.append(backward_order[i - num_warmup_microbatches])
-        if num_warmup_microbatches > 0:
-            order.extend(backward_order[-num_warmup_microbatches:])
-        return order
     for device_id in range(config.num_devices):
         microbatch_group_size_per_vp_stage = config.num_devices
-        total_num_microbatches, are_all_microbatches_in_warmup, num_warmup_microbatches, num_microbatches_remaining = get_pp_rank_microbatches(
             config.num_batches,
             config.num_devices,
             device_id,
@@ -247,13 +235,13 @@ def generate_1f1b_interleave_schedule(config: ScheduleConfig):
             microbatch_group_size_per_vp_stage,
         )
-        schedule_table = get_schedule_table(
             config.num_batches,
             config.num_stages_per_device,
             microbatch_group_size_per_vp_stage,
         )
-        order = convert_schedule_table_to_order(
             num_warmup_microbatches,
             num_model_chunks=config.num_stages_per_device,
             schedule_table=schedule_table,
@@ -280,3 +268,89 @@ def generate_1f1b_interleave_schedule(config: ScheduleConfig):
                 schedule.get_op(micro_batch_id, stage_id, op_type)
             )
     return schedule

     return schedule
+def _get_pp_rank_microbatches(
+    num_microbatches,
+    num_devices,
+    device_id,
+    num_stages_per_device,
+    microbatch_group_size_per_vp_stage,
+):
+    """Get the number of total, warmup, and remaining microbatches in PP scheduling."""
+    total_num_microbatches = num_microbatches * num_stages_per_device
+    if num_devices > 1:
+        # Run (num_model_chunks-1)*microbatch_group_size_per_vp_stage on
+        # all workers, followed by more microbatches after depending on
+        # stage ID (more forward passes for earlier stages, later stages can
+        # immediately start with 1F1B).
+        num_warmup_microbatches = (num_devices - device_id - 1) * 2
+        num_warmup_microbatches += (num_stages_per_device - 1) * microbatch_group_size_per_vp_stage
+    else:
+        # forward_backward_no_pipelining
+        num_warmup_microbatches = 1
+    if num_warmup_microbatches >= total_num_microbatches:
+        num_warmup_microbatches = total_num_microbatches
+    return num_warmup_microbatches
+def _get_schedule_table(num_microbatches, num_model_chunks, microbatch_group_size_per_vp_stage):
+    """Get the schedule table for PP scheduling.
+    Create a tunable schedule lookup table.
+    The schedule lookup table uses the virtual_microbatch_id to find the corresponding microbatch_id and model_chunk_id.
+    For example, the tunable schedule table for PP2 N3M5 with VP2 is constructed as below:
+    virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
+    microbatch_id         | 0 1 2 0 1 2 3 4 3 4
+    model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
+    """
+    schedule_table = []
+    for min_microbatch_id_in_group in range(
+        0, num_microbatches, microbatch_group_size_per_vp_stage
     ):
+        if min_microbatch_id_in_group + microbatch_group_size_per_vp_stage >= num_microbatches:
+            # Construct schedule for the last microbatch group
+            schedule_table.extend(
+                [
+                    (microbatch_id, model_chunk_id)
+                    for model_chunk_id in range(num_model_chunks)
+                    for microbatch_id in range(min_microbatch_id_in_group, num_microbatches)
+                ]
+            )
         else:
+            # Construct schedule for other microbatch groups
+            schedule_table.extend(
+                [
+                    (microbatch_id, model_chunk_id)
+                    for model_chunk_id in range(num_model_chunks)
+                    for microbatch_id in range(
+                        min_microbatch_id_in_group,
+                        min_microbatch_id_in_group + microbatch_group_size_per_vp_stage,
+                    )
+                ]
+            )
+    return schedule_table
+def _convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, schedule_table):
+    """Convert a tunable schedule lookup table to the te.make_graphed_callables() accepted
+    order format. For example, the tunable schedule table for PP2 N3M5 with VP2 is as below:
+    virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
+    microbatch_id         | 0 1 2 0 1 2 3 4 3 4
+    model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
+    Then the forward backward separated order is:
+    forward               | 1 1 1 2 2 2 1 1 2 2
+    backward              | -2 -2 -2 -1 -1 -1 -2 -2 -1 -1
+    If num_warmup_microbatches is 5, the output order is:
+    1 1 1 2 2 2 -2 1 -2 1 -2 2 -1 2 -1 -1 -2 -2 -1 -1
+    """
+    _, model_chunk_id_table = zip(*schedule_table)
+    forward_order = [chunk_id + 1 for chunk_id in model_chunk_id_table]
+    backward_order = [chunk_id - num_model_chunks for chunk_id in model_chunk_id_table]
+    order = forward_order[:num_warmup_microbatches]
+    for i in range(num_warmup_microbatches, len(forward_order)):
+        order.append(forward_order[i])
+        order.append(backward_order[i - num_warmup_microbatches])
+    if num_warmup_microbatches > 0:
+        order.extend(backward_order[-num_warmup_microbatches:])
+    return order
+# Some codes are copied from Megatron-LM
+def generate_1f1b_interleave_schedule(config: ScheduleConfig):
+    schedule = Schedule(config)
     for device_id in range(config.num_devices):
         microbatch_group_size_per_vp_stage = config.num_devices
+        num_warmup_microbatches = _get_pp_rank_microbatches(
             config.num_batches,
             config.num_devices,
             device_id,
             microbatch_group_size_per_vp_stage,
         )
+        schedule_table = _get_schedule_table(
             config.num_batches,
             config.num_stages_per_device,
             microbatch_group_size_per_vp_stage,
         )
+        order = _convert_schedule_table_to_order(
             num_warmup_microbatches,
             num_model_chunks=config.num_stages_per_device,
             schedule_table=schedule_table,
                 schedule.get_op(micro_batch_id, stage_id, op_type)
             )
     return schedule
+def generate_1f1b_interleave_overlap_schedule(config: ScheduleConfig):
+    schedule = Schedule(config)
+    for device_id in range(config.num_devices):
+        microbatch_group_size_per_vp_stage = config.num_devices
+        num_warmup_microbatches = _get_pp_rank_microbatches(
+            config.num_batches,
+            config.num_devices,
+            device_id,
+            config.num_stages_per_device,
+            microbatch_group_size_per_vp_stage,
+        )
+        schedule_table = _get_schedule_table(
+            config.num_batches,
+            config.num_stages_per_device,
+            microbatch_group_size_per_vp_stage,
+        )
+        # NOTE: Add one more warmup microbatch for overlapped operations!
+        num_warmup_microbatches += 1
+        order = _convert_schedule_table_to_order(
+            num_warmup_microbatches,
+            num_model_chunks=config.num_stages_per_device,
+            schedule_table=schedule_table,
+        )
+        cur_stage_microbatch_id = {}
+        for i in range(1, config.num_stages_per_device+1):
+            cur_stage_microbatch_id[i] = 0
+            cur_stage_microbatch_id[-i] = 0
+        i = 0
+        num_overlapped_batches = len(order) - num_warmup_microbatches * 2
+        while i < len(order):
+            if i < num_warmup_microbatches:
+                order_item = order[i]
+                assert order_item > 0
+                op_type = "forward"
+                micro_batch_id = cur_stage_microbatch_id[order_item]
+                cur_stage_microbatch_id[order_item] = cur_stage_microbatch_id[order_item] + 1
+                stage_id = schedule.device_queues[device_id].stages[abs(order_item)-1]
+                schedule.device_queues[device_id].add_operation(
+                    schedule.get_op(micro_batch_id, stage_id, op_type)
+                )
+                i += 1
+            elif i >= num_warmup_microbatches and i < num_warmup_microbatches + num_overlapped_batches - 1:
+                order_item_a = order[i]
+                order_item_b = order[i+1]
+                op_type_a = "forward" if order_item_a > 0 else "backward"
+                micro_batch_id_a = cur_stage_microbatch_id[order_item_a]
+                cur_stage_microbatch_id[order_item_a] = cur_stage_microbatch_id[order_item_a] + 1
+                op_type_b = "forward" if order_item_b > 0 else "backward"
+                micro_batch_id_b = cur_stage_microbatch_id[order_item_b]
+                cur_stage_microbatch_id[order_item_b] = cur_stage_microbatch_id[order_item_b] + 1
+                stage_id_a = schedule.device_queues[device_id].stages[abs(order_item_a)-1]
+                stage_id_b = schedule.device_queues[device_id].stages[abs(order_item_b)-1]
+                op_a = schedule.get_op(micro_batch_id_a, stage_id_a, op_type_a)
+                op_b = schedule.get_op(micro_batch_id_b, stage_id_b, op_type_b)
+                overlapped_op = OverlappedOperation([op_a, op_b])
+                schedule.register_overlapped_operation(overlapped_op)
+                schedule.device_queues[device_id].add_operation(overlapped_op)
+                i += 2
+            else:
+                assert i >= num_warmup_microbatches + num_overlapped_batches
+                order_item = order[i]
+                assert order_item < 0
+                op_type = "backward"
+                micro_batch_id = cur_stage_microbatch_id[order_item]
+                cur_stage_microbatch_id[order_item] = cur_stage_microbatch_id[order_item] + 1
+                stage_id = schedule.device_queues[device_id].stages[abs(order_item)-1]
+                schedule.device_queues[device_id].add_operation(
+                    schedule.get_op(micro_batch_id, stage_id, op_type)
+                )
+                i += 1
+    return schedule