[misc] feat: enable grad ckpt as default and enable chunk prefill as …

…default (#147)
volcengine · Jan 27, 2025 · 9fca71d · 9fca71d
1 parent 54603cb
commit 9fca71d
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 5 deletions.
diff --git a/examples/ppo_trainer/run_qwen2-7b_rm.sh b/examples/ppo_trainer/run_qwen2-7b_rm.sh
@@ -24,6 +24,7 @@ python3 -m verl.trainer.main_ppo \
     actor_rollout_ref.actor.optim.lr_warmup_steps_ratio=0.1 \
     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
     actor_rollout_ref.actor.fsdp_config.param_offload=False \
     actor_rollout_ref.actor.fsdp_config.grad_offload=False \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
@@ -37,8 +38,8 @@ python3 -m verl.trainer.main_ppo \
     critic.model.use_remove_padding=True \
     critic.optim.lr_warmup_steps_ratio=0.05 \
     critic.model.path=Qwen/Qwen2-7B-Instruct \
-    critic.model.enable_gradient_checkpointing=False \
-    critic.ppo_micro_batch_size_per_gpu=16 \
+    critic.model.enable_gradient_checkpointing=True \
+    critic.ppo_micro_batch_size_per_gpu=32 \
     critic.model.fsdp_config.param_offload=False \
     critic.model.fsdp_config.grad_offload=False \
     critic.model.fsdp_config.optimizer_offload=False \

diff --git a/verl/trainer/config/ppo_trainer.yaml b/verl/trainer/config/ppo_trainer.yaml
@@ -16,7 +16,7 @@ actor_rollout_ref:
     path: ~/models/deepseek-llm-7b-chat
     external_lib: null
     override_config: { }
-    enable_gradient_checkpointing: False
+    enable_gradient_checkpointing: True
     use_remove_padding: False
   actor:
     strategy: fsdp  # This is for backward-compatibility
@@ -81,7 +81,7 @@ actor_rollout_ref:
     log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
     log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
     disable_log_stats: True
-    enable_chunked_prefill: False # could get higher throughput
+    enable_chunked_prefill: True # could get higher throughput
     # for hf rollout
     do_sample: True
     # number of responses (i.e. num sample times)
@@ -100,7 +100,7 @@ critic:
     tokenizer_path: ${actor_rollout_ref.model.path}
     override_config: { }
     external_lib: ${actor_rollout_ref.model.external_lib}
-    enable_gradient_checkpointing: False
+    enable_gradient_checkpointing: True
     use_remove_padding: False
     fsdp_config:
       param_offload: False