diff --git a/verl/workers/actor/dp_actor.py b/verl/workers/actor/dp_actor.py index 6f384b0..c3344cc 100644 --- a/verl/workers/actor/dp_actor.py +++ b/verl/workers/actor/dp_actor.py @@ -142,7 +142,7 @@ def _make_minibatch_iterator(self, data: DataProto) -> Iterable[DataProto]: data = data.select(batch_keys=select_keys) return data.make_iterator(mini_batch_size=self.config.ppo_mini_batch_size, epochs=self.config.ppo_epochs, - dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False + dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False def _optimizer_step(self): assert self.config.grad_clip is not None diff --git a/verl/workers/critic/dp_critic.py b/verl/workers/critic/dp_critic.py index d9e7ffa..1ceb08c 100644 --- a/verl/workers/critic/dp_critic.py +++ b/verl/workers/critic/dp_critic.py @@ -104,7 +104,7 @@ def _make_minibatch_iterator(self, data: DataProto) -> Iterable[DataProto]: data = data.select(batch_keys=select_keys) return data.make_iterator(mini_batch_size=self.config.ppo_mini_batch_size, epochs=self.config.ppo_epochs, - dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False + dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False def _optimizer_step(self): assert self.config.grad_clip is not None