diff --git a/verl/workers/actor/dp_actor.py b/verl/workers/actor/dp_actor.py index ff762b6..6f384b0 100644 --- a/verl/workers/actor/dp_actor.py +++ b/verl/workers/actor/dp_actor.py @@ -142,7 +142,7 @@ def _make_minibatch_iterator(self, data: DataProto) -> Iterable[DataProto]: data = data.select(batch_keys=select_keys) return data.make_iterator(mini_batch_size=self.config.ppo_mini_batch_size, epochs=self.config.ppo_epochs, - dataloader_kwargs={'shuffle': self.config.shuffle}) + dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False def _optimizer_step(self): assert self.config.grad_clip is not None diff --git a/verl/workers/critic/dp_critic.py b/verl/workers/critic/dp_critic.py index 3d17b0a..d9e7ffa 100644 --- a/verl/workers/critic/dp_critic.py +++ b/verl/workers/critic/dp_critic.py @@ -104,7 +104,7 @@ def _make_minibatch_iterator(self, data: DataProto) -> Iterable[DataProto]: data = data.select(batch_keys=select_keys) return data.make_iterator(mini_batch_size=self.config.ppo_mini_batch_size, epochs=self.config.ppo_epochs, - dataloader_kwargs={'shuffle': self.config.shuffle}) + dataloader_kwargs={'shuffle': False}) # TODO: hardcode to False def _optimizer_step(self): assert self.config.grad_clip is not None