From 69e940de69258d970e3cae791b5f308891d1d8c4 Mon Sep 17 00:00:00 2001 From: Etienne Pot Date: Thu, 21 Nov 2024 10:11:00 -0800 Subject: [PATCH] Set create=False for CheckpointManager if used in an eval job. PiperOrigin-RevId: 698826017 --- kauldron/main.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kauldron/main.py b/kauldron/main.py index 4445d59b..20dba664 100644 --- a/kauldron/main.py +++ b/kauldron/main.py @@ -54,17 +54,10 @@ def main(_): with _wu_error_handling(_POST_MORTEM.value): eval_names = _EVAL_NAMES.value cfg = _CONFIG.value + trainer: kd.train.Trainer = kd.konfig.resolve(cfg) if eval_names is None: - trainer: kd.train.Trainer = kd.konfig.resolve(cfg) trainer.train() else: - # Orbax does not support CheckpointManagers creating the same root - # directory. By setting `create=False`, we ensure that the checkpoint - # manager does not create a new root directory. - if hasattr(cfg, "checkpointer"): - if hasattr(cfg.checkpointer, "create"): - cfg.checkpointer.create = False - trainer: kd.train.Trainer = kd.konfig.resolve(cfg) trainer.continuous_eval(eval_names)