diff --git a/src/olmo_core/internal/experiment.py b/src/olmo_core/internal/experiment.py index 9870d457..8de3bd75 100644 --- a/src/olmo_core/internal/experiment.py +++ b/src/olmo_core/internal/experiment.py @@ -108,10 +108,16 @@ def run(self, config: ExperimentConfig): teardown_training_environment() elif self == SubCmd.train_single: if config.model.dp_config is not None: - log.warning("dp_config is set to %s, but you can't use data parallelism when running on a single node. Disabling.", config.model.dp_config) + log.warning( + "dp_config is set to %s, but you can't use data parallelism when running on a single node. Disabling.", + config.model.dp_config, + ) config.model.dp_config = None if config.model.tp_config is not None: - log.warning("tp_config is set to %s, but you can't use tensor parallelism when running on a single node. Disabling.", config.model.dp_config) + log.warning( + "tp_config is set to %s, but you can't use tensor parallelism when running on a single node. Disabling.", + config.model.dp_config, + ) config.model.tp_config = None try: train(config) diff --git a/src/olmo_core/nn/transformer/config.py b/src/olmo_core/nn/transformer/config.py index 749aa06a..4e4735e7 100644 --- a/src/olmo_core/nn/transformer/config.py +++ b/src/olmo_core/nn/transformer/config.py @@ -237,7 +237,9 @@ def build( if torch.cuda.is_available(): model.apply_compile() else: - log.warning("model.compile was set to True, but CUDA is not available. Compiling only works with CUDA. Ignoring.") + log.warning( + "model.compile was set to True, but CUDA is not available. Compiling only works with CUDA. Ignoring." + ) # Maybe wrap for data parallel. if dp_mesh is None and mesh is not None: diff --git a/src/olmo_core/train/__init__.py b/src/olmo_core/train/__init__.py index f99ed872..2527a35b 100644 --- a/src/olmo_core/train/__init__.py +++ b/src/olmo_core/train/__init__.py @@ -49,7 +49,7 @@ from ..distributed.utils import init_distributed, is_distributed from ..io import add_cached_path_clients -from ..utils import LogFilterType, prepare_cli_environment, seed_all, get_default_device +from ..utils import LogFilterType, get_default_device, prepare_cli_environment, seed_all from .checkpoint import Checkpointer, CheckpointerConfig from .common import Duration, DurationUnit, LoadStrategy, ReduceType from .config import TrainerConfig diff --git a/src/olmo_core/train/trainer.py b/src/olmo_core/train/trainer.py index cc9bf81b..2ca07ff5 100644 --- a/src/olmo_core/train/trainer.py +++ b/src/olmo_core/train/trainer.py @@ -385,7 +385,9 @@ def __post_init__(self): if torch.cuda.is_available(): self._loss_fn = torch.compile(self._loss_fn) else: - log.warning("compile_loss was set to True, but CUDA is not available. Compiling only works with CUDA. Ignoring.") + log.warning( + "compile_loss was set to True, but CUDA is not available. Compiling only works with CUDA. Ignoring." + ) @property def global_batch_size(self) -> int: