Skip to content

Commit

Permalink
Resolve run_opts before passing it to the workspace
Browse files Browse the repository at this point in the history
Summary:
We were resolving the default values for runopts during the dryrun call on the scheduler. This made the ops passed to the workspace not have the defaults correctly populated for workspace opts. This change also resolves the runopts in the runner dryrun and scheduler submit apis.

Haven't removed the runopts resolution in scheduler dry run here as a lot of other tests broke with it and it seems reasonable to also have runopts resolved for just the scheduler dryrun. The double resolving of runopts for the runner dryrun and scheduler submit cases shouldnt cause any meaningful differences.

There is a separate question on whether workspace should also be built during scheduler dryrun but that can be a follow up change.

Differential Revision: D48395915

fbshipit-source-id: dad30eb6b31d7c7f475b92b5a7047abac59a1177
  • Loading branch information
manav-a authored and facebook-github-bot committed Aug 16, 2023
1 parent 744706b commit c037dba
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
6 changes: 3 additions & 3 deletions torchx/runner/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def dryrun(
cfg = cfg or dict()
with log_event("dryrun", scheduler, runcfg=json.dumps(cfg) if cfg else None):
sched = self._scheduler(scheduler)

resolved_cfg = sched.run_opts().resolve(cfg)
if workspace and isinstance(sched, WorkspaceMixin):
role = app.roles[0]
old_img = role.image
Expand All @@ -366,7 +366,7 @@ def dryrun(
logger.info(
'To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.'
)
sched.build_workspace_and_update_role(role, workspace, cfg)
sched.build_workspace_and_update_role(role, workspace, resolved_cfg)

if old_img != role.image:
logger.info(
Expand All @@ -380,7 +380,7 @@ def dryrun(
)

sched._validate(app, scheduler)
dryrun_info = sched.submit_dryrun(app, cfg)
dryrun_info = sched.submit_dryrun(app, resolved_cfg)
dryrun_info._scheduler = scheduler
return dryrun_info

Expand Down
8 changes: 7 additions & 1 deletion torchx/runner/test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def test_run(self, _) -> None:

def test_dryrun(self, _) -> None:
scheduler_mock = MagicMock()
scheduler_mock.run_opts.return_value.resolve.return_value = {
**self.cfg,
"foo": "bar",
}
with Runner(
name=SESSION_NAME,
scheduler_factories={"local_dir": lambda name: scheduler_mock},
Expand All @@ -127,7 +131,9 @@ def test_dryrun(self, _) -> None:
)
app = AppDef("name", roles=[role])
runner.dryrun(app, "local_dir", cfg=self.cfg)
scheduler_mock.submit_dryrun.assert_called_once_with(app, self.cfg)
scheduler_mock.submit_dryrun.assert_called_once_with(
app, {**self.cfg, "foo": "bar"}
)
scheduler_mock._validate.assert_called_once()

def test_dryrun_env_variables(self, _) -> None:
Expand Down
6 changes: 4 additions & 2 deletions torchx/schedulers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,14 @@ def submit(
Returns:
The application id that uniquely identifies the submitted app.
"""
# pyre-fixme: Generic cfg type passed to resolve
resolved_cfg = self.run_opts().resolve(cfg)
if workspace:
sched = self
assert isinstance(sched, WorkspaceMixin)
role = app.roles[0]
sched.build_workspace_and_update_role(role, workspace, cfg)
dryrun_info = self.submit_dryrun(app, cfg)
sched.build_workspace_and_update_role(role, workspace, resolved_cfg)
dryrun_info = self.submit_dryrun(app, resolved_cfg)
return self.schedule(dryrun_info)

@abc.abstractmethod
Expand Down
4 changes: 2 additions & 2 deletions torchx/schedulers/test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def test_submit_workspace(self) -> None:

scheduler_mock = SchedulerTest.MockScheduler("test_session")

bad_type_cfg = {"foo": "asdf"}
scheduler_mock.submit(app, bad_type_cfg, workspace="some_workspace")
cfg = {"foo": "asdf"}
scheduler_mock.submit(app, cfg, workspace="some_workspace")
self.assertEqual(app.roles[0].image, "some_workspace")

def test_invalid_dryrun_cfg(self) -> None:
Expand Down

0 comments on commit c037dba

Please sign in to comment.