-
Notifications
You must be signed in to change notification settings - Fork 387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feature(zc): add MetaDiffuser and prompt-dt #771
Open
Super1ce
wants to merge
17
commits into
opendilab:main
Choose a base branch
from
Super1ce:metadiffuser
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
b05c856
add action
Super1ce a459fd0
change entry
Super1ce 1c08ede
Merge branch 'opendilab:main' into main
Super1ce e97725c
add meta diffusion and prompt dt
Super1ce 32ccf3f
add metadiffuser
Super1ce 94648d1
change
Super1ce 16e8144
change
Super1ce 3524c72
add init
Super1ce b0e7274
add init
Super1ce 6be5920
add
Super1ce 7519400
debug
Super1ce 3bafbf1
change pdt
Super1ce 2b1bdaa
add comman
Super1ce c8d9c7f
metadiffuser
Super1ce fd2896c
debug
Super1ce 35e8e77
change
Super1ce 9b611db
add notion
Super1ce File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
from typing import Union, Optional, List, Any, Tuple | ||
import os | ||
import torch | ||
from functools import partial | ||
from tensorboardX import SummaryWriter | ||
from copy import deepcopy | ||
from torch.utils.data import DataLoader | ||
from torch.utils.data.distributed import DistributedSampler | ||
|
||
from ding.envs import get_vec_env_setting, create_env_manager | ||
from ding.worker import BaseLearner, InteractionSerialMetaEvaluator | ||
from ding.config import read_config, compile_config | ||
from ding.policy import create_policy | ||
from ding.utils import set_pkg_seed, get_world_size, get_rank | ||
from ding.utils.data import create_dataset | ||
|
||
def serial_pipeline_meta_offline( | ||
input_cfg: Union[str, Tuple[dict, dict]], | ||
seed: int = 0, | ||
env_setting: Optional[List[Any]] = None, | ||
model: Optional[torch.nn.Module] = None, | ||
max_train_iter: Optional[int] = int(1e10), | ||
) -> 'Policy': # noqa | ||
""" | ||
Overview: | ||
Serial pipeline entry. In meta pipeline, policy is trained using multiple tasks \ | ||
and evaluates multiple tasks specified. Evaluation value is mean of every tasks. | ||
Arguments: | ||
- input_cfg (:obj:`Union[str, Tuple[dict, dict]]`): Config in dict type. \ | ||
``str`` type means config file path. \ | ||
``Tuple[dict, dict]`` type means [user_config, create_cfg]. | ||
- seed (:obj:`int`): Random seed. | ||
- env_setting (:obj:`Optional[List[Any]]`): A list with 3 elements: \ | ||
``BaseEnv`` subclass, collector env config, and evaluator env config. | ||
- model (:obj:`Optional[torch.nn.Module]`): Instance of torch.nn.Module. | ||
- max_train_iter (:obj:`Optional[int]`): Maximum policy update iterations in training. | ||
Returns: | ||
- policy (:obj:`Policy`): Converged policy. | ||
""" | ||
if isinstance(input_cfg, str): | ||
cfg, create_cfg = read_config(input_cfg) | ||
else: | ||
cfg, create_cfg = deepcopy(input_cfg) | ||
create_cfg.policy.type = create_cfg.policy.type + '_command' | ||
cfg = compile_config(cfg, seed=seed, auto=True, create_cfg=create_cfg) | ||
|
||
cfg.env['seed'] = seed | ||
|
||
# Dataset | ||
dataset = create_dataset(cfg) | ||
|
||
sampler, shuffle = None, True | ||
if get_world_size() > 1: | ||
sampler, shuffle = DistributedSampler(dataset), False | ||
dataloader = DataLoader( | ||
dataset, | ||
# Dividing by get_world_size() here simply to make multigpu | ||
# settings mathmatically equivalent to the singlegpu setting. | ||
# If the training efficiency is the bottleneck, feel free to | ||
# use the original batch size per gpu and increase learning rate | ||
# correspondingly. | ||
cfg.policy.learn.batch_size // get_world_size(), | ||
shuffle=shuffle, | ||
sampler=sampler, | ||
collate_fn=lambda x: x, | ||
pin_memory=cfg.policy.cuda, | ||
) | ||
|
||
# Env, policy | ||
env_fn, _, evaluator_env_cfg = get_vec_env_setting(cfg.env, collect=False) | ||
evaluator_env = create_env_manager(cfg.env.manager, [partial(env_fn, cfg=c) for c in evaluator_env_cfg]) | ||
|
||
set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda) | ||
policy = create_policy(cfg.policy, model=model, enable_field=['learn', 'eval']) | ||
|
||
if hasattr(policy, 'set_statistic'): | ||
# useful for setting action bounds for ibc | ||
policy.set_statistic(dataset.statistics) | ||
|
||
if cfg.policy.need_init_dataprocess: | ||
policy.init_dataprocess_func(dataset) | ||
|
||
if get_rank() == 0: | ||
tb_logger = SummaryWriter(os.path.join('./{}/log/'.format(cfg.exp_name), 'serial')) | ||
else: | ||
tb_logger = None | ||
learner = BaseLearner(cfg.policy.learn.learner, policy.learn_mode, tb_logger, exp_name=cfg.exp_name) | ||
evaluator = InteractionSerialMetaEvaluator( | ||
cfg.policy.eval.evaluator, evaluator_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name | ||
) | ||
evaluator.init_params(dataset.params) | ||
|
||
learner.call_hook('before_run') | ||
stop = False | ||
|
||
for epoch in range(cfg.policy.learn.train_epoch): | ||
if get_world_size() > 1: | ||
dataloader.sampler.set_epoch(epoch) | ||
# for every train task, train policy with its dataset | ||
for i in range(cfg.policy.train_num): | ||
dataset.set_task_id(i) | ||
for train_data in dataloader: | ||
learner.train(train_data) | ||
|
||
# Evaluate policy at most once per epoch. | ||
if evaluator.should_eval(learner.train_iter): | ||
if hasattr(policy, 'warm_train'): | ||
# if algorithm need warm train | ||
stop, reward = evaluator.eval(learner.save_checkpoint, learner.train_iter, | ||
policy_warm_func=policy.warm_train, need_reward=cfg.policy.need_reward) | ||
else: | ||
stop, reward = evaluator.eval(learner.save_checkpoint, learner.train_iter, | ||
need_reward=cfg.policy.need_reward) | ||
|
||
if stop or learner.train_iter >= max_train_iter: | ||
stop = True | ||
break | ||
|
||
learner.call_hook('after_run') | ||
print('final reward is: {}'.format(reward)) | ||
return policy, stop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from .base_env_manager import BaseEnvManager, BaseEnvManagerV2, create_env_manager, get_env_manager_cls | ||
from .subprocess_env_manager import AsyncSubprocessEnvManager, SyncSubprocessEnvManager, SubprocessEnvManagerV2 | ||
from .subprocess_env_manager import AsyncSubprocessEnvManager, SyncSubprocessEnvManager, SubprocessEnvManagerV2,\ | ||
MetaSyncSubprocessEnvManager | ||
from .gym_vector_env_manager import GymVectorEnvManager | ||
# Do not import PoolEnvManager here, because it depends on installation of `envpool` | ||
from .env_supervisor import EnvSupervisor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"train_num"->"batch_size"?