Skip to content

Commit

Permalink
Move optimizers.py and lr_schedulers.py to d3rlpy.optimizers directory
Browse files Browse the repository at this point in the history
  • Loading branch information
takuseno committed Oct 19, 2024
1 parent 7a17efe commit 3b01da3
Show file tree
Hide file tree
Showing 54 changed files with 123 additions and 125 deletions.
2 changes: 2 additions & 0 deletions d3rlpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
models,
notebook_utils,
ope,
optimizers,
preprocessing,
tokenizers,
types,
Expand All @@ -34,6 +35,7 @@
"logging",
"metrics",
"models",
"optimizers",
"notebook_utils",
"ope",
"preprocessing",
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/awac.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
create_normal_policy,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...models.torch import Parameter
from ...optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.awac_impl import AWACImpl
Expand Down Expand Up @@ -53,9 +53,9 @@ class AWACConfig(LearnableConfig):
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/bc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
create_normal_policy,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.bc_impl import (
Expand Down Expand Up @@ -39,7 +39,7 @@ class BCConfig(LearnableConfig):
Args:
learning_rate (float): Learing rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down Expand Up @@ -129,7 +129,7 @@ class DiscreteBCConfig(LearnableConfig):
Args:
learning_rate (float): Learing rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down
10 changes: 5 additions & 5 deletions d3rlpy/algos/qlearning/bcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
create_vae_encoder,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...models.torch import CategoricalPolicy, compute_output_size
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.bcq_impl import (
Expand Down Expand Up @@ -110,11 +110,11 @@ class BCQConfig(LearnableConfig):
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
imitator_learning_rate (float): Learning rate for Conditional VAE.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
imitator_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
imitator_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the conditional VAE.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down Expand Up @@ -316,7 +316,7 @@ class DiscreteBCQConfig(LearnableConfig):
Observation preprocessor.
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
learning_rate (float): Learning rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory or str):
Encoder factory.
Expand Down
12 changes: 6 additions & 6 deletions d3rlpy/algos/qlearning/bear.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
create_vae_encoder,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.bear_impl import BEARImpl, BEARModules
Expand Down Expand Up @@ -74,15 +74,15 @@ class BEARConfig(LearnableConfig):
function.
temp_learning_rate (float): Learning rate for temperature parameter.
alpha_learning_rate (float): Learning rate for :math:`\alpha`.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
imitator_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
imitator_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the behavior policy.
temp_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
temp_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the temperature.
alpha_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
alpha_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for :math:`\alpha`.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
8 changes: 4 additions & 4 deletions d3rlpy/algos/qlearning/cal_ql.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ class CalQLConfig(CQLConfig):
temp_learning_rate (float):
Learning rate for temperature parameter of SAC.
alpha_learning_rate (float): Learning rate for :math:`\alpha`.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
temp_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
temp_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the temperature.
alpha_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
alpha_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for :math:`\alpha`.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
12 changes: 6 additions & 6 deletions d3rlpy/algos/qlearning/cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
create_parameter,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.cql_impl import CQLImpl, CQLModules, DiscreteCQLImpl
Expand Down Expand Up @@ -74,13 +74,13 @@ class CQLConfig(LearnableConfig):
temp_learning_rate (float):
Learning rate for temperature parameter of SAC.
alpha_learning_rate (float): Learning rate for :math:`\alpha`.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
temp_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
temp_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the temperature.
alpha_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
alpha_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for :math:`\alpha`.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down Expand Up @@ -260,7 +260,7 @@ class DiscreteCQLConfig(LearnableConfig):
Observation preprocessor.
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
learning_rate (float): Learning rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/crr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
create_normal_policy,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.crr_impl import CRRImpl, CRRModules
Expand Down Expand Up @@ -72,9 +72,9 @@ class CRRConfig(LearnableConfig):
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
create_deterministic_policy,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.ddpg_impl import DDPGImpl, DDPGModules
Expand Down Expand Up @@ -55,9 +55,9 @@ class DDPGConfig(LearnableConfig):
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q function.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from ...constants import ActionSpace
from ...models.builders import create_discrete_q_function
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.dqn_impl import DoubleDQNImpl, DQNImpl, DQNModules
Expand Down Expand Up @@ -34,7 +34,7 @@ class DQNConfig(LearnableConfig):
Observation preprocessor.
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
learning_rate (float): Learning rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down Expand Up @@ -140,7 +140,7 @@ class DoubleDQNConfig(DQNConfig):
Observation preprocessor.
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
learning_rate (float): Learning rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/iql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
create_value_function,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import MeanQFunctionFactory
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.iql_impl import IQLImpl, IQLModules
Expand Down Expand Up @@ -62,9 +62,9 @@ class IQLConfig(LearnableConfig):
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
4 changes: 2 additions & 2 deletions d3rlpy/algos/qlearning/nfq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from ...constants import ActionSpace
from ...models.builders import create_discrete_q_function
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.dqn_impl import DQNImpl, DQNModules
Expand Down Expand Up @@ -38,7 +38,7 @@ class NFQConfig(LearnableConfig):
Observation preprocessor.
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
learning_rate (float): Learning rate.
optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory.
encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory.
Expand Down
14 changes: 7 additions & 7 deletions d3rlpy/algos/qlearning/plas.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
create_vae_encoder,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.plas_impl import (
Expand Down Expand Up @@ -56,11 +56,11 @@ class PLASConfig(LearnableConfig):
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
imitator_learning_rate (float): Learning rate for Conditional VAE.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
imitator_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
imitator_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the conditional VAE.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down Expand Up @@ -224,11 +224,11 @@ class PLASWithPerturbationConfig(PLASConfig):
actor_learning_rate (float): Learning rate for policy function.
critic_learning_rate (float): Learning rate for Q functions.
imitator_learning_rate (float): Learning rate for Conditional VAE.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
imitator_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
imitator_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the conditional VAE.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
6 changes: 3 additions & 3 deletions d3rlpy/algos/qlearning/rebrac.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
create_deterministic_policy,
)
from ...models.encoders import EncoderFactory, make_encoder_field
from ...models.optimizers import OptimizerFactory, make_optimizer_field
from ...models.q_functions import QFunctionFactory, make_q_func_field
from ...optimizers.optimizers import OptimizerFactory, make_optimizer_field
from ...types import Shape
from .base import QLearningAlgoBase
from .torch.ddpg_impl import DDPGModules
Expand Down Expand Up @@ -51,9 +51,9 @@ class ReBRACConfig(LearnableConfig):
reward_scaler (d3rlpy.preprocessing.RewardScaler): Reward preprocessor.
actor_learning_rate (float): Learning rate for a policy function.
critic_learning_rate (float): Learning rate for Q functions.
actor_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
actor_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the actor.
critic_optim_factory (d3rlpy.models.optimizers.OptimizerFactory):
critic_optim_factory (d3rlpy.optimizers.OptimizerFactory):
Optimizer factory for the critic.
actor_encoder_factory (d3rlpy.models.encoders.EncoderFactory):
Encoder factory for the actor.
Expand Down
Loading

0 comments on commit 3b01da3

Please sign in to comment.