Fix decision transfomers initialization

takuseno · Oct 8, 2023 · a78d734 · a78d734
1 parent 154f3c0
commit a78d734
Showing 1 changed file with 20 additions and 7 deletions.
diff --git a/d3rlpy/models/torch/transformers.py b/d3rlpy/models/torch/transformers.py
@@ -237,6 +237,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return h
 
 
+def _init_weights(module: nn.Module) -> None:
+    if isinstance(module, (nn.Linear, nn.Embedding)):
+        module.weight.data.normal_(mean=0.0, std=0.02)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+
+
 class ContinuousDecisionTransformer(nn.Module):  # type: ignore
     _encoder: Encoder
     _position_encoding: PositionEncoding
@@ -260,10 +270,7 @@ def __init__(
         activation: nn.Module,
     ):
         super().__init__()
-        self._encoder = encoder
         self._position_encoding = position_encoding
-        self._action_embed = nn.Linear(action_size, feature_size)
-        self._rtg_embed = nn.Linear(1, feature_size)
         self._embed_ln = nn.LayerNorm(feature_size)
         self._gpt2 = GPT2(
             hidden_size=feature_size,
@@ -275,6 +282,11 @@ def __init__(
             embed_dropout=embed_dropout,
             activation=activation,
         )
+        self.apply(_init_weights)
+
+        self._encoder = encoder
+        self._rtg_embed = nn.Linear(1, feature_size)
+        self._action_embed = nn.Linear(action_size, feature_size)
         self._output = nn.Linear(feature_size, action_size)
 
     def forward(
@@ -337,11 +349,7 @@ def __init__(
         embed_activation: nn.Module,
     ):
         super().__init__()
-        self._encoder = encoder
         self._position_encoding = position_encoding
-        self._action_embed = nn.Embedding(action_size, feature_size)
-        nn.init.normal_(self._action_embed.weight, mean=0.0, std=0.02)
-        self._rtg_embed = nn.Linear(1, feature_size)
         self._gpt2 = GPT2(
             hidden_size=feature_size,
             num_heads=num_heads,
@@ -353,6 +361,11 @@ def __init__(
             activation=activation,
         )
         self._output = nn.Linear(feature_size, action_size, bias=False)
+        self._action_embed = nn.Embedding(action_size, feature_size)
+        self.apply(_init_weights)
+
+        self._encoder = encoder
+        self._rtg_embed = nn.Linear(1, feature_size)
         self._embed_activation = embed_activation
 
     def forward(