Skip to content

Commit

Permalink
dierkes-j: removed hpo search space from algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Jun 5, 2024
1 parent 0900872 commit a44005c
Show file tree
Hide file tree
Showing 30 changed files with 3 additions and 625 deletions.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.dqn.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.dqn.dqn.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.ppo.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.ppo.ppo.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.sac.doctree
Binary file not shown.
Binary file modified main/.doctrees/api/arlbench.core.algorithms.sac.sac.doctree
Binary file not shown.
Binary file modified main/.doctrees/arlbench.core.algorithms.doctree
Binary file not shown.
Binary file modified main/.doctrees/arlbench.core.algorithms.dqn.doctree
Binary file not shown.
Binary file modified main/.doctrees/arlbench.core.algorithms.ppo.doctree
Binary file not shown.
Binary file modified main/.doctrees/arlbench.core.algorithms.sac.doctree
Binary file not shown.
Binary file modified main/.doctrees/environment.pickle
Binary file not shown.
50 changes: 0 additions & 50 deletions main/_modules/arlbench/core/algorithms/dqn/dqn.html
Original file line number Diff line number Diff line change
Expand Up @@ -609,56 +609,6 @@ <h1>Source code for arlbench.core.algorithms.dqn.dqn</h1><div class="highlight">
<span class="k">return</span> <span class="n">cs</span></div>


<div class="viewcode-block" id="DQN.get_hpo_search_space">
<a class="viewcode-back" href="../../../../../arlbench.core.algorithms.dqn.html#arlbench.core.algorithms.DQN.get_hpo_search_space">[docs]</a>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">get_hpo_search_space</span><span class="p">(</span><span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ConfigurationSpace</span><span class="p">:</span>
<span class="n">cs</span> <span class="o">=</span> <span class="n">ConfigurationSpace</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;DQNConfigSpace&quot;</span><span class="p">,</span>
<span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">,</span>
<span class="n">space</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;buffer_size&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span>
<span class="s2">&quot;buffer_size&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="mf">1e7</span><span class="p">)),</span> <span class="n">default</span><span class="o">=</span><span class="mi">1000000</span>
<span class="p">),</span>
<span class="s2">&quot;buffer_batch_size&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span>
<span class="s2">&quot;buffer_batch_size&quot;</span><span class="p">,</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">64</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="mi">16</span>
<span class="p">),</span>
<span class="s2">&quot;buffer_prio_sampling&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span>
<span class="s2">&quot;buffer_prio_sampling&quot;</span><span class="p">,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="kc">False</span>
<span class="p">),</span>
<span class="s2">&quot;buffer_alpha&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;buffer_alpha&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.01</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.9</span><span class="p">),</span>
<span class="s2">&quot;buffer_beta&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;buffer_beta&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.01</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.9</span><span class="p">),</span>
<span class="s2">&quot;buffer_epsilon&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;buffer_epsilon&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">1e-7</span><span class="p">,</span> <span class="mf">1e-3</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">),</span>
<span class="s2">&quot;learning_rate&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span>
<span class="s2">&quot;learning_rate&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">1e-6</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">3e-4</span><span class="p">,</span> <span class="n">log</span><span class="o">=</span><span class="kc">True</span>
<span class="p">),</span>
<span class="s2">&quot;tau&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;tau&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.01</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">1.0</span><span class="p">),</span>
<span class="s2">&quot;initial_epsilon&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;initial_epsilon&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">1.0</span><span class="p">),</span>
<span class="s2">&quot;target_epsilon&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;target_epsilon&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.001</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.05</span><span class="p">),</span>
<span class="s2">&quot;exploration_fraction&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;initial_epsilon&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.005</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.1</span><span class="p">),</span>
<span class="s2">&quot;use_target_network&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span>
<span class="s2">&quot;use_target_network&quot;</span><span class="p">,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="kc">True</span>
<span class="p">),</span>
<span class="s2">&quot;train_freq&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span><span class="s2">&quot;train_freq&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mi">4</span><span class="p">),</span>
<span class="s2">&quot;gradient steps&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span><span class="s2">&quot;gradient_steps&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">256</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mi">1</span><span class="p">),</span>
<span class="s2">&quot;learning_starts&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span><span class="s2">&quot;learning_starts&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">32768</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mi">1024</span><span class="p">),</span>
<span class="s2">&quot;target_update_interval&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span>
<span class="s2">&quot;target_update_interval&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2000</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mi">1000</span>
<span class="p">),</span>
<span class="p">},</span>
<span class="p">)</span>
<span class="n">cs</span><span class="o">.</span><span class="n">add_conditions</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">EqualsCondition</span><span class="p">(</span>
<span class="n">cs</span><span class="p">[</span><span class="s2">&quot;target_update_interval&quot;</span><span class="p">],</span> <span class="n">cs</span><span class="p">[</span><span class="s2">&quot;use_target_network&quot;</span><span class="p">],</span> <span class="kc">True</span>
<span class="p">),</span>
<span class="n">EqualsCondition</span><span class="p">(</span><span class="n">cs</span><span class="p">[</span><span class="s2">&quot;tau&quot;</span><span class="p">],</span> <span class="n">cs</span><span class="p">[</span><span class="s2">&quot;use_target_network&quot;</span><span class="p">],</span> <span class="kc">True</span><span class="p">),</span>
<span class="p">]</span>
<span class="p">)</span>

<span class="k">return</span> <span class="n">cs</span></div>


<div class="viewcode-block" id="DQN.get_default_hpo_config">
<a class="viewcode-back" href="../../../../../arlbench.core.algorithms.dqn.html#arlbench.core.algorithms.DQN.get_default_hpo_config">[docs]</a>
<span class="nd">@staticmethod</span>
Expand Down
29 changes: 0 additions & 29 deletions main/_modules/arlbench/core/algorithms/ppo/ppo.html
Original file line number Diff line number Diff line change
Expand Up @@ -554,35 +554,6 @@ <h1>Source code for arlbench.core.algorithms.ppo.ppo</h1><div class="highlight">
<span class="p">)</span></div>


<div class="viewcode-block" id="PPO.get_hpo_search_space">
<a class="viewcode-back" href="../../../../../arlbench.core.algorithms.ppo.html#arlbench.core.algorithms.PPO.get_hpo_search_space">[docs]</a>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">get_hpo_search_space</span><span class="p">(</span><span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ConfigurationSpace</span><span class="p">:</span>
<span class="k">return</span> <span class="n">ConfigurationSpace</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="s2">&quot;PPOConfigSpace&quot;</span><span class="p">,</span>
<span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">,</span>
<span class="n">space</span><span class="o">=</span><span class="p">{</span>
<span class="s2">&quot;minibatch_size&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span>
<span class="s2">&quot;minibatch_size&quot;</span><span class="p">,</span> <span class="p">[</span><span class="mi">16</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">64</span><span class="p">,</span> <span class="mi">128</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="mi">64</span>
<span class="p">),</span>
<span class="s2">&quot;learning_rate&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span>
<span class="s2">&quot;learning_rate&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">1e-6</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">3e-4</span><span class="p">,</span> <span class="n">log</span><span class="o">=</span><span class="kc">True</span>
<span class="p">),</span>
<span class="s2">&quot;n_steps&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span><span class="s2">&quot;n_steps&quot;</span><span class="p">,</span> <span class="p">[</span><span class="mi">32</span><span class="p">,</span> <span class="mi">64</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="mi">128</span><span class="p">),</span>
<span class="s2">&quot;update_epochs&quot;</span><span class="p">:</span> <span class="n">Integer</span><span class="p">(</span><span class="s2">&quot;update_epochs&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">20</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mi">10</span><span class="p">),</span>
<span class="s2">&quot;gae_lambda&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;gae_lambda&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.8</span><span class="p">,</span> <span class="mf">0.9999</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.95</span><span class="p">),</span>
<span class="s2">&quot;clip_eps&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;clip_eps&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.2</span><span class="p">),</span>
<span class="s2">&quot;vf_clip_eps&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;vf_clip_eps&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.2</span><span class="p">),</span>
<span class="s2">&quot;normalize_advantage&quot;</span><span class="p">:</span> <span class="n">Categorical</span><span class="p">(</span>
<span class="s2">&quot;normalize_advantage&quot;</span><span class="p">,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="n">default</span><span class="o">=</span><span class="kc">True</span>
<span class="p">),</span>
<span class="s2">&quot;ent_coef&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;ent_coef&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.0</span><span class="p">),</span>
<span class="s2">&quot;vf_coef&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;vf_coef&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.5</span><span class="p">),</span>
<span class="s2">&quot;max_grad_norm&quot;</span><span class="p">:</span> <span class="n">Float</span><span class="p">(</span><span class="s2">&quot;max_grad_norm&quot;</span><span class="p">,</span> <span class="p">(</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="n">default</span><span class="o">=</span><span class="mf">0.5</span><span class="p">),</span>
<span class="p">},</span>
<span class="p">)</span></div>


<div class="viewcode-block" id="PPO.get_default_hpo_config">
<a class="viewcode-back" href="../../../../../arlbench.core.algorithms.ppo.html#arlbench.core.algorithms.PPO.get_default_hpo_config">[docs]</a>
<span class="nd">@staticmethod</span>
Expand Down
Loading

0 comments on commit a44005c

Please sign in to comment.