Updates to the exercises for summer term 2024 (#19)

* ex08: transitioned from tf to torch and clean-up * ex09: transitioned from tf to torch and clean-up * ex10: transitioned from tf to torch and clean-up for tasks 1+2 * ex11: transition from tf to torch, updating WIP * moved some cells in the notebook * Updated requirements and removed 'faster' code in exercise 3 * first version for ex04 with updated function structure * updated ex05 * drafts for ex4-7 * Finalised exercise 4 * Small fix in template ex. 4 * Final version ex. 5 * Cleared output * updated ex06, reworked task 2 and 3 * Polished and finalised ex. 6 * Small fixes in ex. 7 * cleaned up ex10 task 3 * finished update for DDPG * worked through ppo * Exercise 7 finalisation * Exercise 7 template remove output * Finalised exercise 8 * Finalised ex 09 * Finalised ex 10 * Removed output from ex 10 * Finalised ex. 11 * Finalised ex. 12 * Removed strict gym version requirement * Removed setuptools downgrade from required installation steps because it is (hopefully) not needed anymore * Removed highly complicated task from ex 6 --------- Co-authored-by: XyDrKRulof <[email protected]>
upb-lea · Apr 15, 2024 · 3d71a59 · 3d71a59
1 parent 7a62090
commit 3d71a59
Show file tree

Hide file tree

Showing 28 changed files with 7,604 additions and 12,022 deletions.
diff --git a/README.md b/README.md
@@ -67,7 +67,6 @@ Lecture notes, tutorial tasks including solutions as well as online videos for t
 # Exercise Content
 All exercises are based on Python 3.9 and site-packages according to the requirements.txt:
 ```
->>> pip install setuptools==65.5.0
 >>> pip install -r requirements.txt
 ```
 

diff --git a/exercises/solutions/ex03/Ex3.ipynb b/exercises/solutions/ex03/Ex3.ipynb
@@ -611,24 +611,7 @@
     "            down_value = expected_rewards[1, state_idx] + values[state_idx + (state_idx % 2 + 2)]\n",
     "        values[state_idx] = np.max([up_value, down_value])\n",
     "        \n",
-    "        error = np.max([error, np.sum(np.abs(v_tilde-values[state_idx]))])\n",
-    "\n",
-    "# Alternative, faster solution\n",
-    "values2 = np.zeros(8)\n",
-    "iteration_idx2 = 0\n",
-    "error = 100\n",
-    "transition_indices = np.arange(1, 8)\n",
-    "transition_indices += 1 - transition_indices % 2  # results in [1 3 3 5 5 7 7]\n",
-    "transition_indices = np.column_stack([transition_indices, transition_indices + 1]).clip(max=7).T\n",
-    "while error > delta:\n",
-    "    iteration_idx2 += 1\n",
-    "    updated_values2 = np.max(expected_rewards + values2[transition_indices], axis=0)\n",
-    "    error = np.abs(values2[:-1] - updated_values2).max()  # last state in values2 is never updated\n",
-    "    values2[:-1] = updated_values2  # terminal state will never be updated\n",
-    "assert np.allclose(values, values2)\n",
-    "assert iteration_idx == iteration_idx2\n",
-    "    \n",
-    "        \n",
+    "        error = np.max([error, np.sum(np.abs(v_tilde-values[state_idx]))])        \n",
     "### END SOLUTION\n",
     "print(values)\n",
     "print(iteration_idx)"
@@ -652,14 +635,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['Down', 'Down', 'Down', 'Down', 'Down', 'Graduated! Go home!', 'Graduated! Go home!']\n",
-      "When in Start: Home - Go Down\n",
-      "When in Auld Triangle - Go Down\n",
-      "When in Lötlampe - Go Down\n",
-      "When in Globetrotter - Go Down\n",
-      "When in Black Sheep - Go Down\n",
-      "When in Limericks - Graduated! Go home!\n",
-      "When in Fat Louis - Graduated! Go home!\n"
+      "['Down', 'Down', 'Down', 'Down', 'Down', 'Graduated! Go home!', 'Graduated! Go home!']\n"
      ]
     }
    ],
@@ -677,16 +653,7 @@
     "        policy.append(\"Down\")\n",
     "    else:\n",
     "        policy.append(\"Graduated! Go home!\")        \n",
-    "print(policy)\n",
-    "\n",
-    "# Alternative, faster solution\n",
-    "actions = ['Up', 'Down']\n",
-    "states = ['Start: Home', 'Auld Triangle', 'Lötlampe', 'Globetrotter', 'Black Sheep', 'Limericks', 'Fat Louis']\n",
-    "greedy_policy = np.argmax(expected_rewards + values2[transition_indices], axis=0)\n",
-    "for i, (s, p) in enumerate(zip(states, greedy_policy.tolist())):\n",
-    "    # From last two states there is only one direction\n",
-    "    direction = f'Go {actions[p]}' if i < len(states) - 2 else 'Graduated! Go home!' \n",
-    "    print(f'When in {s} - {direction}')"
+    "print(policy)"
    ]
   },
   {
@@ -710,7 +677,7 @@
  "metadata": {
   "celltoolbar": "Create Assignment",
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -724,9 +691,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }