diff --git a/docs/source/_tutorials/tutorial16rdmd.html b/docs/source/_tutorials/tutorial16rdmd.html new file mode 100644 index 000000000..bc8c100a8 --- /dev/null +++ b/docs/source/_tutorials/tutorial16rdmd.html @@ -0,0 +1,8104 @@ + + + + + +tutorial-16-rdmd + + + + + + + + + + + + +
+
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+ +
+
+ +
+ + +
+
+ +
+
+ +
+ + +
+
+ +
+ +
+
+ + diff --git a/pydmd/bopdmd.py b/pydmd/bopdmd.py index 87971f216..0a22570e5 100644 --- a/pydmd/bopdmd.py +++ b/pydmd/bopdmd.py @@ -477,6 +477,10 @@ def compute_residual(alpha): B, residual, error = compute_residual(alpha) U, S, Vh = self._compute_irank_svd(Phi(alpha, t), tolrank) + # Initialize termination flags. + converged = False + stalled = False + # Initialize storage. all_error = np.zeros(maxiter) djac_matrix = np.zeros((M * IS, IA), dtype="complex") @@ -548,7 +552,7 @@ def step(_lambda, rhs, scales_pvt, ij_pvt): 0.5 * np.linalg.multi_dot( [delta_0.conj().T, djac_matrix.conj().T, rhs_temp] - ).real + )[0].real ) improvement_ratio = actual_improvement / pred_improvement @@ -564,20 +568,21 @@ def step(_lambda, rhs, scales_pvt, ij_pvt): B_0, residual_0, error_0 = compute_residual(alpha_0) if error_0 < error: - alpha, B = alpha_0, B_0 - residual, error = residual_0, error_0 break - # Terminate if no appropriate step length was found. + # Terminate if no appropriate step length was found... if error_0 >= error: if verbose: msg = ( "Failed to find appropriate step length at " "iteration {}. Current error {}." ) - warnings.warn(msg.format(itr, error)) + print(msg.format(itr, error)) return B, alpha + # ...otherwise, update and proceed. + alpha, B, residual, error = alpha_0, B_0, residual_0, error_0 + # Record the current error. all_error[itr] = error @@ -586,23 +591,26 @@ def step(_lambda, rhs, scales_pvt, ij_pvt): update_msg = "Step {} Error {} Lambda {}" print(update_msg.format(itr, error, _lambda)) - # Terminate if the tolerance is met. - if error < tol: + # Update termination status and terminate if converged or stalled. + converged = error < tol + error_reduction = all_error[itr - 1] - all_error[itr] + stalled = (itr > 0) and ( + error_reduction < eps_stall * all_error[itr - 1] + ) + + if converged: + if verbose: + print("Convergence reached!") return B, alpha - # Terminate if a stall is detected. - if ( - itr > 0 - and all_error[itr - 1] - all_error[itr] - < eps_stall * all_error[itr - 1] - ): + if stalled: if verbose: msg = ( "Stall detected: error reduced by less than {} " "times the error at the previous step. " "Iteration {}. Current error {}." ) - warnings.warn(msg.format(eps_stall, itr, error)) + print(msg.format(eps_stall, itr, error)) return B, alpha U, S, Vh = self._compute_irank_svd(Phi(alpha, t), tolrank) @@ -613,7 +621,7 @@ def step(_lambda, rhs, scales_pvt, ij_pvt): "Failed to reach tolerance after maxiter = {} iterations. " "Current error {}." ) - warnings.warn(msg.format(maxiter, error)) + print(msg.format(maxiter, error)) return B, alpha diff --git a/pydmd/rdmd.py b/pydmd/rdmd.py index ebf23f256..ec5cca31c 100644 --- a/pydmd/rdmd.py +++ b/pydmd/rdmd.py @@ -17,10 +17,10 @@ class RDMD(CDMD): """ Randomized Dynamic Mode Decomposition - :param rand_mat: The random test matrix that will be used when executing + :param test_matrix: The random test matrix that will be used when executing the Randomized QB Decomposition. If not provided, the `svd_rank` and `oversampling` parameters will be used to compute the random matrix. - :type rand_mat: numpy.ndarray + :type test_matrix: numpy.ndarray :param oversampling: Number of additional samples (beyond the desired rank) to use when computing the random test matrix. Note that values {5,10} tend to be sufficient. @@ -33,7 +33,7 @@ class RDMD(CDMD): def __init__( self, - rand_mat=None, + test_matrix=None, oversampling=10, power_iters=2, svd_rank=0, @@ -57,7 +57,7 @@ def __init__( self._svd_rank = svd_rank self._oversampling = oversampling self._power_iters = power_iters - self._rand_mat = rand_mat + self._test_matrix = test_matrix def _compress_snapshots(self): """ @@ -69,13 +69,13 @@ def _compress_snapshots(self): :rtype: numpy.ndarray """ # Define the random test matrix if not provided. - if self._rand_mat is None: + if self._test_matrix is None: m = self.snapshots.shape[-1] r = compute_rank(self.snapshots, self._svd_rank) - self._rand_mat = np.random.randn(m, r + self._oversampling) + self._test_matrix = np.random.randn(m, r + self._oversampling) # Compute sampling matrix. - Y = self.snapshots.dot(self._rand_mat) + Y = self.snapshots.dot(self._test_matrix) # Perform power iterations. for _ in range(self._power_iters): diff --git a/tutorials/README.md b/tutorials/README.md index 267ed1831..efdd305ce 100644 --- a/tutorials/README.md +++ b/tutorials/README.md @@ -19,8 +19,9 @@ An additional PDF tutorial ([DSWeb contest winner](https://dsweb.siam.org/The-Ma | Tutorial11 [[.ipynb](tutorial10/tutorial-11-regularization.ipynb), [.py](tutorial11/tutorial-11-regularization.py), [.html](http://pydmd.github.io/PyDMD/tutorial11regularization.html)] | Tikhonov regularization) | `pydmd.DMDBase` | | Tutorial12 [[.ipynb](tutorial12/tutorial-12-cdmd.ipynb), [.py](tutorial12/tutorial-12-cdmd.py)] | cDMD for background modeling | `pydmd.CDMD` | | Tutorial13 [[.ipynb](tutorial13/tutorial-13-subspacedmd.ipynb), [.py](tutorial13/tutorial-13-subspacedmd.py)] | SubspaceDMD for locating eigenvalues of stochastic systems | `pydmd.SubspaceDMD` | -| Tutorial14 [[.ipynb](tutorial14/tutorial-14-bop-dmd.ipynb), [.py](tutorial14/tutorial-14-bop-dmd.py), [.html](http://pydmd.github.io/PyDMD/tutorial14-bop-dmd.html)] | Comparison between Bagging-/ Optimized DMD and exact DMD | `pydmd.bopdmd` | -| Tutorial15 [[.ipynb](tutorial15/tutorial-15-pidmd.ipynb), [.py](tutorial15/tutorial-15-pidmd.py), [.html](http://pydmd.github.io/PyDMD/tutorial15-pidmd.html)] | Physics-informed DMD for manifold enforcement | `pydmd.pidmd` | +| Tutorial14 [[.ipynb](tutorial14/tutorial-14-bop-dmd.ipynb), [.py](tutorial14/tutorial-14-bop-dmd.py), [.html](http://pydmd.github.io/PyDMD/tutorial14-bop-dmd.html)] | Comparison between Bagging-/ Optimized DMD and exact DMD | `pydmd.BOPDMD` | +| Tutorial15 [[.ipynb](tutorial15/tutorial-15-pidmd.ipynb), [.py](tutorial15/tutorial-15-pidmd.py), [.html](http://pydmd.github.io/PyDMD/tutorial15-pidmd.html)] | Physics-informed DMD for manifold enforcement | `pydmd.PiDMD` | +| Tutorial16 [[.ipynb](tutorial16/tutorial-16-rdmd.ipynb), [.py](tutorial16/tutorial-16-rdmd.py), [.html](http://pydmd.github.io/PyDMD/tutorial16-rdmd.html)] | Randomized DMD for greater computation speedup | `pydmd.RDMD` | diff --git a/tutorials/tutorial16/tutorial-16-rdmd.ipynb b/tutorials/tutorial16/tutorial-16-rdmd.ipynb new file mode 100644 index 000000000..7be09197d --- /dev/null +++ b/tutorials/tutorial16/tutorial-16-rdmd.ipynb @@ -0,0 +1,593 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9344ec59", + "metadata": {}, + "source": [ + "# Tutorial 16: Randomized DMD\n", + "\n", + "In this tutorial, we re-examine the system explored in [Tutorial 4](https://pydmd.github.io/PyDMD/tutorial4cdmd.html) and compare the performance of Compressed DMD (CDMD) and Randomized DMD (RDMD) [1] as means of improving the efficiency of the exact DMD algorithm. We highlight RDMD as an effective alternative to its predecessor CDMD, while also highlighting how one might tune the parameters of RDMD in order to balance accuracy and efficiency.\n", + "\n", + "[1] N. B. Erichson, L. Mathelin, J. N. Kutz, and S. L. Brunton, *Randomized dynamic mode decomposition*, SIAM J. Appl. Dyn. Syst., 18 (2019), pp. 1867-1891. https://doi.org/10.1137/18M1215013" + ] + }, + { + "cell_type": "markdown", + "id": "7e1282b7", + "metadata": {}, + "source": [ + "We begin by importing the `RDMD` class from the PyDMD package, along with the `DMD` and `CDMD` classes for performance comparison. We also import the `time` module for calculating runtime, `numpy` for mathematical computations, and `matplotlib.pyplot` for plotting." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b494d4e5", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pydmd import DMD, CDMD, RDMD" + ] + }, + { + "cell_type": "markdown", + "id": "c87ae347", + "metadata": {}, + "source": [ + "We then define a function for calculating relative error, along with a function for computing the CDMD compression matrix used in Tutorial 4." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9e94831f", + "metadata": {}, + "outputs": [], + "source": [ + "def compute_error(true, est):\n", + " \"\"\"\n", + " Computes and returns relative error.\n", + " \"\"\"\n", + " return np.linalg.norm(true - est) / np.linalg.norm(true)\n", + "\n", + "\n", + "def build_compression_matrix(snapshots_matrix):\n", + " \"\"\"\n", + " Computes and returns the CDMD compression matrix used in Tutorial 4.\n", + " \"\"\"\n", + " random_matrix = np.random.permutation(\n", + " snapshots_matrix.shape[0] * snapshots_matrix.shape[1]\n", + " )\n", + " random_matrix = random_matrix.reshape(\n", + " snapshots_matrix.shape[1], snapshots_matrix.shape[0]\n", + " )\n", + " compression_matrix = random_matrix / np.linalg.norm(random_matrix)\n", + "\n", + " return compression_matrix" + ] + }, + { + "cell_type": "markdown", + "id": "2773bb37", + "metadata": {}, + "source": [ + "## The Toy Data Set\n", + "\n", + "Now, we re-create the helper function from Tutorial 4 that returns toy data snapshots for a given spatial and temporal resolution. Each data snapshot is the sum of the following three components, with $x \\in [-5, 5]$ and $t \\in [0, 4\\pi]$.\n", + "\n", + "- $f_1(x, t) = e^{\\frac{-x^2}{5}}\\,\\cos(4x)\\,e^{(2.3i)t}$\n", + "- $f_2(x, t) = \\bigg(1-e^{1-\\frac{x^2}{6}}\\bigg)e^{(1.3i)t}$\n", + "- $f_3(x, t) = \\bigg(-\\frac{x^2}{50} + 1\\bigg)1.1i^{-2t}$\n", + "\n", + "Here we produce our toy data set for 256 spatial collocation points across 128 time points. We then add Gaussian noise to our data so that we may compare method performance in the presence of measurement noise. The clean data and the noisy data sets are then plotted." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5e51d5c4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def create_dataset(x_dim, t_dim):\n", + " \"\"\"\n", + " Args:\n", + " x_dim = resolution along the x range [-5, 5]\n", + " t_dim = resolution along the t range [0, 4*pi]\n", + "\n", + " Returns:\n", + " x_grid = x collocation points\n", + " t_grid = t collocation points\n", + " X = (t_dim, x_dim) np.ndarray of snapshot data\n", + " \"\"\"\n", + " # Define the x and t collocation points.\n", + " x = np.linspace(-5, 5, x_dim)\n", + " t = np.linspace(0, 4 * np.pi, t_dim)\n", + " xgrid, tgrid = np.meshgrid(x, t)\n", + "\n", + " # Define the modes that make up each snapshot.\n", + " def f1(x, t):\n", + " return np.exp(-(x**2) / 5) * np.cos(4 * x) * np.exp(2.3j * t)\n", + "\n", + " def f2(x, t):\n", + " return (1 - np.exp(1 - (x**2) / 6)) * np.exp(1.3j * t)\n", + "\n", + " def f3(x, t):\n", + " return (1 - ((x**2) / 50)) * (1.1j ** (-2 * t))\n", + "\n", + " # Evaluate modes at each collocation point.\n", + " X1 = f1(xgrid, tgrid)\n", + " X2 = f2(xgrid, tgrid)\n", + " X3 = f3(xgrid, tgrid)\n", + "\n", + " return xgrid, tgrid, (X1 + X2 + X3)\n", + "\n", + "\n", + "# Generate and visualize the toy dataset.\n", + "xgrid, tgrid, X = create_dataset(x_dim=256, t_dim=128)\n", + "\n", + "# Generate noisy data for a given noise magnitude. Seed is used for reproducibility.\n", + "noise_mag = 0.1\n", + "rng = np.random.default_rng(seed=42)\n", + "X_noisy = X + (noise_mag * rng.standard_normal(X.shape))\n", + "\n", + "# Plot both the clean and the noisy data sets.\n", + "plt.figure(figsize=(8, 3))\n", + "for i, (mat, name) in enumerate(zip([X, X_noisy], [\"Data\", \"Noisy Data\"])):\n", + " plt.subplot(1, 2, i + 1)\n", + " plt.pcolor(xgrid, tgrid, mat.real)\n", + " plt.colorbar()\n", + " plt.title(name)\n", + " plt.xlabel(\"x\")\n", + " plt.ylabel(\"t\", rotation=0)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a17001ce", + "metadata": {}, + "source": [ + "## Exact DMD\n", + "\n", + "We begin by applying exact DMD to our data so that its results may serve as a benchmark for CDMD and RDMD. Note that throughout this tutorial, we will fit our models to the noisy data set and use our models' ability to reconstruct the clean signal as a proxy for model accuracy. Furthermore, we record fitting time for all methods in order to compare method efficiency. Here, we replicate the DMD approach used in Tutorial 4." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "68a02436", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DMD Reconstruction Error: 0.017055067185643223\n", + "DMD Training Time: 0.16346025466918945\n" + ] + } + ], + "source": [ + "# Define the data matrices to be used for model fitting.\n", + "snapshots_matrix = X.T\n", + "snapshots_matrix_noisy = X_noisy.T\n", + "\n", + "# Fit a DMD model.\n", + "t0 = time.time()\n", + "dmd = DMD(svd_rank=3, exact=True)\n", + "dmd.fit(snapshots_matrix_noisy)\n", + "t1 = time.time()\n", + "\n", + "# Record and print model error and training time.\n", + "dmd_error = compute_error(snapshots_matrix, dmd.reconstructed_data)\n", + "dmd_time = t1 - t0\n", + "print(f\"DMD Reconstruction Error: {dmd_error}\")\n", + "print(f\"DMD Training Time: {dmd_time}\")" + ] + }, + { + "cell_type": "markdown", + "id": "053f91a2", + "metadata": {}, + "source": [ + "## Compressed DMD\n", + "\n", + "Now we apply CDMD to our data, where we again compute error and training time when given noisy data. Here, we compute these metrics across multiple trials in order to account for variations that result from randomness. We additionally utilize the compression matrix used in Tutorial 4 for all CDMD models as to replicate the Tutorial 4 approach. We also begin by including the time needed to compute the compression matrix in our CDMD training time." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2de0b129", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CDMD (Average) Reconstruction Error: 0.02876651792689702\n", + "CDMD (Average) Training Time: 0.08456253528594967\n" + ] + } + ], + "source": [ + "# Define the number of random trials to perform.\n", + "num_trials = 100\n", + "\n", + "# Initialize the error and runtime metrics.\n", + "cdmd_error = 0.0\n", + "cdmd_time = 0.0\n", + "\n", + "for _ in range(num_trials): # Perform multiple trials...\n", + " # Fit a CDMD model.\n", + " t0 = time.time()\n", + " compression_matrix = build_compression_matrix(snapshots_matrix)\n", + " cdmd = CDMD(svd_rank=3, compression_matrix=compression_matrix)\n", + " cdmd.fit(snapshots_matrix_noisy)\n", + " t1 = time.time()\n", + " # Incorporate this trial's results into the running averages.\n", + " cdmd_error += (\n", + " compute_error(snapshots_matrix, cdmd.reconstructed_data) / num_trials\n", + " )\n", + " cdmd_time += (t1 - t0) / num_trials\n", + "\n", + "# Print average model error and training runtime.\n", + "print(f\"CDMD (Average) Reconstruction Error: {cdmd_error}\")\n", + "print(f\"CDMD (Average) Training Time: {cdmd_time}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fed87c06", + "metadata": {}, + "source": [ + "## Randomized DMD: Varying Oversampling\n", + "\n", + "We now examine the performance of RDMD, which is derived in [1] and implemented in the `RDMD` class of PyDMD.\n", + "\n", + "The performance of the RDMD algorithm is manually toggled by 2 major parameters, one of which is the **oversampling** parameter, which controls the number of additional random samples (beyond the predicted rank of the data) that are used to compute a basis for the range of the input data. In short, increasing the oversampling increases the probability that one is able to construct a good basis used in RDMD, yet it simultaneously increases runtime due to the usage of a larger random test matrix. It should be noted that in general, a small oversampling value approximately within the range of $[5, 10]$ often suffices [1].\n", + "\n", + "Here, we demonstate how the performance of the `RDMD` module is impacted by the `oversampling` parameter, which is `10` by default and can be toggled upon the initialization of an `RDMD` model. Here, we examine oversampling values within the range $[0, 50]$ and we again fit our RDMD models to noisy data across multiple random trials. We then compare the average error and training time to that of exact DMD and CDMD." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1e195809", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the default PyDMD oversampling value.\n", + "oversampling_default = 10\n", + "\n", + "# Define the oversampling values to investigate.\n", + "oversampling_values = np.arange(0, 51, 5)\n", + "\n", + "# Initialize the error and runtime metrics.\n", + "oversampling_error = np.zeros(len(oversampling_values))\n", + "oversampling_times = np.zeros(len(oversampling_values))\n", + "\n", + "for i, oversampling in enumerate(oversampling_values):\n", + " for _ in range(num_trials): # Perform multiple trials...\n", + " # Fit an RDMD model.\n", + " t0 = time.time()\n", + " rdmd = RDMD(svd_rank=3, oversampling=oversampling).fit(\n", + " snapshots_matrix_noisy\n", + " )\n", + " t1 = time.time()\n", + " # Incorporate this trial's results into the running averages.\n", + " oversampling_error[i] += (\n", + " compute_error(snapshots_matrix, rdmd.reconstructed_data)\n", + " / num_trials\n", + " )\n", + " oversampling_times[i] += (t1 - t0) / num_trials" + ] + }, + { + "cell_type": "markdown", + "id": "13c03dc7", + "metadata": {}, + "source": [ + "We now plot the results of our experiments.\n", + "\n", + "Notice that exact DMD and RDMD tend to be more accurate than CDMD, with RDMD performing considerably well with very little oversampling. Also notice that as expected, the time required to train an RDMD model on average increases as one increases the oversampling parameter. However, as long as the oversampling isn't too large, an RDMD model can be trained in about the same amount of time as a CDMD model for this particular data set." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "af74369d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot experiment results!\n", + "plt.figure(figsize=(8, 3))\n", + "\n", + "# Plot error vs. oversampling.\n", + "plt.subplot(1, 2, 1)\n", + "plt.plot(oversampling_values, oversampling_error, \"-o\", c=\"g\", label=\"RDMD\")\n", + "plt.axhline(y=cdmd_error, c=\"b\", label=\"CDMD\")\n", + "plt.axhline(y=dmd_error, c=\"r\", label=\"Exact DMD\")\n", + "plt.axvline(x=oversampling_default, ls=\"--\", c=\"k\", label=\"default\")\n", + "plt.title(\"Reconstruction Error\")\n", + "plt.xlabel(\"Oversampling\")\n", + "plt.ylabel(\"Relative Error\")\n", + "plt.legend()\n", + "\n", + "# Plot runtime vs. oversampling.\n", + "plt.subplot(1, 2, 2)\n", + "plt.plot(oversampling_values, oversampling_times, \"-o\", c=\"g\", label=\"RDMD\")\n", + "plt.axhline(y=cdmd_time, c=\"b\", label=\"CDMD\")\n", + "plt.axhline(y=dmd_time, c=\"r\", label=\"Exact DMD\")\n", + "plt.axvline(x=oversampling_default, ls=\"--\", c=\"k\", label=\"default\")\n", + "plt.title(\"Training Time\")\n", + "plt.xlabel(\"Oversampling\")\n", + "plt.ylabel(\"Runtime\")\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "93117ef8", + "metadata": {}, + "source": [ + "## Randomized DMD: Varying Power Iterations\n", + "\n", + "Another major RDMD parameter is the number of **power iterations** used during the randomized QB decomposition process. The use of power iterations is a data preprocessing step that promotes faster singular value decay and hence promotes better basis approximations. Thus similar to the oversampling parameter, increasing the number of power iterations tends to lead to increased accuracy with the drawback of increased runtime due to the need to pass through the data at each power iteration. In general, as little as $1$ or $2$ power iterations often suffice [1].\n", + "\n", + "The number of power iterations used may also be toggled upon the initialization of an `RDMD` model via the `power_iters` argument, which is `2` by default. Here, we run through the same RDMD experiments as before, only this time we examine power iteration values within the range $[0, 20]$." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b5b4652b", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the default PyDMD power_iter value.\n", + "power_iter_default = 2\n", + "\n", + "# Define the power iteration values to investigate.\n", + "power_iter_values = np.arange(0, 21, 2)\n", + "\n", + "# Initialize the error and runtime metrics.\n", + "power_iter_error = np.zeros(len(power_iter_values))\n", + "power_iter_times = np.zeros(len(power_iter_values))\n", + "\n", + "for i, power_iters in enumerate(power_iter_values):\n", + " for _ in range(num_trials): # Perform multiple trials...\n", + " # Fit an RDMD model.\n", + " t0 = time.time()\n", + " rdmd = RDMD(svd_rank=3, power_iters=power_iters).fit(\n", + " snapshots_matrix_noisy\n", + " )\n", + " t1 = time.time()\n", + " # Incorporate this trial's results into the running averages.\n", + " power_iter_error[i] += (\n", + " compute_error(snapshots_matrix, rdmd.reconstructed_data)\n", + " / num_trials\n", + " )\n", + " power_iter_times[i] += (t1 - t0) / num_trials" + ] + }, + { + "cell_type": "markdown", + "id": "c380edcd", + "metadata": {}, + "source": [ + "As expected, we observe that the time required to train an RDMD model tends to increase as one increases the power iterations. Yet again, as long as this parameter isn't too large, an RDMD model can on average be trained in less time than an exact DMD model, and in about the same amount of time as a CDMD model for this particular data set. However this time, notice that on average, introducing as little as 2 power iterations results in a noticeable improvement in RDMD accuracy. Here, we omit the CDMD error so that we can better observe this phenomenon." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "13c5243a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot experiment results!\n", + "plt.figure(figsize=(8, 3))\n", + "\n", + "# Plot error vs. power iterations.\n", + "plt.subplot(1, 2, 1)\n", + "plt.plot(power_iter_values, power_iter_error, \"-o\", c=\"g\", label=\"RDMD\")\n", + "# plt.axhline(y=cdmd_error, c=\"b\", label=\"CDMD\")\n", + "plt.axhline(y=dmd_error, c=\"r\", label=\"exact DMD\")\n", + "plt.axvline(x=power_iter_default, ls=\"--\", c=\"k\", label=\"default value\")\n", + "plt.title(\"Reconstruction Error\")\n", + "plt.xlabel(\"Power Iterations\")\n", + "plt.ylabel(\"Relative Error\")\n", + "plt.legend()\n", + "\n", + "# Plot runtime vs. oversampling.\n", + "plt.subplot(1, 2, 2)\n", + "plt.plot(power_iter_values, power_iter_times, \"-o\", c=\"g\", label=\"RDMD\")\n", + "plt.axhline(y=cdmd_time, c=\"b\", label=\"CDMD\")\n", + "plt.axhline(y=dmd_time, c=\"r\", label=\"exact DMD\")\n", + "plt.axvline(x=power_iter_default, ls=\"--\", c=\"k\", label=\"default value\")\n", + "plt.title(\"Training Time\")\n", + "plt.xlabel(\"Power Iterations\")\n", + "plt.ylabel(\"Runtime\")\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8256be8d", + "metadata": {}, + "source": [ + "## Runtime Comparison" + ] + }, + { + "cell_type": "markdown", + "id": "c0d9aac3", + "metadata": {}, + "source": [ + "So far, we've seen that RDMD tends to be computationally efficient like CDMD, and that the method tends to be more accurate than CDMD in the presence of noise. However, we have yet to observe another major advantage of RDMD over CDMD, which is that when performing data compression, RDMD relies more upon the intrinsic rank of the data, whereas CDMD relies more upon the dimension of the provided snapshots [1]. As a result, RDMD is able to achive high-accuracy results with much smaller compression matrices than CDMD, hence leading to faster runtimes for very high-dimensional data sets.\n", + "\n", + "We demonstrate this by replicating the final runtime experiment performed in Tutorial 4, where we compare the runtime of exact DMD, CDMD, and RDMD as one increases the dimension of the input data snapshots. This time, we do not count the time required to build compression matrices as a part of the training runtime in accordance with Tutorial 4. Notice that our compression DMD methods are more computationally efficient than exact DMD, with RDMD surpassing CDMD in terms of efficiency for larger data sets.\n", + "\n", + "Here, we also demonstrate the usage of the `test_matrix` parameter of the `RDMD` module, which allows users to pass a custom random test matrix to the `RDMD` model. By default, `RDMD` uses a random test matrix $\\Omega \\in \\mathbb{R}^{m \\times l}$ drawn from a normal Gaussian distribution, where $m$ denotes the number of data snapshots and $l$ denotes the target rank + oversampling. However, one may seek to pre-compute their test matrix as demonstrated below, that or use alternative test matrices such as the subsampled randomized Hadamard transform for improved efficiency [1]." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "05a7718a", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Runtime storage.\n", + "time_dmd = []\n", + "time_cdmd = []\n", + "time_rdmd = []\n", + "\n", + "# Define the data parameters to investigate.\n", + "niter = 4\n", + "t_dim = 100\n", + "xdims = 10 ** np.arange(2, 2 + niter)\n", + "\n", + "for x_dim in xdims:\n", + " # Build a data matrix using the current x resolution.\n", + " snapshots_matrix = create_dataset(x_dim, t_dim)[-1].T\n", + "\n", + " # Build compression matrix for CDMD.\n", + " compression_matrix = build_compression_matrix(snapshots_matrix)\n", + "\n", + " # Build random matrix for RDMD.\n", + " test_matrix = np.random.randn(snapshots_matrix.shape[1], 5)\n", + "\n", + " t0 = time.time()\n", + " DMD(svd_rank=-1, exact=True).fit(snapshots_matrix)\n", + " t1 = time.time()\n", + " time_dmd.append(t1 - t0)\n", + "\n", + " t0 = time.time()\n", + " CDMD(svd_rank=-1, compression_matrix=compression_matrix).fit(\n", + " snapshots_matrix\n", + " )\n", + " t1 = time.time()\n", + " time_cdmd.append(t1 - t0)\n", + "\n", + " t0 = time.time()\n", + " RDMD(svd_rank=-1, test_matrix=test_matrix).fit(snapshots_matrix)\n", + " t1 = time.time()\n", + " time_rdmd.append(t1 - t0)\n", + "\n", + "# Plot runtime results!\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(xdims, time_dmd, \"ro--\", label=\"exact dmd\")\n", + "plt.plot(xdims, time_cdmd, \"bo--\", label=\"compressed dmd\")\n", + "plt.plot(xdims, time_rdmd, \"go--\", label=\"randomized dmd\")\n", + "plt.legend()\n", + "plt.ylabel(\"Seconds\")\n", + "plt.xlabel(\"Snapshots dimension\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "24c6d70e", + "metadata": {}, + "source": [ + "## In summary...\n", + "- `RDMD` tends to be faster and more accurate than `CDMD`.\n", + "- The previous statement is especially true in the presence of measurement noise and very high-dimensional snapshots.\n", + "- By default, `oversampling = 10` and `power_iters = 2`, as these are generally effective and appropriate parameter choices.\n", + "- Increasing either oversampling or power iterations often increases accuracy at the expense of slower runtimes.\n", + "- Use the `test_matrix` parameter to input custom or pre-computed random test matrices.\n", + "- See the original RDMD paper [1] for reference and for further details!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b44fdd7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/tutorial16/tutorial-16-rdmd.py b/tutorials/tutorial16/tutorial-16-rdmd.py new file mode 100644 index 000000000..fcadb0877 --- /dev/null +++ b/tutorials/tutorial16/tutorial-16-rdmd.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Tutorial 16: Randomized DMD +# +# In this tutorial, we re-examine the system explored in [Tutorial 4](https://pydmd.github.io/PyDMD/tutorial4cdmd.html) and compare the performance of Compressed DMD (CDMD) and Randomized DMD (RDMD) [1] as means of improving the efficiency of the exact DMD algorithm. We highlight RDMD as an effective alternative to its predecessor CDMD, while also highlighting how one might tune the parameters of RDMD in order to balance accuracy and efficiency. +# +# [1] N. B. Erichson, L. Mathelin, J. N. Kutz, and S. L. Brunton, *Randomized dynamic mode decomposition*, SIAM J. Appl. Dyn. Syst., 18 (2019), pp. 1867-1891. https://doi.org/10.1137/18M1215013 + +# We begin by importing the `RDMD` class from the PyDMD package, along with the `DMD` and `CDMD` classes for performance comparison. We also import the `time` module for calculating runtime, `numpy` for mathematical computations, and `matplotlib.pyplot` for plotting. + +# In[1]: + + +import time +import numpy as np +import matplotlib.pyplot as plt + +from pydmd import DMD, CDMD, RDMD + + +# We then define a function for calculating relative error, along with a function for computing the CDMD compression matrix used in Tutorial 4. + +# In[2]: + + +def compute_error(true, est): + """ + Computes and returns relative error. + """ + return np.linalg.norm(true - est) / np.linalg.norm(true) + + +def build_compression_matrix(snapshots_matrix): + """ + Computes and returns the CDMD compression matrix used in Tutorial 4. + """ + random_matrix = np.random.permutation( + snapshots_matrix.shape[0] * snapshots_matrix.shape[1] + ) + random_matrix = random_matrix.reshape( + snapshots_matrix.shape[1], snapshots_matrix.shape[0] + ) + compression_matrix = random_matrix / np.linalg.norm(random_matrix) + + return compression_matrix + + +# ## The Toy Data Set +# +# Now, we re-create the helper function from Tutorial 4 that returns toy data snapshots for a given spatial and temporal resolution. Each data snapshot is the sum of the following three components, with $x \in [-5, 5]$ and $t \in [0, 4\pi]$. +# +# - $f_1(x, t) = e^{\frac{-x^2}{5}}\,\cos(4x)\,e^{(2.3i)t}$ +# - $f_2(x, t) = \bigg(1-e^{1-\frac{x^2}{6}}\bigg)e^{(1.3i)t}$ +# - $f_3(x, t) = \bigg(-\frac{x^2}{50} + 1\bigg)1.1i^{-2t}$ +# +# Here we produce our toy data set for 256 spatial collocation points across 128 time points. We then add Gaussian noise to our data so that we may compare method performance in the presence of measurement noise. The clean data and the noisy data sets are then plotted. + +# In[3]: + + +def create_dataset(x_dim, t_dim): + """ + Args: + x_dim = resolution along the x range [-5, 5] + t_dim = resolution along the t range [0, 4*pi] + + Returns: + x_grid = x collocation points + t_grid = t collocation points + X = (t_dim, x_dim) np.ndarray of snapshot data + """ + # Define the x and t collocation points. + x = np.linspace(-5, 5, x_dim) + t = np.linspace(0, 4 * np.pi, t_dim) + xgrid, tgrid = np.meshgrid(x, t) + + # Define the modes that make up each snapshot. + def f1(x, t): + return np.exp(-(x**2) / 5) * np.cos(4 * x) * np.exp(2.3j * t) + + def f2(x, t): + return (1 - np.exp(1 - (x**2) / 6)) * np.exp(1.3j * t) + + def f3(x, t): + return (1 - ((x**2) / 50)) * (1.1j ** (-2 * t)) + + # Evaluate modes at each collocation point. + X1 = f1(xgrid, tgrid) + X2 = f2(xgrid, tgrid) + X3 = f3(xgrid, tgrid) + + return xgrid, tgrid, (X1 + X2 + X3) + + +# Generate and visualize the toy dataset. +xgrid, tgrid, X = create_dataset(x_dim=256, t_dim=128) + +# Generate noisy data for a given noise magnitude. Seed is used for reproducibility. +noise_mag = 0.1 +rng = np.random.default_rng(seed=42) +X_noisy = X + (noise_mag * rng.standard_normal(X.shape)) + +# Plot both the clean and the noisy data sets. +plt.figure(figsize=(8, 3)) +for i, (mat, name) in enumerate(zip([X, X_noisy], ["Data", "Noisy Data"])): + plt.subplot(1, 2, i + 1) + plt.pcolor(xgrid, tgrid, mat.real) + plt.colorbar() + plt.title(name) + plt.xlabel("x") + plt.ylabel("t", rotation=0) +plt.tight_layout() +plt.show() + + +# ## Exact DMD +# +# We begin by applying exact DMD to our data so that its results may serve as a benchmark for CDMD and RDMD. Note that throughout this tutorial, we will fit our models to the noisy data set and use our models' ability to reconstruct the clean signal as a proxy for model accuracy. Furthermore, we record fitting time for all methods in order to compare method efficiency. Here, we replicate the DMD approach used in Tutorial 4. + +# In[4]: + + +# Define the data matrices to be used for model fitting. +snapshots_matrix = X.T +snapshots_matrix_noisy = X_noisy.T + +# Fit a DMD model. +t0 = time.time() +dmd = DMD(svd_rank=3, exact=True) +dmd.fit(snapshots_matrix_noisy) +t1 = time.time() + +# Record and print model error and training time. +dmd_error = compute_error(snapshots_matrix, dmd.reconstructed_data) +dmd_time = t1 - t0 +print(f"DMD Reconstruction Error: {dmd_error}") +print(f"DMD Training Time: {dmd_time}") + + +# ## Compressed DMD +# +# Now we apply CDMD to our data, where we again compute error and training time when given noisy data. Here, we compute these metrics across multiple trials in order to account for variations that result from randomness. We additionally utilize the compression matrix used in Tutorial 4 for all CDMD models as to replicate the Tutorial 4 approach. We also begin by including the time needed to compute the compression matrix in our CDMD training time. + +# In[5]: + + +# Define the number of random trials to perform. +num_trials = 100 + +# Initialize the error and runtime metrics. +cdmd_error = 0.0 +cdmd_time = 0.0 + +for _ in range(num_trials): # Perform multiple trials... + # Fit a CDMD model. + t0 = time.time() + compression_matrix = build_compression_matrix(snapshots_matrix) + cdmd = CDMD(svd_rank=3, compression_matrix=compression_matrix) + cdmd.fit(snapshots_matrix_noisy) + t1 = time.time() + # Incorporate this trial's results into the running averages. + cdmd_error += ( + compute_error(snapshots_matrix, cdmd.reconstructed_data) / num_trials + ) + cdmd_time += (t1 - t0) / num_trials + +# Print average model error and training runtime. +print(f"CDMD (Average) Reconstruction Error: {cdmd_error}") +print(f"CDMD (Average) Training Time: {cdmd_time}") + + +# ## Randomized DMD: Varying Oversampling +# +# We now examine the performance of RDMD, which is derived in [1] and implemented in the `RDMD` class of PyDMD. +# +# The performance of the RDMD algorithm is manually toggled by 2 major parameters, one of which is the **oversampling** parameter, which controls the number of additional random samples (beyond the predicted rank of the data) that are used to compute a basis for the range of the input data. In short, increasing the oversampling increases the probability that one is able to construct a good basis used in RDMD, yet it simultaneously increases runtime due to the usage of a larger random test matrix. It should be noted that in general, a small oversampling value approximately within the range of $[5, 10]$ often suffices [1]. +# +# Here, we demonstate how the performance of the `RDMD` module is impacted by the `oversampling` parameter, which is `10` by default and can be toggled upon the initialization of an `RDMD` model. Here, we examine oversampling values within the range $[0, 50]$ and we again fit our RDMD models to noisy data across multiple random trials. We then compare the average error and training time to that of exact DMD and CDMD. + +# In[6]: + + +# Define the default PyDMD oversampling value. +oversampling_default = 10 + +# Define the oversampling values to investigate. +oversampling_values = np.arange(0, 51, 5) + +# Initialize the error and runtime metrics. +oversampling_error = np.zeros(len(oversampling_values)) +oversampling_times = np.zeros(len(oversampling_values)) + +for i, oversampling in enumerate(oversampling_values): + for _ in range(num_trials): # Perform multiple trials... + # Fit an RDMD model. + t0 = time.time() + rdmd = RDMD(svd_rank=3, oversampling=oversampling).fit( + snapshots_matrix_noisy + ) + t1 = time.time() + # Incorporate this trial's results into the running averages. + oversampling_error[i] += ( + compute_error(snapshots_matrix, rdmd.reconstructed_data) + / num_trials + ) + oversampling_times[i] += (t1 - t0) / num_trials + + +# We now plot the results of our experiments. +# +# Notice that exact DMD and RDMD tend to be more accurate than CDMD, with RDMD performing considerably well with very little oversampling. Also notice that as expected, the time required to train an RDMD model on average increases as one increases the oversampling parameter. However, as long as the oversampling isn't too large, an RDMD model can be trained in about the same amount of time as a CDMD model for this particular data set. + +# In[7]: + + +# Plot experiment results! +plt.figure(figsize=(8, 3)) + +# Plot error vs. oversampling. +plt.subplot(1, 2, 1) +plt.plot(oversampling_values, oversampling_error, "-o", c="g", label="RDMD") +plt.axhline(y=cdmd_error, c="b", label="CDMD") +plt.axhline(y=dmd_error, c="r", label="Exact DMD") +plt.axvline(x=oversampling_default, ls="--", c="k", label="default") +plt.title("Reconstruction Error") +plt.xlabel("Oversampling") +plt.ylabel("Relative Error") +plt.legend() + +# Plot runtime vs. oversampling. +plt.subplot(1, 2, 2) +plt.plot(oversampling_values, oversampling_times, "-o", c="g", label="RDMD") +plt.axhline(y=cdmd_time, c="b", label="CDMD") +plt.axhline(y=dmd_time, c="r", label="Exact DMD") +plt.axvline(x=oversampling_default, ls="--", c="k", label="default") +plt.title("Training Time") +plt.xlabel("Oversampling") +plt.ylabel("Runtime") +plt.legend() +plt.tight_layout() +plt.show() + + +# ## Randomized DMD: Varying Power Iterations +# +# Another major RDMD parameter is the number of **power iterations** used during the randomized QB decomposition process. The use of power iterations is a data preprocessing step that promotes faster singular value decay and hence promotes better basis approximations. Thus similar to the oversampling parameter, increasing the number of power iterations tends to lead to increased accuracy with the drawback of increased runtime due to the need to pass through the data at each power iteration. In general, as little as $1$ or $2$ power iterations often suffice [1]. +# +# The number of power iterations used may also be toggled upon the initialization of an `RDMD` model via the `power_iters` argument, which is `2` by default. Here, we run through the same RDMD experiments as before, only this time we examine power iteration values within the range $[0, 20]$. + +# In[8]: + + +# Define the default PyDMD power_iter value. +power_iter_default = 2 + +# Define the power iteration values to investigate. +power_iter_values = np.arange(0, 21, 2) + +# Initialize the error and runtime metrics. +power_iter_error = np.zeros(len(power_iter_values)) +power_iter_times = np.zeros(len(power_iter_values)) + +for i, power_iters in enumerate(power_iter_values): + for _ in range(num_trials): # Perform multiple trials... + # Fit an RDMD model. + t0 = time.time() + rdmd = RDMD(svd_rank=3, power_iters=power_iters).fit( + snapshots_matrix_noisy + ) + t1 = time.time() + # Incorporate this trial's results into the running averages. + power_iter_error[i] += ( + compute_error(snapshots_matrix, rdmd.reconstructed_data) + / num_trials + ) + power_iter_times[i] += (t1 - t0) / num_trials + + +# As expected, we observe that the time required to train an RDMD model tends to increase as one increases the power iterations. Yet again, as long as this parameter isn't too large, an RDMD model can on average be trained in less time than an exact DMD model, and in about the same amount of time as a CDMD model for this particular data set. However this time, notice that on average, introducing as little as 2 power iterations results in a noticeable improvement in RDMD accuracy. Here, we omit the CDMD error so that we can better observe this phenomenon. + +# In[9]: + + +# Plot experiment results! +plt.figure(figsize=(8, 3)) + +# Plot error vs. power iterations. +plt.subplot(1, 2, 1) +plt.plot(power_iter_values, power_iter_error, "-o", c="g", label="RDMD") +# plt.axhline(y=cdmd_error, c="b", label="CDMD") +plt.axhline(y=dmd_error, c="r", label="exact DMD") +plt.axvline(x=power_iter_default, ls="--", c="k", label="default value") +plt.title("Reconstruction Error") +plt.xlabel("Power Iterations") +plt.ylabel("Relative Error") +plt.legend() + +# Plot runtime vs. oversampling. +plt.subplot(1, 2, 2) +plt.plot(power_iter_values, power_iter_times, "-o", c="g", label="RDMD") +plt.axhline(y=cdmd_time, c="b", label="CDMD") +plt.axhline(y=dmd_time, c="r", label="exact DMD") +plt.axvline(x=power_iter_default, ls="--", c="k", label="default value") +plt.title("Training Time") +plt.xlabel("Power Iterations") +plt.ylabel("Runtime") +plt.legend() +plt.tight_layout() +plt.show() + + +# ## Runtime Comparison + +# So far, we've seen that RDMD tends to be computationally efficient like CDMD, and that the method tends to be more accurate than CDMD in the presence of noise. However, we have yet to observe another major advantage of RDMD over CDMD, which is that when performing data compression, RDMD relies more upon the intrinsic rank of the data, whereas CDMD relies more upon the dimension of the provided snapshots [1]. As a result, RDMD is able to achive high-accuracy results with much smaller compression matrices than CDMD, hence leading to faster runtimes for very high-dimensional data sets. +# +# We demonstrate this by replicating the final runtime experiment performed in Tutorial 4, where we compare the runtime of exact DMD, CDMD, and RDMD as one increases the dimension of the input data snapshots. This time, we do not count the time required to build compression matrices as a part of the training runtime in accordance with Tutorial 4. Notice that our compression DMD methods are more computationally efficient than exact DMD, with RDMD surpassing CDMD in terms of efficiency for larger data sets. +# +# Here, we also demonstrate the usage of the `test_matrix` parameter of the `RDMD` module, which allows users to pass a custom random test matrix to the `RDMD` model. By default, `RDMD` uses a random test matrix $\Omega \in \mathbb{R}^{m \times l}$ drawn from a normal Gaussian distribution, where $m$ denotes the number of data snapshots and $l$ denotes the target rank + oversampling. However, one may seek to pre-compute their test matrix as demonstrated below, that or use alternative test matrices such as the subsampled randomized Hadamard transform for improved efficiency [1]. + +# In[10]: + + +# Runtime storage. +time_dmd = [] +time_cdmd = [] +time_rdmd = [] + +# Define the data parameters to investigate. +niter = 4 +t_dim = 100 +xdims = 10 ** np.arange(2, 2 + niter) + +for x_dim in xdims: + # Build a data matrix using the current x resolution. + snapshots_matrix = create_dataset(x_dim, t_dim)[-1].T + + # Build compression matrix for CDMD. + compression_matrix = build_compression_matrix(snapshots_matrix) + + # Build random matrix for RDMD. + test_matrix = np.random.randn(snapshots_matrix.shape[1], 5) + + t0 = time.time() + DMD(svd_rank=-1, exact=True).fit(snapshots_matrix) + t1 = time.time() + time_dmd.append(t1 - t0) + + t0 = time.time() + CDMD(svd_rank=-1, compression_matrix=compression_matrix).fit( + snapshots_matrix + ) + t1 = time.time() + time_cdmd.append(t1 - t0) + + t0 = time.time() + RDMD(svd_rank=-1, test_matrix=test_matrix).fit(snapshots_matrix) + t1 = time.time() + time_rdmd.append(t1 - t0) + +# Plot runtime results! +plt.figure(figsize=(10, 5)) +plt.plot(xdims, time_dmd, "ro--", label="exact dmd") +plt.plot(xdims, time_cdmd, "bo--", label="compressed dmd") +plt.plot(xdims, time_rdmd, "go--", label="randomized dmd") +plt.legend() +plt.ylabel("Seconds") +plt.xlabel("Snapshots dimension") +plt.show() + + +# ## In summary... +# - `RDMD` tends to be faster and more accurate than `CDMD`. +# - The previous statement is especially true in the presence of measurement noise and very high-dimensional snapshots. +# - By default, `oversampling = 10` and `power_iters = 2`, as these are generally effective and appropriate parameter choices. +# - Increasing either oversampling or power iterations often increases accuracy at the expense of slower runtimes. +# - Use the `test_matrix` parameter to input custom or pre-computed random test matrices. +# - See the original RDMD paper [1] for reference and for further details! + +# In[ ]: