diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 2336a6c..af9ea82 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,16 +15,15 @@ jobs: id-token: write steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: "3.x" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@v1.8.5 + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip build + - name: Build package + run: python -m build + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/python-pytest.yml b/.github/workflows/python-pytest.yml index 9bc2232..63296c8 100644 --- a/.github/workflows/python-pytest.yml +++ b/.github/workflows/python-pytest.yml @@ -12,24 +12,49 @@ on: workflow_dispatch: jobs: - build: + test: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: "pip" - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install pytest build - python -m pip install . - - name: Build package - run: python -m build + python -m pip install --upgrade pip pytest pytest-cov + python -m pip install -e . - name: Test with pytest - run: pytest + run: | + pytest -k test_optim + NUMBA_DISABLE_JIT=1 pytest --cov-report term --cov=slise/ --cov-fail-under=9 + + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - run: python -m pip install --upgrade pip build + - name: Build package + run: | + python -m build + python -c "import os, glob; assert os.path.getsize(sorted(glob.glob('dist/slise-*.whl'))[-1]) > 10_000" + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - run: python -m pip install --upgrade pip ruff + - name: Lint with Ruff + run: | + ruff check --output-format=github + ruff format --check \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 63c1912..b751d8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "slise" -version = "2.2.3" +version = "2.2.4" authors = [{ name = "Anton Björklund", email = "anton.bjorklund@helsinki.fi" }] description = "The SLISE algorithm for robust regression and explanations of black box models" readme = "README.md" @@ -28,7 +28,7 @@ dependencies = [ ] [project.optional-dependencies] -dev = ["pytest", "black[jupyter]", "pylint", "IPython"] +dev = ["pytest", "pytest-cov", "black[jupyter]", "pylint", "IPython", "ruff"] tbb = ["tbb"] [project.urls] diff --git a/slise/__init__.py b/slise/__init__.py index 9b816f8..d723bc6 100644 --- a/slise/__init__.py +++ b/slise/__init__.py @@ -1,49 +1,53 @@ """ - SLISE - Sparse Linear Subset Explanations - ----------------------------------------- - - The SLISE algorithm can be used for both robust regression and to explain outcomes from black box models. - See [slise.slise.regression][] and [slise.slise.explain][] for referense. - - - In robust regression we fit regression models that can handle data that - contains outliers. SLISE accomplishes this by fitting a model such that - the largest possible subset of the data items have an error less than a - given value. All items with an error larger than that are considered - potential outliers and do not affect the resulting model. - - SLISE can also be used to provide local model-agnostic explanations for - outcomes from black box models. To do this we replace the ground truth - response vector with the predictions from the complex model. Furthermore, we - force the model to fit a selected item (making the explanation local). This - gives us a local approximation of the complex model with a simpler linear - model. In contrast to other methods SLISE creates explanations using real - data (not some discretised and randomly sampled data) so we can be sure that - all inputs are valid (i.e. in the correct data manifold, and follows the - constraints used to generate the data, e.g., the laws of physics). - - - More in-depth details about the algorithm can be found in the papers: - - Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K. - Sparse Robust Regression for Explaining Classifiers. - Discovery Science (DS 2019). - Lecture Notes in Computer Science, vol 11828, Springer. - https://doi.org/10.1007/978-3-030-33778-0_27 - - Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K. - Robust regression via error tolerance. - Data Mining and Knowledge Discovery (2022). - https://doi.org/10.1007/s10618-022-00819-2 - +SLISE - Sparse Linear Subset Explanations +----------------------------------------- + +The SLISE algorithm can be used for both robust regression and to explain outcomes from black box models. +See [slise.slise.regression][] and [slise.slise.explain][] for referense. + + +In robust regression we fit regression models that can handle data that +contains outliers. SLISE accomplishes this by fitting a model such that +the largest possible subset of the data items have an error less than a +given value. All items with an error larger than that are considered +potential outliers and do not affect the resulting model. + +SLISE can also be used to provide local model-agnostic explanations for +outcomes from black box models. To do this we replace the ground truth +response vector with the predictions from the complex model. Furthermore, we +force the model to fit a selected item (making the explanation local). This +gives us a local approximation of the complex model with a simpler linear +model. In contrast to other methods SLISE creates explanations using real +data (not some discretised and randomly sampled data) so we can be sure that +all inputs are valid (i.e. in the correct data manifold, and follows the +constraints used to generate the data, e.g., the laws of physics). + + +More in-depth details about the algorithm can be found in the papers: + +Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K. +Sparse Robust Regression for Explaining Classifiers. +Discovery Science (DS 2019). +Lecture Notes in Computer Science, vol 11828, Springer. +https://doi.org/10.1007/978-3-030-33778-0_27 + +Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K. +Robust regression via error tolerance. +Data Mining and Knowledge Discovery (2022). +https://doi.org/10.1007/s10618-022-00819-2 + +Björklund A., Henelius A., Oikarinen E., Kallonen K., Puolamäki K. +Explaining any black box model using real data. +Frontiers in Computer Science 5:1143904 (2023). +https://doi.org/10.3389/fcomp.2023.1143904 """ -from slise.slise import ( +from slise.slise import ( # noqa: F401 SliseRegression, regression, SliseExplainer, explain, SliseWarning, ) -from slise.utils import limited_logit as logit -from slise.data import normalise_robust +from slise.utils import limited_logit as logit # noqa: F401 +from slise.data import normalise_robust # noqa: F401 diff --git a/slise/data.py b/slise/data.py index 389a158..1d41c72 100644 --- a/slise/data.py +++ b/slise/data.py @@ -1,5 +1,5 @@ """ - This script contains functions for modifying data, mainly normalisation and PCA. +This script contains functions for modifying data, mainly normalisation and PCA. """ from typing import NamedTuple, Tuple, Union, Optional diff --git a/slise/initialisation.py b/slise/initialisation.py index 04be5d5..2a5827f 100644 --- a/slise/initialisation.py +++ b/slise/initialisation.py @@ -1,5 +1,5 @@ """ - This script contains functions for initialising alpha and beta in SLISE. +This script contains functions for initialising alpha and beta in SLISE. """ from math import log @@ -122,9 +122,7 @@ def initialise_zeros( """ epsilon = epsilon**2 beta_max = min(beta_max, beta_max_init) / epsilon - beta = next_beta( - Y**2, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step - ) + beta = next_beta(Y**2, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step) return np.zeros(X.shape[1]), beta diff --git a/slise/optimisation.py b/slise/optimisation.py index 99c595a..395af03 100644 --- a/slise/optimisation.py +++ b/slise/optimisation.py @@ -1,5 +1,5 @@ """ - This script contains the loss functions and optimisation functions for SLISE. +This script contains the loss functions and optimisation functions for SLISE. """ from math import log @@ -502,11 +502,11 @@ def regularised_regression( lambda2 = float(lambda2) assert X.shape[0] == len(Y), f"Different lengths {X.shape[0]} != {len(Y)}" if weight is None: - lf = lambda alpha: _ridge_numba(alpha, X, Y, lambda2) + lf = lambda alpha: _ridge_numba(alpha, X, Y, lambda2) # noqa: E731 else: weight = np.ascontiguousarray(weight, dtype=np.float64) assert Y.shape == weight.shape, f"Different shapes {Y.shape} != {weight.shape}" - lf = lambda alpha: _ridge_numbaw(alpha, X, Y, lambda2, weight) + lf = lambda alpha: _ridge_numbaw(alpha, X, Y, lambda2, weight) # noqa: E731 return owlqn(lf, np.zeros(X.shape[1], dtype=np.float64), lambda1, max_iterations) @@ -547,11 +547,11 @@ def optimise_loss( epsilon = float(epsilon) beta = float(beta) if weight is None: - lf = lambda alpha: _loss_grad(alpha, X, Y, epsilon, beta, lambda2) + lf = lambda alpha: _loss_grad(alpha, X, Y, epsilon, beta, lambda2) # noqa: E731 else: weight = np.ascontiguousarray(weight, dtype=np.float64) assert Y.shape == weight.shape, f"Different shapes {Y.shape} != {weight.shape}" - lf = lambda alpha: _loss_gradw(alpha, X, Y, epsilon, beta, lambda2, weight) + lf = lambda alpha: _loss_gradw(alpha, X, Y, epsilon, beta, lambda2, weight) # noqa: E731 return owlqn(lf, alpha, lambda1, max_iterations) @@ -576,8 +576,8 @@ def log_approximation_ratio( """ if beta1 >= beta2: return 0 - log_f = lambda r, beta: log_sigmoid(beta * (epsilon2 - r)) - dlog_g = lambda r: -beta1 * dlog_sigmoid( + log_f = lambda r, beta: log_sigmoid(beta * (epsilon2 - r)) # noqa: E731 + dlog_g = lambda r: -beta1 * dlog_sigmoid( # noqa: E731 beta1 * (epsilon2 - r) ) + beta2 * dlog_sigmoid(beta2 * (epsilon2 - r)) if dlog_g(0) < 0: @@ -628,7 +628,7 @@ def next_beta( if log_approx <= log_max_approx: return beta_max else: - f = ( + f = ( # noqa: E731 lambda b: log_approximation_ratio(residuals2, epsilon2, beta, b, weight) - log_max_approx ) @@ -681,9 +681,7 @@ def _debug_log( """ residuals = (X @ alpha - Y) ** 2 loss = loss_sharp(alpha, X, Y, epsilon, lambda1, lambda2, weight) - bloss = loss_residuals( - alpha, residuals, epsilon**2, beta, lambda1, lambda2, weight - ) + bloss = loss_residuals(alpha, residuals, epsilon**2, beta, lambda1, lambda2, weight) epss = matching_epsilon(residuals, epsilon**2, beta, weight) beta = beta * epsilon**2 print( diff --git a/slise/plot.py b/slise/plot.py index 25a6906..01cac57 100644 --- a/slise/plot.py +++ b/slise/plot.py @@ -1,9 +1,9 @@ """ - This script contains functions for plotting SLISE solutions. +This script contains functions for plotting SLISE solutions. """ from collections import OrderedDict -from typing import List, Tuple, Union, Optional +from typing import List, Sequence, Tuple, Union, Optional from warnings import warn import numpy as np @@ -121,7 +121,11 @@ def extended_limits( def get_explanation_order( - alpha: np.ndarray, intercept: bool = True, min: int = 5, th=1e-6 + alpha: np.ndarray, + intercept: bool = True, + min: int = 5, + max: int = -1, + th: float = 1e-6, ) -> Tuple[np.ndarray, np.ndarray]: """Get the order in which to show the variables in the plots. @@ -129,6 +133,7 @@ def get_explanation_order( alpha (np.ndarray): Linear model. intercept (bool, optional): Does the model include an intercept. Defaults to True. min (int, optional): If the number of variables is larger than this, hide the zeroes. Defaults to 5. + max (int, optional): If `max > 0`, select the top variables. Defaults to -1. th ([type], optional): Threshold for zero. Defaults to 1e-6. Returns: @@ -136,17 +141,17 @@ def get_explanation_order( """ if intercept: order = np.argsort(alpha[1:]) + 1 - if len(order) > min: - order = order[np.nonzero(alpha[order])] - if len(order) > min: - order = order[np.abs(alpha[order]) > np.max(np.abs(alpha)) * th] - order = np.concatenate((order, np.zeros(1, order.dtype))) else: order = np.argsort(alpha) + if len(order) > min: + order = order[np.nonzero(alpha[order])] if len(order) > min: - order = order[np.nonzero(alpha[order])] - if len(order) > min: - order = order[np.abs(alpha[order]) > np.max(np.abs(alpha)) * th] + order = order[np.abs(alpha[order]) > np.max(np.abs(alpha)) * th] + if max > 0 and len(order) > max: + nth = -np.partition(-np.abs(alpha), max - 1)[max - 1] + order = order[np.abs(alpha[order]) >= nth] + if intercept: + order = np.concatenate((order, np.zeros(1, order.dtype))) return np.flip(order) @@ -214,8 +219,8 @@ def print_slise( for vs in zip(*(tuple(len(v) for v in vs) for vs in rows.values())) ] if len(coefficients) > num_var: - col_len = [l if c != 0 else 0 for l, c in zip(col_len, coefficients)] - lab_len = max(len(l) for l in rows) + col_len = [cl if c != 0 else 0 for cl, c in zip(col_len, coefficients)] + lab_len = max(len(r) for r in rows) if title: print(title) if unscaled_y is not None: @@ -342,6 +347,7 @@ def plot_dist( norm_terms: Optional[np.ndarray] = None, title: str = "SLISE Explanation", variables: Optional[List[str]] = None, + order: Union[None, int, Sequence[int]] = None, decimals: int = 3, fig: Optional[Figure] = None, ): @@ -358,6 +364,7 @@ def plot_dist( terms (Optional[np.ndarray], optional): Term vector (unscaled x*alpha), if available. Defaults to None. norm_terms (Optional[np.ndarray], optional): Term vector (scaled x*alpha), if available. Defaults to None. title (str, optional): Title of the plot. Defaults to "SLISE Explanation". + order (Union[None, int, Sequence[int]], optional): Select variables (None: all, int: largest, selected). Defaults to all. variables (Optional[List[str]], optional): Names for the (columns/) variables. Defaults to None. decimals (int, optional): Number of decimals when writing numbers. Defaults to 3. fig (Optional[Figure], optional): Pyplot figure to plot on, if None then a new plot is created and shown. Defaults to None. @@ -369,11 +376,18 @@ def plot_dist( alpha = model else: noalpha = False + order_offset = 0 if len(model) == X.shape[1]: model = np.concatenate((np.zeros(1, model.dtype), model)) alpha = np.concatenate((np.zeros(1, model.dtype), alpha)) + order_offset = 1 variables[0] = "" - order = get_explanation_order(np.abs(alpha), True) + if order is None: + order = get_explanation_order(np.abs(alpha), True) + elif isinstance(order, int): + order = get_explanation_order(np.abs(alpha), True, max=order) + else: + order = [0] + [i + order_offset for i in order if i + order_offset != 0] model = model[order] alpha = alpha[order] if terms is not None: @@ -401,7 +415,7 @@ def fill_density(ax, X, x, n): if np.sum(subset) > 1: kde2 = gaussian_kde(X[subset], 0.2) else: - kde2 = lambda x: x * 0 + kde2 = lambda x: x * 0 # noqa: E731 lim = extended_limits(X, 0.1, 100) ax.plot(lim, kde1(lim), color="black", label="Dataset") ax.plot( @@ -424,8 +438,8 @@ def fill_density(ax, X, x, n): fill_density(axs[0, 0], Y, y, "Prediction") axs[0, 0].legend() axs[0, 0].set_title("Dataset Distribution") - for i, k, n in zip(range(1, len(order)), order[1:] - 1, variables[1:]): - fill_density(axs[i, 0], X[:, k], x[k] if x is not None else None, n) + for i, k, n in zip(range(1, len(order)), order[1:], variables[1:]): + fill_density(axs[i, 0], X[:, k - 1], x[k - 1] if x is not None else None, n) # Bar plots def text(x, y, v): diff --git a/slise/slise.py b/slise/slise.py index 229eddc..00a0e28 100644 --- a/slise/slise.py +++ b/slise/slise.py @@ -1,14 +1,14 @@ """ - This script contains the main SLISE functions, and classes. +This script contains the main SLISE functions, and classes. - The library can both be used "sk-learn" style with `SliseRegression(...).fit(X, y)` - and `SliseExplanation(...).explain(index)`, or in a more functional style with - `regression(...)` and `explain(...)`. +The library can both be used "sk-learn" style with `SliseRegression(...).fit(X, y)` +and `SliseExplanation(...).explain(index)`, or in a more functional style with +`regression(...)` and `explain(...)`. """ from __future__ import annotations -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, List, Optional, Sequence, Tuple, Union from warnings import warn import numpy as np @@ -511,6 +511,7 @@ def plot_dist( self, title: str = "SLISE Regression", variables: list = None, + order: Union[None, int, Sequence[int]] = None, decimals: int = 3, fig: Union[Figure, None] = None, ) -> SliseExplainer: @@ -519,23 +520,25 @@ def plot_dist( Args: title (str, optional): Title of the plot. Defaults to "SLISE Explanation". variables (list, optional): Names for the variables. Defaults to None. + order (Union[None, int, Sequence[int]], optional): Select variables (None: all, int: largest, selected). Defaults to all. decimals (int, optional): Number of decimals to write. Defaults to 3. fig (Union[Figure, None], optional): Pyplot figure to plot on, if None then a new plot is created and shown. Defaults to None. """ plot_dist( - self._X, - self._Y, - self.coefficients, - self.subset(), - self.normalised(), - None, - None, - None, - None, - title, - variables, - decimals, - fig, + X=self._X, + Y=self._Y, + model=self.coefficients, + subset=self.subset(), + alpha=self.normalised(), + x=None, + y=None, + terms=None, + norm_terms=None, + title=title, + variables=variables, + order=order, + decimals=decimals, + fig=fig, ) def plot_subset( @@ -1012,6 +1015,7 @@ def plot_dist( self, title: str = "SLISE Explanation", variables: list = None, + order: Union[None, int, Sequence[int]] = None, decimals: int = 3, fig: Union[Figure, None] = None, ) -> SliseExplainer: @@ -1023,23 +1027,25 @@ def plot_dist( Args: title (str, optional): Title of the plot. Defaults to "SLISE Explanation". variables (list, optional): Names for the variables. Defaults to None. + order (Union[None, int, Sequence[int]], optional): Select variables (None: all, int: largest, selected). Defaults to all. decimals (int, optional): Number of decimals to write. Defaults to 3. fig (Union[Figure, None], optional): Pyplot figure to plot on, if None then a new plot is created and shown. Defaults to None. """ plot_dist( - self._X, - self._Y, - self.coefficients, - self.subset(), - self.normalised(), - self._x, - self._y, - self.get_terms(False), - self.get_terms(True) if self._normalise else None, - title, - variables, - decimals, - fig, + X=self._X, + Y=self._Y, + model=self.coefficients, + subset=self.subset(), + alpha=self.normalised(), + x=self._x, + y=self._y, + terms=self.get_terms(False), + norm_terms=self.get_terms(True) if self._normalise else None, + title=title, + variables=variables, + order=order, + decimals=decimals, + fig=fig, ) def plot_subset( diff --git a/slise/utils.py b/slise/utils.py index 72fb764..f4b691a 100644 --- a/slise/utils.py +++ b/slise/utils.py @@ -1,5 +1,5 @@ """ - This script contains some utility functions. +This script contains some utility functions. """ from typing import Union diff --git a/tests/__init__.py b/tests/__init__.py index 5c51e79..a605fb2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,5 @@ """ - This directory contains unit tests. - Some of them are quite rudimentary (and the R version contains more). - use `pytest` to run all tests. +This directory contains unit tests. +Some of them are quite rudimentary (and the R version contains more). +use `pytest` to run all tests. """ diff --git a/tests/test_data.py b/tests/test_data.py index 9353569..c0a16a9 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -15,7 +15,7 @@ ) from slise.utils import mat_mul_inter -from .utils import * +from .utils import data_create2, data_create def test_scaling(): @@ -68,4 +68,3 @@ def test_pca(): assert np.allclose(X @ mod, X2 @ pca_rotate_model(mod, v)) assert np.allclose(X @ pca_invert_model(mod, v), X2 @ mod) X4, v = pca_simple(X.T, 4) - diff --git a/tests/test_optim.py b/tests/test_optim.py index a889ba8..a4d49f2 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pytest import approx from slise.optimisation import ( check_threading_layer, @@ -15,7 +14,7 @@ ) from slise.utils import log_sigmoid, log_sum_exp, log_sum_special, sigmoid, sparsity -from .utils import * +from .utils import data_create, data_create2, numeric_grad def test_utils(): @@ -139,7 +138,7 @@ def test_gradopt(): print("Testing graduated optimisation") X, Y = data_create(20, 5) alpha = np.random.normal(size=5) - alpha2 = graduated_optimisation(alpha, X, Y, 0.1, beta=100) + alpha2 = graduated_optimisation(alpha, X, Y, 0.1, beta=100, debug=True) assert loss_smooth(alpha, X, Y, 0.1, beta=100) >= loss_smooth( alpha2, X, Y, 0.1, beta=100 ) @@ -252,7 +251,7 @@ def test_weights(): assert np.allclose( regularised_regression(X2, Y2, 1e-4, 1e-4, weight=w1), regularised_regression(X, Y, 1e-4, 1e-4, weight=w3), - atol=1e-5 + atol=1e-5, ) diff --git a/tests/test_plot.py b/tests/test_plot.py index 4c67e41..41bf340 100644 --- a/tests/test_plot.py +++ b/tests/test_plot.py @@ -1,9 +1,11 @@ # These tests check if the plotting functions run +import numpy as np from matplotlib import pyplot as plt from slise import regression, explain +from slise.plot import get_explanation_order -from .utils import * +from .utils import data_create2 def test_plot2d(): @@ -25,25 +27,55 @@ def test_plot2d(): plt.close("all") -def test_dist(): - print("Testing dist plots") +def test_print(): X, Y, mod = data_create2(40, 5) + reg = regression(X, Y, 0.1, lambda1=1e-4, lambda2=1e-4, intercept=True) + reg.print() + reg.print(variables=[str(i) for i in range(5)], decimals=2, num_var=4) reg = regression(X, Y, 0.1, lambda1=1e-4, lambda2=1e-4, intercept=False) - reg.plot_dist(fig=plt.figure()) - reg = regression( - X, Y, 0.1, lambda1=1e-4, lambda2=1e-4, intercept=True, normalise=True - ) - reg.plot_dist(fig=plt.figure()) + reg.print() + reg.print(variables=[str(i) for i in range(5)], decimals=2, num_var=4) exp = explain(X, Y, 0.1, 5, lambda1=1e-4, lambda2=1e-4) - exp.plot_dist(fig=plt.figure()) - Y = Y - Y.min() - 0.01 + exp.print() + exp.print(classes=["asd", "bds"], variables=[str(i) for i in range(5)], num_var=4) + exp.print(classes="bds", decimals=2, num_var=4) + Y = Y - Y.min() + 0.01 Y /= Y.max() + 0.01 exp = explain(X, Y, 1.0, 5, lambda1=1e-4, lambda2=1e-4, normalise=True, logit=True) - exp.plot_dist(fig=plt.figure()) - reg.plot_subset(fig=plt.figure()) - exp.plot_subset(fig=plt.figure()) - # plt.show() - plt.close("all") + exp.print() + exp.print(classes=["asd", "bds"], variables=[str(i) for i in range(5)], num_var=4) + exp.print(classes="bds", decimals=2, num_var=4) + + +def test_dist(): + try: + X, Y, mod = data_create2(40, 5) + reg = regression(X, Y, 0.1, lambda1=1e-4, lambda2=1e-4, intercept=False) + reg.plot_dist(fig=plt.figure()) + reg.plot_dist(variables=[str(i) for i in range(5)], order=3, fig=plt.figure()) + reg.plot_dist(title="asd", order=range(1, 4), decimals=5, fig=plt.figure()) + reg = regression( + X, Y, 0.1, lambda1=1e-4, lambda2=1e-4, intercept=True, normalise=True + ) + reg.plot_dist(fig=plt.figure()) + reg.plot_dist(variables=[str(i) for i in range(5)], order=3, fig=plt.figure()) + reg.plot_dist(title="asd", order=range(1, 4), decimals=5, fig=plt.figure()) + exp = explain(X, Y, 0.1, 5, lambda1=1e-4, lambda2=1e-4) + exp.plot_dist(fig=plt.figure()) + exp.plot_dist(variables=[str(i) for i in range(5)], order=3, fig=plt.figure()) + exp.plot_dist(title="asd", order=range(1, 4), decimals=5, fig=plt.figure()) + Y = Y - Y.min() - 0.01 + Y /= Y.max() + 0.01 + exp = explain( + X, Y, 1.0, 5, lambda1=1e-4, lambda2=1e-4, normalise=True, logit=True + ) + exp.plot_dist(fig=plt.figure()) + exp.plot_dist(variables=[str(i) for i in range(5)], order=3, fig=plt.figure()) + exp.plot_dist(title="asd", order=range(1, 4), decimals=5, fig=plt.figure()) + reg.plot_subset(fig=plt.figure()) + exp.plot_subset(fig=plt.figure()) + finally: + plt.close("all") def test_img(): @@ -54,3 +86,19 @@ def test_img(): exp.plot_image(4, 4, fig=plt.figure()) # plt.show() plt.close("all") + + +def test_order(): + def check(a, b, **kwargs): + assert np.allclose(a[get_explanation_order(a, **kwargs)], b) + + a = np.arange(5) + check(a, [0, 4, 3, 2, 1], intercept=True, min=4) + check(a, [4, 3, 2, 1], intercept=False, min=4) + check(a, [4, 3, 2], intercept=False, max=3) + check(a, [0, 4, 3, 2], intercept=True, max=3) + a = np.arange(-3, 4) + check(a, [-3, 3, 2, 1, -1, -2], intercept=True, min=4) + check(a, [3, 2, 1, -1, -2, -3], intercept=False, min=4) + check(a, [-3, 3, 2, -2], intercept=True, max=3) + check(a, [3, 2, -2, -3], intercept=False, max=3) diff --git a/tests/test_slise.py b/tests/test_slise.py index b92f37a..3831c1c 100644 --- a/tests/test_slise.py +++ b/tests/test_slise.py @@ -17,7 +17,7 @@ from slise.optimisation import loss_smooth from slise.utils import mat_mul_inter -from .utils import * +from .utils import data_create, data_create2 def test_initialise_simple(): @@ -123,9 +123,8 @@ def test_slise_reg(): reg1 = regression( X, Y, epsilon=0.1, lambda1=1e-4, lambda2=1e-4, intercept=True, normalise=True ) - reg1.print() Yp = mat_mul_inter(X, reg1.coefficients) - Yn = reg1._scale.scale_y(Y) + # Yn = reg1._scale.scale_y(Y) Ynp = mat_mul_inter(reg1._scale.scale_x(X), reg1._alpha) Ypn = reg1._scale.scale_y(Yp) # S = (Y - Yp) ** 2 < reg1.epsilon ** 2 @@ -144,7 +143,6 @@ def test_slise_reg(): intercept=True, normalise=False, ) - reg2.print() assert reg2.score() <= 0, f"SLISE loss should be negative ({reg2.score()})" assert 1.0 >= reg2.subset().mean() > 0.5 reg3 = regression( @@ -156,7 +154,6 @@ def test_slise_reg(): intercept=True, normalise=False, ) - reg3.print() assert reg3.score() <= 0, f"SLISE loss should be negative ({reg3.score()})" assert 1.0 >= reg3.subset().mean() > 0.5 reg4 = regression( @@ -169,7 +166,6 @@ def test_slise_reg(): normalise=False, weight=w, ) - reg4.print() assert reg4.score() <= 0, f"SLISE loss should be negative ({reg4.score()})" assert 1.0 >= reg4.subset().mean() > 0.4 @@ -183,12 +179,10 @@ def test_slise_exp(): x = np.random.normal(size=5) y = np.random.normal() reg = explain(X, Y, 0.1, x, y, lambda1=1e-4, lambda2=1e-4, normalise=True) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert y == approx(reg.predict(x)) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y, 0.1, 17, lambda1=0.01, lambda2=0.01, normalise=True) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert Y[17] == approx(reg.predict(X[17])) assert 1.0 >= reg.subset().mean() > 0.0 @@ -197,27 +191,22 @@ def test_slise_exp(): assert y == approx(reg.predict(x)) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y, 0.1, x, y, lambda1=0, lambda2=0, normalise=False) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert y == approx(reg.predict(x)) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y, 0.1, 18, lambda1=0.01, lambda2=0.01, normalise=False) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert Y[18] == approx(reg.predict(X[18])) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y, 0.1, 19, lambda1=0, lambda2=0, normalise=False) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert Y[19] == approx(reg.predict(X[19])) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y, 0.1, 19, lambda1=0.01, lambda2=0.01, weight=w, normalise=False) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert Y[19] == approx(reg.predict(X[19])) assert 1.0 >= reg.subset().mean() > 0.0 reg = explain(X, Y2, 0.5, 20, weight=w, normalise=True, logit=True) - reg.print() assert reg.score() <= 0, f"Slise loss should usually be <=0 ({reg.score():.2f})" assert Y2[20] == approx(reg.predict(X[20])) assert 1.0 >= reg.subset().mean() > 0.0 @@ -235,7 +224,7 @@ def test_normalised(): reg.coefficients, reg._scale.unscale_model(reg.normalised(False)) ) threads = numba.get_num_threads() - reg1 = regression( + regression( X, Y, epsilon=0.1, diff --git a/tests/utils.py b/tests/utils.py index a8e88b0..83b8851 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,7 +25,6 @@ def data_create2( def numeric_grad(x: np.ndarray, fn, eps=1e-6) -> np.ndarray: grad = x * 0 - val0 = fn(x) for i, _ in enumerate(grad): x1 = x.copy() x1[i] += eps