Skip to content

Commit

Permalink
Artifacts (#4)
Browse files Browse the repository at this point in the history
* Revise Dockerfile to execute experiments and environment as non-root user
* Add 'Running Your Own Experiments' to README.md
* Add tox.ini to execute unit tests using tox
* Add requirements_dev.in and requirements_dev.txt to prepare environment for unit tests
* Add section on how to execute tests to README.md.
* Fix wrong file name in Dockerfile
* Fix typos in README.md and add code documentation.
* Emphasize that automation scripts need to be executed from the project root directory
* Remove obsolete code in amides/tests and amides.evaluation

---------

Co-authored-by: Rafael Uetz <[email protected]>
  • Loading branch information
clumsy9 and ru37z authored Oct 23, 2023
1 parent c7ba636 commit 7fe2608
Show file tree
Hide file tree
Showing 82 changed files with 1,392 additions and 1,491 deletions.
20 changes: 13 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@

FROM python:3.11-slim-bullseye AS base

RUN apt-get update && apt-get upgrade -y && apt-get install -y jq
RUN addgroup --gid 1000 docker-user && \
adduser --uid 1000 --gid 1000 --disabled-password --gecos "" docker-user && \
echo "docker-user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \
apt-get update && apt-get upgrade -y && apt-get install -y jq

ADD ./amides /amides
WORKDIR /amides
ADD ./amides /home/docker-user/amides

RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN python -m pip install --upgrade pip && pip install -r requirements.txt && pip install .
RUN chmod +x bin/results.sh
RUN python -m venv /home/docker-user/amides/venv
ENV PATH="/home/docker-user/amides/venv/bin:$PATH"
RUN chown -R docker-user:docker-user /home/docker-user/amides

WORKDIR /home/docker-user/amides
USER docker-user
RUN pip install --upgrade pip && pip install -r requirements_dev.txt && pip install tox && pip install -e .
RUN chmod +x experiments.sh classification.sh rule_attribution.sh tainted_training.sh classification_other_types.sh



Expand Down
289 changes: 251 additions & 38 deletions README.md

Large diffs are not rendered by default.

116 changes: 57 additions & 59 deletions amides/amides/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""This module provides classes and functions to hold and prepare data for the classification process."""
"""This module provides classes and functions to hold and prepare datasets for the training and validation process.
"""
from abc import ABC, abstractmethod
import numpy as np

from abc import ABC, abstractmethod
from scipy import sparse

from amides.utils import get_current_timestamp
Expand Down Expand Up @@ -69,6 +70,7 @@ def __init__(self, samples, labels, label_names=None, feature_info=None):

@property
def samples(self):
"""Return samples."""
return self._samples

@samples.setter
Expand All @@ -82,6 +84,7 @@ def samples(self, samples):

@property
def labels(self):
"""Return labels."""
return self._labels

@labels.setter
Expand All @@ -98,6 +101,7 @@ def labels(self, labels):

@property
def label_names(self):
"""Return label names."""
return self._label_names

@label_names.setter
Expand All @@ -109,14 +113,17 @@ def label_names(self, label_names):

@property
def feature_info(self):
"""Return feature info string."""
return self._feature_info

@property
def size(self):
"""Return the number of samples in the bunch."""
return self._samples.shape[0]

@property
def shape(self):
"""Return the shape of the samples array."""
return self._samples.shape

def add_feature_info(self, info):
Expand Down Expand Up @@ -390,18 +397,15 @@ def __init__(self):
def file_name(self):
"""Returns file name which is mainly used when data splits should
be pickled."""
pass

@abstractmethod
def stack_horizontally(self, data_split):
"""Stack splitted data horizontally."""
pass

@abstractmethod
def create_info_dict(self):
"""Return basic information on data split. Mainly used for integration
when objects are being pickled."""
pass


class TrainTestSplit(DataSplit):
Expand Down Expand Up @@ -431,6 +435,7 @@ def __init__(self, train_data=None, test_data=None, name=None):

@property
def name(self):
"""(Sets) and returns the name of the split."""
if self._name is None:
self._build_name_from_data_info()

Expand All @@ -442,6 +447,7 @@ def name(self, name):

@property
def train_data(self):
"""Return the training data."""
return self._data["train"]

@train_data.setter
Expand All @@ -450,6 +456,7 @@ def train_data(self, data):

@property
def test_data(self):
"""Return the test-data."""
return self._data["test"]

@test_data.setter
Expand Down Expand Up @@ -548,7 +555,7 @@ def _build_name_from_data_info(self):


class TrainTestValidSplit(TrainTestSplit):
"""TrainTestValibSplit-class to create objects containing data splits
"""TrainTestValidSplit-class to create objects containing data splits
for training, testing, and validation. Testing or validation data could also be used
for other purposes.
"""
Expand All @@ -572,6 +579,7 @@ def __init__(self, train_data=None, test_data=None, valid_data=None, name=None):

@property
def validation_data(self):
"""Returns the validation data."""
return self._data["valid"]

@validation_data.setter
Expand Down Expand Up @@ -644,58 +652,6 @@ def _build_name_from_data_info(self):
self._name = f"{self._name}_{info}"


class PrecisionRecallData:
"""PrecisionRecallData to represent already calculated precision and recall data"""

def __init__(self, precision, recall, thresholds=None, name=None):
"""Creates PrecisionRecallData instances holding precision and recall data.
Parameters
----------
precision: List or np.array
Precision data.
recall: List or np.array
Recall data.
thresholds: List or np.array
Threshold values that were used to calculate precision and recall data.
name: Optional[str]
Name of the PrecisionRecallData instance (usually used for visualization).
"""

self._name = name
self._precision = precision
self._recall = recall
self._thresholds = thresholds

@property
def name(self):
return self._name

@property
def precision(self):
return self._precision

@property
def recall(self):
return self._recall

@property
def thresholds(self):
return self._thresholds


class PlotData:
def __init__(self, data, name):
self.data = data
self.name = name


class ReliabilityEvaluationData:
def __init__(self, probabilities, labels):
self.probabilities = probabilities
self.labels = labels


class TrainingResult:
"""Holds trained estimator instance and the used training data."""

Expand Down Expand Up @@ -745,6 +701,7 @@ def __init__(

@property
def estimator(self):
"""Returns the trained model."""
return self._estimator

@estimator.setter
Expand All @@ -753,6 +710,7 @@ def estimator(self, estimator):

@property
def data(self):
"""Returns the training data."""
return self._data

@data.setter
Expand All @@ -761,6 +719,7 @@ def data(self, data):

@property
def tainted_share(self):
"""Returns the fraction of tainting."""
return self._tainted_share

@tainted_share.setter
Expand All @@ -769,6 +728,7 @@ def tainted_share(self, share):

@property
def tainted_seed(self):
"""Returns the seeding used for tainting."""
return self._tainted_seed

@tainted_seed.setter
Expand All @@ -777,10 +737,12 @@ def tainted_seed(self, seed):

@property
def feature_extractors(self):
"""Returns the feature extractor."""
return self._feature_extractors

@property
def scaler(self):
"""Returns the symmetric min-max scaler."""
return self._scaler

@scaler.setter
Expand All @@ -789,10 +751,12 @@ def scaler(self, scaler):

@property
def timestamp(self):
"""Returns the timestamp value."""
return self._timestamp

@property
def name(self):
"""Returns the name of the result."""
if self._name is None:
self._build_name_from_result_info()

Expand All @@ -803,6 +767,13 @@ def name(self, name):
self._name = name

def add_feature_extractor(self, feat_extractor):
"""Add feature extractor to the result.
Parameters
----------
feat_extractor: Vectorizer
The feature extractor to be added.
"""
self._feature_extractors.append(feat_extractor)

def file_name(self):
Expand All @@ -828,6 +799,13 @@ def file_name(self):
return file_name

def create_info_dict(self):
"""Creates an info dict containin meta information in human-readable format.
Returns
-------
:dict
Dictionary containing meta information.
"""
info = {
"estimator": self._estimator.__class__.__name__,
"estimator_params": self._estimator.get_params(),
Expand Down Expand Up @@ -911,6 +889,7 @@ def __init__(

@property
def predict(self):
"""Returns the decision function values."""
return self._predict

def file_name(self):
Expand Down Expand Up @@ -955,6 +934,7 @@ def __init__(self, name=None, timestamp=None, benign_training_data=None):

@property
def name(self):
"""Return the name of the result."""
if self._name is None:
self._name = "multi_train_rslt"

Expand All @@ -966,6 +946,7 @@ def name(self, name):

@property
def timestamp(self):
"""Return the timestamp value."""
return self._timestamp

@timestamp.setter
Expand All @@ -974,10 +955,12 @@ def timestamp(self, timestamp):

@property
def results(self):
"""Return the results dictionary."""
return self._results

@property
def benign_train_data(self):
"""Return the common benign training data."""
return self._benign_train_data

@benign_train_data.setter
Expand Down Expand Up @@ -1007,7 +990,14 @@ def get_result(self, result_name):
return result

def file_name(self):
if self.name.startswith("multi_train_result"):
"""Build a file name starting with 'multi_train_rslt'
Returns
-------
:str
The file name starting with 'multi_train_rslt'
"""
if self.name.startswith("multi_train_rslt"):
file_name = self.name
else:
file_name = f"multi_train_rslt_{self.name}"
Expand All @@ -1018,6 +1008,13 @@ def file_name(self):
return file_name

def create_info_dict(self):
"""Creates an info dict containing meta information in human-readable format.
Returns
-------
:dict
Dictionary containing meta information.
"""
results_info = {}

for key, result in self._results.items():
Expand Down Expand Up @@ -1072,6 +1069,7 @@ def name(self):

@property
def benign_valid_data(self):
"""Returns common benign validation data."""
return self._benign_valid_data

@benign_valid_data.setter
Expand Down
Loading

0 comments on commit 7fe2608

Please sign in to comment.