diff --git a/.travis.yml b/.travis.yml
index 784e721..dde68c7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,8 +30,8 @@ install:
   - pip install .[r]
 
 script:
-  - pytest --cov ./ test/test_*.py
+  - pytest --disable-warnings --show-capture=no --cov ./ --cov-report term --cov-report xml test/test_*.py
 
 after_success:
-  - codecov
+  - codecov -f coverage.xml
   - python-codacy-coverage -r coverage.xml
diff --git a/Makefile b/Makefile
index 1582ad2..691a320 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,15 @@
 .DEFAULT_GOAL := pypitest
 
 test3:
-	python3 -m pytest -n 3 --disable-warnings --show-capture=no --cov ./ test/test_*.py
+	python3 -m pytest -n 3 --disable-warnings --show-capture=no --cov=ngs_toolkit --cov-report xml test/test_*.py --lf
 
 test2:
-	python2 -m pytest -n 3 --disable-warnings --show-capture=no --cov ./ test/test_*.py
+	python2 -m pytest -n 3 --disable-warnings --show-capture=no --cov=ngs_toolkit --cov-report xml test/test_*.py --lf
 
 test: test3
 
 coverage: test
-	codecov
+	codecov -f coverage.xml
 	python-codacy-coverage -r coverage.xml
 
 build: test
diff --git a/ngs_toolkit/analysis.py b/ngs_toolkit/analysis.py
new file mode 100644
index 0000000..2845980
--- /dev/null
+++ b/ngs_toolkit/analysis.py
@@ -0,0 +1,636 @@
+#!/usr/bin/env python
+
+import os
+
+import numpy as np
+import pandas as pd
+
+from ngs_toolkit import _LOGGER
+from ngs_toolkit import _CONFIG
+from ngs_toolkit.decorators import check_organism_genome
+
+
+class Analysis(object):
+    """
+    Generic class holding functions and data from a typical NGS analysis.
+
+    Other modules implement classes inheriting from this that in general contain
+    data type-specific functions (e.g. ``ngs_toolkit.atacseq.ATACSeqAnalysis``
+    has a ``get_consensus_sites`` function to generate a peak consensus map).
+
+    Objects of this type can be used to store data (e.g. dataframes), variables
+    (e.g. paths to files or configurations) and are easily serializable (saved
+    to a file as an object) for rapid loading and cross-environment portability.
+    See the ``ngs_toolkit.general.Analysis.to_pickle``,
+    ``ngs_toolkit.general.Analysis.from_pickle`` and
+    ``ngs_toolkit.general.Analysis.update`` functions for this.
+
+    :param name: Name of the analysis. Defaults to ``analysis``.
+    :type name: str, optional
+    :param samples: List of ``peppy.Sample`` objects that this analysis is tied to.
+                    Defaults to ``None``.
+    :type samples: list, optional
+    :param prj: A ``peppy.Project`` object that this analysis is tied to.
+                Defaults to ``None``.
+    :type prj: peppy.Project, optional
+    :param data_dir: Directory containing processed data (e.g. by looper) that will
+                     be input to the analysis. This is in principle not required.
+                     Defaults to ``data``.
+    :type data_dir: str, optional
+    :param results_dir: Directory to contain outputs produced by the analysis.
+                        Defaults to ``results``.
+    :type results_dir: str, optional
+    :param pickle_file: A pickle file to serialize the object.
+                        Defaults to "`name`.pickle".
+    :type pickle_file: str, optional
+    :param from_pickle: Whether the analysis should be loaded from an existing
+                        serialized analysis object in ``pickle_file``.
+                        Defaults to False.
+    :type from_pickle: bool, optional
+
+    Additional keyword arguments will simply be stored as object attributes.
+    """
+    def __init__(
+            self,
+            name="analysis",
+            samples=None,
+            prj=None,
+            root_dir=None,
+            data_dir="data",
+            results_dir="results",
+            pickle_file=None,
+            from_pickle=False,
+            **kwargs):
+        # parse kwargs with default
+        self.name = name
+        if root_dir is None:
+            self.root_dir = os.curdir
+        self.root_dir = os.path.abspath(self.root_dir)
+
+        # # if given absolute paths, keep them, otherwise append to root directory
+        for dir_, attr in [(data_dir, 'data_dir'), (results_dir, 'results_dir')]:
+            if not os.path.isabs(dir_):
+                setattr(self, attr, os.path.join(self.root_dir, dir_))
+            else:
+                setattr(self, attr, dir_)
+
+        self.samples = samples
+        self.prj = prj
+        self.pickle_file = pickle_file
+
+        # parse remaining kwargs
+        _LOGGER.debug("Adding additional kwargs to analysis object.")
+        self.__dict__.update(kwargs)
+
+        # Set default location for the pickle
+        if self.pickle_file is None:
+            self.pickle_file = os.path.join(results_dir, "{}.pickle".format(self.name))
+        # reload itself if required
+        if from_pickle:
+            _LOGGER.info("Updating analysis object from pickle file: '{}'.".format(self.pickle_file))
+            self.update(pickle_file=self.pickle_file)
+
+        for directory in [self.data_dir, self.results_dir]:
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+
+        # Store projects attributes in self
+        _LOGGER.debug("Trying to set analysis attributes.")
+        self.set_project_attributes(overwrite=False)
+
+        # Try to set genome if not set
+        self.organism, self.genome = (None, None)
+        _LOGGER.debug("Trying to get analysis genome.")
+        self.set_organism_genome()
+        # TODO: if genome is set, get required static files for that genome assembly
+
+        # Add sample input file locations
+        _LOGGER.debug("Trying to set sample input file attributes.")
+        self.set_samples_input_files()
+
+        _LOGGER.debug("Setting data type-specific attributes to None.")
+        attrs = [
+            "data_type", "var_names",
+            "quantity", "norm_units", "raw_matrix_name",
+            "norm_matrix_name", "annot_matrix_name"]
+        for attr in attrs:
+            if not hasattr(self, attr):
+                setattr(self, attr, None)
+
+    def __repr__(self):
+        t = "'{}' analysis".format(self.data_type) if self.data_type is not None else "Analysis"
+        samples = " with {} samples".format(len(self.samples)) if self.samples is not None else ""
+        organism = " of organism '{}'".format(self.organism) if self.organism is not None else ""
+        genome = " ({})".format(self.genome) if self.genome is not None else ""
+        suffix = "."
+        return t + " object '{}'".format(self.name) + samples + organism + genome + suffix
+
+    @staticmethod
+    def _overwride_sample_representation():
+        from peppy import Sample
+
+        def r(self): return self.name
+        Sample.__repr__ = r
+
+    @staticmethod
+    def _format_string_with_attributes(obj, string):
+        """
+        Detect whether a string should be formatted with attributes from obj.
+        """
+        # TODO: test
+        to_format = pd.Series(string).str.extractall(r"{(.*?)}")[0].values
+        attrs = obj.__dict__.keys()
+        if not all([x in attrs for x in to_format]):
+            msg = "Not all required patterns were found as attributes of object '{}'.".format(obj)
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+        return string.format(**obj.__dict__)
+
+    @staticmethod
+    def _check_data_type_is_supported(data_type):
+        # TODO: test
+        supported = _CONFIG['supported_data_types']
+        return data_type in supported
+
+    def _get_data_type(self, data_type=None):
+        # TODO: test
+        if data_type is None:
+            msg = "Data type not defined and Analysis object does not have"
+            msg += " a `data_type` attribute."
+            try:
+                data_type = self.data_type
+            except AttributeError as e:
+                _LOGGER.error(msg)
+                raise e
+            if data_type is None:
+                _LOGGER.error(msg)
+                raise ValueError
+        else:
+            msg = "Data type is not supported."
+            hint = " Check which data types are supported in the 'supported_data_types'"
+            hint += " section of the configuration file."
+            if not self._check_data_type_is_supported(data_type):
+                raise ValueError(msg + hint)
+        return data_type
+
+    def _check_samples_have_file(self, attr, f=all):
+        return f([os.path.exists(str(getattr(sample, attr))) for sample in self.samples])
+
+    def _get_samples_have_file(self, attr):
+        return [sample for sample in self.samples if os.path.exists(str(getattr(sample, attr)))]
+
+    def _get_samples_missing_file(self, attr):
+        return [sample for sample in self.samples if not os.path.exists(str(getattr(sample, attr)))]
+
+    def update(self, pickle_file=None):
+        """
+        Update all of the object's attributes with the attributes from a serialized
+        object (i.e. object stored in a file) object.
+
+        :param pickle_file: Pickle file to load. By default this is the object's attribute
+                            `pickle_file`.
+        :type pickle_file: str, optional
+        """
+        self.__dict__.update(self.from_pickle(pickle_file=pickle_file).__dict__)
+
+    def set_organism_genome(self):
+        if self.samples is None:
+            _LOGGER.warning("Genome assembly for analysis was not set and cannot be derived from samples.")
+        else:
+            organisms = list(set([s.organism for s in self.samples]))
+            if len(organisms) == 1:
+                _LOGGER.info("Setting analysis organism as '{}'.".format(organisms[0]))
+                self.organism = organisms[0]
+            else:
+                _LOGGER.warning("Found several organism for the various analysis samples. " +
+                                "Will not set a organism for analysis.")
+            genomes = list(set([s.genome for s in self.samples]))
+            if len(genomes) == 1:
+                _LOGGER.info("Setting analysis genome as '{}'.".format(genomes[0]))
+                self.genome = genomes[0]
+            else:
+                _LOGGER.warning("Found several genome assemblies for the various analysis samples. " +
+                                "Will not set a genome for analysis.")
+
+    def set_project_attributes(self, overwrite=True):
+        """
+        Set Analysis object attributes ``samples``, ``sample_attributes`` and ``group_atrributes``
+        to the values in the associated Project object if existing.
+
+        :param overwrite: Whether to overwrite attribute values if existing. Defaults to True
+        :type overwrite: bool, optional
+        """
+        hint = " Adding a '{}' section to your project configuration file allows the analysis"
+        hint += " object to use those attributes during the analysis."
+        if self.prj is not None:
+            for attr, parent in [
+                    ("samples", self.prj), ("sample_attributes", self.prj),
+                    ("group_attributes", self.prj), ("comparison_table", self.prj.metadata)]:
+                if not hasattr(parent, attr):
+                    _LOGGER.warning("Associated project does not have any '{}'.".format(attr) +
+                                    hint.format(attr) if attr != "samples" else "")
+                else:
+                    msg = "Setting project's '{0}' as the analysis '{0}'.".format(attr)
+                    if overwrite:
+                        _LOGGER.info(msg)
+                        setattr(self, attr, getattr(parent, attr))
+                    else:
+                        if not hasattr(self, attr):
+                            _LOGGER.info(msg)
+                            setattr(self, attr, getattr(parent, attr))
+                        else:
+                            if getattr(self, attr) is None:
+                                setattr(self, attr, getattr(parent, attr))
+                            else:
+                                _LOGGER.debug("{} already exist for analysis, not overwriting."
+                                              .format(attr.replace("_", " ").capitalize()))
+            if hasattr(self, "comparison_table"):
+                if isinstance(self.comparison_table, str):
+                    _LOGGER.debug("Reading up comparison table.")
+                    self.comparison_table = pd.read_csv(self.comparison_table)
+        else:
+            _LOGGER.warning("Analysis object does not have an attached Project. " +
+                            "Will not add special attributes to analysis such as " +
+                            "samples, their attributes and comparison table.")
+
+    def set_samples_input_files(self, overwrite=True):
+        """
+        Add input file values to sample objects dependent on data type.
+        These are specified in the `ngs_toolkit` configuration file
+        under 'sample_input_files:<data type>:<attribute>'.
+
+        :param overwrite: Whether to overwrite attribute values if existing. Defaults to True
+        :type overwrite: bool, optional
+        """
+        if self.samples is None:
+            _LOGGER.error("Analysis object does not have attached Samples. " +
+                          "Will not add special attributes to samples such as " +
+                          "input file locations.")
+            return
+
+        msg = "Setting '{}' in sample {} as '{}'."
+        for data_type in _CONFIG['sample_input_files']:
+            for attr, value in _CONFIG['sample_input_files'][data_type].items():
+                for s in [s for s in self.samples if s.protocol == data_type]:
+                    if value is None:
+                        pass
+                    elif ("{data_dir}" in value) and ("{sample_name}" in value):
+                        value = value.format(data_dir=self.data_dir, sample_name=s.name)
+                    elif "{data_dir}" in value:
+                        value = value.format(data_dir=self.data_dir)
+                    elif "{sample_name}" in value:
+                        value = value.format(sample_name=s.name)
+                    if overwrite:
+                        _LOGGER.info(msg.format(attr, s.name, value))
+                        setattr(s, attr, value)
+                    else:
+                        if not hasattr(s, attr):
+                            _LOGGER.info(msg.format(attr, s.name, value))
+                            setattr(s, attr, value)
+                        else:
+                            if getattr(s, attr) is None:
+                                _LOGGER.info(msg.format(attr, s.name, value))
+                                setattr(s, attr, value)
+                            else:
+                                _LOGGER.debug("{} already exists in sample, not overwriting."
+                                              .format(attr.replace("_", " ").capitalize()))
+
+    def to_pickle(self, timestamp=False):
+        """
+        Serialize object (i.e. save to disk) to hickle format.
+        """
+        import pickle
+        if timestamp:
+            import time
+            import datetime
+            ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S')
+            p = self.pickle_file.replace(".pickle", ".{}.pickle".format(ts))
+        else:
+            p = self.pickle_file
+        pickle.dump(self, open(p, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
+
+    def from_pickle(self, pickle_file=None):
+        """
+        Load object from pickle file.
+
+        :param pickle_file: Pickle file to load. By default this is the object's
+                            attribute `pickle_file`.
+        :type pickle_file: str, optional
+        """
+        import pickle
+        if pickle_file is None:
+            pickle_file = self.pickle_file
+        return pickle.load(open(pickle_file, 'rb'))
+
+    @check_organism_genome
+    def get_annotations(
+            self,
+            organism=None, genome_assembly=None, output_dir=None,
+            steps=['blacklist', 'tss', 'genomic_context']):
+        """
+        Get genome annotations and other resources for several ngs_toolkit analysis.
+
+        :param organism: Organism to get for. Currently supported are 'human' and 'mouse'.
+                         Defaults to analysis' own organism.
+        :type organism: str, optional
+        :param genome_assembly: Genome assembly to get for.
+                                Currently supported are 'hg19', 'hg38' and 'mm10'.
+                                Defaults to analysis' own genome assembly.
+        :type genome_assembly: str, optional
+        :param output_dir: Directory to save results to.
+                           Defaults to 'reference' in analysis root directory.
+        :type output_dir: str, optional
+        :param steps: Which steps to get.
+                      Options are:
+                        - 'fasta': Genome sequence in FASTA format
+                        - 'blacklist': Locations of blacklisted regions for genome
+                        - 'tss': Locations of gene's TSSs
+                        - 'genomic_context': Genomic context of genome
+                      Defaults to ['blacklist', 'tss', 'genomic_context']
+        :type steps: list, optional
+        :returns: Dictionary with keys same as the options as steps, containing
+                  paths to the requested files.
+        :rtype: dict
+        """
+        from ngs_toolkit.general import (
+            get_genome_reference,
+            get_blacklist_annotations,
+            get_tss_annotations,
+            get_genomic_context)
+
+        if organism is None:
+            organism = self.organism
+        if genome_assembly is None:
+            genome_assembly = self.genome
+        if output_dir is None:
+            output_dir = os.path.join(self.root_dir, "reference")
+
+        args = {'organism': organism,
+                'genome_assembly': genome_assembly,
+                'output_dir': output_dir}
+        mapping = {"hg19": "grch37", "hg38": "grch38", "mm10": "grcm38"}
+        output = dict()
+
+        if 'fasta' in steps:
+            output['fasta_file'] = get_genome_reference(**args)
+        if 'blacklist' in steps:
+            output['blacklist_file'] = get_blacklist_annotations(**args)
+        if 'tss' in steps:
+            get_tss_annotations(**args)
+            output['tss_file'] = os.path.join(
+                output_dir, "{}.{}.gene_annotation.protein_coding.tss.bed"
+                .format(self.organism, mapping[self.genome]))
+        if 'genomic_context' in steps:
+            get_genomic_context(**args)
+            output['genomic_context_file'] = os.path.join(
+                output_dir, "{}.{}.genomic_context.bed"
+                .format(self.organism, mapping[self.genome]))
+
+        return output
+
+    def get_matrix(self, matrix=None, matrix_name=None, samples=None):
+        """
+        Return a matrix that is an attribute of self subsetted for the requested samples.
+
+        :param pandas.DataFrame matrix: Pandas DataFrame.
+        :param list samples: Iterable of peppy.Sample objects to restrict matrix to.
+                             If not provided (`None` is passed) the matrix will not be subsetted.
+        :param str matrix_name: Name of the matrix that is an attribute of the object
+                                with values for samples in `samples`.
+        :returns pandas.DataFrame: Requested DataFrame.
+        """
+        if (matrix is None) & (matrix_name is None):
+            msg = "Either arguments `matrix` or `matrix_name` must be provided."
+            _LOGGER.error(msg)
+            raise ValueError(msg)
+        # default to matrix to be normalized
+        if matrix is None:
+            r_matrix = getattr(self, matrix_name)
+        else:
+            r_matrix = matrix
+        # default to all samples in self with matching names in matrix
+        if samples is None:
+            r_matrix = r_matrix.loc[:, [s.name for s in self.samples]]
+        else:
+            r_matrix = r_matrix.loc[:, [s.name for s in samples]]
+
+        return r_matrix
+
+    def annotate_with_sample_metadata(
+            self,
+            quant_matrix=None,
+            attributes=None,
+            numerical_attributes=None,
+            save=True,
+            assign=True):
+        """
+        Annotate matrix ``(n_regions, n_samples)`` with sample metadata
+        (creates MultiIndex on columns). Numerical attributes can be pass as a iterable
+        to ``numerical_attributes`` to be converted.
+
+        :param str quant_matrix: Attribute name of matrix to annotate. By default this will
+                                 be infered from the analysis data_type in the following way:
+                                 ATAC-seq or ChIP-seq: ``coverage_annotated``;
+                                 RNA-seq: ``expression_annotated``.
+        :param list attributes: Desired attributes to be annotated. This defaults
+                                to all attributes in the original sample annotation sheet
+                                of the analysis Project.
+        :param list numerical_attributes: Attributes which are numeric even though they
+                                          might be so in the samples' attributes. Will attempt
+                                          to convert values to numeric.
+        :param bool save: Whether to write normalized DataFrame to disk.
+        :param bool assign: Whether to assign the normalized DataFrame to an attribute
+                            ``accessibility`` if ``data_type`` is "ATAC-seq,
+                            ``binding`` if ``data_type`` is "ChIP-seq, or
+                            ``expression`` if ``data_type`` is "RNA-seq.
+        :var pd.DataFrame {accessibility,binding,expression}: A pandas DataFrame with
+                                                              MultiIndex column index
+                                                              containing the sample's
+                                                              attributes specified.
+        :returns pd.DataFrame: Annotated dataframe with requested sample attributes.
+        """
+        if (attributes is None) and hasattr(self, "sample_attributes"):
+            _LOGGER.info("Using 'sample_attributes' from analysis to annotate matrix: {}"
+                         .format(",".join(self.sample_attributes)))
+            attributes = self.sample_attributes
+        if (attributes is None) and hasattr(self, "prj"):
+            _LOGGER.warn(
+                "Analysis has no 'sample_attributes' set. " +
+                "Will use all columns from project annotation sheet: {}"
+                .format(",".join(self.prj.sheet.columns)))
+            attributes = self.prj.sheet.columns
+        if attributes is None:
+            msg = "Attributes not given and could not be set from Analysis or Project."
+            raise ValueError(msg)
+
+        if self.data_type == "ATAC-seq":
+            output_matrix = "accessibility"
+            if quant_matrix is None:
+                quant_matrix = "coverage_annotated"
+        elif self.data_type == "ChIP-seq":
+            output_matrix = "binding"
+            if quant_matrix is None:
+                quant_matrix = "coverage_annotated"
+        elif self.data_type == "CNV":
+            output_matrix = "cnv"
+            if quant_matrix is None:
+                if not isinstance(getattr(self, quant_matrix), pd.DataFrame):
+                    _LOGGER.error("For CNV data type, the matrix to be annotated must be" +
+                                  " directly passed to the function throught the `quant_matrix` argument!")
+                    raise ValueError
+            if quant_matrix is None:
+                quant_matrix = "coverage_norm"
+        elif self.data_type == "RNA-seq":
+            output_matrix = "expression"
+            if quant_matrix is None:
+                quant_matrix = "expression_annotated"
+        else:
+            _LOGGER.warning("Data type of object not known, will not set as attribute.")
+            assign = False
+            output_matrix = ""
+            if quant_matrix is None:
+                msg = "Data type of object not known, must specify `quant_matrix` to annotate!"
+                raise ValueError(msg)
+
+        matrix = getattr(self, quant_matrix)
+
+        if isinstance(matrix.columns, pd.core.indexes.multi.MultiIndex):
+            matrix.columns = matrix.columns.get_level_values("sample_name")
+
+        samples = [s for s in self.samples if s.name in matrix.columns.tolist()]
+
+        attrs = list()
+        for attr in attributes:
+            _LOGGER.debug("Attribute: '{}'".format(attr))
+            ll = list()
+            for sample in samples:  # keep order of samples in matrix
+                try:
+                    ll.append(getattr(sample, attr))
+                except AttributeError:
+                    ll.append(np.nan)
+            if numerical_attributes is not None:
+                if attr in numerical_attributes:
+                    ll = pd.Series(ll).replace("", np.nan).astype(float).tolist()
+            attrs.append(ll)
+
+        # Generate multiindex columns
+        index = pd.MultiIndex.from_arrays(attrs, names=attributes)
+        df = matrix[[s.name for s in samples]]
+        df.columns = index
+
+        # Save
+        if save:
+            df.to_csv(
+                os.path.join(
+                    self.results_dir,
+                    self.name + "{}.annotated_metadata.csv"
+                        .format("." + output_matrix if output_matrix != "" else output_matrix)),
+                index=True)
+        if assign:
+            setattr(self, output_matrix, df)
+        return df
+
+    def get_level_colors(
+            self, index=None, matrix=None, levels=None,
+            pallete="tab20", cmap="RdBu_r", nan_color=(0.662745, 0.662745, 0.662745, 1.0),
+            # TODO: implement dataframe return
+            as_dataframe=False):
+        """
+        Get tuples of floats representing a colour for a sample in a given variable in a
+        dataframe's index (particularly useful with MultiIndex dataframes).
+
+        If given, will use the provieded ``index`` argument, otherwise, the the columns
+        and its levels of an attribute of self named ``matrix``.
+        ``levels`` can be passed to subset the levels of the index.
+
+        Will try to guess if each variable is categorical or numerical and return either colours
+        from a colour ``pallete`` or a ``cmap``, respectively with null values set to ``nan_color``
+        (a 4-value tuple of floats).
+
+        :param index: Pandas Index to use. If not provided (default == None), this will be
+        the column Index of the provided ``matrix``.
+        :type index: pandas.Index, optional
+        :param matrix: Name of analysis attribute containing a dataframe with pandas.MultiIndex
+                       columns to use.
+        :type matrix: str, optional
+        :param levels: Levels of multiindex to restrict to. Defaults to all in index.
+        :type levels: list, optional
+        :param pallete: Name of matplotlib color palete to use with categorical levels.
+                        See matplotlib.org/examples/color/colormaps_reference.html.
+                        Defaults to ``Paired``.
+        :type pallete: str, optional
+        :param cmap: Name of matplotlib color palete to use with numerical levels.
+                     See matplotlib.org/examples/color/colormaps_reference.html.
+                     Defaults to ``RdBu_r``.
+        :type cmap: str, optional
+        :param nan_color: Color for missing (i.e. NA) values.
+                          Defaults to ``(0.662745, 0.662745, 0.662745, 0.5)`` == ``grey``.
+        :type nan_color: tuple, optional
+        :param as_dataframe: Whether a dataframe should be return. Defaults to False.
+                             Not implemented yet.
+        :type as_dataframe: bool, optional
+        :returns: List of list tuples (matrix) of shape (level, sample) with rgb values of
+                  each of the variable.
+        :rtype: {list}
+        """
+        import matplotlib
+        import matplotlib.pyplot as plt
+        from collections import Counter
+
+        if index is None:
+            if matrix is None:
+                msg = "One of `index` or `matrix` must be provided."
+                _LOGGER.error(msg)
+                raise ValueError(msg)
+            index = getattr(self, matrix).columns
+
+        if levels is not None:
+            drop = [l.name for l in index.levels if l.name not in levels]
+            index = index.droplevel(drop)
+
+        # Handle special case of single level
+        if not isinstance(index, pd.core.indexes.multi.MultiIndex):
+            index = pd.MultiIndex.from_arrays([index.values], names=[index.name])
+
+        _cmap = plt.get_cmap(cmap)
+        _pallete = plt.get_cmap(pallete)
+
+        colors = list()
+        for level in index.levels:
+            # For empty levels (all values nan), return nan colour
+            if len(level) == 0:
+                colors.append([nan_color] * len(index))
+                _LOGGER.warning("Level {} has only NaN values.".format(level.name))
+                continue
+            # determine the type of data in each level
+            # TODO: check this works in all cases
+            most_common = Counter([type(x) for x in level]).most_common()[0][0]
+            _LOGGER.debug("Getting colours for level '{}', which has type '{}'."
+                          .format(level.name, most_common))
+
+            # Add either colors based on categories or numerical scale
+            if most_common in [int, float, np.float32, np.float64, np.int32, np.int64]:
+                values = index.get_level_values(level.name)
+                # Create a range of either 0-100 if only positive values are found
+                # or symmetrically from the maximum absolute value found
+                if not any(values.dropna() < 0):
+                    norm = matplotlib.colors.Normalize(vmin=values.min(), vmax=values.max())
+                else:
+                    r = max(abs(values.min()), abs(values.max()))
+                    norm = matplotlib.colors.Normalize(vmin=-r, vmax=r)
+
+                col = _cmap(norm(values))
+                # replace color for nan cases
+                col[np.where(index.get_level_values(level.name).to_series().isnull().tolist())] = nan_color
+                colors.append(col.tolist())
+            else:
+                n = len(set(index.get_level_values(level.name)))
+                # get n equidistant colors
+                p = [_pallete(1. * i / n) for i in range(n)]
+                color_dict = dict(zip(list(set(index.get_level_values(level.name))), p))
+                # color for nan cases
+                color_dict[np.nan] = nan_color
+                col = [color_dict[x] for x in index.get_level_values(level.name)]
+                colors.append(col)
+
+        return colors
diff --git a/ngs_toolkit/atacseq.py b/ngs_toolkit/atacseq.py
index 0027fcb..d853836 100644
--- a/ngs_toolkit/atacseq.py
+++ b/ngs_toolkit/atacseq.py
@@ -654,12 +654,10 @@ def get_peak_gccontent_length(
             sites = pybedtools.BedTool(bed_file)
 
         if fasta_file is None:
-            from ngs_toolkit.general import get_genome_reference
             _LOGGER.info("Reference genome FASTA file was not given, will try to get it.")
-            self._check_organism_genome(self)
             _LOGGER.info("Getting genome FASTA file for organism '{}', genome '{}'. "
                          .format(self.organism, self.genome))
-            fasta_file = get_genome_reference(organism=self.organism, genome_assembly=self.genome)
+            fasta_file = self.get_annotations(steps=['fasta'])['fasta_file']
 
         nuc = sites.nucleotide_content(fi=fasta_file).to_dataframe(comment="#")[["score", "blockStarts"]]
         nuc.columns = ["gc_content", "length"]
@@ -828,19 +826,15 @@ def get_peak_gene_annotation(self, tss_file=None, max_dist=100000):
         cols = [6, 8, 9]
 
         if tss_file is None:
-            from ngs_toolkit.general import get_tss_annotations
             _LOGGER.info("Reference TSS file was not given, will try to get TSS annotations.")
-            self._check_organism_genome(self)
             _LOGGER.info("Getting TSS annotations for organism '{}', genome '{}'. "
                          .format(self.organism, self.genome))
-            tss = pybedtools.BedTool.from_dataframe(
-                get_tss_annotations(organism=self.organism, genome_assembly=self.genome)
-                .iloc[:, list(range(6))])
-        else:
-            # extract only relevant columns
-            tss = pd.read_csv(tss_file, header=None, sep="\t")
-            tss = tss.iloc[:, list(range(6))]
-            tss = pybedtools.BedTool.from_dataframe(tss)
+            tss_file = self.get_annotations(steps=['tss'])['tss_file']
+
+        # extract only relevant columns
+        tss = pd.read_csv(tss_file, header=None, sep="\t")
+        tss = tss.iloc[:, list(range(6))]
+        tss = pybedtools.BedTool.from_dataframe(tss)
 
         if isinstance(self.sites, str):
             self.sites = pybedtools.BedTool(self.sites)
@@ -916,15 +910,14 @@ def get_peak_genomic_location(
         :returns: A dataframe with genomic context annotation for the peak set.
         """
         from ngs_toolkit.general import bed_to_index
+
         if genomic_context_file is None:
-            from ngs_toolkit.general import get_genomic_context
             _LOGGER.info("Reference genomic context file was not given, will try to get it.")
             _LOGGER.info("Getting genomic context annotations for organism '{}', genome '{}'. "
                          .format(self.organism, self.genome))
-            context = pybedtools.BedTool.from_dataframe(
-                get_genomic_context(organism=self.organism, genome_assembly=self.genome))
-        else:
-            context = pybedtools.BedTool(genomic_context_file)
+            genomic_context_file = self.get_annotations(steps=['genomic_context'])['genomic_context_file']
+
+        context = pybedtools.BedTool(genomic_context_file)
 
         if isinstance(self.sites, str):
             self.sites = pybedtools.BedTool(self.sites)
@@ -1692,9 +1685,9 @@ def characterize_regions_function(
     df[['chrom', 'start', 'end']].reset_index().to_csv(tsv_file, sep="\t", header=False, index=False)
 
     # export gene names
-    clean = df['gene_name'].str.split(",").apply(pd.Series, 1).stack().drop_duplicates()
-    clean = clean[~clean.isin(['.', 'nan'])]
-    clean.to_csv(
+    clean_gene = df['gene_name'].str.split(",").apply(pd.Series, 1).stack().drop_duplicates()
+    clean_gene = clean_gene[~clean_gene.isin(['.', 'nan', ''])]
+    clean_gene.to_csv(
             os.path.join(output_dir, "{}_genes.symbols.txt".format(prefix)),
             index=False)
     if "ensembl_gene_id" in df.columns:
@@ -1749,7 +1742,7 @@ def characterize_regions_function(
     # Enrichr
     if 'enrichr' in steps:
         _LOGGER.info("Running Enrichr for '{}'".format(prefix))
-        results = enrichr(df[['chrom', 'start', 'end', "gene_name"]])
+        results = enrichr(clean_gene.to_frame(name="gene_name"))
         results.to_csv(
             os.path.join(output_dir, "{}_regions.enrichr.csv".format(prefix)),
             index=False, encoding='utf-8')
diff --git a/ngs_toolkit/decorators.py b/ngs_toolkit/decorators.py
index 518dd67..da39eff 100644
--- a/ngs_toolkit/decorators.py
+++ b/ngs_toolkit/decorators.py
@@ -1,23 +1,34 @@
 #!/usr/bin/env python
 
 from functools import wraps
+from ngs_toolkit import _LOGGER
 
 
 def check_organism_genome(f):
-    from ngs_toolkit.general import Analysis
-
     @wraps(f)
     def wrapper(*args, **kwds):
-        Analysis._check_organism_genome(args[0])
+        attrs = ['organism', 'genome']
+        msg = "Analysis does not have 'organism' or 'genome' attributes set."
+        hint = " You can set them with analysis.set_organism_genome, for example."
+        r1 = all([hasattr(args[0], attr) for attr in attrs])
+        r2 = all([getattr(args[0], attr) is not None for attr in attrs])
+        if not all([r1, r2]):
+            _LOGGER.error(msg + hint)
+            raise AttributeError(msg)
         return f(*args, **kwds)
     return wrapper
 
 
 def check_has_sites(f):
-    from ngs_toolkit.general import Analysis
-
     @wraps(f)
     def wrapper(*args, **kwds):
-        Analysis._check_has_sites(args[0])
+        attrs = ['sites']
+        msg = "Analysis object does not have a `sites` attribute."
+        hint = " Produce one with analysis.get_consensus_sites for example."
+        r1 = all([hasattr(args[0], attr) for attr in attrs])
+        r2 = all([getattr(args[0], attr) is not None for attr in attrs])
+        if not all([r1, r2]):
+            _LOGGER.error(msg + hint)
+            raise AttributeError(msg)
         return f(*args, **kwds)
     return wrapper
diff --git a/ngs_toolkit/general.py b/ngs_toolkit/general.py
index af9a581..271c403 100644
--- a/ngs_toolkit/general.py
+++ b/ngs_toolkit/general.py
@@ -7,584 +7,10 @@
 
 from ngs_toolkit import _LOGGER
 from ngs_toolkit import _CONFIG
+from ngs_toolkit.analysis import Analysis
 
 
-class Analysis(object):
-    """
-    Generic class holding functions and data from a typical NGS analysis.
-
-    Other modules implement classes inheriting from this that in general contain
-    data type-specific functions (e.g. ``ngs_toolkit.atacseq.ATACSeqAnalysis``
-    has a ``get_consensus_sites`` function to generate a peak consensus map).
-
-    Objects of this type can be used to store data (e.g. dataframes), variables
-    (e.g. paths to files or configurations) and are easily serializable (saved
-    to a file as an object) for rapid loading and cross-environment portability.
-    See the ``ngs_toolkit.general.Analysis.to_pickle``,
-    ``ngs_toolkit.general.Analysis.from_pickle`` and
-    ``ngs_toolkit.general.Analysis.update`` functions for this.
-
-    :param name: Name of the analysis. Defaults to ``analysis``.
-    :type name: str, optional
-    :param samples: List of ``peppy.Sample`` objects that this analysis is tied to.
-                    Defaults to ``None``.
-    :type samples: list, optional
-    :param prj: A ``peppy.Project`` object that this analysis is tied to.
-                Defaults to ``None``.
-    :type prj: peppy.Project, optional
-    :param data_dir: Directory containing processed data (e.g. by looper) that will
-                     be input to the analysis. This is in principle not required.
-                     Defaults to ``data``.
-    :type data_dir: str, optional
-    :param results_dir: Directory to contain outputs produced by the analysis.
-                        Defaults to ``results``.
-    :type results_dir: str, optional
-    :param pickle_file: A pickle file to serialize the object.
-                        Defaults to "`name`.pickle".
-    :type pickle_file: str, optional
-    :param from_pickle: Whether the analysis should be loaded from an existing
-                        serialized analysis object in ``pickle_file``.
-                        Defaults to False.
-    :type from_pickle: bool, optional
-
-    Additional keyword arguments will simply be stored as object attributes.
-    """
-    def __init__(
-            self,
-            name="analysis",
-            samples=None,
-            prj=None,
-            data_dir="data",
-            results_dir="results",
-            pickle_file=None,
-            from_pickle=False,
-            **kwargs):
-        # parse kwargs with default
-        self.name = name
-        self.data_dir = data_dir
-        self.results_dir = results_dir
-        self.samples = samples
-        self.prj = prj
-        self.pickle_file = pickle_file
-
-        # parse remaining kwargs
-        _LOGGER.debug("Adding additional kwargs to analysis object.")
-        self.__dict__.update(kwargs)
-
-        # Set default location for the pickle
-        if self.pickle_file is None:
-            self.pickle_file = os.path.join(results_dir, "{}.pickle".format(self.name))
-        # reload itself if required
-        if from_pickle:
-            _LOGGER.info("Updating analysis object from pickle file: '{}'.".format(self.pickle_file))
-            self.update(pickle_file=self.pickle_file)
-
-        for directory in [self.data_dir, self.results_dir]:
-            if not os.path.exists(directory):
-                os.makedirs(directory)
-
-        # Store projects attributes in self
-        _LOGGER.debug("Trying to set analysis attributes.")
-        self.set_project_attributes(overwrite=False)
-
-        # Try to set genome if not set
-        self.organism, self.genome = (None, None)
-        _LOGGER.debug("Trying to get analysis genome.")
-        self.set_organism_genome()
-        # TODO: if genome is set, get required static files for that genome assembly
-
-        # Add sample input file locations
-        _LOGGER.debug("Trying to set sample input file attributes.")
-        self.set_samples_input_files()
-
-        _LOGGER.debug("Setting data type-specific attributes to None.")
-        attrs = [
-            "data_type", "var_names",
-            "quantity", "norm_units", "raw_matrix_name",
-            "norm_matrix_name", "annot_matrix_name"]
-        for attr in attrs:
-            if not hasattr(self, attr):
-                setattr(self, attr, None)
-
-    def __repr__(self):
-        t = "'{}' analysis".format(self.data_type) if self.data_type is not None else "Analysis"
-        samples = " with {} samples".format(len(self.samples)) if self.samples is not None else ""
-        organism = " of organism '{}'".format(self.organism) if self.organism is not None else ""
-        genome = " ({})".format(self.genome) if self.genome is not None else ""
-        suffix = "."
-        return t + " object '{}'".format(self.name) + samples + organism + genome + suffix
-
-    @staticmethod
-    def _overwride_sample_representation():
-        from peppy import Sample
-
-        def r(self): return self.name
-        Sample.__repr__ = r
-
-    @staticmethod
-    def _format_string_with_attributes(obj, string):
-        """
-        Detect whether a string should be formatted with attributes from obj.
-
-        :param obj: [description]
-        :type obj: [type]
-        :param string: [description]
-        :type string: [type]
-        :returns: [description]
-        :rtype: {[type]}
-        :raises: NotImplementedError
-        """
-        # TODO: implement
-        # Detect if string contains formattable fields
-        # if all vaiables are present, format accordingly
-        raise NotImplementedError
-        return string.format()
-
-    @staticmethod
-    def _check_data_type_is_supported(data_type):
-        # TODO: test
-        supported = _CONFIG['supported_data_types']
-        return data_type in supported
-
-    @staticmethod
-    def _check_organism_genome(analysis):
-        # TODO: test
-        attrs = ['organism', 'genome']
-        msg = "Analysis does not have 'organism' or 'genome' attributes set."
-        hint = " You can set them with analysis.set_organism_genome, for example."
-        r1 = all([hasattr(analysis, attr) for attr in attrs])
-        r2 = all([getattr(analysis, attr) is not None for attr in attrs])
-        if not all([r1, r2]):
-            _LOGGER.error(msg + hint)
-            raise AttributeError(msg)
-        return True
-
-    @staticmethod
-    def _check_has_sites(analysis):
-        if not hasattr(analysis, "sites"):
-            msg = "Analysis object does not have a `sites` attribute."
-            hint = " Produce one with analysis.get_consensus_sites for example."
-            _LOGGER.error(msg + hint)
-            raise AttributeError(msg)
-        return True
-
-    def _get_data_type(self, data_type=None):
-        # TODO: test
-        if data_type is None:
-            msg = "Data type not defined and Analysis object does not have"
-            msg += " a `data_type` attribute."
-            try:
-                data_type = self.data_type
-            except AttributeError as e:
-                _LOGGER.error(msg)
-                raise e
-            if data_type is None:
-                _LOGGER.error(msg)
-                raise ValueError
-        else:
-            msg = "Data type is not supported."
-            hint = " Check which data types are supported in the 'supported_data_types'"
-            hint += " section of the configuration file."
-            if not self._check_data_type_is_supported(data_type):
-                raise ValueError(msg + hint)
-        return data_type
-
-    def _check_samples_have_file(self, attr, f=all):
-        return f([os.path.exists(str(getattr(sample, attr))) for sample in self.samples])
-
-    def _get_samples_have_file(self, attr):
-        return [sample for sample in self.samples if os.path.exists(str(getattr(sample, attr)))]
-
-    def _get_samples_missing_file(self, attr):
-        return [sample for sample in self.samples if not os.path.exists(str(getattr(sample, attr)))]
-
-    def update(self, pickle_file=None):
-        """
-        Update all of the object's attributes with the attributes from a serialized
-        object (i.e. object stored in a file) object.
-
-        :param pickle_file: Pickle file to load. By default this is the object's attribute
-                            `pickle_file`.
-        :type pickle_file: str, optional
-        """
-        self.__dict__.update(self.from_pickle(pickle_file=pickle_file).__dict__)
-
-    def set_organism_genome(self):
-        if self.samples is None:
-            _LOGGER.warning("Genome assembly for analysis was not set and cannot be derived from samples.")
-        else:
-            organisms = list(set([s.organism for s in self.samples]))
-            if len(organisms) == 1:
-                _LOGGER.info("Setting analysis organism as '{}'.".format(organisms[0]))
-                self.organism = organisms[0]
-            else:
-                _LOGGER.warning("Found several organism for the various analysis samples. " +
-                                "Will not set a organism for analysis.")
-            genomes = list(set([s.genome for s in self.samples]))
-            if len(genomes) == 1:
-                _LOGGER.info("Setting analysis genome as '{}'.".format(genomes[0]))
-                self.genome = genomes[0]
-            else:
-                _LOGGER.warning("Found several genome assemblies for the various analysis samples. " +
-                                "Will not set a genome for analysis.")
-
-    def set_project_attributes(self, overwrite=True):
-        """
-        Set Analysis object attributes ``samples``, ``sample_attributes`` and ``group_atrributes``
-        to the values in the associated Project object if existing.
-
-        :param overwrite: Whether to overwrite attribute values if existing. Defaults to True
-        :type overwrite: bool, optional
-        """
-        hint = " Adding a '{}' section to your project configuration file allows the analysis"
-        hint += " object to use those attributes during the analysis."
-        if self.prj is not None:
-            for attr, parent in [
-                    ("samples", self.prj), ("sample_attributes", self.prj),
-                    ("group_attributes", self.prj), ("comparison_table", self.prj.metadata)]:
-                if not hasattr(parent, attr):
-                    _LOGGER.warning("Associated project does not have any '{}'.".format(attr) +
-                                    hint.format(attr) if attr != "samples" else "")
-                else:
-                    msg = "Setting project's '{0}' as the analysis '{0}'.".format(attr)
-                    if overwrite:
-                        _LOGGER.info(msg)
-                        setattr(self, attr, getattr(parent, attr))
-                    else:
-                        if not hasattr(self, attr):
-                            _LOGGER.info(msg)
-                            setattr(self, attr, getattr(parent, attr))
-                        else:
-                            if getattr(self, attr) is None:
-                                setattr(self, attr, getattr(parent, attr))
-                            else:
-                                _LOGGER.debug("{} already exist for analysis, not overwriting."
-                                              .format(attr.replace("_", " ").capitalize()))
-            if hasattr(self, "comparison_table"):
-                if isinstance(self.comparison_table, str):
-                    _LOGGER.debug("Reading up comparison table.")
-                    self.comparison_table = pd.read_csv(self.comparison_table)
-        else:
-            _LOGGER.warning("Analysis object does not have an attached Project. " +
-                            "Will not add special attributes to analysis such as " +
-                            "samples, their attributes and comparison table.")
-
-    def set_samples_input_files(self, overwrite=True):
-        """
-        Add input file values to sample objects dependent on data type.
-        These are specified in the `ngs_toolkit` configuration file
-        under 'sample_input_files:<data type>:<attribute>'.
-
-        :param overwrite: Whether to overwrite attribute values if existing. Defaults to True
-        :type overwrite: bool, optional
-        """
-        if self.samples is None:
-            _LOGGER.error("Analysis object does not have attached Samples. " +
-                          "Will not add special attributes to samples such as " +
-                          "input file locations.")
-            return
-
-        msg = "Setting '{}' in sample {} as '{}'."
-        for data_type in _CONFIG['sample_input_files']:
-            for attr, value in _CONFIG['sample_input_files'][data_type].items():
-                for s in [s for s in self.samples if s.protocol == data_type]:
-                    if value is None:
-                        pass
-                    elif ("{data_dir}" in value) and ("{sample_name}" in value):
-                        value = value.format(data_dir=self.data_dir, sample_name=s.name)
-                    elif "{data_dir}" in value:
-                        value = value.format(data_dir=self.data_dir)
-                    elif "{sample_name}" in value:
-                        value = value.format(sample_name=s.name)
-                    if overwrite:
-                        _LOGGER.info(msg.format(attr, s.name, value))
-                        setattr(s, attr, value)
-                    else:
-                        if not hasattr(s, attr):
-                            _LOGGER.info(msg.format(attr, s.name, value))
-                            setattr(s, attr, value)
-                        else:
-                            if getattr(s, attr) is None:
-                                _LOGGER.info(msg.format(attr, s.name, value))
-                                setattr(s, attr, value)
-                            else:
-                                _LOGGER.debug("{} already exists in sample, not overwriting."
-                                              .format(attr.replace("_", " ").capitalize()))
-
-    def to_pickle(self, timestamp=False):
-        """
-        Serialize object (i.e. save to disk) to hickle format.
-        """
-        import pickle
-        if timestamp:
-            import time
-            import datetime
-            ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S')
-            p = self.pickle_file.replace(".pickle", ".{}.pickle".format(ts))
-        else:
-            p = self.pickle_file
-        pickle.dump(self, open(p, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
-
-    def from_pickle(self, pickle_file=None):
-        """
-        Load object from pickle file.
-
-        :param pickle_file: Pickle file to load. By default this is the object's
-                            attribute `pickle_file`.
-        :type pickle_file: str, optional
-        """
-        import pickle
-        if pickle_file is None:
-            pickle_file = self.pickle_file
-        return pickle.load(open(pickle_file, 'rb'))
-
-    def get_matrix(self, matrix=None, matrix_name=None, samples=None):
-        """
-        Return a matrix that is an attribute of self subsetted for the requested samples.
-
-        :param pandas.DataFrame matrix: Pandas DataFrame.
-        :param list samples: Iterable of peppy.Sample objects to restrict matrix to.
-                             If not provided (`None` is passed) the matrix will not be subsetted.
-        :param str matrix_name: Name of the matrix that is an attribute of the object
-                                with values for samples in `samples`.
-        :returns pandas.DataFrame: Requested DataFrame.
-        """
-        if (matrix is None) & (matrix_name is None):
-            msg = "Either arguments `matrix` or `matrix_name` must be provided."
-            _LOGGER.error(msg)
-            raise ValueError(msg)
-        # default to matrix to be normalized
-        if matrix is None:
-            r_matrix = getattr(self, matrix_name)
-        else:
-            r_matrix = matrix
-        # default to all samples in self with matching names in matrix
-        if samples is None:
-            r_matrix = r_matrix.loc[:, [s.name for s in self.samples]]
-        else:
-            r_matrix = r_matrix.loc[:, [s.name for s in samples]]
-
-        return r_matrix
-
-    def annotate_with_sample_metadata(
-            self,
-            quant_matrix=None,
-            attributes=None,
-            numerical_attributes=None,
-            save=True,
-            assign=True):
-        """
-        Annotate matrix ``(n_regions, n_samples)`` with sample metadata
-        (creates MultiIndex on columns). Numerical attributes can be pass as a iterable
-        to ``numerical_attributes`` to be converted.
-
-        :param str quant_matrix: Attribute name of matrix to annotate. By default this will
-                                 be infered from the analysis data_type in the following way:
-                                 ATAC-seq or ChIP-seq: ``coverage_annotated``;
-                                 RNA-seq: ``expression_annotated``.
-        :param list attributes: Desired attributes to be annotated. This defaults
-                                to all attributes in the original sample annotation sheet
-                                of the analysis Project.
-        :param list numerical_attributes: Attributes which are numeric even though they
-                                          might be so in the samples' attributes. Will attempt
-                                          to convert values to numeric.
-        :param bool save: Whether to write normalized DataFrame to disk.
-        :param bool assign: Whether to assign the normalized DataFrame to an attribute
-                            ``accessibility`` if ``data_type`` is "ATAC-seq,
-                            ``binding`` if ``data_type`` is "ChIP-seq, or
-                            ``expression`` if ``data_type`` is "RNA-seq.
-        :var pd.DataFrame {accessibility,binding,expression}: A pandas DataFrame with
-                                                              MultiIndex column index
-                                                              containing the sample's
-                                                              attributes specified.
-        :returns pd.DataFrame: Annotated dataframe with requested sample attributes.
-        """
-        if (attributes is None) and hasattr(self, "sample_attributes"):
-            _LOGGER.info("Using 'sample_attributes' from analysis to annotate matrix: {}"
-                         .format(",".join(self.sample_attributes)))
-            attributes = self.sample_attributes
-        if (attributes is None) and hasattr(self, "prj"):
-            _LOGGER.warn(
-                "Analysis has no 'sample_attributes' set. " +
-                "Will use all columns from project annotation sheet: {}"
-                .format(",".join(self.prj.sheet.columns)))
-            attributes = self.prj.sheet.columns
-        if attributes is None:
-            msg = "Attributes not given and could not be set from Analysis or Project."
-            raise ValueError(msg)
-
-        if self.data_type == "ATAC-seq":
-            output_matrix = "accessibility"
-            if quant_matrix is None:
-                quant_matrix = "coverage_annotated"
-        elif self.data_type == "ChIP-seq":
-            output_matrix = "binding"
-            if quant_matrix is None:
-                quant_matrix = "coverage_annotated"
-        elif self.data_type == "CNV":
-            output_matrix = "cnv"
-            if quant_matrix is None:
-                if type(getattr(self, quant_matrix)) is not pd.DataFrame:
-                    _LOGGER.error("For CNV data type, the matrix to be annotated must be" +
-                                  " directly passed to the function throught the `quant_matrix` argument!")
-                    raise ValueError
-            if quant_matrix is None:
-                quant_matrix = "coverage_norm"
-        elif self.data_type == "RNA-seq":
-            output_matrix = "expression"
-            if quant_matrix is None:
-                quant_matrix = "expression_annotated"
-        else:
-            _LOGGER.warning("Data type of object not known, will not set as attribute.")
-            assign = False
-            output_matrix = ""
-            if quant_matrix is None:
-                msg = "Data type of object not known, must specify `quant_matrix` to annotate!"
-                raise ValueError(msg)
-
-        matrix = getattr(self, quant_matrix)
-
-        if type(matrix.columns) is pd.core.indexes.multi.MultiIndex:
-            matrix.columns = matrix.columns.get_level_values("sample_name")
-
-        samples = [s for s in self.samples if s.name in matrix.columns.tolist()]
-
-        attrs = list()
-        for attr in attributes:
-            _LOGGER.debug("Attribute: '{}'".format(attr))
-            ll = list()
-            for sample in samples:  # keep order of samples in matrix
-                try:
-                    ll.append(getattr(sample, attr))
-                except AttributeError:
-                    ll.append(np.nan)
-            if numerical_attributes is not None:
-                if attr in numerical_attributes:
-                    ll = [float(x) for x in ll]
-            attrs.append(ll)
-
-        # Generate multiindex columns
-        index = pd.MultiIndex.from_arrays(attrs, names=attributes)
-        df = matrix[[s.name for s in samples]]
-        df.columns = index
-
-        # Save
-        if save:
-            df.to_csv(
-                os.path.join(
-                    self.results_dir,
-                    self.name + "{}.annotated_metadata.csv"
-                        .format("." + output_matrix if output_matrix != "" else output_matrix)),
-                index=True)
-        if assign:
-            setattr(self, output_matrix, df)
-        return df
-
-    def get_level_colors(
-            self, index=None, matrix=None, levels=None,
-            pallete="tab20", cmap="RdBu_r", nan_color=(0.662745, 0.662745, 0.662745, 1.0),
-            # TODO: implement dataframe return
-            as_dataframe=False):
-        """
-        Get tuples of floats representing a colour for a sample in a given variable in a
-        dataframe's index (particularly useful with MultiIndex dataframes).
-
-        If given, will use the provieded ``index`` argument, otherwise, the the columns
-        and its levels of an attribute of self named ``matrix``.
-        ``levels`` can be passed to subset the levels of the index.
-
-        Will try to guess if each variable is categorical or numerical and return either colours
-        from a colour ``pallete`` or a ``cmap``, respectively with null values set to ``nan_color``
-        (a 4-value tuple of floats).
-
-        :param index: Pandas Index to use. If not provided (default == None), this will be
-        the column Index of the provided ``matrix``.
-        :type index: pandas.Index, optional
-        :param matrix: Name of analysis attribute containing a dataframe with pandas.MultiIndex
-                       columns to use.
-        :type matrix: str, optional
-        :param levels: Levels of multiindex to restrict to. Defaults to all in index.
-        :type levels: list, optional
-        :param pallete: Name of matplotlib color palete to use with categorical levels.
-                        See matplotlib.org/examples/color/colormaps_reference.html.
-                        Defaults to ``Paired``.
-        :type pallete: str, optional
-        :param cmap: Name of matplotlib color palete to use with numerical levels.
-                     See matplotlib.org/examples/color/colormaps_reference.html.
-                     Defaults to ``RdBu_r``.
-        :type cmap: str, optional
-        :param nan_color: Color for missing (i.e. NA) values.
-                          Defaults to ``(0.662745, 0.662745, 0.662745, 0.5)`` == ``grey``.
-        :type nan_color: tuple, optional
-        :param as_dataframe: Whether a dataframe should be return. Defaults to False.
-                             Not implemented yet.
-        :type as_dataframe: bool, optional
-        :returns: List of list tuples (matrix) of shape (level, sample) with rgb values of
-                  each of the variable.
-        :rtype: {list}
-        """
-        import numpy as np
-        import matplotlib
-        import matplotlib.pyplot as plt
-        from collections import Counter
-
-        if index is None:
-            if matrix is None:
-                msg = "One of `index` or `matrix` must be provided."
-                _LOGGER.error(msg)
-                raise ValueError(msg)
-            index = getattr(self, matrix).columns
-
-        if levels is not None:
-            drop = [l.name for l in index.levels if l.name not in levels]
-            index = index.droplevel(drop)
-
-        # Handle special case of single level
-        if type(index) is pd.core.indexes.base.Index:
-            index = pd.MultiIndex.from_arrays([index.values], names=[index.name])
-
-        _cmap = plt.get_cmap(cmap)
-        _pallete = plt.get_cmap(pallete)
-
-        colors = list()
-        for level in index.levels:
-            # For empty levels (all values nan), return nan colour
-            if len(level) == 0:
-                colors.append([nan_color] * len(index))
-                _LOGGER.warning("Level {} has only NaN values.".format(level.name))
-                continue
-            # determine the type of data in each level
-            # TODO: check this works in all cases
-            most_common = Counter([type(x) for x in level]).most_common()[0][0]
-            _LOGGER.debug("Getting colours for level '{}', which has type '{}'."
-                          .format(level.name, most_common))
-
-            # Add either colors based on categories or numerical scale
-            if most_common in [int, float, np.float32, np.float64, np.int32, np.int64]:
-                values = index.get_level_values(level.name)
-                # Create a range of either 0-100 if only positive values are found
-                # or symmetrically from the maximum absolute value found
-                if not any(values.dropna() < 0):
-                    norm = matplotlib.colors.Normalize(vmin=values.min(), vmax=values.max())
-                else:
-                    r = max(abs(values.min()), abs(values.max()))
-                    norm = matplotlib.colors.Normalize(vmin=-r, vmax=r)
-
-                col = _cmap(norm(values))
-                # replace color for nan cases
-                col[np.where(index.get_level_values(level.name).to_series().isnull().tolist())] = nan_color
-                colors.append(col.tolist())
-            else:
-                n = len(set(index.get_level_values(level.name)))
-                # get n equidistant colors
-                p = [_pallete(1. * i / n) for i in range(n)]
-                color_dict = dict(zip(list(set(index.get_level_values(level.name))), p))
-                # color for nan cases
-                color_dict[np.nan] = nan_color
-                col = [color_dict[x] for x in index.get_level_values(level.name)]
-                colors.append(col)
-
-        return colors
+Analysis
 
 
 def get_genome_reference(
@@ -1010,31 +436,6 @@ def get_genomic_context(
     return annot
 
 
-def get_annotations(
-        organism, genome_assembly=None, output_dir=None,
-        steps=['blacklist', 'tss', 'genomic']):
-    """
-    Get genome annotations required for several ngs_toolkit analysis.
-    This is a simple approach using Biomart's API querying the Ensembl database..
-    Saves results to disk and returns a dataframe.
-    """
-    # TODO: test
-    from ngs_toolkit.general import (
-        get_genome_reference,
-        get_blacklist_annotations,
-        get_tss_annotations,
-        get_genomic_context)
-
-    if 'fasta' in steps:
-        get_genome_reference(organism=organism, genome_assembly=genome_assembly)
-    if 'blacklist' in steps:
-        get_blacklist_annotations(organism=organism, genome_assembly=genome_assembly)
-    if 'tss' in steps:
-        get_tss_annotations(organism=organism, genome_assembly=genome_assembly)
-    if 'genomic' in steps:
-        get_genomic_context(organism=organism, genome_assembly=genome_assembly)
-
-
 def count_reads_in_intervals(bam, intervals):
     """
     Count total number of reads in a iterable holding strings
@@ -1117,6 +518,7 @@ def normalize_quantiles_p(df_input):
 
 def unsupervised_analysis(
         analysis,
+        steps=['correlation', 'manifold', 'pca', 'pca_association'],
         data_type=None,
         quant_matrix=None,
         samples=None,
@@ -1238,6 +640,7 @@ def unsupervised_analysis(
     from scipy.stats import pearsonr
     import matplotlib.pyplot as plt
     import seaborn as sns
+    from statsmodels.sandbox.stats.multicomp import multipletests
 
     if data_type is None:
         msg = "Data type not defined and Analysis object does not have a `data_type` attribute."
@@ -1349,300 +752,322 @@ def unsupervised_analysis(
     #         .str.replace("RNA-seq_", "")
     #         .str.replace("ChIP-seq_", ""))
 
-    # Pairwise correlations
-    for method in ['pearson', 'spearman']:
-        _LOGGER.info("Plotting pairwise correlation with '{}' metric.".format(method))
-        g = sns.clustermap(
-            X.astype(float).corr(method),
-            xticklabels=False, yticklabels=sample_display_names, annot=display_corr_values,
-            cmap="Spectral_r", figsize=(0.2 * X.shape[1], 0.2 * X.shape[1]),
-            cbar_kws={"label": "{} correlation".format(method.capitalize())},
-            row_colors=color_dataframe.T, col_colors=color_dataframe.T)
-        g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0, fontsize='xx-small')
-        g.ax_heatmap.set_xlabel(None, visible=False)
-        g.ax_heatmap.set_ylabel(None, visible=False)
-        g.fig.savefig(os.path.join(
-            output_dir, "{}.{}.{}_correlation.clustermap.svg"
-            .format(analysis.name, plot_prefix, method)), bbox_inches='tight', dpi=dpi)
-
-    # Manifolds
-    params = {
-        "MDS": {'n_jobs': -1},
-        "Isomap": {'n_jobs': -1},
-        "LocallyLinearEmbedding": {},
-        "SpectralEmbedding": {'n_jobs': -1},
-        "TSNE": {'init': 'pca'},
-    }
-    for algo in manifold_algorithms:
-        msg = "Learning manifold with '{}' algorithm".format(algo)
-        _LOGGER.info(msg + ".")
+    if 'correlation' in steps:
+        # Pairwise correlations
+        for method in ['pearson', 'spearman']:
+            _LOGGER.info("Plotting pairwise correlation with '{}' metric.".format(method))
+            g = sns.clustermap(
+                X.astype(float).corr(method),
+                xticklabels=False, yticklabels=sample_display_names, annot=display_corr_values,
+                cmap="Spectral_r", figsize=(0.2 * X.shape[1], 0.2 * X.shape[1]),
+                cbar_kws={"label": "{} correlation".format(method.capitalize())},
+                row_colors=color_dataframe.T, col_colors=color_dataframe.T)
+            g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0, fontsize='xx-small')
+            g.ax_heatmap.set_xlabel(None, visible=False)
+            g.ax_heatmap.set_ylabel(None, visible=False)
+            g.fig.savefig(os.path.join(
+                output_dir, "{}.{}.{}_correlation.clustermap.svg"
+                .format(analysis.name, plot_prefix, method)), bbox_inches='tight', dpi=dpi)
+
+    if 'manifold' in steps:
+        # Manifolds
+        params = {
+            "MDS": {'n_jobs': -1},
+            "Isomap": {'n_jobs': -1},
+            "LocallyLinearEmbedding": {},
+            "SpectralEmbedding": {'n_jobs': -1},
+            "TSNE": {'init': 'pca'},
+        }
+        for algo in manifold_algorithms:
+            msg = "Learning manifold with '{}' algorithm".format(algo)
+            _LOGGER.info(msg + ".")
 
-        manif = getattr(manifold, algo)(**params[algo])
-        try:
-            x_new = manif.fit_transform(X.T)
-        except (TypeError, ValueError):
-            hint = " Number of samples might be too small to perform '{}'".format(algo)
-            _LOGGER.error(msg + " failed!" + hint)
-            continue
+            manif = getattr(manifold, algo)(**params[algo])
+            try:
+                x_new = manif.fit_transform(X.T)
+            except (TypeError, ValueError):
+                hint = " Number of samples might be too small to perform '{}'".format(algo)
+                _LOGGER.error(msg + " failed!" + hint)
+                continue
 
-        xx = pd.DataFrame(x_new, index=X.columns, columns=list(range(x_new.shape[1])))
+            xx = pd.DataFrame(x_new, index=X.columns, columns=list(range(x_new.shape[1])))
 
-        _LOGGER.info("Plotting projection of manifold with '{}' algorithm.".format(algo))
-        fig, axis = plt.subplots(1, len(attributes_to_plot), figsize=(4 * len(attributes_to_plot), 4 * 1))
-        if len(attributes_to_plot) != 1:
-            axis = axis.flatten()
-        else:
-            axis = np.array([axis])
-        for i, attr in enumerate(attributes_to_plot):
-            for j, sample in enumerate(xx.index):
-                sample = pd.Series(sample, index=X.columns.names)
-                try:
-                    label = getattr(sample, attributes_to_plot[i])
-                except AttributeError:
-                    label = np.nan
-                axis[i].scatter(
-                    xx.loc[sample['sample_name'], 0],
-                    xx.loc[sample['sample_name'], 1],
-                    s=50, color=color_dataframe.loc[attr, sample['sample_name']],
-                    alpha=0.75, label=label, rasterized=rasterized)
-
-            # Plot groups
-            if plot_group_centroids:
-                xx2 = xx.groupby(attr).mean()
-                # get the color of each attribute group
-                cd = color_dataframe.loc[attr]
-                cd.name = None
-                cd.index = X.columns.get_level_values(attr)
-                cd = cd.reset_index().drop_duplicates().set_index(attr)
-                for j, group in enumerate(xx2.index):
+            _LOGGER.info("Plotting projection of manifold with '{}' algorithm.".format(algo))
+            fig, axis = plt.subplots(1, len(attributes_to_plot), figsize=(4 * len(attributes_to_plot), 4 * 1))
+            if len(attributes_to_plot) != 1:
+                axis = axis.flatten()
+            else:
+                axis = np.array([axis])
+            for i, attr in enumerate(attributes_to_plot):
+                for j, sample in enumerate(xx.index):
+                    sample = pd.Series(sample, index=X.columns.names)
+                    try:
+                        label = getattr(sample, attributes_to_plot[i])
+                    except AttributeError:
+                        label = np.nan
                     axis[i].scatter(
-                        xx2.loc[group, 0],
-                        xx2.loc[group, 1],
-                        marker="s", s=50, color=cd.loc[group].squeeze(),
-                        alpha=0.95, label=group, rasterized=rasterized)
-                    axis[i].text(
-                        xx2.loc[group, 0],
-                        xx2.loc[group, 1], group,
-                        color=cd.loc[group].squeeze(), alpha=0.95)
-
-            # Graphics
-            axis[i].set_title(attributes_to_plot[i])
-            axis[i].set_xlabel("{} 1".format(algo))
-            axis[i].set_ylabel("{} 2".format(algo))
-            if not axis_ticklabels:
-                axis[i].set_xticklabels(axis[i].get_xticklabels(), visible=False)
-                axis[i].set_yticklabels(axis[i].get_yticklabels(), visible=False)
-            if axis_lines:
-                axis[i].axhline(0, linestyle="--", color="black", alpha=0.3)
-                axis[i].axvline(0, linestyle="--", color="black", alpha=0.3)
-
-            if legends:
-                # Unique legend labels
-                handles, labels = axis[i].get_legend_handles_labels()
-                by_label = OrderedDict(zip(labels, handles))
-                if any([type(c) in [str, unicode] for c in by_label.keys()]) and len(by_label) <= 20:
-                    # if not any([re.match("^\d", c) for c in by_label.keys()]):
-                    axis[i].legend(by_label.values(), by_label.keys())
-        fig.savefig(os.path.join(
-            output_dir, "{}.{}.{}.svg"
-            .format(analysis.name, plot_prefix, algo.lower())), bbox_inches='tight', dpi=dpi)
+                        xx.loc[sample['sample_name'], 0],
+                        xx.loc[sample['sample_name'], 1],
+                        s=50, color=color_dataframe.loc[attr, sample['sample_name']],
+                        alpha=0.75, label=label, rasterized=rasterized)
+
+                # Plot groups
+                if plot_group_centroids:
+                    xx2 = xx.groupby(attr).mean()
+                    # get the color of each attribute group
+                    cd = color_dataframe.loc[attr, :]
+                    cd.name = None
+                    cd.index = X.columns.get_level_values(attr)
+                    cd = cd.apply(lambda x: tuple(x) if isinstance(x, list) else x)  # fix for deduplicating lists
+                    cd = cd.reset_index().drop_duplicates().set_index(attr)
+                    for j, group in enumerate(xx2.index):
+                        axis[i].scatter(
+                            xx2.loc[group, 0],
+                            xx2.loc[group, 1],
+                            marker="s", s=50, color=cd.loc[group].squeeze(),
+                            alpha=0.95, label=group, rasterized=rasterized)
+                        axis[i].text(
+                            xx2.loc[group, 0],
+                            xx2.loc[group, 1], group,
+                            color=cd.loc[group].squeeze(), alpha=0.95)
+
+                # Graphics
+                axis[i].set_title(attributes_to_plot[i])
+                axis[i].set_xlabel("{} 1".format(algo))
+                axis[i].set_ylabel("{} 2".format(algo))
+                if not axis_ticklabels:
+                    axis[i].set_xticklabels(axis[i].get_xticklabels(), visible=False)
+                    axis[i].set_yticklabels(axis[i].get_yticklabels(), visible=False)
+                if axis_lines:
+                    axis[i].axhline(0, linestyle="--", color="black", alpha=0.3)
+                    axis[i].axvline(0, linestyle="--", color="black", alpha=0.3)
+
+                if legends:
+                    # Unique legend labels
+                    handles, labels = axis[i].get_legend_handles_labels()
+                    by_label = OrderedDict(zip(labels, handles))
+                    if any([type(c) in [str, unicode] for c in by_label.keys()]) and len(by_label) <= 20:
+                        # if not any([re.match("^\d", c) for c in by_label.keys()]):
+                        axis[i].legend(by_label.values(), by_label.keys())
+            fig.savefig(os.path.join(
+                output_dir, "{}.{}.{}.svg"
+                .format(analysis.name, plot_prefix, algo.lower())), bbox_inches='tight', dpi=dpi)
 
-    # PCA
-    pcs = min(*X.shape) - 1
-    _LOGGER.info("Decomposing data with 'PCA' algorithm for {} dimensions.".format(pcs))
-    pca = PCA(n_components=pcs, svd_solver="arpack")
-    x_new = pca.fit_transform(X.T)
-    # transform again
-    xx = pd.DataFrame(x_new, index=X.columns, columns=list(range(x_new.shape[1])))
-
-    # plot % explained variance per PC
-    _LOGGER.info("Plotting variance explained with PCA.")
-    fig, axis = plt.subplots(1, 2, figsize=(4 * 2, 4))
-    axis[0].plot(
-        range(1, len(pca.explained_variance_) + 1),  # all PCs
-        (pca.explained_variance_ / pca.explained_variance_.sum()) * 100, 'o-')
-    axis[1].plot(
-        range(1, len(pca.explained_variance_) + 1),  # all PCs
-        np.log10(pca.explained_variance_), 'o-')
-    for ax in axis:
-        ax.axvline(len(attributes_to_plot), linestyle='--')
-        ax.set_xlabel("PC")
-    axis[0].set_ylabel("% variance")
-    axis[1].set_ylabel("log variance")
-    sns.despine(fig)
-    fig.savefig(os.path.join(
-        output_dir, "{}.{}.pca.explained_variance.svg"
-                    .format(analysis.name, plot_prefix)), bbox_inches='tight', dpi=dpi)
-
-    # Write % variance expained to disk
-    pd.Series((pca.explained_variance_ / pca.explained_variance_.sum()) * 100, name="PC").to_csv(
-        os.path.join(output_dir, "{}.{}.pca.explained_variance.csv".format(
-            analysis.name, plot_prefix)))
-
-    # plot pca
-    pcs = min(xx.shape[1] - 1, plot_max_pcs)
-    _LOGGER.info("Plotting PCA up to {} dimensions.".format(pcs))
-    fig, axis = plt.subplots(pcs, len(attributes_to_plot), figsize=(
-        4 * len(attributes_to_plot), 4 * pcs))
-    if len(attributes_to_plot) == 1:
-        axis = axis.reshape((pcs, 1))
-    if pcs == 1:
-        axis = axis.reshape((1, len(attributes_to_plot)))
-    for pc in range(pcs):
-        for i, attr in enumerate(attributes_to_plot):
-            for j, sample in enumerate(xx.index):
-                sample = pd.Series(sample, index=X.columns.names)
-                try:
-                    label = getattr(samples[j], attr)
-                except AttributeError:
-                    label = np.nan
-                axis[pc, i].scatter(
-                    xx.loc[sample['sample_name'], pc],
-                    xx.loc[sample['sample_name'], pc + 1],
-                    s=30, color=color_dataframe.loc[attr, sample['sample_name']],
-                    alpha=0.75, label=label, rasterized=rasterized)
-
-            # Plot groups
-            if plot_group_centroids:
-                xx2 = xx.groupby(attr).mean()
-                # get the color of each attribute group
-                cd = color_dataframe.loc[attr]
-                cd.name = None
-                cd.index = X.columns.get_level_values(attr)
-                cd = cd.reset_index().drop_duplicates().set_index(attr)
-                for j, group in enumerate(xx2.index):
+    if 'pca' in steps:
+        # PCA
+        pcs = min(*X.shape) - 1
+        _LOGGER.info("Decomposing data with 'PCA' algorithm for {} dimensions.".format(pcs))
+        pca = PCA(n_components=pcs, svd_solver="arpack")
+        x_new = pca.fit_transform(X.T)
+
+        pcs_order = range(pca.n_components_)
+        xx = pd.DataFrame(x_new, index=X.columns, columns=pcs_order)
+        xx.to_csv(
+            os.path.join(output_dir, "{}.{}.pca.fit.csv".format(
+                analysis.name, plot_prefix)))
+        comps = pd.DataFrame(pca.components_.T, index=X.index, columns=pcs_order)
+        comps.to_csv(
+            os.path.join(output_dir, "{}.{}.pca.loadings.csv".format(
+                analysis.name, plot_prefix)))
+
+        # Write % variance expained to disk
+        variance = pd.Series(
+            pca.explained_variance_ratio_ * 100,
+            name="percent_variance", index=pcs_order
+            ).to_frame()
+        variance['log_variance'] = np.log10(pca.explained_variance_)
+        variance.index.name = "PC"
+        variance.to_csv(
+            os.path.join(output_dir, "{}.{}.pca.explained_variance.csv".format(
+                analysis.name, plot_prefix)))
+
+        # plot % explained variance per PC
+        _LOGGER.info("Plotting variance explained with PCA.")
+        fig, axis = plt.subplots(1, 3, figsize=(4 * 3, 4))
+        axis[0].plot(variance.index, variance['percent_variance'], 'o-')
+        axis[0].set_ylim((0, variance['percent_variance'].max() + variance['percent_variance'].max() * 0.1))
+        axis[1].plot(variance.index, variance['log_variance'], 'o-')
+        axis[2].plot(variance.index, variance['percent_variance'].cumsum(), 'o-')
+        axis[2].set_ylim((0, 100))
+        for ax in axis:
+            ax.axvline(len(attributes_to_plot), linestyle='--')
+            ax.set_xlabel("PC")
+        axis[0].set_ylabel("% variance")
+        axis[1].set_ylabel("log variance")
+        axis[2].set_ylabel("Cumulative % variance")
+        sns.despine(fig)
+        fig.savefig(os.path.join(
+            output_dir, "{}.{}.pca.explained_variance.svg"
+                        .format(analysis.name, plot_prefix)), bbox_inches='tight', dpi=dpi)
+
+        # plot pca
+        pcs = min(xx.shape[1] - 1, plot_max_pcs)
+        _LOGGER.info("Plotting PCA up to {} dimensions.".format(pcs))
+        fig, axis = plt.subplots(pcs, len(attributes_to_plot), figsize=(
+            4 * len(attributes_to_plot), 4 * pcs))
+        if len(attributes_to_plot) == 1:
+            axis = axis.reshape((pcs, 1))
+        if pcs == 1:
+            axis = axis.reshape((1, len(attributes_to_plot)))
+        for pc in range(pcs):
+            for i, attr in enumerate(attributes_to_plot):
+                for j, sample in enumerate(xx.index):
+                    sample = pd.Series(sample, index=X.columns.names)
+                    try:
+                        label = getattr(samples[j], attr)
+                    except AttributeError:
+                        label = np.nan
                     axis[pc, i].scatter(
-                        xx2.loc[group, pc],
-                        xx2.loc[group, pc + 1],
-                        marker="s", s=50, color=cd.loc[group].squeeze(),
-                        alpha=0.95, label=group, rasterized=rasterized)
-                    axis[pc, i].text(
-                        xx2.loc[group, pc],
-                        xx2.loc[group, pc + 1], group,
-                        color=cd.loc[group].squeeze(), alpha=0.95)
-
-            # Graphics
-            axis[pc, i].set_title(attr)
-            axis[pc, i].set_xlabel("PC {}".format(pc + 1))
-            axis[pc, i].set_ylabel("PC {}".format(pc + 2))
-            if not axis_ticklabels:
-                axis[pc, i].set_xticklabels(axis[pc, i].get_xticklabels(), visible=False)
-                axis[pc, i].set_yticklabels(axis[pc, i].get_yticklabels(), visible=False)
-            if axis_lines:
-                axis[pc, i].axhline(0, linestyle="--", color="black", alpha=0.3)
-                axis[pc, i].axvline(0, linestyle="--", color="black", alpha=0.3)
-
-            if legends:
-                # Unique legend labels
-                handles, labels = axis[pc, i].get_legend_handles_labels()
-                by_label = OrderedDict(zip(labels, handles))
-                if any([type(c) in [str, unicode] for c in by_label.keys()]) and len(by_label) <= plot_max_attr:
-                    # if not any([re.match("^\d", c) for c in by_label.keys()]):
-                    if always_legend:
-                        axis[pc, i].legend(by_label.values(), by_label.keys())
-                    else:
-                        if pc == (pcs - 1):
-                            axis[pc, i].legend(
-                                by_label.values(), by_label.keys())
-    fig.savefig(os.path.join(output_dir, "{}.{}.pca.svg".format(
-        analysis.name, plot_prefix)), bbox_inches="tight")
-
-    # Test association of PCs with attributes
-    if not test_pc_association:
-        _LOGGER.info("Not testing association of attributes with principal components.")
-        return
+                        xx.loc[sample['sample_name'], pc],
+                        xx.loc[sample['sample_name'], pc + 1],
+                        s=30, color=color_dataframe.loc[attr, sample['sample_name']],
+                        alpha=0.75, label=label, rasterized=rasterized)
+
+                # Plot groups
+                if plot_group_centroids:
+                    xx2 = xx.groupby(attr).mean()
+                    # get the color of each attribute group
+                    cd = color_dataframe.loc[attr, :]
+                    cd.name = None
+                    cd.index = X.columns.get_level_values(attr)
+                    cd = cd.apply(lambda x: tuple(x) if isinstance(x, list) else x)  # fix for deduplicating lists
+                    cd = cd.reset_index().drop_duplicates().set_index(attr)
+                    for j, group in enumerate(xx2.index):
+                        axis[pc, i].scatter(
+                            xx2.loc[group, pc],
+                            xx2.loc[group, pc + 1],
+                            marker="s", s=50, color=cd.loc[group].squeeze(),
+                            alpha=0.95, label=group, rasterized=rasterized)
+                        axis[pc, i].text(
+                            xx2.loc[group, pc],
+                            xx2.loc[group, pc + 1], group,
+                            color=cd.loc[group].squeeze(), alpha=0.95)
+
+                # Graphics
+                axis[pc, i].set_title(attr)
+                axis[pc, i].set_xlabel("PC {}".format(pc + 1))
+                axis[pc, i].set_ylabel("PC {}".format(pc + 2))
+                if not axis_ticklabels:
+                    axis[pc, i].set_xticklabels(axis[pc, i].get_xticklabels(), visible=False)
+                    axis[pc, i].set_yticklabels(axis[pc, i].get_yticklabels(), visible=False)
+                if axis_lines:
+                    axis[pc, i].axhline(0, linestyle="--", color="black", alpha=0.3)
+                    axis[pc, i].axvline(0, linestyle="--", color="black", alpha=0.3)
+
+                if legends:
+                    # Unique legend labels
+                    handles, labels = axis[pc, i].get_legend_handles_labels()
+                    by_label = OrderedDict(zip(labels, handles))
+                    if any([type(c) in [str, unicode] for c in by_label.keys()]) and len(by_label) <= plot_max_attr:
+                        # if not any([re.match("^\d", c) for c in by_label.keys()]):
+                        if always_legend:
+                            axis[pc, i].legend(by_label.values(), by_label.keys())
+                        else:
+                            if pc == (pcs - 1):
+                                axis[pc, i].legend(
+                                    by_label.values(), by_label.keys())
+        fig.savefig(os.path.join(output_dir, "{}.{}.pca.svg".format(
+            analysis.name, plot_prefix)), bbox_inches="tight")
+
+    if ('pca' in steps) and ('pca_association' in steps):
+        # Test association of PCs with attributes
+        if not test_pc_association:
+            _LOGGER.info("Not testing association of attributes with principal components.")
+            return
 
-    _LOGGER.info("Computing association of given attributes with principal components.")
-    associations = list()
-    for pc in range(pcs):
-        for attr in attributes_to_plot:
-            _LOGGER.info("PC {}; Attribute {}.".format(pc + 1, attr))
+        _LOGGER.info("Computing association of given attributes with principal components.")
+        associations = list()
+        for pc in range(pcs):
+            for attr in attributes_to_plot:
+                _LOGGER.info("PC {}; Attribute {}.".format(pc + 1, attr))
 
-            # Get all values of samples for this attr
-            groups = xx.index.get_level_values(attr)
+                # Get all values of samples for this attr
+                groups = xx.index.get_level_values(attr)
 
-            # Determine if attr is categorical or continuous
-            if all([type(i) in [str, bool] for i in groups]) or len(groups) == 2:
-                variable_type = "categorical"
-            elif all([type(i) in [int, float, np.int64, np.float64] for i in groups]):
-                variable_type = "numerical"
-            else:
-                _LOGGER.warning("attr %s cannot be tested." % attr)
-                associations.append([pc + 1, attr, variable_type, np.nan, np.nan, np.nan])
-                continue
+                # Determine if attr is categorical or continuous
+                if all([type(i) in [str, bool] for i in groups]) or len(groups) == 2:
+                    variable_type = "categorical"
+                elif all([type(i) in [int, float, np.int64, np.float64] for i in groups]):
+                    variable_type = "numerical"
+                else:
+                    _LOGGER.warning("attr %s cannot be tested." % attr)
+                    associations.append([pc + 1, attr, variable_type, np.nan, np.nan, np.nan])
+                    continue
 
-            if variable_type == "categorical":
-                # It categorical, test pairwise combinations of attributes
-                for group1, group2 in itertools.combinations(groups, 2):
-                    g1_mask = xx.index.get_level_values(attr) == group1
-                    g2_mask = xx.index.get_level_values(attr) == group2
+                if variable_type == "categorical":
+                    # It categorical, test pairwise combinations of attributes
+                    for group1, group2 in itertools.combinations(groups, 2):
+                        g1_mask = xx.index.get_level_values(attr) == group1
+                        g2_mask = xx.index.get_level_values(attr) == group2
 
-                    g1_values = xx.loc[g1_mask, pc]
-                    g2_values = xx.loc[g2_mask, pc]
+                        g1_values = xx.loc[g1_mask, pc]
+                        g2_values = xx.loc[g2_mask, pc]
 
-                    # Test ANOVA (or Kruskal-Wallis H-test)
-                    p = kruskal(g1_values, g2_values)[1]
+                        # Test ANOVA (or Kruskal-Wallis H-test)
+                        p = kruskal(g1_values, g2_values)[1]
 
-                    # Append
-                    associations.append([pc + 1, attr, variable_type, group1, group2, p])
+                        # Append
+                        associations.append([pc + 1, attr, variable_type, group1, group2, p])
 
-            elif variable_type == "numerical":
-                # It numerical, calculate pearson correlation
-                pc_values = xx.loc[:, pc]
-                trait_values = xx.index.get_level_values(attr)
-                p = pearsonr(pc_values, trait_values)[1]
+                elif variable_type == "numerical":
+                    # It numerical, calculate pearson correlation
+                    pc_values = xx.loc[:, pc]
+                    trait_values = xx.index.get_level_values(attr)
+                    p = pearsonr(pc_values, trait_values)[1]
 
-                associations.append([pc + 1, attr, variable_type, np.nan, np.nan, p])
+                    associations.append([pc + 1, attr, variable_type, np.nan, np.nan, p])
 
-    associations = pd.DataFrame(
-        associations, columns=["pc", "attribute", "variable_type", "group_1", "group_2", "p_value"])
+        associations = pd.DataFrame(
+            associations, columns=["pc", "attribute", "variable_type", "group_1", "group_2", "p_value"])
 
-    if associations.empty:
-        msg = "Couldn't test any associations between PCs and factors."
-        hint = " Perhaps PCA produced only 1 PC?"
-        _LOGGER.warning(msg + hint)
-        return
+        if associations.empty:
+            msg = "Couldn't test any associations between PCs and factors."
+            hint = " Perhaps PCA produced only 1 PC?"
+            _LOGGER.warning(msg + hint)
+            return
 
-    # write
-    _LOGGER.info("Saving associations.")
-    associations.to_csv(os.path.join(
-        output_dir, "{}.{}.pca.variable_principle_components_association.csv"
-                    .format(analysis.name, plot_prefix)), index=False)
+        # correct p-values
+        associations.loc[:, 'adj_pvalue'] = multipletests(associations['p_value'], method="fdr_bh")[1]
 
-    if len(attributes_to_plot) < 2:
-        _LOGGER.info("Only one attribute given, can't plot associations.")
-        return
+        # write
+        _LOGGER.info("Saving associations.")
+        associations.to_csv(os.path.join(
+            output_dir, "{}.{}.pca.variable_principle_components_association.csv"
+                        .format(analysis.name, plot_prefix)), index=False)
+
+        if len(attributes_to_plot) < 2:
+            _LOGGER.info("Only one attribute given, can't plot associations.")
+            return
 
-    # Plot
-    # associations[associations['p_value'] < 0.05].drop(['group_1', 'group_2'], axis=1).drop_duplicates()
-    # associations.drop(['group_1', 'group_2'], axis=1).drop_duplicates().pivot(index="pc", columns="attribute", values="p_value")
-    pivot = (
-        associations
-        .groupby(["pc", "attribute"])
-        .min()['p_value']
-        .reset_index()
-        .pivot(index="pc", columns="attribute", values="p_value")
-        .dropna(axis=1))
-
-    # heatmap of -log p-values
-    g = sns.clustermap(
-        -np.log10(pivot), row_cluster=False,
-        annot=True, cbar_kws={"label": "-log10(p_value) of association"},
-        square=True, rasterized=rasterized)
-    g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=45, ha="right")
-    g.fig.savefig(os.path.join(
-        output_dir, "{}.{}.pca.variable_principle_components_association.svg"
-                    .format(analysis.name, plot_prefix)), bbox_inches="tight", dpi=dpi)
-
-    # heatmap of masked significant
-    g = sns.clustermap(
-        (pivot < 0.05).astype(int),
-        row_cluster=False, cbar_kws={"label": "significant association"},
-        square=True, rasterized=rasterized, vmin=0, vmax=1)
-    g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=45, ha="right")
-    g.fig.savefig(os.path.join(
-        output_dir, "{}.{}.pca.variable_principle_components_association.masked.svg"
-                    .format(analysis.name, plot_prefix)), bbox_inches="tight", dpi=dpi)
+        # Plot
+        for var in ['p_value', 'adj_pvalue']:
+            pivot = (
+                associations
+                .groupby(["pc", "attribute"])
+                .min()[var]
+                .reset_index()
+                .pivot(index="pc", columns="attribute", values=var)
+                .dropna(axis=1))
+
+            # heatmap of -log p-values
+            g = sns.clustermap(
+                -np.log10(pivot), row_cluster=False,
+                annot=True, cbar_kws={"label": "-log10(p_value) of association"},
+                square=True, rasterized=rasterized, vmin=0)
+            g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=45, ha="right")
+            g.fig.savefig(os.path.join(
+                output_dir, "{}.{}.pca.variable_principle_components_association.{}.svg"
+                            .format(analysis.name, plot_prefix, var)), bbox_inches="tight", dpi=dpi)
+
+            # heatmap of masked significant
+            g = sns.clustermap(
+                (pivot < 0.05).astype(int),
+                row_cluster=False, cbar_kws={"label": "significant association"},
+                square=True, rasterized=rasterized, vmin=0, vmax=1, cmap="Paired")
+            g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=45, ha="right")
+            g.fig.savefig(os.path.join(
+                output_dir, "{}.{}.pca.variable_principle_components_association.{}.masked.svg"
+                            .format(analysis.name, plot_prefix, var)), bbox_inches="tight", dpi=dpi)
 
 
 def deseq_analysis(
@@ -2919,16 +2344,16 @@ def plot_differential(
     split_diff = results.groupby(
         [comparison_column, "direction"])['diff'].sum().sort_values(ascending=False).reset_index()
     split_diff.loc[split_diff['direction'] == 'down', "diff"] *= -1
-    split_diff['label'] = split_diff['comparison_name'] + ", " + split_diff['direction']
+    split_diff['label'] = split_diff[comparison_column].astype(str) + ", " + split_diff['direction']
     total_diff['diff_perc'] = (total_diff['diff'] / n_vars) * 100
     split_diff['diff_perc'] = (split_diff['diff'] / n_vars) * 100
 
     _LOGGER.info("Plotting number of differential {}s per comparison.".format(var_name))
     fig, axis = plt.subplots(2, 2, figsize=(4 * 2, 4 * 2))
-    sns.barplot(data=total_diff, x="diff", y="comparison_name", orient="h", ax=axis[0, 0])
-    sns.barplot(data=total_diff, x="diff_perc", y="comparison_name", orient="h", ax=axis[0, 1])
-    sns.barplot(data=split_diff, x="diff", y="comparison_name", hue="direction", dodge=False, orient="h", ax=axis[1, 0])
-    sns.barplot(data=split_diff, x="diff_perc", y="comparison_name", hue="direction", dodge=False, orient="h", ax=axis[1, 1])
+    sns.barplot(data=total_diff, x="diff", y=comparison_column, orient="h", ax=axis[0, 0])
+    sns.barplot(data=total_diff, x="diff_perc", y=comparison_column, orient="h", ax=axis[0, 1])
+    sns.barplot(data=split_diff, x="diff", y=comparison_column, hue="direction", dodge=False, orient="h", ax=axis[1, 0])
+    sns.barplot(data=split_diff, x="diff_perc", y=comparison_column, hue="direction", dodge=False, orient="h", ax=axis[1, 1])
     for ax in axis[0, :]:
         ax.set_xlabel("", visible=False)
     for ax in axis[:, 1]:
@@ -3060,8 +2485,8 @@ def plot_differential(
             ax.set_xlabel("log2(fold-change)")
             ax.set_ylabel("-log10(p-value)")
             ax.axvline(0, linestyle='--', alpha=0.5, zorder=0, color="black")
-            l = np.max([abs(i) for i in ax.get_xlim()])
-            ax.set_xlim(-l, l)
+            ll = np.max([abs(ii) for ii in ax.get_xlim()])
+            ax.set_xlim(-ll, ll)
 
             # Add lines of significance
             ax.axhline(
@@ -3108,8 +2533,8 @@ def plot_differential(
             ax.set_xlabel("Mean {}".format(quantity.lower()))
             ax.set_ylabel("log2(fold-change)")
             ax.axhline(0, linestyle='--', alpha=0.5, zorder=0, color="black")
-            l = np.max([abs(i) for i in ax.get_ylim()])
-            ax.set_ylim(-l, l)
+            ll = np.max([abs(ii) for ii in ax.get_ylim()])
+            ax.set_ylim(-ll, ll)
 
             # Add lines of significance
             if fold_change is not None:
@@ -3934,7 +3359,7 @@ def differential_enrichment(
     :type max_diff: number, optional
     :param sort_var: Variable to sort for when setting `max_diff`. Defaults to "pvalue".
     :type sort_var: str, optional
-    :param as_jobs: One of "serial" or "job". Defaults to True.
+    :param as_jobs: Whether work should be submitted as jobs. Defaults to True.
     :type as_jobs: bool, optional
     """
     import pandas as pd
@@ -4181,6 +3606,17 @@ def collect_differential_enrichment(
     if "{data_type}" in output_dir:
         output_dir = output_dir.format(data_type=data_type)
 
+    data_type_steps = {
+        "ATAC-seq": ['lola', 'motif', 'homer', 'homer_consensus', 'enrichr'],
+        "ChIP-seq": ['lola', 'motif', 'homer', 'homer_consensus', 'enrichr'],
+        "RNA-seq": ['enrichr']}
+    steps = [s for s in steps if s in data_type_steps[data_type]]
+
+    if len(steps) == 0:
+        msg = "No valid steps for the respective data type selected."
+        _LOGGER.error(msg)
+        raise ValueError(msg)
+
     error_msg = "{} results for comparison '{}', direction '{}' were not found!"
 
     lola_enr = pd.DataFrame()
@@ -4206,109 +3642,92 @@ def collect_differential_enrichment(
         for direction in params:
             comparison_dir = os.path.join(output_dir, "{}.{}".format(comp, direction))
 
-            if data_type == "RNA-seq":
-                _LOGGER.debug("Collecting enrichments of comparison '{}', direction '{}'.".format(comp, direction))
-                if 'enrichr' in steps:
-                    try:
-                        enr = pd.read_csv(os.path.join(comparison_dir, input_prefix + ".enrichr.csv"))
-                    except IOError as e:
-                        if permissive:
-                            _LOGGER.error(error_msg.format("Enrichr", comp, direction))
-                        else:
-                            raise e
-                    except pd.errors.EmptyDataError:
-                        continue
-                    else:
-                        enr.loc[:, "comparison_name"] = comp
-                        enr.loc[:, "direction"] = direction
-                        pathway_enr = pathway_enr.append(enr, ignore_index=True)
-            elif data_type == "ATAC-seq":
-                _LOGGER.debug("Collecting enrichments of comparison '{}', direction '{}'.".format(comp, direction))
+            _LOGGER.debug("Collecting enrichments of comparison '{}', direction '{}'.".format(comp, direction))
 
-                # MEME/AME
-                if 'motif' in steps:
-                    try:
-                        ame_motifs = parse_ame(comparison_dir).reset_index()
-                        ame_motifs.columns = ["TF", "p_value"]
-                    except IOError as e:
-                        if permissive:
-                            _LOGGER.error(error_msg.format("MEME/AME motif", comp, direction))
-                        else:
-                            raise e
+            # MEME/AME
+            if 'motif' in steps:
+                try:
+                    ame_motifs = parse_ame(comparison_dir).reset_index()
+                    ame_motifs.columns = ["TF", "p_value"]
+                except IOError as e:
+                    if permissive:
+                        _LOGGER.warn(error_msg.format("MEME/AME motif", comp, direction))
                     else:
-                        if not ame_motifs.empty:  # work around no motifs enriched
-                            ame_motifs.loc[:, "comparison_name"] = comp
-                            ame_motifs.loc[:, "direction"] = direction
-                            meme_enr = meme_enr.append(ame_motifs, ignore_index=True)
-
-                            # fix mouse TF names
-                            if (meme_enr['TF'].str.startswith("UP").sum() / float(meme_enr.shape[0])) >= 0.1:
-                                annot = pd.read_table(
-                                    "~/resources/motifs/motif_databases/MOUSE/uniprobe_mouse.id_mapping.tsv",
-                                    header=None, names=["TF", "TF_name"])
-                                annot.loc[:, "TF"] = annot['TF'].str.replace(r"_.*", "")
-                                meme_enr = pd.merge(meme_enr, annot).drop("TF", axis=1).rename(columns={"TF_name": "TF"})
-                        else:
-                            _LOGGER.warning("Comparison '{}' has no MEME enriched motifs.".format(comp))
+                        raise e
+                else:
+                    if not ame_motifs.empty:  # work around no motifs enriched
+                        ame_motifs.loc[:, "comparison_name"] = comp
+                        ame_motifs.loc[:, "direction"] = direction
+                        meme_enr = meme_enr.append(ame_motifs, ignore_index=True)
+
+                        # fix mouse TF names
+                        if (meme_enr['TF'].str.startswith("UP").sum() / float(meme_enr.shape[0])) >= 0.1:
+                            annot = pd.read_table(
+                                "~/resources/motifs/motif_databases/MOUSE/uniprobe_mouse.id_mapping.tsv",
+                                header=None, names=["TF", "TF_name"])
+                            annot.loc[:, "TF"] = annot['TF'].str.replace(r"_.*", "")
+                            meme_enr = pd.merge(meme_enr, annot).drop("TF", axis=1).rename(columns={"TF_name": "TF"})
+                    else:
+                        _LOGGER.warning("Comparison '{}' has no MEME enriched motifs.".format(comp))
 
-                # HOMER DE NOVO - de novo motif enrichment independent for each sample
-                if 'homer' in steps:
-                    try:
-                        homer_motifs = parse_homer(os.path.join(comparison_dir, "homerResults"))
-                    except IOError as e:
-                        if permissive:
-                            _LOGGER.error(error_msg.format("HOMER motif", comp, direction))
-                        else:
-                            raise e
+            # HOMER DE NOVO - de novo motif enrichment independent for each sample
+            if 'homer' in steps:
+                try:
+                    homer_motifs = parse_homer(os.path.join(comparison_dir, "homerResults"))
+                except IOError as e:
+                    if permissive:
+                        _LOGGER.warn(error_msg.format("HOMER motif", comp, direction))
                     else:
-                        homer_motifs.loc[:, "comparison_name"] = comp
-                        homer_motifs.loc[:, "direction"] = direction
-                        homer_enr = homer_enr.append(homer_motifs, ignore_index=True)
+                        raise e
+                else:
+                    homer_motifs.loc[:, "comparison_name"] = comp
+                    homer_motifs.loc[:, "direction"] = direction
+                    homer_enr = homer_enr.append(homer_motifs, ignore_index=True)
 
-                # HOMER CONSENSUS - motif enrichment on a consensus set of motifs
-                if 'homer_consensus' in steps:
-                    try:
-                        homer_cons = pd.read_table(os.path.join(comparison_dir, "knownResults.txt"))
-                    except IOError as e:
-                        # Homer consensus is always permissive
-                        _LOGGER.error(error_msg.format("HOMER consensus", comp, direction))
+            # HOMER CONSENSUS - motif enrichment on a consensus set of motifs
+            if 'homer_consensus' in steps:
+                try:
+                    homer_cons = pd.read_table(os.path.join(comparison_dir, "knownResults.txt"))
+                except IOError as e:
+                    # Homer consensus is always permissive
+                    _LOGGER.warn(error_msg.format("HOMER consensus", comp, direction))
 
+                else:
+                    homer_cons.columns = homer_cons.columns.str.replace(r"\(of .*", "")
+                    homer_cons["# of Background Sequences with Motif"]
+                    for col in homer_cons.columns[homer_cons.columns.str.contains("%")]:
+                        homer_cons[col] = homer_cons[col].str.replace("%", "").astype(float)
+                    homer_cons.loc[:, "comparison_name"] = comp
+                    homer_cons.loc[:, "direction"] = direction
+                    homer_consensus = homer_consensus.append(homer_cons, ignore_index=True)
+
+            # LOLA
+            if 'lola' in steps:
+                try:
+                    lola = pd.read_csv(os.path.join(comparison_dir, "allEnrichments.tsv"), sep="\t")
+                except IOError as e:
+                    if permissive:
+                        _LOGGER.warn(error_msg.format("LOLA", comp, direction))
                     else:
-                        homer_cons.columns = homer_cons.columns.str.replace(r"\(of .*", "")
-                        homer_cons["# of Background Sequences with Motif"]
-                        for col in homer_cons.columns[homer_cons.columns.str.contains("%")]:
-                            homer_cons[col] = homer_cons[col].str.replace("%", "").astype(float)
-                        homer_cons.loc[:, "comparison_name"] = comp
-                        homer_cons.loc[:, "direction"] = direction
-                        homer_consensus = homer_consensus.append(homer_cons, ignore_index=True)
-
-                # LOLA
-                if 'lola' in steps:
-                    try:
-                        lola = pd.read_csv(os.path.join(comparison_dir, "allEnrichments.tsv"), sep="\t")
-                    except IOError as e:
-                        if permissive:
-                            _LOGGER.error(error_msg.format("LOLA", comp, direction))
-                        else:
-                            raise e
-                    else:
-                        lola.loc[:, "comparison_name"] = comp
-                        lola.loc[:, "direction"] = direction
-                        lola_enr = lola_enr.append(lola, ignore_index=True)
+                        raise e
+                else:
+                    lola.loc[:, "comparison_name"] = comp
+                    lola.loc[:, "direction"] = direction
+                    lola_enr = lola_enr.append(lola, ignore_index=True)
 
-                # ENRICHR
-                if 'enrichr' in steps:
-                    try:
-                        enr = pd.read_csv(os.path.join(comparison_dir, input_prefix + "_genes.enrichr.csv"))
-                    except IOError as e:
-                        if permissive:
-                            _LOGGER.error(error_msg.format("Enrichr", comp, direction))
-                        else:
-                            raise e
+            # ENRICHR
+            if 'enrichr' in steps:
+                try:
+                    enr = pd.read_csv(os.path.join(comparison_dir, input_prefix + "_genes.enrichr.csv"))
+                except IOError as e:
+                    if permissive:
+                        _LOGGER.warn(error_msg.format("Enrichr", comp, direction))
                     else:
-                        enr.loc[:, "comparison_name"] = comp
-                        enr.loc[:, "direction"] = direction
-                        pathway_enr = pathway_enr.append(enr, ignore_index=True)
+                        raise e
+                else:
+                    enr.loc[:, "comparison_name"] = comp
+                    enr.loc[:, "direction"] = direction
+                    pathway_enr = pathway_enr.append(enr, ignore_index=True)
 
     # write combined enrichments
     if 'enrichr' in steps:
@@ -4641,7 +4060,7 @@ def plot_differential_enrichment(
             enr.loc[:, 'Motif Name'] = enr['Motif Name'].str.replace(".*BestGuess:", "").str.replace(r"-ChIP-Seq.*", "")
 
             enr.loc[:, 'combined'] = enr[['enrichment_over_background', 'log_p_value']].apply(zscore).mean(axis=1)
-            cs = axis[i].scatter(
+            axis[i].scatter(
                 enr['enrichment_over_background'],
                 enr['log_p_value'],
                 c=enr['combined'],
@@ -5077,10 +4496,10 @@ def signed_max(x, f=0.66, axis=0):
         else:
             if types[0] not in [np.float_, float, np.int_, int]:
                 return np.nan
-        l = float(len(x))
+        ll = float(len(x))
         neg = sum(x < 0)
         pos = sum(x > 0)
-        obs_f = max(neg / l, pos / l)
+        obs_f = max(neg / ll, pos / ll)
         if obs_f >= f:
             if neg > pos:
                 return min(x)
@@ -5556,7 +4975,7 @@ def project_to_geo(
         os.makedirs(output_dir)
 
     cmds = list()
-    annot = pd.DataFrame()
+    annot = pd.DataFrame(index=pd.Index([], name="sample_name"))
     for sample in samples:
         various = len(sample.data_path.split(" ")) > 1
         cmd = ""
@@ -5599,7 +5018,6 @@ def project_to_geo(
                 _LOGGER.warning("'{}' sample '{}' does not have a 'bigwig'"
                                 .format(sample.library, sample.name) +
                                 " attribute set. Skipping bigWig file.")
-
         # Copy peaks
         if sample.library == "ATAC-seq":
             if hasattr(sample, "peaks"):
@@ -5621,8 +5039,7 @@ def project_to_geo(
                 _LOGGER.warning("'{}' sample '{}' does not have a 'peaks' attribute set."
                                 .format(sample.library, sample.name) +
                                 " Skipping peaks file.")
-
-        if distributed and not dry_run:
+        if distributed:
             from pypiper.ngstk import NGSTk
             import textwrap
             tk = NGSTk()
@@ -5639,7 +5056,8 @@ def project_to_geo(
 
             with open(job_file, "w") as handle:
                 handle.write(textwrap.dedent(job))
-            tk.slurm_submit_job(job_file)
+            if not dry_run:
+                tk.slurm_submit_job(job_file)
         else:
             cmds.append(cmd)
 
@@ -5651,6 +5069,29 @@ def project_to_geo(
     return annot
 
 
+def collect_md5_sums(df):
+    """
+    Given a dataframe with columns with paths to md5sum files ending in '_md5sum',
+    replace the paths to the md5sum files with the actual checksums.
+
+    Useful to use in combination with ``project_to_geo``.
+
+    :param df: A dataframe with columns ending in '_md5sum'.
+    :type df: pandas.DataFrame
+    :returns: pandas.DataFrame with md5sum columns replaced with the actual md5sums.
+    :rtype: pandas.DataFrame
+    """
+    cols = df.columns[df.columns.str.endswith("_md5sum")]
+    for col in cols:
+        for i, path in df.loc[:, col].iteritems():
+            if not pd.isnull(path):
+                cont = open(path, 'r').read().strip()
+                if any([x.isspace() for x in cont]):
+                    cont = cont.split(" ")[0]
+                df.loc[i, col] = cont
+    return df
+
+
 def rename_sample_files(
         annotation_mapping,
         old_sample_name_column="old_sample_name",
@@ -5872,7 +5313,7 @@ def subtract_principal_component(
     return X2
 
 
-def subtract_principal_component_by_attribute(df, pc=1, attributes=["CLL"]):
+def subtract_principal_component_by_attribute(df, attributes, pc=1):
     """
     Given a matrix (n_samples, n_variables), remove `pc` (1-based) from matrix.
     """
diff --git a/ngs_toolkit/track_manager.py b/ngs_toolkit/track_manager.py
index 3c4b53e..4ccee50 100755
--- a/ngs_toolkit/track_manager.py
+++ b/ngs_toolkit/track_manager.py
@@ -10,9 +10,6 @@
 import ngs_toolkit
 
 
-_LOGGER = ngs_toolkit._LOGGER
-
-
 def parse_arguments():
     """
     Argument Parsing.
@@ -76,7 +73,7 @@ def make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name,
 """.format(proj=proj_name, description=proj_desc, email=user_email)
     with open(track_hub, 'w') as handle:
         handle.write(text)
-    os.chmod(track_hub, 0755)
+    os.chmod(track_hub, 0o755)
 
     # Templates for various levels
     track_parent = """track {proj}
@@ -145,7 +142,7 @@ def make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name,
     for genome in df['genome'].unique():
         if not os.path.exists(os.path.join(bigwig_dir, genome)):
             os.makedirs(os.path.join(bigwig_dir, genome))
-        os.chmod(os.path.join(bigwig_dir, genome), 0755)
+        os.chmod(os.path.join(bigwig_dir, genome), 0o755)
 
         df_g = df[(df['genome'] == genome)]
 
@@ -155,7 +152,7 @@ def make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name,
     """.format(g=genome)
         with open(genomes_hub, 'a') as handle:
             handle.write(text)
-        os.chmod(genomes_hub, 0755)
+        os.chmod(genomes_hub, 0o755)
 
         # TrackDB
         track_db = os.path.join(os.path.join(bigwig_dir, genome, "trackDb.txt"))
@@ -197,7 +194,7 @@ def make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name,
                 if not os.path.exists(dest) and args.link:
                     try:
                         os.symlink(sample_attrs['bigwig'], dest)
-                        os.chmod(dest, 0755)
+                        os.chmod(dest, 0o755)
                     except OSError:
                         print("Sample {} track file does not exist!".format(sample_attrs["sample_name"]))
                         continue
@@ -226,13 +223,13 @@ def make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub, proj_name,
                 if not os.path.exists(dest) and args.link:
                     try:
                         os.symlink(sample_attrs['bigwig'], dest)
-                        os.chmod(dest, 0755)
+                        os.chmod(dest, 0o755)
                     except OSError:
                         print("Sample {} track file does not exist!".format(sample_attrs["sample_name"]))
                         continue
                 # Make directories readable and executable
-                os.chmod(os.path.join(bigwig_dir, genome), 0755)
-                os.chmod(bigwig_dir, 0755)
+                os.chmod(os.path.join(bigwig_dir, genome), 0o755)
+                os.chmod(bigwig_dir, 0o755)
 
         track = re.sub("_none", "", track)
 
@@ -295,8 +292,8 @@ def make_igv_tracklink(prj, track_file, track_url):
     # write to file
     with open(track_file, 'w') as handle:
         handle.write(text + "\n")
-    os.chmod(track_file, 0655)
-    os.chmod(os.path.dirname(track_file), 0755)
+    os.chmod(track_file, 0o655)
+    os.chmod(os.path.dirname(track_file), 0o755)
 
     msg = "\n".join([
         "Finished producing IGV track file!", "'{}'".format(track_file),
diff --git a/requirements/requirements.test.txt b/requirements/requirements.test.txt
index daa187e..b61158b 100644
--- a/requirements/requirements.test.txt
+++ b/requirements/requirements.test.txt
@@ -1,4 +1,5 @@
 pytest>=4.0.2
 coverage>=4.5.2
 pytest-cov
+codecov
 codacy-coverage
diff --git a/test/test_atacseq_analysis.py b/test/test_atacseq_analysis.py
index f5f7888..8f8060e 100644
--- a/test/test_atacseq_analysis.py
+++ b/test/test_atacseq_analysis.py
@@ -171,11 +171,13 @@ def test_rpm_normalization(various_analysis):
     for analysis in various_analysis:
         qnorm = analysis.normalize_coverage_rpm(save=False)
         assert qnorm.dtypes.all() == np.float
+        assert hasattr(analysis, "coverage_rpm")
         rpm_file = os.path.join(analysis.results_dir, analysis.name + "_peaks.coverage_rpm.csv")
         assert not os.path.exists(rpm_file)
         qnorm = analysis.normalize_coverage_rpm(save=True)
         assert os.path.exists(rpm_file)
         assert os.stat(rpm_file).st_size > 0
+        assert hasattr(analysis, "coverage_rpm")
 
 
 def test_quantile_normalization(various_analysis):
@@ -203,6 +205,7 @@ def test_quantile_normalization(various_analysis):
         # assert all(np.array(cors) > 0.99)
 
 
+@pytest.mark.skipif(travis, reason="This is anyway tested after")
 def test_cqn_normalization(analysis):
     # At some point, downloading a genome reference in Travis
     # caused memory error.
@@ -225,10 +228,15 @@ def test_normalize(analysis):
     qnorm = analysis.normalize_coverage_rpm(save=False)
     assert isinstance(qnorm, pd.DataFrame)
     assert hasattr(analysis, "coverage_rpm")
+    del analysis.coverage_rpm
     qnorm_d = analysis.normalize(method="total", save=False)
+    assert isinstance(qnorm_d, pd.DataFrame)
+    assert hasattr(analysis, "coverage_rpm")
     assert np.array_equal(qnorm_d, qnorm)
+
     qnorm = analysis.normalize_coverage_quantiles(save=False)
-    assert hasattr(analysis, "coverage_rpm")
+    assert hasattr(analysis, "coverage_qnorm")
+    del analysis.coverage_rpm
     qnorm_d = analysis.normalize(method="quantile", save=False)
     assert isinstance(qnorm_d, pd.DataFrame)
     assert hasattr(analysis, "coverage_qnorm")
@@ -238,16 +246,10 @@ def test_normalize(analysis):
     # caused memory error.
     # This should now be fixed by implementing download/decompressing
     # functions working in chunks
-    try:
-        qnorm = analysis.normalize_gc_content(save=False)
-    except OSError:
-        if travis:
-            return
-        else:
-            raise
-
-    assert isinstance(qnorm, pd.DataFrame)
-    assert hasattr(analysis, "coverage_gc_corrected")
+    if not travis:
+        qnorm = analysis.normalize(method="gc_content", save=False)
+        assert isinstance(qnorm, pd.DataFrame)
+        assert hasattr(analysis, "coverage_gc_corrected")
 
 
 def test_get_matrix_stats(various_analysis):
@@ -267,7 +269,7 @@ def test_get_peak_gene_annotation(analysis):
 
     os.chdir(os.path.join(analysis.results_dir, os.pardir))
     annot = analysis.get_peak_gene_annotation(max_dist=1e10)
-    tss = os.path.join("reference",
+    tss = os.path.join(analysis.root_dir, 'reference',
                        "{}.{}.gene_annotation.protein_coding.tss.bed"
                        .format(analysis.organism, mapping[analysis.genome]))
     assert os.path.exists(tss)
@@ -278,7 +280,7 @@ def test_get_peak_gene_annotation(analysis):
 
 def test_get_peak_genomic_location(analysis):
     prefix = os.path.join(
-        analysis.results_dir, "..", "reference", "{}.{}.genomic_context")
+        analysis.root_dir, "reference", "{}.{}.genomic_context")
     fs = [prefix + a for a in [
         ".bed", ".exon.bed", ".genebody.bed", ".intergenic.bed",
         ".intron.bed", ".promoter.bed", ".utr3.bed", ".utr5.bed"]]
diff --git a/test/test_decorators.py b/test/test_decorators.py
new file mode 100644
index 0000000..3da91d2
--- /dev/null
+++ b/test/test_decorators.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+
+from ngs_toolkit.atacseq import ATACSeqAnalysis
+import pytest
+import os
+
+
+travis = 'TRAVIS' in os.environ
+
+
+@pytest.fixture
+def empty_analysis():
+    return ATACSeqAnalysis()
+
+
+@pytest.fixture
+def null_analysis():
+    analysis = ATACSeqAnalysis()
+    analysis.organism = None
+    analysis.genome = None
+    analysis.sites = None
+    return analysis
+
+
+@pytest.fixture
+def full_analysis():
+    analysis = ATACSeqAnalysis()
+    analysis.organism = "human"
+    analysis.genome = "hg38"
+    analysis.sites = "hg38"
+    analysis.samples = []
+    return analysis
+
+
+class Test_check_organism_genome:
+    # here we use 'get_annotations' as en example
+    # decorated function that won't fail for some other
+    # reason on a fairly empty analysis object
+    def test_empty_analysis(self, empty_analysis):
+        # Make sure it raises AttributeError
+        with pytest.raises(AttributeError):
+            empty_analysis.get_annotations(steps=[])
+
+    def test_null_analysis(self, null_analysis):
+        # Make sure it raises AttributeError
+        with pytest.raises(AttributeError):
+            null_analysis.get_annotations(steps=[])
+
+    def test_full_analysis(self, full_analysis):
+        full_analysis.get_annotations(steps=[])
+
+
+class Test_check_has_sites:
+    # here we use 'calculate_peak_support' as en example
+    # decorated function. It will however fail for another
+    # reason due to the fairly empty analysis object (last test)
+    def test_empty_analysis(self, empty_analysis):
+        # Make sure it raises AttributeError
+        with pytest.raises(AttributeError):
+            empty_analysis.calculate_peak_support()
+
+    def test_null_analysis(self, null_analysis):
+        # Make sure it raises AttributeError
+        with pytest.raises(AttributeError):
+            null_analysis.calculate_peak_support()
+
+    def test_full_analysis(self, full_analysis):
+        # This passes on the decorator
+        with pytest.raises(IOError):
+            full_analysis.calculate_peak_support()
diff --git a/test/test_unsupervised_analysis.py b/test/test_unsupervised_analysis.py
index d09af9a..1651286 100644
--- a/test/test_unsupervised_analysis.py
+++ b/test/test_unsupervised_analysis.py
@@ -75,8 +75,10 @@ def outputs(analysis):
         prefix + "pca.explained_variance.svg",
         prefix + "pca.svg",
         prefix + "pca.variable_principle_components_association.csv",
-        prefix + "pca.variable_principle_components_association.masked.svg",
-        prefix + "pca.variable_principle_components_association.svg",
+        prefix + "pca.variable_principle_components_association.p_value.masked.svg",
+        prefix + "pca.variable_principle_components_association.p_value.svg",
+        prefix + "pca.variable_principle_components_association.adj_pvalue.masked.svg",
+        prefix + "pca.variable_principle_components_association.adj_pvalue.svg",
         prefix + "pearson_correlation.clustermap.svg",
         prefix + "spearman_correlation.clustermap.svg",
         prefix + "spectralembedding.svg",
@@ -130,8 +132,10 @@ def test_low_samples_no_manifolds(self, analysis):
             prefix + "locallylinearembedding.svg",
             prefix + "spectralembedding.svg",
             prefix + "pca.variable_principle_components_association.csv",
-            prefix + "pca.variable_principle_components_association.masked.svg",
-            prefix + "pca.variable_principle_components_association.svg"]
+            prefix + "pca.variable_principle_components_association.p_value.masked.svg",
+            prefix + "pca.variable_principle_components_association.adj_pvalue.masked.svg",
+            prefix + "pca.variable_principle_components_association.p_value.svg",
+            prefix + "pca.variable_principle_components_association.adj_pvalue.svg"]
         unsupervised_analysis(analysis, samples=analysis.samples[:2])
         for output in outputs2:
             assert os.path.exists(output)
@@ -157,8 +161,10 @@ def test_various_plotting_attributes(self, analysis, outputs):
         prefix = os.path.join(
             analysis.results_dir, "unsupervised_analysis_ATAC-seq", analysis.name + ".all_sites.")
         not_outputs = [
-            prefix + "pca.variable_principle_components_association.masked.svg",
-            prefix + "pca.variable_principle_components_association.svg"]
+            prefix + "pca.variable_principle_components_association.p_value.masked.svg",
+            prefix + "pca.variable_principle_components_association.p_value.svg",
+            prefix + "pca.variable_principle_components_association.adj_pvalue.masked.svg",
+            prefix + "pca.variable_principle_components_association.adj_pvalue.svg"]
         for i in range(1, len(analysis.group_attributes)):
             unsupervised_analysis(analysis, attributes_to_plot=analysis.group_attributes[:i])
             for output in outputs: