From 5e534836315dd78de1120a04afcfa64018d5383b Mon Sep 17 00:00:00 2001
From: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
Date: Wed, 4 May 2022 10:34:43 -0700
Subject: [PATCH] LINT: linting cleanup (#17)

---
 LICENSE                          |   1 -
 Makefile                         |   4 +-
 ci/recipe/meta.yaml              |   6 +-
 q2_fmt/__init__.py               |   5 +-
 q2_fmt/_engraftment.py           | 142 +++++++++++-------
 q2_fmt/_examples.py              |  15 +-
 q2_fmt/_stats.py                 |  36 ++---
 q2_fmt/_transformer.py           |   4 +-
 q2_fmt/_visualizer.py            |   4 +-
 q2_fmt/plugin_setup.py           | 156 ++++++++++++--------
 q2_fmt/tests/test_engraftment.py | 237 +++++++++++++++++++------------
 11 files changed, 390 insertions(+), 220 deletions(-)

diff --git a/LICENSE b/LICENSE
index 9700189..bffa534 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,3 @@
-
 BSD 3-Clause License
 
 Copyright (c) 2022, QIIME 2 development team.
diff --git a/Makefile b/Makefile
index 4b8cf7c..1b9c4d2 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
 PYTHON ?= python
 
-all:
+all: ;
 
 lint:
 	q2lint
@@ -22,4 +22,4 @@ dev: all
 
 clean: distclean
 
-distclean:
+distclean: ;
diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml
index ffbb184..362393a 100644
--- a/ci/recipe/meta.yaml
+++ b/ci/recipe/meta.yaml
@@ -24,14 +24,16 @@ requirements:
 
   run:
     - python {{ python }}
+    - pandas
+    - scipy
+    - scikit-bio
+    - jinja2
     - qiime2 {{ qiime2_epoch }}.*
-    - q2templates {{ qiime2_epoch }}.*
     - q2-types {{ qiime2_epoch }}.*
 
 test:
   requires:
     - qiime2 >={{ qiime2 }}
-    - q2templates >={{ q2templates }}
     - q2-types >={{ q2_types }}
     - pytest
 
diff --git a/q2_fmt/__init__.py b/q2_fmt/__init__.py
index f58c592..8b1589e 100644
--- a/q2_fmt/__init__.py
+++ b/q2_fmt/__init__.py
@@ -15,5 +15,6 @@
 __version__ = get_versions()['version']
 del get_versions
 
-__all__ = ['RecordTSVFileFormat', 'AnnotatedTSVDirFmt', 'StatsTable', 'Pairwise',
-           'GroupDist', 'Ordered', 'Unordered', 'Matched', 'Independent', 'engraftment']
+__all__ = ['RecordTSVFileFormat', 'AnnotatedTSVDirFmt', 'StatsTable',
+           'Pairwise', 'GroupDist', 'Ordered', 'Unordered', 'Matched',
+           'Independent', 'engraftment']
diff --git a/q2_fmt/_engraftment.py b/q2_fmt/_engraftment.py
index d941eea..3882274 100644
--- a/q2_fmt/_engraftment.py
+++ b/q2_fmt/_engraftment.py
@@ -16,7 +16,8 @@ def engraftment(
     ctx, diversity_measure, metadata, hypothesis, time_column,
     reference_column, subject_column, control_column=None,
     filter_missing_references=False, where=None, against_group=None,
-    p_val_approx='auto'):
+    p_val_approx='auto'
+):
 
     raincloud_plot = ctx.get_action('fmt', 'plot_rainclouds')
     group_timepoints = ctx.get_action('fmt', 'group_timepoints')
@@ -24,25 +25,29 @@ def engraftment(
     results = []
 
     time_dist, ref_dist = group_timepoints(diversity_measure, metadata,
-    time_column, reference_column, subject_column, control_column,
-    filter_missing_references, where)
+                                           time_column, reference_column,
+                                           subject_column, control_column,
+                                           filter_missing_references, where)
 
     if hypothesis == 'reference' or hypothesis == 'all-pairwise':
         mann_whitney_u = ctx.get_action('fmt', 'mann_whitney_u')
         stats = mann_whitney_u(distribution=ref_dist, hypothesis=hypothesis,
                                reference_group=against_group,
-                               against_each=ref_dist, p_val_approx=p_val_approx)
+                               against_each=ref_dist,
+                               p_val_approx=p_val_approx)
 
     else:
         wilcoxon_srt = ctx.get_action('fmt', 'wilcoxon_srt')
         stats = wilcoxon_srt(distribution=time_dist, hypothesis=hypothesis,
-                             baseline_group=against_group, p_val_approx=p_val_approx)
+                             baseline_group=against_group,
+                             p_val_approx=p_val_approx)
 
     results += stats
     results += raincloud_plot(data=time_dist, stats=stats[0])
 
     return tuple(results)
 
+
 def group_timepoints(
         diversity_measure: pd.Series, metadata: qiime2.Metadata,
         time_column: str, reference_column: str, subject_column: str = False,
@@ -53,9 +58,9 @@ def group_timepoints(
         diversity_measure.index = _sort_multi_index(diversity_measure.index)
 
     is_beta, used_references, time_col, subject_col, used_controls = \
-        _data_filtering(diversity_measure, metadata, time_column, reference_column,
-                        subject_column, control_column, filter_missing_references,
-                        where)
+        _data_filtering(diversity_measure, metadata, time_column,
+                        reference_column, subject_column, control_column,
+                        filter_missing_references, where)
 
     original_measure_name = diversity_measure.name
     diversity_measure.name = 'measure'
@@ -72,7 +77,7 @@ def group_timepoints(
     ordered_df['id'].attrs.update(id_annotation)
     ordered_df['measure'].attrs.update({
         'unit': ('Distance to %s' % used_references.name)
-                if is_beta else original_measure_name,
+        if is_beta else original_measure_name,
         'description': '...'
     })
     ordered_df['group'].attrs.update({
@@ -85,9 +90,9 @@ def group_timepoints(
             'description': '...'
         })
 
-
     independent_df = _independent_dists(diversity_measure, metadata,
-                                        used_references, is_beta, used_controls)
+                                        used_references, is_beta,
+                                        used_controls)
 
     # id, measure, group, [A, B]
     if is_beta:
@@ -107,7 +112,7 @@ def group_timepoints(
     })
     independent_df['group'].attrs.update({
         'unit': used_references.name if used_controls is None else
-                '%s or %s' % (used_references.name, used_controls.name),
+        '%s or %s' % (used_references.name, used_controls.name),
         'description': '...'
     })
     if is_beta:
@@ -116,15 +121,18 @@ def group_timepoints(
 
     return ordered_df, independent_df
 
+
 # HELPER FUNCTION FOR DATA FILTERING
 def _data_filtering(diversity_measure: pd.Series, metadata: qiime2.Metadata,
-        time_column: str, reference_column: str, subject_column: str = False,
-        control_column: str = None, filter_missing_references: bool = False,
-        where: str = None):
+                    time_column: str, reference_column: str,
+                    subject_column: str = False, control_column: str = None,
+                    filter_missing_references: bool = False,
+                    where: str = None):
 
     if diversity_measure.empty:
         raise ValueError('Empty diversity measure detected.'
-                         ' Please make sure your diversity measure contains data.')
+                         ' Please make sure your diversity measure'
+                         ' contains data.')
 
     if isinstance(diversity_measure.index, pd.MultiIndex):
         is_beta = True
@@ -137,7 +145,10 @@ def _data_filtering(diversity_measure: pd.Series, metadata: qiime2.Metadata,
     metadata = metadata.filter_ids(ids_to_keep=ids_with_data)
 
     if where is not None:
-        metadata = metadata.filter_ids(ids_to_keep=metadata.get_ids(where=where))
+        metadata = (metadata
+                    .filter_ids(ids_to_keep=metadata
+                                .get_ids(where=where))
+                    )
 
     def _get_series_from_col(md, col_name, param_name, expected_type=None,
                              drop_missing_values=False):
@@ -145,7 +156,7 @@ def _get_series_from_col(md, col_name, param_name, expected_type=None,
             column = md.get_column(col_name)
         except ValueError as e:
             raise ValueError("There was an issue with the argument for %r. %s"
-                            % (param_name, e)) from e
+                             % (param_name, e)) from e
 
         if expected_type is not None and not isinstance(column, expected_type):
             if type(expected_type) is tuple:
@@ -154,18 +165,22 @@ def _get_series_from_col(md, col_name, param_name, expected_type=None,
                 exp = expected_type.type
 
             raise ValueError("Provided column for %r is %r, not %r."
-                            % (param_name, column.type, exp))
+                             % (param_name, column.type, exp))
 
         if drop_missing_values:
             column = column.drop_missing_values()
 
         return column.to_series()
 
-    time_col = _get_series_from_col(md=metadata, col_name=time_column, param_name='time_column',
-                                    expected_type=qiime2.NumericMetadataColumn)
+    time_col = _get_series_from_col(
+        md=metadata, col_name=time_column,
+        param_name='time_column',
+        expected_type=qiime2.NumericMetadataColumn)
 
-    reference_col = _get_series_from_col(md=metadata, col_name=reference_column, param_name='reference_column',
-                                         expected_type=qiime2.CategoricalMetadataColumn)
+    reference_col = _get_series_from_col(
+        md=metadata, col_name=reference_column,
+        param_name='reference_column',
+        expected_type=qiime2.CategoricalMetadataColumn)
 
     used_references = reference_col[~time_col.isna()]
 
@@ -174,44 +189,55 @@ def _get_series_from_col(md, col_name, param_name, expected_type=None,
             used_references = used_references.dropna()
         else:
             nan_references = used_references.index[used_references.isna()]
-            raise KeyError('Missing references for the associated sample data. Please make sure'
-                        ' that all samples with a timepoint value have an associated reference.'
-                        ' IDs where missing references were found: %s' % (tuple(nan_references),))
+            raise KeyError('Missing references for the associated sample data.'
+                           ' Please make sure that all samples with a'
+                           ' timepoint value have an associated reference.'
+                           ' IDs where missing references were found:'
+                           ' %s' % (tuple(nan_references),))
 
     available_references = (used_references.isin(ids_with_data))
     if not available_references.all():
         if filter_missing_references:
             used_references = used_references[available_references]
         else:
-            raise KeyError('References included in the metadata are missing from the diversity measure.'
-                        ' Please make sure all references included in the metadata are also present'
-                        ' in the diversity measure. Missing references: %s'
-                        % list(used_references[~available_references].unique())
-            )
+            raise KeyError('References included in the metadata are missing'
+                           ' from the diversity measure. Please make sure all'
+                           ' references included in the metadata are also'
+                           ' present in the diversity measure.'
+                           ' Missing references: %s'
+                           % list(used_references[~available_references]
+                                  .unique()))
 
     if used_references.empty:
         raise KeyError('No references were found within the diversity metric.')
 
     subject_col = None
     if subject_column:
-            subject_col = _get_series_from_col(md=metadata, col_name=subject_column, param_name='subject_column',
-                                               expected_type=qiime2.CategoricalMetadataColumn)
+        subject_col = _get_series_from_col(
+            md=metadata, col_name=subject_column,
+            param_name='subject_column',
+            expected_type=qiime2.CategoricalMetadataColumn)
 
     used_controls = None
     if control_column is not None:
-        control_col = _get_series_from_col(md=metadata, col_name=control_column, param_name='control_column')
+        control_col = _get_series_from_col(md=metadata,
+                                           col_name=control_column,
+                                           param_name='control_column')
         used_controls = control_col[~control_col.isna()]
 
     return is_beta, used_references, time_col, subject_col, used_controls
 
+
 # HELPER FUNCTION FOR sorting a multi-index (for dist matrix and metadata)
 def _sort_multi_index(index):
     sorted_levels = list(map(sorted, index))
     sorted_multi = pd.MultiIndex.from_tuples(sorted_levels)
     return sorted_multi
 
+
 # HELPER FUNCTION FOR GroupDists[Ordered, Matched | Independent]
-def _ordered_dists(diversity_measure: pd.Series, is_beta, used_references, time_col, subject_col):
+def _ordered_dists(diversity_measure: pd.Series, is_beta,
+                   used_references, time_col, subject_col):
     if is_beta:
         idx = pd.MultiIndex.from_frame(
             used_references.to_frame().reset_index())
@@ -222,11 +248,16 @@ def _ordered_dists(diversity_measure: pd.Series, is_beta, used_references, time_
         idx.name = 'id'
 
     try:
-        sliced_df = diversity_measure[idx].to_frame().reset_index().set_index('id')
+        sliced_df = (diversity_measure[idx]
+                     .to_frame()
+                     .reset_index()
+                     .set_index('id')
+                     )
     except KeyError:
-        raise KeyError('Pairwise comparisons were unsuccessful. Please double check that your'
-        ' chosen reference column contains values that are also present in the ID column for'
-        ' the associated metadata.')
+        raise KeyError('Pairwise comparisons were unsuccessful. Please double'
+                       ' check that your chosen reference column contains'
+                       ' values that are also present in the ID column for'
+                       ' the associated metadata.')
 
     if is_beta:
         sliced_df.index = used_references.index
@@ -239,8 +270,10 @@ def _ordered_dists(diversity_measure: pd.Series, is_beta, used_references, time_
 
     return ordinal_df.reset_index()
 
+
 # HELPER FUNCTION FOR GroupDists[Unordered, Independent]
-def _independent_dists(diversity_measure, metadata, used_references, is_beta, used_controls):
+def _independent_dists(diversity_measure, metadata,
+                       used_references, is_beta, used_controls):
     unique_references = sorted(used_references.unique())
 
     if is_beta:
@@ -248,18 +281,26 @@ def _independent_dists(diversity_measure, metadata, used_references, is_beta, us
             ref_idx = pd.MultiIndex.from_tuples(
                 itertools.combinations(unique_references, 2))
         except TypeError:
-            raise TypeError('Single reference value detected. More than one unique reference must be'
-                            ' provided for successful grouping.')
+            raise TypeError('Single reference value detected. More than one'
+                            ' unique reference must be provided for'
+                            ' successful grouping.')
 
         ref_idx.names = ['A', 'B']
 
         if used_controls is not None:
-            grouped_md = metadata.to_dataframe().loc[used_controls.index].groupby(used_controls)
+            grouped_md = (metadata
+                          .to_dataframe()
+                          .loc[used_controls.index]
+                          .groupby(used_controls)
+                          )
             ctrl_list = list()
             for group_id, grouped_ctrls in grouped_md:
                 if len(grouped_ctrls.index) < 2:
                     continue
-                ctrl_combos = list(itertools.combinations(grouped_ctrls.index, 2))
+                ctrl_combos = list(
+                    itertools.combinations(
+                        grouped_ctrls.index, 2)
+                )
                 ctrl_idx = pd.MultiIndex.from_tuples(ctrl_combos)
                 ctrl_series = pd.Series(group_id, index=ctrl_idx)
                 ctrl_list.append(ctrl_series)
@@ -267,8 +308,10 @@ def _independent_dists(diversity_measure, metadata, used_references, is_beta, us
             try:
                 ctrl_series = pd.concat(ctrl_list)
             except ValueError:
-                raise ValueError('One or less controls detected. When including controls in your data,'
-                                 ' please include more than one for successful grouping.')
+                raise ValueError('One or less controls detected.'
+                                 ' When including controls in your data,'
+                                 ' please include more than one for'
+                                 ' successful grouping.')
 
             ctrl_series.name = 'group'
             ctrl_series.index.names = ['A', 'B']
@@ -282,9 +325,10 @@ def _independent_dists(diversity_measure, metadata, used_references, is_beta, us
     try:
         nominal_df = diversity_measure[ref_idx].to_frame().reset_index()
     except KeyError:
-        raise KeyError('Pairwise comparisons were unsuccessful. Please double check that your'
-        ' chosen reference column contains values that are also present in the ID column for'
-        ' the associated metadata.')
+        raise KeyError('Pairwise comparisons were unsuccessful. Please double'
+                       ' check that your chosen reference column contains'
+                       ' values that are also present in the ID column for'
+                       ' the associated metadata.')
 
     nominal_df['group'] = 'reference'
 
diff --git a/q2_fmt/_examples.py b/q2_fmt/_examples.py
index 65bc395..5ab06c0 100644
--- a/q2_fmt/_examples.py
+++ b/q2_fmt/_examples.py
@@ -11,6 +11,7 @@
 
 import qiime2
 
+
 def _get_data_from_tests(path):
     return pkg_resources.resource_filename('q2_fmt.tests',
                                            os.path.join('data', path))
@@ -20,38 +21,47 @@ def alpha_md_factory():
     return qiime2.Metadata.load(
         _get_data_from_tests('sample_metadata_alpha_div.tsv'))
 
+
 def beta_md_factory():
     return qiime2.Metadata.load(
         _get_data_from_tests('sample_metadata_donors.tsv'))
 
+
 def alpha_div_factory():
     return qiime2.Artifact.import_data(
         'SampleData[AlphaDiversity]', _get_data_from_tests('alpha_div.tsv'))
 
+
 def beta_div_factory():
     return qiime2.Artifact.import_data(
         'DistanceMatrix', _get_data_from_tests('dist_matrix_donors.tsv'))
 
+
 def faithpd_timedist_factory():
     return qiime2.Artifact.import_data(
         'GroupDist[Ordered, Matched]', _get_data_from_tests('faithpd_timedist')
     )
 
+
 def faithpd_refdist_factory():
     return qiime2.Artifact.import_data(
-        'GroupDist[Unordered, Independent]', _get_data_from_tests('faithpd_refdist')
+        'GroupDist[Unordered, Independent]',
+        _get_data_from_tests('faithpd_refdist')
     )
 
+
 def faithpd_md_factory():
     return qiime2.Metadata.load(
         _get_data_from_tests('metadata-faithpd.tsv')
     )
 
+
 def faithpd_div_factory():
     return qiime2.Artifact.import_data(
         'SampleData[AlphaDiversity]', _get_data_from_tests('faithpd.tsv')
     )
 
+
 def group_timepoints_alpha_independent(use):
     alpha = use.init_artifact('alpha', alpha_div_factory)
     metadata = use.init_metadata('metadata', alpha_md_factory)
@@ -97,6 +107,7 @@ def group_timepoints_beta(use):
     timepoints.assert_output_type('GroupDist[Ordered, Matched]')
     references.assert_output_type('GroupDist[Unordered, Independent]')
 
+
 def wilcoxon_baseline0(use):
     timedist = use.init_artifact('timedist', faithpd_timedist_factory)
 
@@ -115,6 +126,7 @@ def wilcoxon_baseline0(use):
 
     stats_table.assert_output_type('StatsTable[Pairwise]')
 
+
 def mann_whitney_pairwise(use):
     timedist = use.init_artifact('timedist', faithpd_timedist_factory)
     refdist = use.init_artifact('refdist', faithpd_refdist_factory)
@@ -134,6 +146,7 @@ def mann_whitney_pairwise(use):
 
     stats_table.assert_output_type('StatsTable[Pairwise]')
 
+
 # Engraftment example using faith PD, baseline0 hypothesis
 def engraftment_baseline(use):
     md = use.init_metadata('md', faithpd_md_factory)
diff --git a/q2_fmt/_stats.py b/q2_fmt/_stats.py
index 388e69c..acdbdbc 100644
--- a/q2_fmt/_stats.py
+++ b/q2_fmt/_stats.py
@@ -12,9 +12,9 @@
 
 
 def mann_whitney_u(distribution: pd.DataFrame, hypothesis: str,
-                   reference_group: str=None,
-                   against_each: pd.DataFrame=None,
-                   p_val_approx: str='auto') -> pd.DataFrame:
+                   reference_group: str = None,
+                   against_each: pd.DataFrame = None,
+                   p_val_approx: str = 'auto') -> pd.DataFrame:
 
     dists = [distribution]
 
@@ -27,14 +27,15 @@ def mann_whitney_u(distribution: pd.DataFrame, hypothesis: str,
     elif hypothesis == 'all-pairwise':
         if reference_group is not None:
             raise ValueError("`all-pairwise` was selected as the hypothesis,"
-                             " but a `reference_group` was added. Please either"
-                             " select `reference` as the hypothesis, or remove"
-                             " the `reference_group` parameter from your command.")
+                             " but a `reference_group` was added."
+                             " Please either select `reference` as the"
+                             " hypothesis, or remove the `reference_group`"
+                             " parameter from your command.")
         comparisons = _comp_all_pairwise(distribution,
                                          against_each=against_each)
     else:
-        raise ValueError("Invalid hypothesis. Please either choose `reference` or"
-                         " `all-pairwise` as your hypothesis.")
+        raise ValueError("Invalid hypothesis. Please either choose `reference`"
+                         " or `all-pairwise` as your hypothesis.")
 
     table = []
     for (idx_a, comp_a), (idx_b, comp_b) in comparisons:
@@ -80,7 +81,6 @@ def mann_whitney_u(distribution: pd.DataFrame, hypothesis: str,
     df['q-value'].attrs.update(
         dict(unit='Benjamini–Hochberg', description='...'))
 
-
     return df
 
 
@@ -125,20 +125,22 @@ def _compare_mannwhitneyu(group_a, group_b, p_val_approx):
 
 
 def wilcoxon_srt(distribution: pd.DataFrame, hypothesis: str,
-                 baseline_group: str=None, p_val_approx: str='auto') -> pd.DataFrame:
+                 baseline_group: str = None,
+                 p_val_approx: str = 'auto') -> pd.DataFrame:
 
     if hypothesis == 'baseline':
         comparisons = _comp_baseline(distribution, baseline_group)
     elif hypothesis == 'consecutive':
         if baseline_group is not None:
             raise ValueError("`consecutive` was selected as the hypothesis,"
-                             " but a `baseline_group` was added. Please either"
-                             " select `baseline` as the hypothesis, or remove"
-                             " the `baseline_group` parameter from your command.")
+                             " but a `baseline_group` was added. Please"
+                             " either select `baseline` as the hypothesis,"
+                             " or remove the `baseline_group` parameter"
+                             " from your command.")
         comparisons = _comp_consecutive(distribution)
     else:
-        raise ValueError("Invalid hypothesis. Please either choose `baseline` or"
-                         " `consecutive` as your hypothesis.")
+        raise ValueError("Invalid hypothesis. Please either choose `baseline`"
+                         " or `consecutive` as your hypothesis.")
 
     table = []
     for comp_a, comp_b in comparisons:
@@ -230,6 +232,7 @@ def _compare_wilcoxon(group_a, group_b, p_val_approx) -> dict:
 
     return results
 
+
 def _fdr_correction(p_vals):
     ranked_p_values = scipy.stats.rankdata(p_vals)
     fdr = p_vals * len(p_vals) / ranked_p_values
@@ -237,6 +240,7 @@ def _fdr_correction(p_vals):
 
     return fdr
 
+
 def _get_reference_from_column(series, reference_value, param_name):
     if reference_value is None:
         raise ValueError("%s must be provided." % param_name)
@@ -253,4 +257,4 @@ def _get_reference_from_column(series, reference_value, param_name):
             return reference_value
 
     raise ValueError("%r was not found as a group within the distribution."
-                        % reference_value)
+                     % reference_value)
diff --git a/q2_fmt/_transformer.py b/q2_fmt/_transformer.py
index 0e9a8e5..25227d0 100644
--- a/q2_fmt/_transformer.py
+++ b/q2_fmt/_transformer.py
@@ -1,5 +1,5 @@
 # ----------------------------------------------------------------------------
-# Copyright (c) 2021, QIIME 2 development team.
+# Copyright (c) 2022, QIIME 2 development team.
 #
 # Distributed under the terms of the Modified BSD License.
 #
@@ -32,6 +32,7 @@ def _3(ff: LSMatFormat) -> pd.Series:
     dm = skbio.DistanceMatrix.read(str(ff), format='lsmat', verify=False)
     return dm.to_series()
 
+
 @plugin.register_transformer
 def _4(df: AnnotatedTSVDirFmt) -> pd.DataFrame:
     data = df.data.view(pd.DataFrame)
@@ -44,6 +45,7 @@ def _4(df: AnnotatedTSVDirFmt) -> pd.DataFrame:
 
     return data
 
+
 @plugin.register_transformer
 def _5(obj: pd.DataFrame) -> AnnotatedTSVDirFmt:
     metadata = []
diff --git a/q2_fmt/_visualizer.py b/q2_fmt/_visualizer.py
index c1c33df..a87b2d9 100644
--- a/q2_fmt/_visualizer.py
+++ b/q2_fmt/_visualizer.py
@@ -14,7 +14,7 @@
 
 
 def plot_rainclouds(output_dir: str, data: pd.DataFrame,
-                    stats: pd.DataFrame=None):
+                    stats: pd.DataFrame = None):
     table1 = None
     if stats is not None:
         table1, stats = _make_stats(stats)
@@ -119,5 +119,5 @@ def _make_group_col(prefix, df):
     group_n = " (n=" + group_n.apply(str) + ")"
 
     series = group_series + group_n
-    series.name = f"Group " + prefix
+    series.name = f'{"Group "}' + prefix
     return series
diff --git a/q2_fmt/plugin_setup.py b/q2_fmt/plugin_setup.py
index 02cd658..f4d75f7 100644
--- a/q2_fmt/plugin_setup.py
+++ b/q2_fmt/plugin_setup.py
@@ -8,7 +8,8 @@
 
 import importlib
 
-from qiime2.plugin import Str, Plugin, Metadata, TypeMap, Bool, Choices, Visualization
+from qiime2.plugin import (Str, Plugin, Metadata, TypeMap,
+                           Bool, Choices, Visualization)
 from q2_types.sample_data import SampleData, AlphaDiversity
 from q2_types.distance_matrix import DistanceMatrix
 
@@ -18,7 +19,8 @@
 from q2_fmt._stats import mann_whitney_u, wilcoxon_srt
 from q2_fmt._format import AnnotatedTSVDirFmt
 from q2_fmt._visualizer import plot_rainclouds
-from q2_fmt._type import GroupDist, Matched, Independent, Ordered, Unordered, StatsTable, Pairwise
+from q2_fmt._type import (GroupDist, Matched, Independent, Ordered,
+                          Unordered, StatsTable, Pairwise)
 import q2_fmt._examples as ex
 
 plugin = Plugin(name='fmt',
@@ -29,10 +31,12 @@
                 short_description='Plugin for analyzing FMT data.')
 
 plugin.register_formats(RecordTSVFileFormat, AnnotatedTSVDirFmt)
-plugin.register_semantic_types(StatsTable, Pairwise, GroupDist, Matched, Independent,
-                               Ordered, Unordered)
+plugin.register_semantic_types(StatsTable, Pairwise, GroupDist, Matched,
+                               Independent, Ordered, Unordered)
 plugin.register_semantic_type_to_format(
-    GroupDist[Ordered | Unordered, Matched | Independent] | StatsTable[Pairwise], AnnotatedTSVDirFmt)
+    GroupDist[Ordered | Unordered,
+              Matched | Independent] | StatsTable[Pairwise], AnnotatedTSVDirFmt
+    )
 
 T_subject, T_dependence = TypeMap({
     Bool % Choices(False): Independent,
@@ -47,40 +51,54 @@
                                             'baseline', 'consecutive'),
                 'time_column': Str, 'reference_column': Str,
                 'subject_column': T_subject, 'control_column': Str,
-                'filter_missing_references': Bool, 'where': Str, 'against_group': Str,
+                'filter_missing_references': Bool, 'where': Str,
+                'against_group': Str,
                 'p_val_approx': Str % Choices('auto', 'exact', 'asymptotic')},
     outputs=[
         ('stats', StatsTable[Pairwise]),
         ('raincloud_plot', Visualization)
     ],
-    input_descriptions= {
+    input_descriptions={
         'diversity_measure': '',
     },
     parameter_descriptions={
         'metadata': 'The sample metadata.',
-        'hypothesis': 'The hypothesis that will be used to analyze the input `distribution`.'
-                      ' Either `reference`, `all-pairwise`, `baseline` or `consecutive` must be selected.',
-        'time_column': 'The column within the `metadata` that the `diversity_measure` should be grouped by.'
+        'hypothesis': 'The hypothesis that will be used to analyze the input'
+                      ' `distribution`. Either `reference`, `all-pairwise`,'
+                      ' `baseline` or `consecutive` must be selected.',
+        'time_column': 'The column within the `metadata` that the'
+                       ' `diversity_measure` should be grouped by.'
                        ' This column should contain simple integer values.',
-        'control_column': 'The column within the `metadata` that contains any relevant control group IDs.'
-                          ' Actual treatment samples should not contain any value within this column.',
-        'reference_column': 'The column within the `metadata` that contains the sample to use as a reference'
-                            ' for a given beta `diversity_measure`.'
-                            ' For example, this may be the relevant donor sample to compare against.',
-        'subject_column': 'The column within the `metadata` that contains the subject ID to be tracked against timepoints.',
-        'filter_missing_references': 'Filter out references contained within the metadata that are not present'
-                                     ' in the diversity measure. Default behavior is to raise an error.',
+        'control_column': 'The column within the `metadata` that contains any'
+                          ' relevant control group IDs.'
+                          ' Actual treatment samples should not contain any'
+                          ' value within this column.',
+        'reference_column': 'The column within the `metadata` that contains'
+                            ' the sample to use as a reference for a given'
+                            ' beta `diversity_measure`. For example, this'
+                            ' may be the relevant donor sample to compare'
+                            ' against.',
+        'subject_column': 'The column within the `metadata` that contains the'
+                          ' subject ID to be tracked against timepoints.',
+        'filter_missing_references': 'Filter out references contained within'
+                                     ' the metadata that are not present in'
+                                     ' the diversity measure.'
+                                     ' Default behavior is to raise an error.',
         'where': '..',
-        'against_group': 'Based on the selected hypothesis, this is the column that will be used'
-                          ' to compare all samples against.',
-        'p_val_approx': '"exact" will calculate an exact p-value for distributions,'
-                        ' "asymptotic" will use a normal distribution, and "auto" will use either "exact"'
-                        ' when one of the groups has less than 8 observations and there are no ties, otherwise "asymptotic".'
+        'against_group': 'Based on the selected hypothesis, this is the column'
+                         ' that will be used to compare all samples against.',
+        'p_val_approx': '"exact" will calculate an exact p-value'
+                        ' for distributions, "asymptotic" will use a normal'
+                        ' distribution, and "auto" will use either "exact"'
+                        ' when one of the groups has less than 8 observations'
+                        ' and there are no ties, otherwise "asymptotic".'
     },
     output_descriptions={
-        'stats': 'Either the Mann-Whitney U or Wilcoxon SRT distribution for the chosen hypothesis.',
-        'raincloud_plot': 'Raincloud plot for the computed significance test (either Mann-Whitney U or Wilxocon SRT)'
-                          ' from the grouped diversity data and selected hypothesis.',
+        'stats': 'Either the Mann-Whitney U or Wilcoxon SRT distribution'
+                 ' for the chosen hypothesis.',
+        'raincloud_plot': 'Raincloud plot for the computed significance test'
+                          ' (either Mann-Whitney U or Wilxocon SRT) from the'
+                          ' grouped diversity data and selected hypothesis.',
     },
     name='Engraftment Pipeline for FMT Analysis',
     description='',
@@ -93,31 +111,45 @@
     function=group_timepoints,
     inputs={'diversity_measure': DistanceMatrix | SampleData[AlphaDiversity]},
     parameters={'metadata': Metadata, 'time_column': Str,
-                'reference_column': Str, 'subject_column': T_subject, 'control_column': Str,
-                'filter_missing_references': Bool, 'where': Str},
+                'reference_column': Str, 'subject_column': T_subject,
+                'control_column': Str, 'filter_missing_references': Bool,
+                'where': Str},
     outputs=[('timepoint_dists', GroupDist[Ordered, T_dependence]),
              ('reference_dists', GroupDist[Unordered, Independent])],
     parameter_descriptions={
         'metadata': 'The sample metadata.',
-        'time_column': 'The column within the `metadata` that the `diversity_measure` should be grouped by.'
+        'time_column': 'The column within the `metadata` that the'
+                       ' `diversity_measure` should be grouped by.'
                        ' This column should contain simple integer values.',
-        'control_column': 'The column within the `metadata` that contains any relevant control group IDs.'
-                          ' Actual treatment samples should not contain any value within this column.',
-        'reference_column': 'The column within the `metadata` that contains the sample to use as a reference'
+        'control_column': 'The column within the `metadata` that contains any'
+                          ' relevant control group IDs.'
+                          ' Actual treatment samples should not contain any'
+                          ' value within this column.',
+        'reference_column': 'The column within the `metadata` that contains'
+                            ' the sample to use as a reference'
                             ' for a given beta `diversity_measure`.'
-                            ' For example, this may be the relevant donor sample to compare against.',
-        'subject_column': 'The column within the `metadata` that contains the subject ID to be tracked against timepoints.',
-        'filter_missing_references': 'Filter out references contained within the metadata that are not present'
-                                     ' in the diversity measure. Default behavior is to raise an error.',
+                            ' For example, this may be the relevant donor'
+                            ' sample to compare against.',
+        'subject_column': 'The column within the `metadata` that contains the'
+                          ' subject ID to be tracked against timepoints.',
+        'filter_missing_references': 'Filter out references contained within'
+                                     ' the metadata that are not present'
+                                     ' in the diversity measure.'
+                                     ' Default behavior is to raise an error.',
         'where': '..',
     },
     output_descriptions={
-        'timepoint_dists': 'The distributions for the `diversity_measure`, grouped by the selected `time_column`.'
-                           ' May also contain subject IDs, if `subject_column` is provided in the `metadata`.',
-        'reference_dists': 'The inter-group reference and inter-group control (when provided) distributions.'
-                           ' When `diversity_measure` is DistanceMatrix, the inter-group calculations'
-                           ' will be all pairwise comparisons within a group.'
-                           ' Otherwise, these are just the per-sample measurements of alpha-diversity.'
+        'timepoint_dists': 'The distributions for the `diversity_measure`,'
+                           ' grouped by the selected `time_column`.'
+                           ' May also contain subject IDs, if `subject_column`'
+                           ' is provided in the `metadata`.',
+        'reference_dists': 'The inter-group reference and inter-group control'
+                           ' (when provided) distributions.'
+                           ' When `diversity_measure` is DistanceMatrix, the'
+                           ' inter-group calculations will be all pairwise'
+                           ' comparisons within a group.'
+                           ' Otherwise, these are just the per-sample'
+                           ' measurements of alpha-diversity.'
     },
     name='',
     description='',
@@ -130,22 +162,28 @@
 plugin.methods.register_function(
     function=mann_whitney_u,
     inputs={'distribution': GroupDist[Unordered | Ordered, Independent],
-            'against_each': GroupDist[Unordered | Ordered, Matched | Independent]},
+            'against_each': GroupDist[Unordered | Ordered,
+                                      Matched | Independent]},
     parameters={'hypothesis': Str % Choices('reference', 'all-pairwise'),
                 'reference_group': Str,
                 'p_val_approx': Str % Choices('auto', 'exact', 'asymptotic')},
     outputs=[('stats', StatsTable[Pairwise])],
     parameter_descriptions={
-        'hypothesis': 'The hypothesis that will be used to analyze the input `distribution`.'
-                      ' Either `reference` or `all-pairwise` must be selected.',
-        'reference_group': 'If `reference` is the selected hypothesis, this is the column that will be used'
+        'hypothesis': 'The hypothesis that will be used to analyze the input'
+                      ' `distribution`. Either `reference` or `all-pairwise`'
+                      ' must be selected.',
+        'reference_group': 'If `reference` is the selected hypothesis, this'
+                           ' is the column that will be used'
                            ' to compare all samples against.',
-        'p_val_approx': '"exact" will calculate an exact p-value for distributions,'
-                        ' "asymptotic" will use a normal distribution, and "auto" will use either "exact"'
-                        ' when one of the groups has less than 8 observations and there are no ties, otherwise "asymptotic".'
+        'p_val_approx': '"exact" will calculate an exact p-value for'
+                        ' distributions, "asymptotic" will use a normal'
+                        ' distribution, and "auto" will use either "exact"'
+                        ' when one of the groups has less than 8 observations'
+                        ' and there are no ties, otherwise "asymptotic".'
     },
     output_descriptions={
-        'stats': 'The Mann-Whitney U distribution for either the `reference` or `all-pairwise` hypothesis.',
+        'stats': 'The Mann-Whitney U distribution for either the `reference`'
+                 ' or `all-pairwise` hypothesis.',
     },
     name='Mann-Whitney U Test',
     description='',
@@ -162,15 +200,21 @@
                 'p_val_approx': Str % Choices('auto', 'exact', 'asymptotic')},
     outputs=[('stats', StatsTable[Pairwise])],
     parameter_descriptions={
-        'hypothesis': 'The hypothesis that will be used to analyze the input `distribution`.'
-                      ' Either `baseline` or `consecutive` must be selected.',
-        'baseline_group': 'If `baseline` is the selected hypothesis, this is the column that will be used'
+        'hypothesis': 'The hypothesis that will be used to analyze the input'
+                      ' `distribution`. Either `baseline` or `consecutive`'
+                      ' must be selected.',
+        'baseline_group': 'If `baseline` is the selected hypothesis, this is'
+                          ' the column that will be used'
                           ' to compare all samples against.',
-        'p_val_approx': '"exact" will calculate an exact p-value for distributions of up to 25 (inclusive) measurements,'
-                        ' "asymptotic" will use a normal distribution, and "auto" will use either "exact" or "approx" depending on size.'
+        'p_val_approx': '"exact" will calculate an exact p-value for'
+                        ' distributions of up to 25 (inclusive) measurements,'
+                        ' "asymptotic" will use a normal distribution,'
+                        ' and "auto" will use either "exact" or "approx"'
+                        ' depending on size.'
     },
     output_descriptions={
-        'stats': 'The Wilcoxon SRT distribution for either the `baseline` or `consecutive` hypothesis.',
+        'stats': 'The Wilcoxon SRT distribution for either the `baseline`'
+                 ' or `consecutive` hypothesis.',
     },
     name='Wilcoxon Signed Rank Test',
     description='',
diff --git a/q2_fmt/tests/test_engraftment.py b/q2_fmt/tests/test_engraftment.py
index 4f7d628..c5727d8 100644
--- a/q2_fmt/tests/test_engraftment.py
+++ b/q2_fmt/tests/test_engraftment.py
@@ -18,16 +18,20 @@
 
 
 class TestBase(TestPluginBase):
-    package='q2_fmt.tests'
+    package = 'q2_fmt.tests'
 
     def setUp(self):
         super().setUp()
 
-        self.md_beta = Metadata.load(self.get_data_path('sample_metadata_donors.tsv'))
-        self.md_alpha = Metadata.load(self.get_data_path('sample_metadata_alpha_div.tsv'))
+        self.md_beta = Metadata.load(self.get_data_path(
+                           'sample_metadata_donors.tsv'))
+        self.md_alpha = Metadata.load(self.get_data_path(
+                            'sample_metadata_alpha_div.tsv'))
 
-        self.dm = DistanceMatrix.read(self.get_data_path('dist_matrix_donors.tsv')).to_series()
-        self.alpha = pd.read_csv(self.get_data_path('alpha_div.tsv'), sep='\t', index_col=0, squeeze=True)
+        self.dm = DistanceMatrix.read(self.get_data_path(
+                      'dist_matrix_donors.tsv')).to_series()
+        self.alpha = pd.read_csv(self.get_data_path('alpha_div.tsv'),
+                                 sep='\t', index_col=0, squeeze=True)
 
         self.faithpd_timedist = faithpd_timedist_factory().view(pd.DataFrame)
         self.faithpd_refdist = faithpd_refdist_factory().view(pd.DataFrame)
@@ -35,7 +39,8 @@ def setUp(self):
 
 class ErrorMixins:
     def test_with_time_column_input_not_in_metadata(self):
-        with self.assertRaisesRegex(ValueError, 'time_column.*foo.*metadata'):
+        with self.assertRaisesRegex(ValueError,
+                                    'time_column.*foo.*metadata'):
             group_timepoints(diversity_measure=self.div,
                              metadata=self.md,
                              time_column='foo',
@@ -43,7 +48,8 @@ def test_with_time_column_input_not_in_metadata(self):
                              control_column='control')
 
     def test_with_reference_column_input_not_in_metadata(self):
-        with self.assertRaisesRegex(ValueError, 'reference_column.*foo.*metadata'):
+        with self.assertRaisesRegex(ValueError,
+                                    'reference_column.*foo.*metadata'):
             group_timepoints(diversity_measure=self.div,
                              metadata=self.md,
                              time_column='days_post_transplant',
@@ -51,7 +57,8 @@ def test_with_reference_column_input_not_in_metadata(self):
                              control_column='control')
 
     def test_with_control_column_input_not_in_metadata(self):
-        with self.assertRaisesRegex(ValueError, 'control_column.*foo.*metadata'):
+        with self.assertRaisesRegex(ValueError,
+                                    'control_column.*foo.*metadata'):
             group_timepoints(diversity_measure=self.div,
                              metadata=self.md,
                              time_column='days_post_transplant',
@@ -59,13 +66,15 @@ def test_with_control_column_input_not_in_metadata(self):
                              control_column='foo')
 
     def test_with_non_numeric_time_column(self):
-        with self.assertRaisesRegex(ValueError, 'time_column.*categorical.*numeric'):
+        with self.assertRaisesRegex(ValueError,
+                                    'time_column.*categorical.*numeric'):
             group_timepoints(diversity_measure=self.div,
                              metadata=self.md,
                              time_column='non_numeric_time_column',
                              reference_column='relevant_donor',
                              control_column='control')
 
+
 class TestAlphaErrors(TestBase, ErrorMixins):
     def setUp(self):
         super().setUp()
@@ -73,6 +82,7 @@ def setUp(self):
         self.div = self.alpha
         self.md = self.md_alpha
 
+
 class TestBetaErrors(TestBase, ErrorMixins):
     def setUp(self):
         super().setUp()
@@ -80,6 +90,7 @@ def setUp(self):
         self.div = self.dm
         self.md = self.md_beta
 
+
 class TestGroupTimepoints(TestBase):
     # Beta Diversity (Distance Matrix) Test Cases
     def test_beta_dists_with_donors_and_controls(self):
@@ -93,9 +104,12 @@ def test_beta_dists_with_donors_and_controls(self):
             'id': ['donor1..donor2', 'donor1..donor3', 'donor2..donor3',
                    'sampleB..sampleC', 'sampleB..sampleD', 'sampleC..sampleD'],
             'measure': [0.24, 0.41, 0.74, 0.37, 0.44, 0.31],
-            'group': ['reference', 'reference', 'reference', 'control1', 'control1', 'control1'],
-            'A': ['donor1', 'donor1', 'donor2', 'sampleB', 'sampleB', 'sampleC'],
-            'B': ['donor2', 'donor3', 'donor3', 'sampleC', 'sampleD', 'sampleD']
+            'group': ['reference', 'reference', 'reference',
+                      'control1', 'control1', 'control1'],
+            'A': ['donor1', 'donor1', 'donor2',
+                  'sampleB', 'sampleB', 'sampleC'],
+            'B': ['donor2', 'donor3', 'donor3',
+                  'sampleC', 'sampleD', 'sampleD']
         })
 
         time_df, ref_df = group_timepoints(diversity_measure=self.dm,
@@ -112,16 +126,20 @@ def test_beta_dists_with_donors_controls_and_subjects(self):
             'id': ['sampleA', 'sampleB', 'sampleC', 'sampleD', 'sampleE'],
             'measure': [0.45, 0.40, 0.28, 0.78, 0.66],
             'group': [7.0, 7.0, 9.0, 11.0, 11.0],
-            'subject': ['subject1', 'subject1', 'subject1', 'subject2', 'subject2']
+            'subject': ['subject1', 'subject1',
+                        'subject1', 'subject2', 'subject2']
         })
 
         exp_ref_df = pd.DataFrame({
             'id': ['donor1..donor2', 'donor1..donor3', 'donor2..donor3',
                    'sampleB..sampleC', 'sampleB..sampleD', 'sampleC..sampleD'],
             'measure': [0.24, 0.41, 0.74, 0.37, 0.44, 0.31],
-            'group': ['reference', 'reference', 'reference', 'control1', 'control1', 'control1'],
-            'A': ['donor1', 'donor1', 'donor2', 'sampleB', 'sampleB', 'sampleC'],
-            'B': ['donor2', 'donor3', 'donor3', 'sampleC', 'sampleD', 'sampleD']
+            'group': ['reference', 'reference', 'reference',
+                      'control1', 'control1', 'control1'],
+            'A': ['donor1', 'donor1', 'donor2',
+                  'sampleB', 'sampleB', 'sampleC'],
+            'B': ['donor2', 'donor3', 'donor3',
+                  'sampleC', 'sampleD', 'sampleD']
         })
 
         time_df, ref_df = group_timepoints(diversity_measure=self.dm,
@@ -135,7 +153,8 @@ def test_beta_dists_with_donors_controls_and_subjects(self):
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_beta_dists_with_same_donor_for_all_samples(self):
-        with self.assertRaisesRegex(TypeError, 'Single reference value detected'):
+        with self.assertRaisesRegex(TypeError,
+                                    'Single reference value detected'):
             group_timepoints(diversity_measure=self.dm,
                              metadata=self.md_beta,
                              time_column='days_post_transplant',
@@ -143,20 +162,23 @@ def test_beta_dists_with_same_donor_for_all_samples(self):
                              control_column='control')
 
     def test_beta_dists_with_one_donor_and_controls(self):
-        with self.assertRaisesRegex(KeyError, 'Missing references for the associated sample data'):
+        with self.assertRaisesRegex(KeyError,
+                                    'Missing references for the associated'
+                                    ' sample data'):
             group_timepoints(diversity_measure=self.dm,
-                           metadata=self.md_beta,
-                           time_column='days_post_transplant',
-                           reference_column='single_donor',
-                           control_column='control')
+                             metadata=self.md_beta,
+                             time_column='days_post_transplant',
+                             reference_column='single_donor',
+                             control_column='control')
 
     def test_beta_dists_with_donors_and_one_control(self):
-        with self.assertRaisesRegex(ValueError, 'One or less controls detected'):
+        with self.assertRaisesRegex(ValueError,
+                                    'One or less controls detected'):
             group_timepoints(diversity_measure=self.dm,
-                           metadata=self.md_beta,
-                           time_column='days_post_transplant',
-                           reference_column='relevant_donor',
-                           control_column='single_control')
+                             metadata=self.md_beta,
+                             time_column='days_post_transplant',
+                             reference_column='relevant_donor',
+                             control_column='single_control')
 
     def test_beta_dists_with_donors_no_controls(self):
         exp_time_df = pd.DataFrame({
@@ -182,16 +204,19 @@ def test_beta_dists_with_donors_no_controls(self):
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_beta_dists_no_donors_with_controls(self):
-        with self.assertRaisesRegex(TypeError, r"group_timepoints\(\) missing 1 required positional argument: "
-                                    "'reference_column'"):
+        with self.assertRaisesRegex(
+            TypeError,
+            r"group_timepoints\(\) missing 1 required positional argument: "
+                "'reference_column'"):
             group_timepoints(diversity_measure=self.dm,
                              metadata=self.md_beta,
                              time_column='days_post_transplant',
                              control_column='control')
 
     def test_beta_dists_with_invalid_ref_column(self):
-        with self.assertRaisesRegex(KeyError, 'References included in the metadata are missing'
-                                    ' from the diversity measure.*foo.*bar.*baz'):
+        with self.assertRaisesRegex(KeyError, 'References included in the'
+                                    ' metadata are missing from the diversity'
+                                    ' measure.*foo.*bar.*baz'):
             group_timepoints(diversity_measure=self.dm,
                              metadata=self.md_beta,
                              time_column='days_post_transplant',
@@ -201,7 +226,8 @@ def test_beta_dists_with_invalid_ref_column(self):
     def test_beta_dists_with_empty_diversity_series(self):
         empty_beta_series = pd.Series()
 
-        with self.assertRaisesRegex(ValueError, 'Empty diversity measure detected'):
+        with self.assertRaisesRegex(ValueError,
+                                    'Empty diversity measure detected'):
             group_timepoints(diversity_measure=empty_beta_series,
                              metadata=self.md_beta,
                              time_column='days_post_transplant',
@@ -209,7 +235,8 @@ def test_beta_dists_with_empty_diversity_series(self):
                              control_column='control')
 
     def test_beta_dists_with_extra_samples_in_metadata_not_in_diversity(self):
-        extra_md = Metadata.load(self.get_data_path('sample_metadata_donors_missing.tsv'))
+        extra_md = Metadata.load(self.get_data_path(
+                       'sample_metadata_donors_missing.tsv'))
 
         exp_time_df = pd.DataFrame({
             'id': ['sampleA', 'sampleB', 'sampleC', 'sampleD', 'sampleE'],
@@ -221,9 +248,12 @@ def test_beta_dists_with_extra_samples_in_metadata_not_in_diversity(self):
             'id': ['donor1..donor2', 'donor1..donor3', 'donor2..donor3',
                    'sampleB..sampleC', 'sampleB..sampleD', 'sampleC..sampleD'],
             'measure': [0.24, 0.41, 0.74, 0.37, 0.44, 0.31],
-            'group': ['reference', 'reference', 'reference', 'control1', 'control1', 'control1'],
-            'A': ['donor1', 'donor1', 'donor2', 'sampleB', 'sampleB', 'sampleC'],
-            'B': ['donor2', 'donor3', 'donor3', 'sampleC', 'sampleD', 'sampleD']
+            'group': ['reference', 'reference', 'reference',
+                      'control1', 'control1', 'control1'],
+            'A': ['donor1', 'donor1', 'donor2',
+                  'sampleB', 'sampleB', 'sampleC'],
+            'B': ['donor2', 'donor3', 'donor3',
+                  'sampleC', 'sampleD', 'sampleD']
         })
 
         time_df, ref_df = group_timepoints(diversity_measure=self.dm,
@@ -236,9 +266,12 @@ def test_beta_dists_with_extra_samples_in_metadata_not_in_diversity(self):
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_beta_dists_with_extra_samples_in_diversity_not_in_metadata(self):
-        extra_dm = DistanceMatrix.read(self.get_data_path('dist_matrix_donors_missing.tsv')).to_series()
+        extra_dm = DistanceMatrix.read(self.get_data_path(
+                       'dist_matrix_donors_missing.tsv')).to_series()
 
-        with self.assertRaisesRegex(ValueError, 'The following IDs are not present in the metadata'):
+        with self.assertRaisesRegex(ValueError,
+                                    'The following IDs are not present'
+                                    ' in the metadata'):
             group_timepoints(diversity_measure=extra_dm,
                              metadata=self.md_beta,
                              time_column='days_post_transplant',
@@ -310,20 +343,23 @@ def test_alpha_dists_with_same_donor_for_all_samples(self):
         exp_ref_df = pd.DataFrame({
             'id': ['donor1', 'sampleC', 'sampleD', 'sampleE', 'sampleF'],
             'measure': [32, 15, 6, 44, 17],
-            'group': ['reference', 'control1', 'control1', 'control2', 'control2']
+            'group': ['reference', 'control1',
+                      'control1', 'control2', 'control2']
         })
 
-        time_df, ref_df = group_timepoints(diversity_measure=self.alpha,
-                                           metadata=self.md_alpha,
-                                           time_column='days_post_transplant',
-                                           reference_column='relevant_donor_all',
-                                           control_column='control')
+        time_df, ref_df = group_timepoints(
+            diversity_measure=self.alpha, metadata=self.md_alpha,
+            time_column='days_post_transplant',
+            reference_column='relevant_donor_all',
+            control_column='control')
 
         pd.testing.assert_frame_equal(time_df, exp_time_df)
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_alpha_dists_with_one_donor_and_controls(self):
-        with self.assertRaisesRegex(KeyError, 'Missing references for the associated sample data'):
+        with self.assertRaisesRegex(KeyError,
+                                    'Missing references for the associated'
+                                    ' sample data'):
             group_timepoints(diversity_measure=self.alpha,
                              metadata=self.md_alpha,
                              time_column='days_post_transplant',
@@ -341,14 +377,15 @@ def test_alpha_dists_with_donors_and_one_control(self):
         exp_ref_df = pd.DataFrame({
             'id': ['donor1', 'donor2', 'donor3', 'donor4', 'sampleB'],
             'measure': [32, 51, 3, 19, 37],
-            'group': ['reference', 'reference', 'reference', 'reference', 'control1']
+            'group': ['reference', 'reference', 'reference',
+                      'reference', 'control1']
         })
 
         time_df, ref_df = group_timepoints(diversity_measure=self.alpha,
-                             metadata=self.md_alpha,
-                             time_column='days_post_transplant',
-                             reference_column='relevant_donor',
-                             control_column='single_control')
+                                           metadata=self.md_alpha,
+                                           time_column='days_post_transplant',
+                                           reference_column='relevant_donor',
+                                           control_column='single_control')
 
         pd.testing.assert_frame_equal(time_df, exp_time_df)
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
@@ -376,16 +413,19 @@ def test_alpha_dists_with_donors_no_controls(self):
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_alpha_dists_no_donors_with_controls(self):
-        with self.assertRaisesRegex(TypeError, r"group_timepoints\(\) missing 1 required positional argument: "
-                                    "'reference_column'"):
+        with self.assertRaisesRegex(
+            TypeError,
+            r"group_timepoints\(\) missing 1 required positional argument: "
+                "'reference_column'"):
             group_timepoints(diversity_measure=self.alpha,
                              metadata=self.md_alpha,
                              time_column='days_post_transplant',
                              control_column='control')
 
     def test_alpha_dists_with_invalid_ref_column(self):
-        with self.assertRaisesRegex(KeyError, 'References included in the metadata are missing'
-                                    ' from the diversity measure.*foo.*bar.*baz'):
+        with self.assertRaisesRegex(KeyError, 'References included in the'
+                                    ' metadata are missing from the diversity'
+                                    ' measure.*foo.*bar.*baz'):
             group_timepoints(diversity_measure=self.alpha,
                              metadata=self.md_alpha,
                              time_column='days_post_transplant',
@@ -395,7 +435,8 @@ def test_alpha_dists_with_invalid_ref_column(self):
     def test_alpha_dists_with_empty_diversity_series(self):
         empty_alpha_series = pd.Series()
 
-        with self.assertRaisesRegex(ValueError, 'Empty diversity measure detected'):
+        with self.assertRaisesRegex(ValueError,
+                                    'Empty diversity measure detected'):
             group_timepoints(diversity_measure=empty_alpha_series,
                              metadata=self.md_alpha,
                              time_column='days_post_transplant',
@@ -403,7 +444,8 @@ def test_alpha_dists_with_empty_diversity_series(self):
                              control_column='control')
 
     def test_alpha_dists_with_extra_samples_in_metadata_not_in_diversity(self):
-        extra_md = Metadata.load(self.get_data_path('sample_metadata_alpha_div_missing.tsv'))
+        extra_md = Metadata.load(self.get_data_path(
+                       'sample_metadata_alpha_div_missing.tsv'))
 
         exp_time_df = pd.DataFrame({
             'id': ['sampleA', 'sampleB', 'sampleC', 'sampleD',
@@ -430,9 +472,11 @@ def test_alpha_dists_with_extra_samples_in_metadata_not_in_diversity(self):
         pd.testing.assert_frame_equal(ref_df, exp_ref_df)
 
     def test_alpha_dists_with_extra_samples_in_diversity_not_in_metadata(self):
-        extra_alpha = pd.read_csv(self.get_data_path('alpha_div_missing.tsv'), sep='\t', index_col=0, squeeze=True)
+        extra_alpha = pd.read_csv(self.get_data_path('alpha_div_missing.tsv'),
+                                  sep='\t', index_col=0, squeeze=True)
 
-        with self.assertRaisesRegex(ValueError, 'The following IDs are not present in the metadata'):
+        with self.assertRaisesRegex(ValueError, 'The following IDs are not'
+                                    ' present in the metadata'):
             group_timepoints(diversity_measure=extra_alpha,
                              metadata=self.md_alpha,
                              time_column='days_post_transplant',
@@ -442,12 +486,14 @@ def test_alpha_dists_with_extra_samples_in_diversity_not_in_metadata(self):
     def test_examples(self):
         self.execute_examples()
 
+
 class TestStats(TestBase):
     # Wilcoxon SRT test cases
 
     # Data in the exp_stats_data dataframes were pulled from Greg Caporaso's
     # Autism study repo on github, which can be found here:
-    # https://github.com/caporaso-lab/autism-fmt1/blob/18-month-followup/16S/engraftment.ipynb
+    # https://github.com/caporaso-lab/autism-fmt1/
+    # blob/18-month-followup/16S/engraftment.ipynb
     def test_wilcoxon_with_faith_pd_baseline0_asymptotic(self):
         exp_stats_data = pd.DataFrame({
             'A:group': [0.0, 0.0, 0.0, 0.0],
@@ -462,8 +508,10 @@ def test_wilcoxon_with_faith_pd_baseline0_asymptotic(self):
             'q-value': [0.758312374, 0.005782696, 0.00154471, 0.002246758]
         })
 
-        stats_data = wilcoxon_srt(distribution=self.faithpd_timedist, hypothesis='baseline',
-                                  baseline_group='0', p_val_approx='asymptotic')
+        stats_data = wilcoxon_srt(distribution=self.faithpd_timedist,
+                                  hypothesis='baseline',
+                                  baseline_group='0',
+                                  p_val_approx='asymptotic')
 
         pd.testing.assert_frame_equal(stats_data, exp_stats_data)
 
@@ -482,54 +530,65 @@ def test_wilcoxon_with_faith_pd_consecutive_asymptotic(self):
         })
 
         stats_data = wilcoxon_srt(distribution=self.faithpd_timedist,
-                                  hypothesis='consecutive', p_val_approx='asymptotic')
+                                  hypothesis='consecutive',
+                                  p_val_approx='asymptotic')
 
         pd.testing.assert_frame_equal(stats_data, exp_stats_data)
 
     def test_wilcoxon_consecutive_hypothesis_with_baseline_group(self):
-        with self.assertRaisesRegex(ValueError, "`consecutive` was selected as the hypothesis,"
-                                    " but a `baseline_group` was added."):
+        with self.assertRaisesRegex(ValueError, "`consecutive` was selected as"
+                                    " the hypothesis, but a `baseline_group`"
+                                    " was added."):
             wilcoxon_srt(distribution=self.faithpd_timedist,
                          hypothesis='consecutive', baseline_group='reference')
 
     def test_wilcoxon_invalid_hypothesis(self):
-        with self.assertRaisesRegex(ValueError, "Invalid hypothesis. Please either choose"
-                                    " `baseline` or `consecutive` as your hypothesis."):
-            wilcoxon_srt(distribution=self.faithpd_timedist, hypothesis='foo')
+        with self.assertRaisesRegex(ValueError, "Invalid hypothesis. Please"
+                                    " either choose `baseline` or"
+                                    " `consecutive` as your hypothesis."):
+            wilcoxon_srt(distribution=self.faithpd_timedist,
+                         hypothesis='foo')
 
     def test_wilcoxon_invalid_baseline_group(self):
-        with self.assertRaisesRegex(ValueError, "'foo' was not found as a group"
-                                    " within the distribution."):
+        with self.assertRaisesRegex(ValueError, "'foo' was not found as a"
+                                    " group within the distribution."):
             wilcoxon_srt(distribution=self.faithpd_timedist,
                          hypothesis='baseline', baseline_group='foo')
 
     # Mann-Whitney U test cases
 
-    # Data in the exp_stats_data dataframes were calculated 'by hand' in a jupyter notebook
-    # using the same data, manually organized into groups and subsequently compared using
-    # scipy.stats.mannwhitneyu to calculate the test-statistic and p-values
-    # Notebook can be found here, for reference:
+    # Data in the exp_stats_data dataframes were calculated 'by hand' in a
+    # jupyter notebook using the same data, manually organized into groups
+    # and subsequently compared using scipy.stats.mannwhitneyu to calculate
+    # the test-statistic and p-values. Notebook can be found here:
     # https://gist.github.com/lizgehret/c9add7b451e5e91b1017a2a963276bff
     def test_mann_whitney_pairwise_against_each(self):
         exp_stats_data = pd.DataFrame({
             'A:group': ['control', 'control', 'control', 'control', 'control',
-                        'reference', 'reference', 'reference', 'reference', 'reference'],
+                        'reference', 'reference', 'reference',
+                        'reference', 'reference'],
             'A:n': [23, 23, 23, 23, 23, 5, 5, 5, 5, 5],
-            'A:measure': [11.64962736, 11.64962736, 11.64962736, 11.64962736, 11.64962736,
-                          10.24883918, 10.24883918, 10.24883918, 10.24883918, 10.24883918],
+            'A:measure': [11.64962736, 11.64962736, 11.64962736,
+                          11.64962736, 11.64962736,
+                          10.24883918, 10.24883918, 10.24883918,
+                          10.24883918, 10.24883918],
             'B:group': [0, 3, 10, 18, 100, 0, 3, 10, 18, 100],
             'B:n': [18, 17, 18, 18, 16, 18, 17, 18, 18, 16],
-            'B:measure': [9.54973486, 9.592979726, 10.9817719, 11.39392352, 12.97286672,
-                          9.54973486, 9.592979726, 10.9817719, 11.39392352, 12.97286672],
+            'B:measure': [9.54973486, 9.592979726, 10.9817719,
+                          11.39392352, 12.97286672,
+                          9.54973486, 9.592979726, 10.9817719,
+                          11.39392352, 12.97286672],
             'n': [41, 40, 41, 41, 39, 23, 22, 23, 23, 21],
             'test-statistic': [282.0, 260.0, 194.0, 190.0, 104.0,
                                49.0, 43.0, 20.0, 14.0, 6.0],
-            'p-value': [0.050330911733538534, 0.07994303215567311, 0.7426248650660427,
-                        0.6646800940267454, 0.02321456407322841, 0.7941892150565809,
+            'p-value': [0.050330911733538534, 0.07994303215567311,
+                        0.7426248650660427, 0.6646800940267454,
+                        0.02321456407322841, 0.7941892150565809,
                         1.0, 0.06783185968744732, 0.023005953105134484,
                         0.0056718704407604376],
-            'q-value': [0.12582728, 0.13323839, 0.92828108, 0.94954299, 0.07738188,
-                        0.88243246, 1.0, 0.13566372, 0.11502977, 0.0567187],
+            'q-value': [0.12582728, 0.13323839, 0.92828108, 0.94954299,
+                        0.07738188, 0.88243246, 1.0, 0.13566372,
+                        0.11502977, 0.0567187],
         })
 
         stats_data = mann_whitney_u(distribution=self.faithpd_refdist,
@@ -561,20 +620,22 @@ def test_mann_whitney_reference(self):
         pd.testing.assert_frame_equal(stats_data, exp_stats_data)
 
     def test_mann_whitney_all_pairwise_hypothesis_with_reference_group(self):
-        with self.assertRaisesRegex(ValueError, "`all-pairwise` was selected as the"
-                                    " hypothesis, but a `reference_group` was added."):
+        with self.assertRaisesRegex(ValueError, "`all-pairwise` was selected"
+                                    " as the hypothesis, but a"
+                                    " `reference_group` was added."):
             mann_whitney_u(distribution=self.faithpd_refdist,
                            hypothesis='all-pairwise',
                            reference_group='reference')
 
     def test_mann_whitney_invalid_hypothesis(self):
-        with self.assertRaisesRegex(ValueError, "Invalid hypothesis. Please either"
-                         " choose `reference` or `all-pairwise` as your hypothesis."):
+        with self.assertRaisesRegex(ValueError, "Invalid hypothesis. Please"
+                                    " either choose `reference` or"
+                                    " `all-pairwise` as your hypothesis."):
             mann_whitney_u(distribution=self.faithpd_refdist,
                            hypothesis='foo')
 
     def test_mann_whitney_invalid_reference_group(self):
-        with self.assertRaisesRegex(ValueError, "'foo' was not found as a group"
-                                    " within the distribution."):
+        with self.assertRaisesRegex(ValueError, "'foo' was not found as a"
+                                    " group within the distribution."):
             mann_whitney_u(distribution=self.faithpd_refdist,
                            hypothesis='reference', reference_group='foo')