diff --git a/README.md b/README.md
index f5f8250..2f53c98 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,8 @@ Options:
   -k, --deconv-config, --dec YAML
                                   Configuration of parameters for kernel
                                   deconvolution  [required]
+  --filters YAML                  List of filters for removing problematic
+                                  mutations from tally
   -l, --loc, --location, --wwtp, --catchment NAME
                                   Name(s) of location/wastewater treatment
                                   plant/catchment area to process
@@ -288,6 +290,49 @@ var_dates:
 ```
 see [variants_dates_example.yaml](variants_dates_example.yaml).
 
+#### Filters (optional)
+
+Some mutations might be problematic and need to be taken out --- e.g. 
+due to drop-outs in the multiplex PCR amplification, they do not show up in the data
+and this could be misinterpreted by LolliPop as proof of absence of a variant.
+This optional file contains a collection of filters, each filter has a list of statements 
+selecting entry based on value found in columns.
+The general syntax of statements is:
+```text
+- <column> <op> <value>
+```
+Valid _op_ are:
+- `==` on that line, the value in column _<column>_ is exactly _<value>_
+  - for simple strings this can be omitted: `- proto v3` is synonymous with `- proto == v3`
+- `<=` the value is less than or equal to _<value>_
+- `>=` the value is greater than or equal to _<value>_
+- `<` the value is less than _<value>_
+- `>` the value is greater than _<value>_
+- `!=` the value is **not** _<value>_
+- `in` the value is found in the list specidied in _<value>_
+- `~` the value matches the regular expression in _<value>_
+  - regex can be quoted using `/` or `@`
+- `!~` the vlue does **not** matche the regular expression in _<value>_
+
+Any arbitrary column found in the input file can be used.
+
+All statements are combined with a logical `and` and matching lines are removed from the tally table.
+
+For example:
+```yaml
+# filter to remove test samples
+remove_test:
+- sample ~ /^Test/
+
+# filter to remove an amplicon that has drop-outs
+amplicon75:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 22428
+  - pos <= 22785
+```
+see [example in filters_preprint.yaml](filters_preprint.yaml).
+
 #### Running it
 
 ```bash
diff --git a/filters_preprint.yaml b/filters_preprint.yaml
new file mode 100644
index 0000000..743abdc
--- /dev/null
+++ b/filters_preprint.yaml
@@ -0,0 +1,53 @@
+bad_mutations: 
+  - proto v3
+  - mutations in [ 28461G, 11201G, 26801C, -28461G, -11201G, -26801C ]
+
+amplicon75:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 22428
+  - pos <= 22785
+
+amplicon76:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 22677
+  - pos <= 23028
+
+amplicon77:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 22974
+  - pos <= 23327
+
+amplicon88:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 26277
+  - pos <= 26635
+
+amplicon90:
+  - proto v3
+  - date > 2021-11-20
+  - pos >= 26895
+  - pos <= 27256
+
+other_0:
+  - proto v3
+  - date > 2021-11-20
+  - pos == 26709
+
+other_1:
+  - proto v3
+  - date > 2021-11-20
+  - pos == 27807
+
+other_2:
+  - proto v3
+  - date > 2021-11-20
+  - pos == 2832
+
+other_3:
+  - proto v3
+  - date > 2021-11-20
+  - pos == 10449
diff --git a/lollipop/cli/deconvolute.py b/lollipop/cli/deconvolute.py
index 93c0ac2..47bce76 100755
--- a/lollipop/cli/deconvolute.py
+++ b/lollipop/cli/deconvolute.py
@@ -94,6 +94,15 @@
     default=None,
     help="Name(s) of location/wastewater treatment plant/catchment area to process",
 )
+@click.option(
+    "--filters",
+    "-fl",
+    metavar="YAML",
+    required=False,
+    default=None,
+    type=str,
+    help="List of filters for removing problematic mutations from tally",
+)
 @click.option(
     "--seed",
     "-s",
@@ -109,6 +118,7 @@ def deconvolute(
     variants_dates,
     deconv_config,
     loc,
+    filters,
     seed,
     output,
     fmt_columns,
@@ -135,6 +145,13 @@ def deconvolute(
     with open(deconv_config, "r") as file:
         deconv = yaml.load(file)
 
+    # problematic mutation filters
+    if filters:
+        with open(filters, "r") as file:
+            filters = yaml.load(file)
+
+        print(f"{len(filters)} filter{ '' if len(filters) == 1 else 's' } loaded")
+
     # data
     try:
         df_tally = pd.read_csv(
@@ -287,7 +304,7 @@ def deconvolute(
         no_date=no_date,
         remove_deletions=remove_deletions,
     )
-    preproc = preproc.filter_mutations()
+    preproc = preproc.filter_mutations(filters=filters)
 
     print("deconvolve all")
     np.random.seed(seed)
diff --git a/lollipop/preprocessors.py b/lollipop/preprocessors.py
index 46710f2..3702140 100644
--- a/lollipop/preprocessors.py
+++ b/lollipop/preprocessors.py
@@ -1,6 +1,9 @@
 import pandas as pd
 import numpy as np
+from functools import reduce
+import re
 import sys
+from pandas.api.types import is_numeric_dtype
 
 
 class DataPreprocesser:
@@ -118,6 +121,93 @@ def general_preprocess(
 
     def filter_mutations(self, filters=None):
         """filter out hardcoded problematic mutations"""
+        if filters is None:
+            return self
+
+        types = self.df_tally.dtypes
+
+        rxprser = re.compile(
+            r"^ *(?:(?P<col>"
+            + r"|".join(self.df_tally.columns)
+            + r")|(?P<bad>\w+)) *(?P<op>in|[<>=~!]*) *(?P<qv>['\"]?)(?P<val>.+)(?P=qv) *$"
+        )
+
+        def apply_filter_statement(name, fs):
+            """parse a single statement from a filter and apply it, returning a boolean series"""
+            m = rxprser.search(fs)
+            assert m, f"Cannot parse statement <{fs}> in filter {name}"
+            m = m.groupdict()
+
+            assert m[
+                "col"
+            ], f"bad column name {m['bad']}, not in list: {self.df_tally.columns}, while parsing statement <{fs}> in filter {name}"
+
+            # HACK handle 'date' column differently, to force datatypes
+            col = (
+                pd.to_datetime(self.df_tally["date"])
+                if "date" == m["col"]
+                else self.df_tally[m["col"]]
+            )
+            val = (
+                np.datetime64(m["val"])
+                if "date" == m["col"]
+                else (
+                    pd.to_numeric(m["val"])
+                    if is_numeric_dtype(types[m["col"]])
+                    else m["val"]
+                )
+            )
+
+            # apply operator
+            match m["op"]:
+                case "=" | "==" | "" as e:
+                    if e == "":
+                        assert (
+                            " " not in val
+                        ), "Do not use values with space <{val}> when using no operator (implicit 'equals'). (while parsing statement <{fs}> in filter {name})"
+                    return col == val
+                case "!=" | "!":
+                    return col != val
+                case "<":
+                    return col < val
+                case "<=" | "=<":
+                    return col <= val
+                case ">=" | ">=":
+                    return col >= val
+                case ">":
+                    return col > val
+                case "in":
+                    # unpack list
+                    return col.isin(
+                        [
+                            v.strip("\"' ")
+                            for v in val.lstrip("[ ").rstrip(" ]").split(",")
+                        ]
+                    )
+                case "~" | "=~" | "~=":
+                    return col.str.contains(
+                        val[1, -2] if val[0] == val[-1] in "/@" else val
+                    )
+                case "!~" | "~!":
+                    return ~(
+                        col.str.contains(
+                            val[1, -2] if val[0] == val[-1] in "/@" else val
+                        )
+                    )
+                case _ as o:
+                    raise ValueError(
+                        f"unknown operator {o}, while parsing statement <{fs}> in filter {name}"
+                    )
+
+        for name, fl in filters.items():
+            print(f"filter {name}")
+
+            self.df_tally = self.df_tally[
+                ~reduce(
+                    (lambda x, y: x & y),
+                    [apply_filter_statement(name, fstatmt) for fstatmt in fl],
+                )
+            ]
 
         # HACK completely disable filters
         return self
diff --git a/tests/test_filters.py b/tests/test_filters.py
new file mode 100644
index 0000000..f5c6980
--- /dev/null
+++ b/tests/test_filters.py
@@ -0,0 +1,150 @@
+import pandas as pd
+import numpy as np
+import lollipop as ll
+import ruamel.yaml
+from pandas.testing import assert_frame_equal
+
+yaml = ruamel.yaml.YAML(typ="rt")
+
+
+def assert_frame_NOT_equal(*args, **kwargs):
+    try:
+        assert_frame_equal(*args, **kwargs)
+    except AssertionError:
+        # frames are not equal
+        pass
+    else:
+        # frames are equal
+        raise AssertionError
+
+
+def hardcoded_filter(df_tally):
+    """this is the old hard-coded filter"""
+    df_tally = df_tally[
+        ~df_tally["mutations"].isin(
+            ["28461G", "11201G", "26801C"] + ["-28461G", "-11201G", "-26801C"]
+        )
+    ]
+
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos >= 22428)
+            & (df_tally.pos <= 22785)
+        )
+    ]  # amplicon75
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos >= 22677)
+            & (df_tally.pos <= 23028)
+        )
+    ]  # amplicon76
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos >= 22974)
+            & (df_tally.pos <= 23327)
+        )
+    ]  # amplicon77
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos >= 26277)
+            & (df_tally.pos <= 26635)
+        )
+    ]  # amplicon88
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos >= 26895)
+            & (df_tally.pos <= 27256)
+        )
+    ]  # amplicon90
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos == 26709)
+        )
+    ]  # other
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos == 27807)
+        )
+    ]  # other
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos == 2832)
+        )
+    ]  # other
+    df_tally = df_tally[
+        ~(
+            (pd.to_datetime(df_tally.date) > np.datetime64("2021-11-20"))
+            & (df_tally.pos == 10449)
+        )
+    ]  # other
+
+    return df_tally
+
+
+def test_filters():
+    tally_file = "preprint/data/tallymut_line_full.tsv.zst"
+    varconf_file = "config_preprint.yaml"
+    filter_file = "filters_preprint.yaml"
+
+    # filter to test
+    with open(filter_file, "r") as f:
+        filters = yaml.load(f)
+
+    # load config
+    with open(varconf_file, "r") as f:
+        conf_yaml = yaml.load(f)
+    variants_list = conf_yaml["variants_list"]
+    variants_pangolin = conf_yaml["variants_pangolin"]
+    variants_not_reported = conf_yaml.get("variants_not_reported", [])
+    to_drop = conf_yaml.get("to_drop", [])
+    start_date = conf_yaml.get("start_date", None)
+    end_date = conf_yaml.get("end_date", None)
+    no_date = conf_yaml.get("no_date", False)
+    remove_deletions = conf_yaml.get("remove_deletions", True)
+
+    # load data
+    df_tally = pd.read_csv(
+        tally_file, sep="\t", parse_dates=["date"], dtype={"location_code": "str"}
+    )
+
+    # pre-process data
+    preproc = ll.DataPreprocesser(df_tally)
+    preproc = preproc.general_preprocess(
+        variants_list=variants_list,
+        variants_pangolin=variants_pangolin,
+        variants_not_reported=variants_not_reported,
+        to_drop=to_drop,
+        start_date=start_date,
+        end_date=end_date,
+        no_date=no_date,
+        remove_deletions=remove_deletions,
+    )
+
+    # keep copy of original
+    df_unfil = preproc.df_tally
+
+    # old filtering
+    df_hc = hardcoded_filter(df_unfil)
+
+    assert_frame_NOT_equal(df_unfil, df_hc)
+
+    # modern filtering
+    preproc = preproc.filter_mutations(filters=filters)
+
+    df_filt = preproc.df_tally
+
+    assert_frame_NOT_equal(df_unfil, df_filt)
+
+    # compare
+    assert_frame_equal(df_filt[df_filt["proto"] == "v3"], df_hc[df_hc["proto"] == "v3"])
+    assert_frame_equal(
+        df_filt[df_filt["proto"] != "v3"], df_unfil[df_unfil["proto"] != "v3"]
+    )
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 99a7efc..9bbdd99 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -17,6 +17,7 @@ def test_workflow():
             "--fmt-columns",
             "--variants-config=config_preprint.yaml",
             "--deconv-config=presets/deconv_linear.yaml",
+            "--filters=filters_preprint.yaml",
             "--location=Zürich (ZH)",
             "--seed=42",
             "preprint/data/tallymut_line_full.tsv.zst",