diff --git a/.coveragerc b/.coveragerc
index 21704d6..2b755b1 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,7 +1,8 @@
[run]
omit =
- *tests*
- *migrations*
+ */tests/*
+ */migrations/*
*settings*
*asgi.py
- *wsgi.py
\ No newline at end of file
+ *wsgi.py
+ *generate_dev_data.py
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 1af8a97..ec007a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ __pycache__
.coverage
.pytest_cache
test-results
+screenshot_website*
# Elastic Beanstalk Files
.elasticbeanstalk/*
diff --git a/docs/img/website/mag-annotations.png b/docs/img/website/mag-annotations.png
new file mode 100644
index 0000000..62310ef
Binary files /dev/null and b/docs/img/website/mag-annotations.png differ
diff --git a/docs/img/website/mag-catalogue.png b/docs/img/website/mag-catalogue.png
index 7b7a980..510eab2 100644
Binary files a/docs/img/website/mag-catalogue.png and b/docs/img/website/mag-catalogue.png differ
diff --git a/docs/img/website/mag-containment.png b/docs/img/website/mag-containment.png
new file mode 100644
index 0000000..fdb738a
Binary files /dev/null and b/docs/img/website/mag-containment.png differ
diff --git a/docs/website.qmd b/docs/website.qmd
index 88b7d96..84005d3 100644
--- a/docs/website.qmd
+++ b/docs/website.qmd
@@ -125,6 +125,37 @@ MAGs can be found by searching on accession or taxonomy, or for the accession of
The MAGs in a catalogue can be downloaded as a TSV file, using the "Download all as TSV" button.
+#### MAG Annotations
+MGnify Genomes catalogues use a standardised pipeline ([Gurbich et al. 2023](https://europepmc.org/article/MED/36806692)) to annotate
+the assembled genomes with various tools.
+These annotations are performed on the species-level cluster representative genomes.
+These annotations can all be accessed via the data portal’s links to MGnify.
+
+Given the HoloFood project's aims, [CAZy](http://www.cazy.org) (Carbohydrate-Active enZymes) annotations are particularly relevant to
+the HoloFood MAG catalogues.
+
+A summary of the CAZy annotations, in the form of counts per CAZy category, is therefore shown on the detail view of each MAG.
+(Note that, like all MAG annotations, these CAZy annotations refer to the MAG's cluster representative genome – not necesarily the HoloFood-data-derived MAG itself.)
+
+![Screenshot of a MAG’s detail page, including CAZy annotations](/img/website/mag-annotations.png)
+
+These annotations are also available via each genome's [API](api.ipynb) endpoint.
+
+#### MAG containment within samples
+To facilitate the linking of MAGs to other samples within the HoloFood dataset, the data portal also includes a list of "containments" for each MAG within all of the project’s metagenomic samples.
+
+For each MAG, a sample list was found using Mastiff (a tool based on [sourmash](https://sourmash.bio), [Irber et al. 2022](https://www.biorxiv.org/content/10.1101/2022.11.02.514947v1)).
+Each sample in this list contains the MAG at some level, equivalent to the fraction of the MAG’s kmers that are present in the sample’s sequencing reads.
+The list can be filtered to find only the samples that contain the MAG above some minimum containment threshold.
+
+![Screenshot of a MAG’s sample containment list](/img/website/mag-containment.png)
+
+These sample containment lists are also available via each genome's [API](api.ipynb) endpoint, as well as via the TSV export option above the table.
+
+Together with the [MAG’s CAZy annotations](website.qmd#mag-annotations), this feature means the prevalence of
+carbohydrate-active enzymes can be compared at the genome level between samples originating from animals
+under different experimental conditions.
+
### Viral Catalogues
![Screenshot of a viral catalogue](/img/website/viral-catalogue.png)
Viral catalogues are lists of the unique (at species-level) viruses found in HoloFood samples.
diff --git a/holofood/api.py b/holofood/api.py
index 9299f53..38b99f2 100644
--- a/holofood/api.py
+++ b/holofood/api.py
@@ -192,7 +192,13 @@ def resolve_representative_url(obj: Genome):
class Config:
model = Genome
- model_fields = ["accession", "cluster_representative", "taxonomy", "metadata"]
+ model_fields = [
+ "accession",
+ "cluster_representative",
+ "taxonomy",
+ "metadata",
+ "annotations",
+ ]
class GenomeSampleContainmentSchema(ModelSchema):
diff --git a/holofood/filters.py b/holofood/filters.py
index 4ed07d2..2cabdf4 100644
--- a/holofood/filters.py
+++ b/holofood/filters.py
@@ -16,6 +16,7 @@
from django.forms import NumberInput
from django.utils.safestring import mark_safe
+from holofood.forms import CazyAnnotationsFilterForm
from holofood.models import (
Sample,
Genome,
@@ -123,6 +124,7 @@ class Meta:
class GenomeFilter(django_filters.FilterSet):
class Meta:
model = Genome
+ form = CazyAnnotationsFilterForm
fields = {
"accession": ["icontains"],
@@ -130,6 +132,19 @@ class Meta:
"taxonomy": ["icontains"],
}
+ def filter_queryset(self, queryset):
+ qs = queryset
+ for name, value in self.form.cleaned_data.items():
+ if name in self.filters:
+ qs = self.filters[name].filter(qs, value)
+
+ filters = Q()
+ if self.data:
+ cazy_annotations = self.data.getlist("cazy_annotations")
+ for key in cazy_annotations:
+ filters &= Q(**{f"annotations__cazy__{key}__gt": 0})
+ return qs.filter(filters)
+
class GenomeSampleContainmentFilter(django_filters.FilterSet):
minimum_containment = django_filters.NumberFilter(
diff --git a/holofood/forms.py b/holofood/forms.py
new file mode 100644
index 0000000..b2294a6
--- /dev/null
+++ b/holofood/forms.py
@@ -0,0 +1,75 @@
+from django import forms
+from django.forms.widgets import SelectMultiple
+from django.utils.html import format_html
+from django.utils.encoding import force_str
+
+
+class CazyCheckboxesWidget(SelectMultiple):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.choices = (
+ [
+ ("GH", "Glycoside Hydrolase"),
+ ("CB", "Carbohydrate-Binding"),
+ ("PL", "Polysaccharide Lyases"),
+ ("CE", "Carbohydrate Esterases"),
+ ("AA", "Auxiliary Activities"),
+ ("GT", "GlycosylTransferases"),
+ ],
+ )
+
+ def render(self, name, value, attrs=None, renderer=None):
+ output = []
+ if not isinstance(value, list):
+ value = [value]
+ for option_value, option_label in self.choices:
+ final_attrs = self.build_attrs(self.attrs, attrs)
+ final_attrs["type"] = "checkbox"
+ final_attrs["name"] = name
+ final_attrs["value"] = option_value
+ final_attrs["id"] = "id_%s_%s" % (name, option_value)
+
+ if value and force_str(option_value) in value:
+ final_attrs["checked"] = "checked"
+ else:
+ final_attrs.pop("checked", None)
+
+ output.append(
+ format_html(
+ '
+ {{ genome.accession }}’s cluster representative genome {{ genome.cluster_representative }} has been annotated by MGnify’s Genomes pipeline.
+ {# djlint:off #}CAZy (Carbohydrate-Active enZymes){# djlint:on #} annotations are particularly relevant to HoloFood’s experimental goals,
+ and this graphic shows the number of CAZy annotations on {{ genome.cluster_representative }} from each CAZy category.
+
+ {% include "holofood/scripts/cazy_chart.html" with cazy_annotations=cazy_annotations only %}
@@ -95,7 +106,7 @@
{{ genome.accession }}
{{ genome.accession }}’s cluster representative ({{ genome.cluster_representative }})
- has been searched for in all HoloFood samples, using a sourmash-based tool.
+ has been searched for in all HoloFood samples, using a sourmash-based tool.
These samples contain some or all of the kmers in {{ genome.cluster_representative }}’s sequence.
Because {{ genome.accession }} has been clustered with {{ genome.cluster_representative }}
at 95% sequence similarity, this indicates that these samples are likely to contain {{ genome.accession }}.
diff --git a/templates/holofood/scripts/cazy_chart.html b/templates/holofood/scripts/cazy_chart.html
new file mode 100644
index 0000000..56a025d
--- /dev/null
+++ b/templates/holofood/scripts/cazy_chart.html
@@ -0,0 +1,33 @@
+
+
+{{ cazy_annotations | json_script:"cazy_annotations_data" }}
+
\ No newline at end of file