Skip to content

Commit

Permalink
Merge branch 'template_524' into 'dev'
Browse files Browse the repository at this point in the history
Reconcile wf-template v5.2.5, update charts to bokeh API

See merge request epi2melabs/workflows/wf-human-variation!333
  • Loading branch information
RenzoTale88 committed Sep 12, 2024
2 parents 60d5741 + 886b5ac commit 7e28289
Show file tree
Hide file tree
Showing 15 changed files with 162 additions and 126 deletions.
4 changes: 2 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ docker-run:
parallel:
matrix:
- MATRIX_NAME: [
"str-usersex",
"str-infersex",
"missing-ref",
"two-models",
"all_phased",
Expand All @@ -73,8 +75,6 @@ docker-run:
"wf-human-snp_svrefine",
"wf-human-sv",
"wf-human-sv-phase",
"str-usersex",
"str-infersex",
"wf-human-cnv-spectre",
"wf-human-phase_all",
"wf-human-phase_all_lp",
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
always_run: true
pass_filenames: false
additional_dependencies:
- epi2melabs==0.0.56
- epi2melabs==0.0.57
- id: build_models
name: build_models
entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]
## [v2.4.0]
### Added
- IGV configuration for the EPI2ME App includes the output VCF files.
### Changed
- Emit indexes for the input reference, when generated by the workflow.
- Links to the reference genomes in the README now point to `bgzip`-compressed fasta files.
- Updated `modkit` to v0.3.3.
- Reconciled workflow with wf-template v5.2.5
### Fixed
- `ERROR ~ No such variable: colors` when the workflow cannot find the reference file.
- Incorrect bin size unit in QDNAseq wrapper script help text (@HudoGriz, #209).
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ therefore Nextflow will need to be
installed before attempting to run the workflow.

The workflow can currently be run using either
[Docker](https://www.docker.com/products/docker-desktop
[Docker](https://www.docker.com/products/docker-desktop)
or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html)
to provide isolation of the required software.
Both methods are automated out-of-the-box provided
Expand Down
2 changes: 1 addition & 1 deletion base.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ params {
str_tag = "shadd2f2963fe39351d4e0d6fa3ca54e1064c6ec057"
spectre_tag = "sha49a9fe474da9860f84f08f17f137b47a010b1834"
snpeff_tag = "shadcc812849019640d4d2939703fbb8777256e41ad"
common_sha = "shad399cf22079b5b153920ac39ee40095a677933f1"
common_sha = "shae58638742cf84dbeeec683ba24bcdee67f64b986"
}
}

Expand Down
8 changes: 7 additions & 1 deletion bin/workflow_glue/check_bam_headers_in_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@ def main(args):
for xam_file in target_files:
# get the `@SQ` and `@HD` lines in the header
with pysam.AlignmentFile(xam_file, check_sq=False) as f:
sq_lines = f.header.get("SQ")
# compare only the SN/LN/M5 elements of SQ to avoid labelling XAM with
# same reference but different SQ.UR as mixed_header (see CW-4842)
sq_lines = [{
"SN": sq["SN"],
"LN": sq["LN"],
"M5": sq.get("M5"),
} for sq in f.header.get("SQ", [])]
hd_lines = f.header.get("HD")
# Check if it is sorted.
# When there is more than one BAM, merging/sorting
Expand Down
12 changes: 6 additions & 6 deletions bin/workflow_glue/check_xam_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ def validate_xam_index(xam_file):
Invalid indexes will fail the call with a ValueError:
ValueError: fetch called on bamfile without index
"""
alignments = pysam.AlignmentFile(xam_file, check_sq=False)
try:
alignments.fetch()
has_valid_index = True
except ValueError:
has_valid_index = False
with pysam.AlignmentFile(xam_file, check_sq=False) as alignments:
try:
alignments.fetch()
has_valid_index = True
except ValueError:
has_valid_index = False
return has_valid_index


Expand Down
4 changes: 4 additions & 0 deletions bin/workflow_glue/report_snp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import sys

from bokeh.models import HoverTool
from dominate.tags import a, h6, p
from ezcharts.components.bcfstats import load_bcfstats
from ezcharts.components.clinvar import load_clinvar_vcf
Expand Down Expand Up @@ -227,6 +228,9 @@ def main(args):
else:
sizes = indel_sizes(bcfstats['IDD'])
plt = barplot(data=sizes, x="nlength", y="count", color=Colors.cerulean)
# Add tooltips
hover = plt._fig.select(dict(type=HoverTool))
hover.tooltips = [("Number of variants", "@top")]
EZChart(plt, 'epi2melabs')

# write report
Expand Down
148 changes: 69 additions & 79 deletions bin/workflow_glue/report_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Plot STRs."""

from bokeh.models import BoxZoomTool, ColumnDataSource, HoverTool
from bokeh.models import PanTool, Range1d, ResetTool, WheelZoomTool
from bokeh.models import PanTool, Range1d, ResetTool, Title, WheelZoomTool
from dominate.tags import b, code, h3, p, span, table, tbody, td, th, thead, tr
from ezcharts.components import fastcat
from ezcharts.components.ezchart import EZChart
Expand All @@ -11,7 +11,6 @@
from ezcharts.layout.snippets import Grid, Tabs
from ezcharts.plots import BokehPlot
from ezcharts.plots.distribution import histplot
from ezcharts.plots.distribution import MakeRectangles
from natsort import natsorted
import pandas as pd
from .util import wf_parser # noqa: ABS101
Expand Down Expand Up @@ -91,50 +90,37 @@ def argparser():
return parser


def add_triangle(plt, x_pos, idx):
"""Draw triangle."""
plt.add_series(dict(
type='line',
datasetIndex=idx,
markPoint={
'data': [{
'symbol': 'triangle',
'coord': [x_pos, 0],
'symbolSize': [20, 20],
'symbolOffset': [0, 15],
'itemStyle': {
'color': 'rgb(255, 255, 255)',
'borderColor': 'rgb(0, 0, 0)',
'borderWidth': 1.5
}
}]
}
))


def add_rectangle(plt, x_start, x_end, height, idx, rectangle):
def add_line(plt, x_pos, heigth):
"""Draw vertical line."""
plt._fig.line(
[x_pos, x_pos],
[0, heigth],
line_width=2,
line_dash='dashed',
color='rgba(0, 0, 0, 0.4)'
)


def add_rectangle(plt, x_start, x_end, height, rectangle):
"""Draw rectangle."""
if rectangle == 'normal':
colour = 'rgba(144, 198, 231, 0.4)'
elif rectangle == 'pathogenic':
colour = 'rgba(239, 65, 53, 0.4)'
plt.add_dataset(dict(
source=[[x_start, x_end, height]],
dimensions=['x_starts', 'ends', 'heights']
))
plt.add_series(dict(
type='custom',
name=rectangle+" range",
renderItem=MakeRectangles(),
datasetIndex=idx,
encode={
'x': ['x_starts', 'ends'],
'y': ['heights']
},
itemStyle={
'color': colour},
clip=True
))
# X/Y values in bokeh rect point to the central position on the
# axis of the figure.
plt._fig.rect(
x=(x_end+x_start)/2,
y=0 + height/2,
width=x_end-x_start,
height=height,
legend_label=rectangle,
fill_color=colour,
line_color=colour
)

plt._fig.legend.location = "top"
plt._fig.legend.orientation = "horizontal"


def parse_vcf(fname, info_cols=None, nrows=None):
Expand Down Expand Up @@ -204,17 +190,22 @@ def histogram_with_mean_and_median(
raise ValueError("`series` must be `pd.Series`.")

plt = histplot(data=series, bins=bins)
plt.title = dict(
text=title,
subtext=(
f"Mean: {series.mean().round(round_digits)}. "
f"Median: {series.median().round(round_digits)}"
),
subtext = (
f"Mean: {series.mean().round(round_digits)}. " +
f"Median: {series.median().round(round_digits)}"
)
plt._fig.add_layout(
Title(text=subtext, text_font_size="0.8em"),
'above'
)
plt._fig.add_layout(
Title(text=title, text_font_size="1.5em"),
'above'
)
if x_axis_name is not None:
plt.xAxis.name = x_axis_name
plt._fig.xaxis.axis_label = x_axis_name
if y_axis_name is not None:
plt.yAxis.name = y_axis_name
plt._fig.yaxis.axis_label = y_axis_name
return plt


Expand All @@ -223,38 +214,25 @@ def create_str_histogram(
cn1, cn2, disease):
"""Create a histogram of STR results for a given repeat."""
h3(disease + ' (' + repeat + ')')
df = hist_data[hist_data['VARID'] == repeat]['copy_number']
plt = histplot(
data=df,
x='copy_number',
binwidth=1
)
histogram_data = plt._fig.renderers[0].data_source.to_df()

plt = histplot(data=hist_data[hist_data['VARID'] == repeat]
['copy_number'].values, binwidth=1)
histogram_data = plt.dataset[0].source
max_cols = histogram_data.max(axis=0)
max_rectangle_height = max_cols[2]

xaxis = {
'name': "Repeat number",
'nameGap': '30',
'nameLocation': 'middle',
'nameTextStyle': {'fontSize': '14', 'fontStyle': 'bold'},
'min': '0',
'max': pathologic_max
}

yaxis = {
'name': "Number of supporting reads",
'nameGap': '30',
'nameLocation': 'middle',
'nameTextStyle': {'fontSize': '14', 'fontStyle': 'bold'},
'max': max_rectangle_height
}

plt.xAxis = xaxis
plt.yAxis = yaxis
max_rectangle_height = max_cols.top

plt._fig.xaxis.axis_label = "Repeat number"
plt._fig.yaxis.axis_label = "Number of supporting reads"

add_rectangle(
plt, 0, normal_max, max_rectangle_height, 1, 'normal'
plt, 0, normal_max, max_rectangle_height, 'normal'
)
add_rectangle(
plt, pathologic_min, pathologic_max, max_rectangle_height, 2,
plt, pathologic_min, pathologic_max, max_rectangle_height,
'pathogenic'
)

Expand All @@ -265,8 +243,14 @@ def create_str_histogram(
{"name": "pathogenic range"},
]}

add_triangle(plt, cn1, 3)
add_triangle(plt, cn2, 4)
# add_triangle(plt, cn1)
# add_triangle(plt, cn2)
add_line(plt, cn1, max_rectangle_height)
add_line(plt, cn2, max_rectangle_height)

# Remove hover
hover = plt._fig.select(dict(type=HoverTool))
hover.tooltips = None

EZChart(plt, theme='epi2melabs')

Expand Down Expand Up @@ -456,7 +440,7 @@ def make_report(
"""
The tabs below contain short tandem repeat (STR) expansion plots for each
repeat genotyped in the sample. The coloured boxes denote the normal and
pathogenic ranges of repeat numbers, and the triangles denote the median
pathogenic ranges of repeat numbers, and the dashed lines denote the median
number of repeats in each allele.
"""
)
Expand Down Expand Up @@ -693,7 +677,10 @@ def make_report(
)

plt = fastcat.read_length_plot(read_lengths)
plt.xAxis.max = max_read_length_to_show
plt._fig.x_range.end = max_read_length_to_show
# Add tooltips
hover = plt._fig.select(dict(type=HoverTool))
hover.tooltips = [("Number of reads", "@top")]
EZChart(plt, theme='epi2melabs')

plt = histogram_with_mean_and_median(
Expand All @@ -702,6 +689,9 @@ def make_report(
x_axis_name="Quality",
y_axis_name="Number of reads"
)
# Add tooltips
hover = plt._fig.select(dict(type=HoverTool))
hover.tooltips = [("Number of reads", "@top")]
EZChart(plt, theme='epi2melabs')

return report
Expand Down
21 changes: 11 additions & 10 deletions bin/workflow_glue/report_sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import sys

from bokeh.models import HoverTool, Title
from dominate.tags import a, h4, p
from ezcharts.components.ezchart import EZChart
from ezcharts.components.reports.labs import LabsReport
Expand Down Expand Up @@ -194,16 +195,16 @@ def sv_size_plots(vcf_data, max_size=5000):
# Add deletion plot
plt = histplot(data=indels, bins=bins, stat='count')
# override excharts axisLabel interval
plt.xAxis = dict(
name='Length',
axisLabel=dict(
interval="auto",
rotate=30
),
max=max_size,
min=-max_size
)
plt.title = {"text": "Indels size distribution"}
plt._fig.add_layout(
Title(text="Indels size distribution", text_font_size="1.5em"),
'above'
)
plt._fig.xaxis.axis_label = 'Length'
plt._fig.x_range.start = -max_size
plt._fig.x_range.end = max_size
# Add tooltips
hover = plt._fig.select(dict(type=HoverTool))
hover.tooltips = [("Number of variants", "@top")]
EZChart(plt, 'epi2melabs')
p("The plot shows Indels with |length| < 5Kb.")
else:
Expand Down
Loading

0 comments on commit 7e28289

Please sign in to comment.