Skip to content

Commit

Permalink
DOC: Automatic flowchart
Browse files Browse the repository at this point in the history
  • Loading branch information
larsoner committed Feb 23, 2024
1 parent 4e6733d commit f09b68b
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 75 deletions.
1 change: 1 addition & 0 deletions docs/source/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
features/steps.md
features/overview.md
145 changes: 139 additions & 6 deletions docs/source/features/gen_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,92 @@

from mne_bids_pipeline._config_utils import _get_step_modules

pre = """\
# Detailed lis of processing steps
autogen_header = f"""\
[//]: # (AUTO-GENERATED, TO CHANGE EDIT {'/'.join(Path(__file__).parts[-4:])})
"""

steps_pre = f"""\
{autogen_header}
# Detailed list of processing steps
The following table provides a concise summary of each processing step. The
step names can be used to run individual steps or entire groups of steps by
passing their name(s) to `mne_bids_pipeline` via the `steps=...` argument.
""" # noqa: E501

overview_pre = f"""\
{autogen_header}
MNE-BIDS-Pipeline processes your data in a sequential manner, i.e., one step
at a time. The next step is only run after the previous steps have been
successfully completed. There are, of course, exceptions; for example, if you
chose not to apply ICA, the respective steps will simply be omitted and we'll
directly move to the subsequent steps. The following flow chart aims to give
you a brief overview of which steps are included in the pipeline, in which
order they are run, and how we group them together.
!!! info
All intermediate results are saved to disk for later
inspection, and an **extensive report** is generated.
!!! info
Analyses are conducted on individual (per-subject) as well as group level.
"""

icon_map = {
"Filesystem initialization and dataset inspection": ":open_file_folder:",
"Preprocessing": ":broom:",
"Sensor-space analysis": ":satellite:",
"Source-space analysis": ":brain:",
"FreeSurfer-related processing": ":person_surfing:",
}
out_dir = Path(__file__).parent

print("Generating steps …")
step_modules = _get_step_modules()
char_start = ord("A")

# In principle we could try to sort this out based on naming, but for now let's just
# set our hierarchy manually and update it when we move files around since that's easy
# (and rare) enough to do.
manual_order = {
"Preprocessing": (
("01", "02"),
("02", "03"),
("03", "04"),
("04", "05"),
("05", "06a"),
("05", "06b"),
("05", "07"),
# technically we could have the raw data flow here, but it doesn't really help
# ("05", "08a"),
# ("05", "08b"),
("06a", "08a"),
("07", "08a"),
# Force the artifact-fitting and epoching steps on the same level, in this order
"""\
subgraph Z[" "]
direction LR
B06a
B07
B06b
end
style Z fill:#0000,stroke-width:0px
""",
("06b", "08b"),
("07", "08b"),
("08a", "09"),
("08b", "09"),
),
}

# Construct the lines of steps.md
lines = [pre]
lines = [steps_pre]
overview_lines = [overview_pre]
used_titles = set()
for di, (dir_, modules) in enumerate(step_modules.items(), 1):
# Steps
if dir_ == "all":
continue # this is an alias
dir_module = importlib.import_module(f"mne_bids_pipeline.steps.{dir_}")
Expand All @@ -29,7 +101,9 @@
dir_body = dir_body[1].strip()
else:
dir_body = ""
lines.append(f"## {di}. {dir_header}\n")
icon = icon_map[dir_header]
module_header = f"{di}. {icon} {dir_header}"
lines.append(f"## {module_header}\n")
if dir_body:
lines.append(f"{dir_body}\n")
lines.append("| Step name | Description |")
Expand All @@ -42,5 +116,64 @@
step_title = module.__doc__.split("\n")[0]
lines.append(f"`{step_name}` | {step_title} |")
lines.append("")
with open(Path(__file__).parent / "steps.md", "w") as fid:
fid.write("\n".join(lines))

# Overview
overview_lines.append(
f"""\
## {module_header}
```mermaid
flowchart TD"""
)
chr_pre = chr(char_start + di - 1) # A, B, C, ...
start = None
prev_idx = None
title_map = {}
for mi, module in enumerate(modules, 1):
step_title = module.__doc__.split("\n")[0].rstrip(".")
idx = module.__name__.split(".")[-1].split("_")[1] # 01, 05a, etc.
# Need to quote the title to deal with parens, and sanitize quotes
step_title = step_title.replace('"', "'")
assert step_title not in used_titles, f"Redundant title: {step_title}"
used_titles.add(step_title)
this_block = f'{chr_pre}{idx}["{step_title}"]'
# special case: manual order
title_map[idx] = step_title
if dir_header in manual_order:
continue
if mi == 1:
start = this_block
assert prev_idx is None
continue
if start is not None:
assert mi == 2, mi
overview_lines.append(f" {start} --> {this_block}")
start = None
else:
overview_lines.append(f" {chr_pre}{prev_idx} --> {this_block}")
prev_idx = idx
if dir_header in manual_order:
mapped = set()
for a_b in manual_order[dir_header]:
if isinstance(a_b, str): # insert directly
overview_lines.append(a_b)
continue
assert isinstance(a_b, tuple), type(a_b)
a_b = list(a_b) # allow modification
for ii, idx in enumerate(a_b):
assert idx in title_map, (dir_header, sorted(title_map))
if idx not in mapped:
mapped.add(idx)
a_b[ii] = f'{idx}["{title_map[idx]}"]'
overview_lines.append(f" {chr_pre}{a_b[0]} --> {chr_pre}{a_b[1]}")
all_steps = set(
sum(
[a_b for a_b in manual_order[dir_header] if not isinstance(a_b, str)],
(),
)
)
assert mapped == all_steps, all_steps.symmetric_difference(mapped)
overview_lines.append("```\n")

(out_dir / "steps.md").write_text("\n".join(lines), encoding="utf8")
(out_dir / "overview.md").write_text("\n".join(overview_lines), encoding="utf8")
53 changes: 0 additions & 53 deletions docs/source/features/overview.md

This file was deleted.

1 change: 1 addition & 0 deletions docs/source/v1.6.md.inc
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@
- The package build backend has been switched from `setuptools` to `hatchling`. (#825 by @hoechenberger)
- Code formatting now uses `ruff format` instead of `black` (#834, #838 by @larsoner)
- Code caching is now tested using GitHub Actions (#836 by @larsoner)
- Steps in the documentatino are now automatically parsed into flowcharts (#859 by @larsoner)
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""Run Signal Subspace Projections (SSP) for artifact correction.
These are often also referred to as PCA vectors.
"""
"""Temporal regression for artifact removal."""

from types import SimpleNamespace
from typing import Optional
Expand Down
9 changes: 4 additions & 5 deletions mne_bids_pipeline/steps/preprocessing/_06a_run_ica.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
"""Run Independent Component Analysis (ICA) for artifact correction.
"""Fit ICA.
This fits ICA on epoched data filtered with 1 Hz highpass,
for this purpose only using fastICA. Separate ICAs are fitted and stored for
MEG and EEG data.
This fits Independent Component Analysis (ICA) on raw data filtered with 1 Hz highpass,
temporarily creating task-related epochs.
Before performing ICA, we reject epochs based on peak-to-peak amplitude above
the 'ica_reject' to filter massive non-biological artifacts.
To actually remove designated ICA components from your data, you will have to
run 05a-apply_ica.py.
run the apply_ica step.
"""

from collections.abc import Iterable
Expand Down
3 changes: 2 additions & 1 deletion mne_bids_pipeline/steps/preprocessing/_06b_run_ssp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Run Signal Subspace Projections (SSP) for artifact correction.
"""Compute SSP.
Signal subspace projections (SSP) vectors are computed from EOG and ECG signals.
These are often also referred to as PCA vectors.
"""

Expand Down
3 changes: 1 addition & 2 deletions mne_bids_pipeline/steps/preprocessing/_08a_apply_ica.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Apply ICA and obtain the cleaned epochs and raw data.
"""Apply ICA.
Blinks and ECG artifacts are automatically detected and the corresponding ICA
components are removed from the data.
Expand All @@ -8,7 +8,6 @@
make sure you did not re-run the ICA in the meantime. Otherwise (especially if
the random state was not set, or you used a different machine, the component
order might differ).
"""

from types import SimpleNamespace
Expand Down
3 changes: 1 addition & 2 deletions mne_bids_pipeline/steps/preprocessing/_08b_apply_ssp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Apply SSP projections and obtain the cleaned epochs and raw data.
"""Apply SSP.
Blinks and ECG artifacts are automatically detected and the corresponding SSP
projections components are removed from the data.
"""

from types import SimpleNamespace
Expand Down
4 changes: 2 additions & 2 deletions mne_bids_pipeline/steps/preprocessing/_09_ptp_reject.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Remove epochs based on peak-to-peak (PTP) amplitudes.
"""Remove epochs based on PTP amplitudes.
Epochs containing peak-to-peak above the thresholds defined
Epochs containing peak-to-peak (PTP) above the thresholds defined
in the 'reject' parameter are removed from the data.
This step will drop epochs containing non-biological artifacts
Expand Down

0 comments on commit f09b68b

Please sign in to comment.