Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support EDF.jl 0.8 #104

Merged
merged 11 commits into from
Jan 31, 2025
2 changes: 1 addition & 1 deletion OndaEDFSchemas.jl/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "OndaEDFSchemas"
uuid = "9c87d999-769b-4741-85b2-6f554d09e731"
authors = ["Beacon Biosignals, Inc."]
version = "0.2.2"
version = "0.3.0"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is breaking since it's just adding new schemas that use the widened field type rather than changing any existing schemas.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now this only supports EDF 0.8 (not 0.7 and earlier). This means that the returned value of a number of functions is FilePlanV3 / PlanV3 instead of V2.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, I somehow missed that this is for OndaEDFSchemas.


[deps]
Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
Expand Down
47 changes: 42 additions & 5 deletions OndaEDFSchemas.jl/src/OndaEDFSchemas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
AnnotationV1
using UUIDs

export PlanV1, PlanV2, FilePlanV1, FilePlanV2, EDFAnnotationV1
export PlanV1, PlanV2, PlanV3, FilePlanV1, FilePlanV2, FilePlanV3, EDFAnnotationV1

@schema "ondaedf.plan" Plan

Expand Down Expand Up @@ -65,7 +65,33 @@
error::Union{Nothing,String} = coalesce(error, nothing)
end


@version PlanV3 begin
# EDF.SignalHeader fields
label::String
transducer_type::String
physical_dimension::String
physical_minimum::Float32
physical_maximum::Float32
digital_minimum::Float32
digital_maximum::Float32
prefilter::String
samples_per_record::Int32
# EDF.FileHeader field
seconds_per_record::Float64
# Onda.SignalV2 fields (channels -> channel), may be missing
recording::Union{UUID,Missing} = lift(UUID, recording)
sensor_type::Union{Missing,AbstractString} = lift(_validate_signal_sensor_type, sensor_type)
sensor_label::Union{Missing,AbstractString} = lift(_validate_signal_sensor_label,
coalesce(sensor_label, sensor_type))
channel::Union{Missing,AbstractString} = lift(_validate_signal_channel, channel)
sample_unit::Union{Missing,AbstractString} = lift(String, sample_unit)
sample_resolution_in_unit::Union{Missing,Float64}
sample_offset_in_unit::Union{Missing,Float64}
sample_type::Union{Missing,AbstractString} = lift(onda_sample_type_from_julia_type, sample_type)
sample_rate::Union{Missing,Float64}
# errors, use `nothing` to indicate no error
error::Union{Nothing,String} = coalesce(error, nothing)
end

const PLAN_DOC_TEMPLATE = """
@version PlanV{{ VERSION }} begin
Expand All @@ -78,7 +104,7 @@
digital_minimum::Float32
digital_maximum::Float32
prefilter::String
samples_per_record::Int16
samples_per_record::{{ SAMPLES_PER_RECORD_TYPE }}
# EDF.FileHeader field
seconds_per_record::Float64
# Onda.SignalV{{ VERSION }} fields (channels -> channel), may be missing
Expand Down Expand Up @@ -108,19 +134,22 @@
function _plan_doc(v)
uniques = if v == 1
["kind::Union{Missing,AbstractString}"]
elseif v == 2
elseif v == 2 || v == 3

Check warning on line 137 in OndaEDFSchemas.jl/src/OndaEDFSchemas.jl

View check run for this annotation

Codecov / codecov/patch

OndaEDFSchemas.jl/src/OndaEDFSchemas.jl#L137

Added line #L137 was not covered by tests
["sensor_type::Union{Missing,AbstractString}",
"sensor_label::Union{Missing,AbstractString}"]
else
throw(ArgumentError("Invalid version"))
end
samples_per_record_type = v in (1,2) ? "Int16" : "Int32"

Check warning on line 143 in OndaEDFSchemas.jl/src/OndaEDFSchemas.jl

View check run for this annotation

Codecov / codecov/patch

OndaEDFSchemas.jl/src/OndaEDFSchemas.jl#L143

Added line #L143 was not covered by tests
unique_lines = join(map(s -> " $s", uniques), "\n")
s = replace(PLAN_DOC_TEMPLATE, "{{ VERSION }}" => v)
s = replace(s, "{{ SAMPLES_PER_RECORD_TYPE }}" => samples_per_record_type)

Check warning on line 146 in OndaEDFSchemas.jl/src/OndaEDFSchemas.jl

View check run for this annotation

Codecov / codecov/patch

OndaEDFSchemas.jl/src/OndaEDFSchemas.jl#L146

Added line #L146 was not covered by tests
return replace(s, "{{ SAMPLES_INFO_UNIQUE_FIELDS }}" => unique_lines)
end

@doc _plan_doc(1) PlanV1
@doc _plan_doc(2) PlanV2
@doc _plan_doc(3) PlanV3

@schema "ondaedf.file-plan" FilePlan

Expand All @@ -134,6 +163,11 @@
onda_signal_index::Int
end

@version FilePlanV3 > PlanV3 begin
edf_signal_index::Int
onda_signal_index::Int
end

const FILE_PLAN_DOC_TEMPLATE = """
@version FilePlanV{{ VERSION }} > PlanV{{ VERSION }} begin
edf_signal_index::Int
Expand All @@ -158,8 +192,11 @@

@doc _file_plan_doc(1) FilePlanV1
@doc _file_plan_doc(2) FilePlanV2
@doc _file_plan_doc(3) FilePlanV3

const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,PlanV2SchemaVersion,FilePlanV1SchemaVersion,FilePlanV2SchemaVersion}
const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,FilePlanV1SchemaVersion,
PlanV2SchemaVersion,FilePlanV2SchemaVersion,
PlanV3SchemaVersion,FilePlanV3SchemaVersion}
Legolas.accepted_field_type(::OndaEDFSchemaVersions, ::Type{String}) = AbstractString
# we need this because Arrow write can introduce a Missing for the error column
# (I think because of how missing/nothing sentinels are handled?)
Expand Down
4 changes: 2 additions & 2 deletions OndaEDFSchemas.jl/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function mock_plan(; v, rng=GLOBAL_RNG)
ingested = rand(rng, Bool)
specific_kwargs = if v == 1
(; kind=ingested ? "eeg" : missing)
elseif v == 2
elseif v in (2, 3)
(; sensor_type=ingested ? "eeg" : missing,
sensor_label=ingested ? "eeg" : missing)
else
Expand Down Expand Up @@ -63,7 +63,7 @@ end

@testset "Schema version $v" for v in (1, 2)
SamplesInfo = v == 1 ? Onda.SamplesInfoV1 : SamplesInfoV2

@testset "ondaedf.plan@$v" begin
rng = StableRNG(10)
plans = mock_plan(30; v, rng)
Expand Down
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "OndaEDF"
uuid = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e"
authors = ["Beacon Biosignals, Inc."]
version = "0.12.4"
version = "0.13.0"

[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
Expand All @@ -18,11 +18,11 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Compat = "3.32, 4"
EDF = "0.7"
EDF = "0.8"
FilePathsBase = "0.9"
Legolas = "0.5"
Onda = "0.15"
OndaEDFSchemas = "0.2.1"
OndaEDFSchemas = "0.3.0"
PrettyTables = "1.3, 2"
StableRNGs = "1"
StatsBase = "0.33, 0.34"
Expand Down
2 changes: 1 addition & 1 deletion src/OndaEDF.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Write a plan table to `io_or_path` using `Legolas.write`, using the
"""
function write_plan(io_or_path, plan_table; kwargs...)
return Legolas.write(io_or_path, plan_table,
Legolas.SchemaVersion("ondaedf.file-plan", 2);
Legolas.SchemaVersion("ondaedf.file-plan", 3);
kwargs...)
end

Expand Down
46 changes: 23 additions & 23 deletions src/import_edf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
label = _safe_lowercase(label)

# ideally, we'd do the original behavior:
#
#
# match exact STANDARD (or custom) signal types at beginning of label,
# ignoring case possibly bracketed by or prepended with `[`, `]`, `,` or
# whitespace everything after is included in the spec a.k.a. label
Expand All @@ -139,7 +139,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
#
# This is not equivalent to the original behavior in only a handful of
# cases
#
#
# - if one of the `signal_names` is a suffix of the signal, like `"pap"`
# matching against `"xpap cpap"`. the fix for this is to add the full
# signal name to the (end) of `signal_names` in the label set.
Expand Down Expand Up @@ -231,7 +231,7 @@ function promote_encodings(encodings; pick_offset=(_ -> 0.0), pick_resolution=mi
sample_resolution_in_unit=missing,
sample_rate=missing)
end

sample_type = mapreduce(Onda.sample_type, promote_type, encodings)

sample_rates = [e.sample_rate for e in encodings]
Expand Down Expand Up @@ -339,7 +339,7 @@ As an example, here is (a subset of) the default labels for ECG signals:

```julia
["ecg", "ekg"] => ["i" => ["1"], "ii" => ["2"], "iii" => ["3"],
"avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"],
"avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"],
"avr"=> ["ekgr", "ecgr", "r"], ...]
```

Expand All @@ -363,7 +363,7 @@ function plan_edf_to_onda_samples(header,
"Instead, preprocess signal header rows to before calling " *
"`plan_edf_to_onda_samples`"))
end

row = (; header..., seconds_per_record, error=nothing)

try
Expand All @@ -384,23 +384,23 @@ function plan_edf_to_onda_samples(header,
channel_name = canonical_channel_name(canonical)

matched = match_edf_label(edf_label, signal_names, channel_name, channel_names)

if matched !== nothing
# create SamplesInfo and return
row = rowmerge(row;
row = rowmerge(row;
channel=matched,
sensor_type=first(signal_names),
sensor_label=first(signal_names))
return PlanV2(row)
return PlanV3(row)
end
end
end
catch e
return PlanV2(_errored_row(row, e))
return PlanV3(_errored_row(row, e))
end

# nothing matched, return the original signal header (as a namedtuple)
return PlanV2(row)
return PlanV3(row)
end

# create a table with a plan for converting this EDF file to onda: one row per
Expand All @@ -418,7 +418,7 @@ end

Formulate a plan for converting an `EDF.File` to Onda Samples. This applies
`plan_edf_to_onda_samples` to each individual signal contained in the file,
storing `edf_signal_index` as an additional column.
storing `edf_signal_index` as an additional column.

The resulting rows are then passed to [`plan_edf_to_onda_samples_groups`](@ref)
and grouped according to `onda_signal_groupby` (by default, the `:sensor_type`,
Expand All @@ -444,7 +444,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
"`plan_edf_to_onda_samples`. See the OndaEDF README."))
end


true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)
plan_rows = map(true_signals) do s
return plan_edf_to_onda_samples(s.header, edf.header.seconds_per_record;
Expand All @@ -455,7 +455,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
# write index of destination signal into plan to capture grouping
plan_rows = plan_edf_to_onda_samples_groups(plan_rows; onda_signal_groupby)

return FilePlanV2.(plan_rows)
return FilePlanV3.(plan_rows)
end

"""
Expand All @@ -479,7 +479,7 @@ function plan_edf_to_onda_samples_groups(plan_rows;
edf_signal_index = coalesce(_get(row, :edf_signal_index), i)
return rowmerge(row; edf_signal_index)
end

grouped_rows = groupby(grouper(onda_signal_groupby), plan_rows)
sorted_keys = sort!(collect(keys(grouped_rows)))
plan_rows = mapreduce(vcat, enumerate(sorted_keys)) do (onda_signal_index, key)
Expand Down Expand Up @@ -517,20 +517,20 @@ Samples are returned in the order of `:onda_signal_index`. Signals that could
not be matched or otherwise caused an error during execution are not returned.

If `validate=true` (the default), the plan is validated against the
[`FilePlanV2`](@ref) schema, and the signal headers in the `EDF.File`.
[`FilePlanV3`](@ref) schema, and the signal headers in the `EDF.File`.

If `dither_storage=missing` (the default), dither storage is allocated automatically
as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.

$SAMPLES_ENCODED_WARNING
"""
function edf_to_onda_samples(edf::EDF.File, plan_table; validate=true, dither_storage=missing)

true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)

if validate
Legolas.validate(Tables.schema(Tables.columns(plan_table)),
Legolas.SchemaVersion("ondaedf.file-plan", 2))
Legolas.SchemaVersion("ondaedf.file-plan", 3))
for row in Tables.rows(plan_table)
signal = true_signals[row.edf_signal_index]
signal.header.label == row.label ||
Expand Down Expand Up @@ -628,14 +628,14 @@ the `Onda.SamplesInfo` in `target`. This checks for matching sample rates in
the source signals. If the encoding of `target` is the same as the encoding in
a signal, its encoded (usually `Int16`) data is copied directly into the
`Samples` data matrix; otherwise it is re-encoded.

If `dither_storage=missing` (the default), dither storage is allocated automatically
as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
See `Onda.encode`'s docstring for more details.

!!! note

This function is not meant to be called directly, but through
This function is not meant to be called directly, but through
[`edf_to_onda_samples`](@ref)

$SAMPLES_ENCODED_WARNING
Expand Down Expand Up @@ -737,7 +737,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()

signals = Onda.SignalV2[]
edf_samples, plan = edf_to_onda_samples(edf; kwargs...)

errors = _get(Tables.columns(plan), :error)
if !ismissing(errors)
# why unique? because errors that occur during execution get inserted
Expand All @@ -749,7 +749,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()
end
end
end

edf_samples = postprocess_samples(edf_samples)
for samples in edf_samples
sample_filename = string(recording_uuid, "_", samples.info.sensor_type, ".", file_format)
Expand Down
14 changes: 7 additions & 7 deletions test/import.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ using StableRNGs

validate_extracted_signals(s.info for s in returned_samples)
end

@testset "custom grouping" begin
signal_plans = [rowmerge(plan; grp=string(plan.sensor_type, plan.sample_unit, plan.sample_rate))
for plan in signal_plans]
Expand Down Expand Up @@ -74,10 +74,10 @@ using StableRNGs
grouped_plans_rev_bad = plan_edf_to_onda_samples_groups(plans_rev_bad)
@test_throws(ArgumentError("Plan's label EcG EKGL does not match EDF label EEG C3-M2!"),
edf_to_onda_samples(edf, grouped_plans_rev_bad))

end
end

@testset "store_edf_as_onda" begin
n_records = 100
edf, edf_channel_indices = make_test_data(StableRNG(42), 256, 512, n_records)
Expand Down Expand Up @@ -105,7 +105,7 @@ using StableRNGs

signals = Dict(s.sensor_type => s for s in nt.signals)

@testset "Signal roundtrip" begin
@testset "Signal roundtrip" begin
for (signal_name, edf_indices) in edf_channel_indices
@testset "$signal_name" begin
onda_samples = load(signals[string(signal_name)]).data
Expand Down Expand Up @@ -243,7 +243,7 @@ using StableRNGs
unitless_plan = plan_edf_to_onda_samples(one_signal, 1.0; units=["millivolt" => ["mV"]])
@test unitless_plan.error === nothing
@test ismissing(unitless_plan.sample_unit)

# error on execution
plans = plan_edf_to_onda_samples(edf)
# intentionally combine signals of different sensor_types
Expand Down Expand Up @@ -293,11 +293,11 @@ using StableRNGs
edf, _ = make_test_data(StableRNG(42), 256, 512, 100, Int16)
plan = plan_edf_to_onda_samples(edf)
@test validate(Tables.schema(plan),
SchemaVersion("ondaedf.file-plan", 2)) === nothing
SchemaVersion("ondaedf.file-plan", 3)) === nothing

samples, plan_exec = edf_to_onda_samples(edf, plan)
@test validate(Tables.schema(plan_exec),
SchemaVersion("ondaedf.file-plan", 2)) === nothing
SchemaVersion("ondaedf.file-plan", 3)) === nothing

plan_rt = let io=IOBuffer()
OndaEDF.write_plan(io, plan)
Expand Down
Loading