beacon-biosignals · haberdashPI · Jan 31, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/OndaEDFSchemas.jl/Project.toml b/OndaEDFSchemas.jl/Project.toml
@@ -1,7 +1,7 @@
 name = "OndaEDFSchemas"
 uuid = "9c87d999-769b-4741-85b2-6f554d09e731"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.2.2"
+version = "0.3.0"
 
 [deps]
 Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"

diff --git a/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl b/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl
@@ -7,7 +7,7 @@
             AnnotationV1
 using UUIDs
 
-export PlanV1, PlanV2, FilePlanV1, FilePlanV2, EDFAnnotationV1
+export PlanV1, PlanV2, PlanV3, FilePlanV1, FilePlanV2, FilePlanV3, EDFAnnotationV1
 
 @schema "ondaedf.plan" Plan
 
@@ -65,7 +65,33 @@
     error::Union{Nothing,String} = coalesce(error, nothing)
 end
 
-
+@version PlanV3 begin
+    # EDF.SignalHeader fields
+    label::String
+    transducer_type::String
+    physical_dimension::String
+    physical_minimum::Float32
+    physical_maximum::Float32
+    digital_minimum::Float32
+    digital_maximum::Float32
+    prefilter::String
+    samples_per_record::Int32
+    # EDF.FileHeader field
+    seconds_per_record::Float64
+    # Onda.SignalV2 fields (channels -> channel), may be missing
+    recording::Union{UUID,Missing} = lift(UUID, recording)
+    sensor_type::Union{Missing,AbstractString} = lift(_validate_signal_sensor_type, sensor_type)
+    sensor_label::Union{Missing,AbstractString} = lift(_validate_signal_sensor_label,
+                                                       coalesce(sensor_label, sensor_type))
+    channel::Union{Missing,AbstractString} = lift(_validate_signal_channel, channel)
+    sample_unit::Union{Missing,AbstractString} = lift(String, sample_unit)
+    sample_resolution_in_unit::Union{Missing,Float64}
+    sample_offset_in_unit::Union{Missing,Float64}
+    sample_type::Union{Missing,AbstractString} = lift(onda_sample_type_from_julia_type, sample_type)
+    sample_rate::Union{Missing,Float64}
+    # errors, use `nothing` to indicate no error
+    error::Union{Nothing,String} = coalesce(error, nothing)
+end
 
 const PLAN_DOC_TEMPLATE = """
     @version PlanV{{ VERSION }} begin
@@ -78,7 +104,7 @@
         digital_minimum::Float32
         digital_maximum::Float32
         prefilter::String
-        samples_per_record::Int16
+        samples_per_record::{{ SAMPLES_PER_RECORD_TYPE }}
         # EDF.FileHeader field
         seconds_per_record::Float64
         # Onda.SignalV{{ VERSION }} fields (channels -> channel), may be missing
@@ -108,19 +134,22 @@
 function _plan_doc(v)
     uniques = if v == 1
         ["kind::Union{Missing,AbstractString}"]
-    elseif v == 2
+    elseif v == 2 || v == 3
         ["sensor_type::Union{Missing,AbstractString}",
          "sensor_label::Union{Missing,AbstractString}"]
     else
         throw(ArgumentError("Invalid version"))
     end
+    samples_per_record_type = v in (1,2) ? "Int16" : "Int32"
     unique_lines = join(map(s -> "        $s", uniques), "\n")
     s = replace(PLAN_DOC_TEMPLATE, "{{ VERSION }}" => v)
+    s = replace(s, "{{ SAMPLES_PER_RECORD_TYPE }}" => samples_per_record_type)
     return replace(s, "{{ SAMPLES_INFO_UNIQUE_FIELDS }}" => unique_lines)
 end
 
 @doc _plan_doc(1) PlanV1
 @doc _plan_doc(2) PlanV2
+@doc _plan_doc(3) PlanV3
 
 @schema "ondaedf.file-plan" FilePlan
 
@@ -134,6 +163,11 @@
     onda_signal_index::Int
 end
 
+@version FilePlanV3 > PlanV3 begin
+    edf_signal_index::Int
+    onda_signal_index::Int
+end
+
 const FILE_PLAN_DOC_TEMPLATE = """
     @version FilePlanV{{ VERSION }} > PlanV{{ VERSION }} begin
         edf_signal_index::Int
@@ -158,8 +192,11 @@
 
 @doc _file_plan_doc(1) FilePlanV1
 @doc _file_plan_doc(2) FilePlanV2
+@doc _file_plan_doc(3) FilePlanV3
 
-const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,PlanV2SchemaVersion,FilePlanV1SchemaVersion,FilePlanV2SchemaVersion}
+const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,FilePlanV1SchemaVersion,
+                                    PlanV2SchemaVersion,FilePlanV2SchemaVersion,
+                                    PlanV3SchemaVersion,FilePlanV3SchemaVersion}
 Legolas.accepted_field_type(::OndaEDFSchemaVersions, ::Type{String}) = AbstractString
 # we need this because Arrow write can introduce a Missing for the error column
 # (I think because of how missing/nothing sentinels are handled?)

diff --git a/OndaEDFSchemas.jl/test/runtests.jl b/OndaEDFSchemas.jl/test/runtests.jl
@@ -19,7 +19,7 @@ function mock_plan(; v, rng=GLOBAL_RNG)
     ingested = rand(rng, Bool)
     specific_kwargs = if v == 1
         (; kind=ingested ? "eeg" : missing)
-    elseif v == 2
+    elseif v in (2, 3)
         (; sensor_type=ingested ? "eeg" : missing,
          sensor_label=ingested ? "eeg" : missing)
     else
@@ -63,7 +63,7 @@ end
 
 @testset "Schema version $v" for v in (1, 2)
     SamplesInfo = v == 1 ? Onda.SamplesInfoV1 : SamplesInfoV2
-    
+
     @testset "ondaedf.plan@$v" begin
         rng = StableRNG(10)
         plans = mock_plan(30; v, rng)

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "OndaEDF"
 uuid = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.12.4"
+version = "0.13.0"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
@@ -18,11 +18,11 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [compat]
 Compat = "3.32, 4"
-EDF = "0.7"
+EDF = "0.8"
 FilePathsBase = "0.9"
 Legolas = "0.5"
 Onda = "0.15"
-OndaEDFSchemas = "0.2.1"
+OndaEDFSchemas = "0.3.0"
 PrettyTables = "1.3, 2"
 StableRNGs = "1"
 StatsBase = "0.33, 0.34"

diff --git a/src/OndaEDF.jl b/src/OndaEDF.jl
@@ -29,7 +29,7 @@ Write a plan table to `io_or_path` using `Legolas.write`, using the
 """
 function write_plan(io_or_path, plan_table; kwargs...)
     return Legolas.write(io_or_path, plan_table,
-                         Legolas.SchemaVersion("ondaedf.file-plan", 2);
+                         Legolas.SchemaVersion("ondaedf.file-plan", 3);
                          kwargs...)
 end
 

diff --git a/src/import_edf.jl b/src/import_edf.jl
@@ -117,7 +117,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
     label = _safe_lowercase(label)
 
     # ideally, we'd do the original behavior:
-    # 
+    #
     # match exact STANDARD (or custom) signal types at beginning of label,
     # ignoring case possibly bracketed by or prepended with `[`, `]`, `,` or
     # whitespace everything after is included in the spec a.k.a. label
@@ -139,7 +139,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
     #
     # This is not equivalent to the original behavior in only a handful of
     # cases
-    # 
+    #
     # - if one of the `signal_names` is a suffix of the signal, like `"pap"`
     #   matching against `"xpap cpap"`.  the fix for this is to add the full
     #   signal name to the (end) of `signal_names` in the label set.
@@ -231,7 +231,7 @@ function promote_encodings(encodings; pick_offset=(_ -> 0.0), pick_resolution=mi
                 sample_resolution_in_unit=missing,
                 sample_rate=missing)
     end
-    
+
     sample_type = mapreduce(Onda.sample_type, promote_type, encodings)
 
     sample_rates = [e.sample_rate for e in encodings]
@@ -339,7 +339,7 @@ As an example, here is (a subset of) the default labels for ECG signals:
 
 ```julia
 ["ecg", "ekg"] => ["i" => ["1"], "ii" => ["2"], "iii" => ["3"],
-                   "avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"], 
+                   "avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"],
                    "avr"=> ["ekgr", "ecgr", "r"], ...]
 ```
 
@@ -363,7 +363,7 @@ function plan_edf_to_onda_samples(header,
                             "Instead, preprocess signal header rows to before calling " *
                             "`plan_edf_to_onda_samples`"))
     end
-    
+
     row = (; header..., seconds_per_record, error=nothing)
 
     try
@@ -384,23 +384,23 @@ function plan_edf_to_onda_samples(header,
                 channel_name = canonical_channel_name(canonical)
 
                 matched = match_edf_label(edf_label, signal_names, channel_name, channel_names)
-                
+
                 if matched !== nothing
                     # create SamplesInfo and return
-                    row = rowmerge(row; 
+                    row = rowmerge(row;
                                    channel=matched,
                                    sensor_type=first(signal_names),
                                    sensor_label=first(signal_names))
-                    return PlanV2(row)
+                    return PlanV3(row)
                 end
             end
         end
     catch e
-        return PlanV2(_errored_row(row, e))
+        return PlanV3(_errored_row(row, e))
     end
 
     # nothing matched, return the original signal header (as a namedtuple)
-    return PlanV2(row)
+    return PlanV3(row)
 end
 
 # create a table with a plan for converting this EDF file to onda: one row per
@@ -418,7 +418,7 @@ end
 
 Formulate a plan for converting an `EDF.File` to Onda Samples.  This applies
 `plan_edf_to_onda_samples` to each individual signal contained in the file,
-storing `edf_signal_index` as an additional column.  
+storing `edf_signal_index` as an additional column.
 
 The resulting rows are then passed to [`plan_edf_to_onda_samples_groups`](@ref)
 and grouped according to `onda_signal_groupby` (by default, the `:sensor_type`,
@@ -444,7 +444,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
                             "`plan_edf_to_onda_samples`.  See the OndaEDF README."))
     end
 
-    
+
     true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)
     plan_rows = map(true_signals) do s
         return plan_edf_to_onda_samples(s.header, edf.header.seconds_per_record;
@@ -455,7 +455,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
     # write index of destination signal into plan to capture grouping
     plan_rows = plan_edf_to_onda_samples_groups(plan_rows; onda_signal_groupby)
 
-    return FilePlanV2.(plan_rows)
+    return FilePlanV3.(plan_rows)
 end
 
 """
@@ -479,7 +479,7 @@ function plan_edf_to_onda_samples_groups(plan_rows;
         edf_signal_index = coalesce(_get(row, :edf_signal_index), i)
         return rowmerge(row; edf_signal_index)
     end
-    
+
     grouped_rows = groupby(grouper(onda_signal_groupby), plan_rows)
     sorted_keys = sort!(collect(keys(grouped_rows)))
     plan_rows = mapreduce(vcat, enumerate(sorted_keys)) do (onda_signal_index, key)
@@ -517,20 +517,20 @@ Samples are returned in the order of `:onda_signal_index`.  Signals that could
 not be matched or otherwise caused an error during execution are not returned.
 
 If `validate=true` (the default), the plan is validated against the
-[`FilePlanV2`](@ref) schema, and the signal headers in the `EDF.File`.
+[`FilePlanV3`](@ref) schema, and the signal headers in the `EDF.File`.
 
 If `dither_storage=missing` (the default), dither storage is allocated automatically
 as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
 
 $SAMPLES_ENCODED_WARNING
 """
 function edf_to_onda_samples(edf::EDF.File, plan_table; validate=true, dither_storage=missing)
-                             
+
     true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)
-    
+
     if validate
         Legolas.validate(Tables.schema(Tables.columns(plan_table)),
-                         Legolas.SchemaVersion("ondaedf.file-plan", 2))
+                         Legolas.SchemaVersion("ondaedf.file-plan", 3))
         for row in Tables.rows(plan_table)
             signal = true_signals[row.edf_signal_index]
             signal.header.label == row.label ||
@@ -628,14 +628,14 @@ the `Onda.SamplesInfo` in `target`.  This checks for matching sample rates in
 the source signals.  If the encoding of `target` is the same as the encoding in
 a signal, its encoded (usually `Int16`) data is copied directly into the
 `Samples` data matrix; otherwise it is re-encoded.
-                                                                                        
+
 If `dither_storage=missing` (the default), dither storage is allocated automatically
-as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering. 
+as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
 See `Onda.encode`'s docstring for more details.
 
 !!! note
 
-    This function is not meant to be called directly, but through 
+    This function is not meant to be called directly, but through
     [`edf_to_onda_samples`](@ref)
 
 $SAMPLES_ENCODED_WARNING
@@ -737,7 +737,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()
 
     signals = Onda.SignalV2[]
     edf_samples, plan = edf_to_onda_samples(edf; kwargs...)
-    
+
     errors = _get(Tables.columns(plan), :error)
     if !ismissing(errors)
         # why unique?  because errors that occur during execution get inserted
@@ -749,7 +749,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()
             end
         end
     end
-    
+
     edf_samples = postprocess_samples(edf_samples)
     for samples in edf_samples
         sample_filename = string(recording_uuid, "_", samples.info.sensor_type, ".", file_format)

diff --git a/test/import.jl b/test/import.jl
@@ -34,7 +34,7 @@ using StableRNGs
 
             validate_extracted_signals(s.info for s in returned_samples)
         end
-        
+
         @testset "custom grouping" begin
             signal_plans = [rowmerge(plan; grp=string(plan.sensor_type, plan.sample_unit, plan.sample_rate))
                             for plan in signal_plans]
@@ -74,10 +74,10 @@ using StableRNGs
             grouped_plans_rev_bad = plan_edf_to_onda_samples_groups(plans_rev_bad)
             @test_throws(ArgumentError("Plan's label EcG EKGL does not match EDF label EEG C3-M2!"),
                          edf_to_onda_samples(edf, grouped_plans_rev_bad))
-            
+
         end
     end
-    
+
     @testset "store_edf_as_onda" begin
         n_records = 100
         edf, edf_channel_indices = make_test_data(StableRNG(42), 256, 512, n_records)
@@ -105,7 +105,7 @@ using StableRNGs
 
         signals = Dict(s.sensor_type => s for s in nt.signals)
 
-        @testset "Signal roundtrip" begin 
+        @testset "Signal roundtrip" begin
             for (signal_name, edf_indices) in edf_channel_indices
                 @testset "$signal_name" begin
                     onda_samples = load(signals[string(signal_name)]).data
@@ -243,7 +243,7 @@ using StableRNGs
         unitless_plan = plan_edf_to_onda_samples(one_signal, 1.0; units=["millivolt" => ["mV"]])
         @test unitless_plan.error === nothing
         @test ismissing(unitless_plan.sample_unit)
-        
+
         # error on execution
         plans = plan_edf_to_onda_samples(edf)
         # intentionally combine signals of different sensor_types
@@ -293,11 +293,11 @@ using StableRNGs
         edf, _ = make_test_data(StableRNG(42), 256, 512, 100, Int16)
         plan = plan_edf_to_onda_samples(edf)
         @test validate(Tables.schema(plan),
-                       SchemaVersion("ondaedf.file-plan", 2)) === nothing
+                       SchemaVersion("ondaedf.file-plan", 3)) === nothing
 
         samples, plan_exec = edf_to_onda_samples(edf, plan)
         @test validate(Tables.schema(plan_exec),
-                       SchemaVersion("ondaedf.file-plan", 2)) === nothing
+                       SchemaVersion("ondaedf.file-plan", 3)) === nothing
 
         plan_rt = let io=IOBuffer()
             OndaEDF.write_plan(io, plan)