beacon-biosignals · haberdashPI · Jan 31, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/OndaEDFSchemas.jl/Project.toml b/OndaEDFSchemas.jl/Project.toml
@@ -1,7 +1,7 @@
 name = "OndaEDFSchemas"
 uuid = "9c87d999-769b-4741-85b2-6f554d09e731"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.2.2"
+version = "0.2.3"
 
 [deps]
 Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"

diff --git a/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl b/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl
@@ -7,7 +7,7 @@
             AnnotationV1
 using UUIDs
 
-export PlanV1, PlanV2, FilePlanV1, FilePlanV2, EDFAnnotationV1
+export PlanV1, PlanV2, PlanV3, FilePlanV1, FilePlanV2, FilePlanV3, EDFAnnotationV1
 
 @schema "ondaedf.plan" Plan
 
@@ -65,7 +65,33 @@
     error::Union{Nothing,String} = coalesce(error, nothing)
 end
 
-
+@version PlanV3 begin
+    # EDF.SignalHeader fields
+    label::String
+    transducer_type::String
+    physical_dimension::String
+    physical_minimum::Float32
+    physical_maximum::Float32
+    digital_minimum::Float32
+    digital_maximum::Float32
+    prefilter::String
+    samples_per_record::Int32
+    # EDF.FileHeader field
+    seconds_per_record::Float64
+    # Onda.SignalV2 fields (channels -> channel), may be missing
+    recording::Union{UUID,Missing} = lift(UUID, recording)
+    sensor_type::Union{Missing,AbstractString} = lift(_validate_signal_sensor_type, sensor_type)
+    sensor_label::Union{Missing,AbstractString} = lift(_validate_signal_sensor_label,
+                                                       coalesce(sensor_label, sensor_type))
+    channel::Union{Missing,AbstractString} = lift(_validate_signal_channel, channel)
+    sample_unit::Union{Missing,AbstractString} = lift(String, sample_unit)
+    sample_resolution_in_unit::Union{Missing,Float64}
+    sample_offset_in_unit::Union{Missing,Float64}
+    sample_type::Union{Missing,AbstractString} = lift(onda_sample_type_from_julia_type, sample_type)
+    sample_rate::Union{Missing,Float64}
+    # errors, use `nothing` to indicate no error
+    error::Union{Nothing,String} = coalesce(error, nothing)
+end
 
 const PLAN_DOC_TEMPLATE = """
     @version PlanV{{ VERSION }} begin
@@ -78,7 +104,7 @@
         digital_minimum::Float32
         digital_maximum::Float32
         prefilter::String
-        samples_per_record::Int16
+        samples_per_record::{{ SAMPLES_PER_RECORD_TYPE }}
         # EDF.FileHeader field
         seconds_per_record::Float64
         # Onda.SignalV{{ VERSION }} fields (channels -> channel), may be missing
@@ -108,19 +134,22 @@
 function _plan_doc(v)
     uniques = if v == 1
         ["kind::Union{Missing,AbstractString}"]
-    elseif v == 2
+    elseif v == 2 || v == 3
         ["sensor_type::Union{Missing,AbstractString}",
          "sensor_label::Union{Missing,AbstractString}"]
     else
         throw(ArgumentError("Invalid version"))
     end
+    samples_per_record_type = v in (1,2) ? "Int16" : "Int32"
     unique_lines = join(map(s -> "        $s", uniques), "\n")
     s = replace(PLAN_DOC_TEMPLATE, "{{ VERSION }}" => v)
+    s = replace(s, "{{ SAMPLES_PER_RECORD_TYPE }}" => samples_per_record_type)
     return replace(s, "{{ SAMPLES_INFO_UNIQUE_FIELDS }}" => unique_lines)
 end
 
 @doc _plan_doc(1) PlanV1
 @doc _plan_doc(2) PlanV2
+@doc _plan_doc(3) PlanV3
 
 @schema "ondaedf.file-plan" FilePlan
 
@@ -134,6 +163,11 @@
     onda_signal_index::Int
 end
 
+@version FilePlanV3 > PlanV3 begin
+    edf_signal_index::Int
+    onda_signal_index::Int
+end
+
 const FILE_PLAN_DOC_TEMPLATE = """
     @version FilePlanV{{ VERSION }} > PlanV{{ VERSION }} begin
         edf_signal_index::Int
@@ -158,8 +192,11 @@
 
 @doc _file_plan_doc(1) FilePlanV1
 @doc _file_plan_doc(2) FilePlanV2
+@doc _file_plan_doc(3) FilePlanV3
 
-const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,PlanV2SchemaVersion,FilePlanV1SchemaVersion,FilePlanV2SchemaVersion}
+const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,FilePlanV1SchemaVersion,
+                                    PlanV2SchemaVersion,FilePlanV2SchemaVersion,
+                                    PlanV3SchemaVersion,FilePlanV3SchemaVersion}
 Legolas.accepted_field_type(::OndaEDFSchemaVersions, ::Type{String}) = AbstractString
 # we need this because Arrow write can introduce a Missing for the error column
 # (I think because of how missing/nothing sentinels are handled?)

diff --git a/OndaEDFSchemas.jl/test/runtests.jl b/OndaEDFSchemas.jl/test/runtests.jl
@@ -19,14 +19,20 @@ function mock_plan(; v, rng=GLOBAL_RNG)
     ingested = rand(rng, Bool)
     specific_kwargs = if v == 1
         (; kind=ingested ? "eeg" : missing)
-    elseif v == 2
+    elseif v in (2, 3)
         (; sensor_type=ingested ? "eeg" : missing,
          sensor_label=ingested ? "eeg" : missing)
     else
         error("Invalid version")
     end
     errored = !ingested && rand(rng, Bool)
-    PlanVersion = v == 1 ? PlanV1 : PlanV2
+    PlanVersion = if v == 1
+        PlanV1
+    elseif v == 2
+        PlanV2
+    else
+        PlanV3
+    end
     return PlanVersion(; label="EEG CZ-M1",
                        transducer_type="Ag-Cl electrode",
                        physical_dimension="uV",
@@ -55,15 +61,21 @@ end
 
 function mock_file_plan(; v, rng=GLOBAL_RNG)
     plan = mock_plan(; v, rng)
-    PlanVersion = v == 1 ? FilePlanV1 : FilePlanV2
+    PlanVersion = if v == 1
+        FilePlanV1
+    elseif v == 2
+        FilePlanV2
+    else
+        FilePlanV3
+    end
     return PlanVersion(Tables.rowmerge(plan;
                                        edf_signal_index=rand(rng, Int),
                                        onda_signal_index=rand(rng, Int)))
 end
 
-@testset "Schema version $v" for v in (1, 2)
+@testset "Schema version $v" for v in (1, 2, 3)
     SamplesInfo = v == 1 ? Onda.SamplesInfoV1 : SamplesInfoV2
-    
+
     @testset "ondaedf.plan@$v" begin
         rng = StableRNG(10)
         plans = mock_plan(30; v, rng)

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "OndaEDF"
 uuid = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.12.4"
+version = "0.13.0"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
@@ -18,12 +18,13 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [compat]
 Compat = "3.32, 4"
-EDF = "0.7"
+EDF = "0.8"
 FilePathsBase = "0.9"
 Legolas = "0.5"
 Onda = "0.15"
-OndaEDFSchemas = "0.2.1"
+OndaEDFSchemas = "0.2.3"
 PrettyTables = "1.3, 2"
+SparseArrays = "1"
 StableRNGs = "1"
 StatsBase = "0.33, 0.34"
 Tables = "1.4"
@@ -33,9 +34,10 @@ julia = "1.6"
 [extras]
 FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["FilePathsBase", "Test", "Random", "StableRNGs", "Statistics"]
+test = ["FilePathsBase", "Test", "Random", "SparseArrays", "StableRNGs", "Statistics"]
diff --git a/src/OndaEDF.jl b/src/OndaEDF.jl
@@ -29,7 +29,7 @@ Write a plan table to `io_or_path` using `Legolas.write`, using the
 """
 function write_plan(io_or_path, plan_table; kwargs...)
     return Legolas.write(io_or_path, plan_table,
-                         Legolas.SchemaVersion("ondaedf.file-plan", 2);
+                         Legolas.SchemaVersion("ondaedf.file-plan", 3);
                          kwargs...)
 end
 

diff --git a/src/export_edf.jl b/src/export_edf.jl
@@ -23,7 +23,7 @@ end
 const DATA_RECORD_SIZE_LIMIT = 30720
 const EDF_BYTE_LIMIT = 8
 
-edf_sample_count_per_record(samples::Samples, seconds_per_record::Float64) = Int16(samples.info.sample_rate * seconds_per_record)
+edf_sample_count_per_record(samples::Samples, seconds_per_record::Float64) = Int32(samples.info.sample_rate * seconds_per_record)
 
 _rationalize(x) = rationalize(x)
 _rationalize(x::Int) = x // 1

diff --git a/src/import_edf.jl b/src/import_edf.jl
@@ -117,7 +117,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
     label = _safe_lowercase(label)
 
     # ideally, we'd do the original behavior:
-    # 
+    #
     # match exact STANDARD (or custom) signal types at beginning of label,
     # ignoring case possibly bracketed by or prepended with `[`, `]`, `,` or
     # whitespace everything after is included in the spec a.k.a. label
@@ -139,7 +139,7 @@ function match_edf_label(label, signal_names, channel_name, canonical_names)
     #
     # This is not equivalent to the original behavior in only a handful of
     # cases
-    # 
+    #
     # - if one of the `signal_names` is a suffix of the signal, like `"pap"`
     #   matching against `"xpap cpap"`.  the fix for this is to add the full
     #   signal name to the (end) of `signal_names` in the label set.
@@ -231,7 +231,7 @@ function promote_encodings(encodings; pick_offset=(_ -> 0.0), pick_resolution=mi
                 sample_resolution_in_unit=missing,
                 sample_rate=missing)
     end
-    
+
     sample_type = mapreduce(Onda.sample_type, promote_type, encodings)
 
     sample_rates = [e.sample_rate for e in encodings]
@@ -339,7 +339,7 @@ As an example, here is (a subset of) the default labels for ECG signals:
 
 ```julia
 ["ecg", "ekg"] => ["i" => ["1"], "ii" => ["2"], "iii" => ["3"],
-                   "avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"], 
+                   "avl"=> ["ecgl", "ekgl", "ecg", "ekg", "l"],
                    "avr"=> ["ekgr", "ecgr", "r"], ...]
 ```
 
@@ -363,7 +363,7 @@ function plan_edf_to_onda_samples(header,
                             "Instead, preprocess signal header rows to before calling " *
                             "`plan_edf_to_onda_samples`"))
     end
-    
+
     row = (; header..., seconds_per_record, error=nothing)
 
     try
@@ -384,23 +384,23 @@ function plan_edf_to_onda_samples(header,
                 channel_name = canonical_channel_name(canonical)
 
                 matched = match_edf_label(edf_label, signal_names, channel_name, channel_names)
-                
+
                 if matched !== nothing
                     # create SamplesInfo and return
-                    row = rowmerge(row; 
+                    row = rowmerge(row;
                                    channel=matched,
                                    sensor_type=first(signal_names),
                                    sensor_label=first(signal_names))
-                    return PlanV2(row)
+                    return PlanV3(row)
                 end
             end
         end
     catch e
-        return PlanV2(_errored_row(row, e))
+        return PlanV3(_errored_row(row, e))
     end
 
     # nothing matched, return the original signal header (as a namedtuple)
-    return PlanV2(row)
+    return PlanV3(row)
 end
 
 # create a table with a plan for converting this EDF file to onda: one row per
@@ -418,7 +418,7 @@ end
 
 Formulate a plan for converting an `EDF.File` to Onda Samples.  This applies
 `plan_edf_to_onda_samples` to each individual signal contained in the file,
-storing `edf_signal_index` as an additional column.  
+storing `edf_signal_index` as an additional column.
 
 The resulting rows are then passed to [`plan_edf_to_onda_samples_groups`](@ref)
 and grouped according to `onda_signal_groupby` (by default, the `:sensor_type`,
@@ -444,7 +444,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
                             "`plan_edf_to_onda_samples`.  See the OndaEDF README."))
     end
 
-    
+
     true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)
     plan_rows = map(true_signals) do s
         return plan_edf_to_onda_samples(s.header, edf.header.seconds_per_record;
@@ -455,7 +455,7 @@ function plan_edf_to_onda_samples(edf::EDF.File;
     # write index of destination signal into plan to capture grouping
     plan_rows = plan_edf_to_onda_samples_groups(plan_rows; onda_signal_groupby)
 
-    return FilePlanV2.(plan_rows)
+    return FilePlanV3.(plan_rows)
 end
 
 """
@@ -479,7 +479,7 @@ function plan_edf_to_onda_samples_groups(plan_rows;
         edf_signal_index = coalesce(_get(row, :edf_signal_index), i)
         return rowmerge(row; edf_signal_index)
     end
-    
+
     grouped_rows = groupby(grouper(onda_signal_groupby), plan_rows)
     sorted_keys = sort!(collect(keys(grouped_rows)))
     plan_rows = mapreduce(vcat, enumerate(sorted_keys)) do (onda_signal_index, key)
@@ -517,20 +517,20 @@ Samples are returned in the order of `:onda_signal_index`.  Signals that could
 not be matched or otherwise caused an error during execution are not returned.
 
 If `validate=true` (the default), the plan is validated against the
-[`FilePlanV2`](@ref) schema, and the signal headers in the `EDF.File`.
+[`FilePlanV3`](@ref) schema, and the signal headers in the `EDF.File`.
 
 If `dither_storage=missing` (the default), dither storage is allocated automatically
 as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
 
 $SAMPLES_ENCODED_WARNING
 """
 function edf_to_onda_samples(edf::EDF.File, plan_table; validate=true, dither_storage=missing)
-                             
+
     true_signals = filter(x -> isa(x, EDF.Signal), edf.signals)
-    
+
     if validate
         Legolas.validate(Tables.schema(Tables.columns(plan_table)),
-                         Legolas.SchemaVersion("ondaedf.file-plan", 2))
+                         Legolas.SchemaVersion("ondaedf.file-plan", 3))
         for row in Tables.rows(plan_table)
             signal = true_signals[row.edf_signal_index]
             signal.header.label == row.label ||
@@ -628,14 +628,14 @@ the `Onda.SamplesInfo` in `target`.  This checks for matching sample rates in
 the source signals.  If the encoding of `target` is the same as the encoding in
 a signal, its encoded (usually `Int16`) data is copied directly into the
 `Samples` data matrix; otherwise it is re-encoded.
-                                                                                        
+
 If `dither_storage=missing` (the default), dither storage is allocated automatically
-as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering. 
+as specified in the docstring for `Onda.encode`. `dither_storage=nothing` disables dithering.
 See `Onda.encode`'s docstring for more details.
 
 !!! note
 
-    This function is not meant to be called directly, but through 
+    This function is not meant to be called directly, but through
     [`edf_to_onda_samples`](@ref)
 
 $SAMPLES_ENCODED_WARNING
@@ -737,7 +737,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()
 
     signals = Onda.SignalV2[]
     edf_samples, plan = edf_to_onda_samples(edf; kwargs...)
-    
+
     errors = _get(Tables.columns(plan), :error)
     if !ismissing(errors)
         # why unique?  because errors that occur during execution get inserted
@@ -749,7 +749,7 @@ function store_edf_as_onda(edf::EDF.File, onda_dir, recording_uuid::UUID=uuid4()
             end
         end
     end
-    
+
     edf_samples = postprocess_samples(edf_samples)
     for samples in edf_samples
         sample_filename = string(recording_uuid, "_", samples.info.sensor_type, ".", file_format)

diff --git a/test/export.jl b/test/export.jl
@@ -31,7 +31,9 @@
     @testset "Record metadata" begin
         function change_sample_rate(samples; sample_rate)
             info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_rate=sample_rate))
-            new_data = similar(samples.data, 0, Onda.index_from_time(sample_rate, Onda.duration(samples)) - 1)
+            # sparse CSC but for wide data we have a huge column pointer
+            # so transposing to get sparse CSR
+            new_data = spzeros(eltype(samples.data), Onda.index_from_time(sample_rate, Onda.duration(samples)) - 1, channel_count(samples))'
             return Samples(new_data, info, samples.encoded; validate=false)
         end