Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Coerce(SciType) #271

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/transforms/coerce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

Return a copy of the table, ensuring that the scientific types of the columns match the new specification.

Coerce(S)

Coerce all columns of the table with scientific type `S`.

This transform uses the `DataScienceTraits.coerce` function. Please see their docstring for more details.

# Examples
Expand All @@ -18,23 +22,28 @@ Coerce(:a => DST.Continuous, :b => DST.Continuous)
Coerce("a" => DST.Continuous, "b" => DST.Continuous)
```
"""
struct Coerce{S<:ColumnSelector} <: StatelessFeatureTransform
struct Coerce{S<:ColumnSelector,T} <: StatelessFeatureTransform
selector::S
scitypes::Vector{DataType}
scitypes::T
end

Coerce() = throw(ArgumentError("cannot create Coerce transform without arguments"))

Coerce(scitype::Type{<:SciType}) = Coerce(AllSelector(), scitype)

Coerce(pairs::Pair{C,DataType}...) where {C<:Column} = Coerce(selector(first.(pairs)), collect(last.(pairs)))

isrevertible(::Type{<:Coerce}) = true

_typedict(scitype::Type{<:SciType}, snames) = Dict(nm => scitype for nm in snames)
_typedict(scitypes::AbstractVector, snames) = Dict(zip(snames, scitypes))

function applyfeat(transform::Coerce, feat, prep)
cols = Tables.columns(feat)
names = Tables.columnnames(cols)
types = Tables.schema(feat).types
snames = transform.selector(names)
typedict = Dict(zip(snames, transform.scitypes))
typedict = _typedict(transform.scitypes, snames)

columns = map(names) do name
x = Tables.getcolumn(cols, name)
Expand Down
2 changes: 2 additions & 0 deletions src/transforms/rename.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct Rename{S<:ColumnSelector,N} <: StatelessFeatureTransform
end
end

Rename() = throw(ArgumentError("cannot create Rename transform without arguments"))

Rename(fun) = Rename(AllSelector(), fun)

Rename(pairs::Pair{C,Symbol}...) where {C<:Column} = Rename(selector(first.(pairs)), collect(last.(pairs)))
Expand Down
21 changes: 21 additions & 0 deletions test/transforms/coerce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,32 @@
@test eltype(tₒ.a) == eltype(t.a)
@test eltype(tₒ.b) == eltype(t.b)

T = Coerce(DST.Continuous)
n, c = apply(T, t)
@test eltype(n.a) <: Float64
@test eltype(n.b) <: Float64
n, c = apply(T, t)
tₒ = revert(T, n, c)
@test eltype(tₒ.a) == eltype(t.a)
@test eltype(tₒ.b) == eltype(t.b)

T = Coerce(DST.Categorical)
n, c = apply(T, t)
@test eltype(n.a) <: Int
@test eltype(n.b) <: Int
n, c = apply(T, t)
tₒ = revert(T, n, c)
@test eltype(tₒ.a) == eltype(t.a)
@test eltype(tₒ.b) == eltype(t.b)

# row table
rt = Tables.rowtable(t)
T = Coerce(:a => DST.Continuous, :b => DST.Categorical)
n, c = apply(T, rt)
@test Tables.isrowtable(n)
rtₒ = revert(T, n, c)
@test rt == rtₒ

# error: cannot create Coerce transform without arguments
@test_throws ArgumentError Coerce()
end
4 changes: 3 additions & 1 deletion test/transforms/rename.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@
tₒ = revert(T, n, c)
@test t == tₒ

# throws
# error: cannot create Rename transform without arguments
@test_throws ArgumentError Rename()
# error: new names must be unique
@test_throws AssertionError Rename(:a => :x, :b => :x)
@test_throws AssertionError apply(Rename(:a => :c, :b => :d), t)
end
Loading