Skip to content

Commit

Permalink
Add 'DropNaN' transform (#274)
Browse files Browse the repository at this point in the history
  • Loading branch information
eliascarv authored Apr 10, 2024
1 parent 5e31881 commit 5a8ec6d
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/src/transforms.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ Filter
DropMissing
```

## DropNaN

```@docs
DropNaN
```

## DropExtrema

```@docs
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ export
Sample,
Filter,
DropMissing,
DropNaN,
DropExtrema,
DropUnits,
AbsoluteUnits,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ include("transforms/sort.jl")
include("transforms/sample.jl")
include("transforms/filter.jl")
include("transforms/dropmissing.jl")
include("transforms/dropnan.jl")
include("transforms/dropextrema.jl")
include("transforms/dropunits.jl")
include("transforms/dropconstant.jl")
Expand Down
55 changes: 55 additions & 0 deletions src/transforms/dropnan.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
DropNaN()
DropNaN(:)
Drop all rows with NaN values in table.
DropNaN(col₁, col₂, ..., colₙ)
DropNaN([col₁, col₂, ..., colₙ])
DropNaN((col₁, col₂, ..., colₙ))
Drop all rows with NaN values in selected columns `col₁`, `col₂`, ..., `colₙ`.
DropNaN(regex)
Drop all rows with NaN values in columns that match with `regex`.
# Examples
```julia
DropNaN(2, 3, 4)
DropNaN([:b, :c, :d])
DropNaN(("b", "c", "d"))
DropNaN(r"[bcd]")
```
"""
struct DropNaN{S<:ColumnSelector} <: StatelessFeatureTransform
selector::S
end

DropNaN() = DropNaN(AllSelector())
DropNaN(cols) = DropNaN(selector(cols))
DropNaN(cols::C...) where {C<:Column} = DropNaN(selector(cols))

isrevertible(::Type{<:DropNaN}) = false

_isnan(_) = false
_isnan(x::Number) = isnan(x)

function preprocess(transform::DropNaN, feat)
cols = Tables.columns(feat)
names = Tables.columnnames(cols)
snames = transform.selector(names)
ftrans = Filter(row -> all(!_isnan(row[nm]) for nm in snames))
fprep = preprocess(ftrans, feat)
ftrans, fprep
end

function applyfeat(::DropNaN, feat, prep)
ftrans, fprep = prep
applyfeat(ftrans, feat, fprep)
end
14 changes: 14 additions & 0 deletions test/shows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,20 @@
└─ selector = [:a, :b, :c]"""
end

@testset "DropNaN" begin
T = DropNaN(:a, :b, :c)

# compact mode
iostr = sprint(show, T)
@test iostr == "DropNaN([:a, :b, :c])"

# full mode
iostr = sprint(show, MIME("text/plain"), T)
@test iostr == """
DropNaN transform
└─ selector = [:a, :b, :c]"""
end

@testset "DropExtrema" begin
T = DropExtrema("a", low=0.25, high=0.75)

Expand Down
1 change: 1 addition & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ transformfiles = [
"sample.jl",
"filter.jl",
"dropmissing.jl",
"dropnan.jl",
"dropextrema.jl",
"dropunits.jl",
"dropconstant.jl",
Expand Down
111 changes: 111 additions & 0 deletions test/transforms/dropnan.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
@testset "DropNaN" begin
@test !isrevertible(DropNaN())

a = [1.8, 0.5, 1.2, 3.7, 5.0, NaN]
b = [6.0f0, 5.4f0, 5.4f0, NaN32, 5.5f0, 2.6f0]
c = [4.9, 5.1, NaN, 5.1, 8.6, 4.4] * u"m"
d = [NaN32, 1.0f0, 8.8f0, 0.1f0, 1.5f0, 9.5f0] * u"m"
e = ["yes", "no", "no", "yes", "yes", "no"]
t = Table(; a, b, c, d, e)

T = DropNaN()
n, c = apply(T, t)
@test n.a == [0.5, 5.0]
@test n.b == [5.4f0, 5.5f0]
@test n.c == [5.1, 8.6] * u"m"
@test n.d == [1.0f0, 1.5f0] * u"m"
@test n.e == ["no", "yes"]

# args...
# integers
T = DropNaN(1, 3)
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes", "yes"])

# symbols
T = DropNaN(:a, :c)
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes", "yes"])

# strings
T = DropNaN("a", "c")
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 3.7, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, NaN32, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 0.1f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes", "yes"])

# vector
# integers
T = DropNaN([2, 4])
n, c = apply(T, t)
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
@test isequal(n.e, ["no", "no", "yes", "no"])

# symbols
T = DropNaN([:b, :d])
n, c = apply(T, t)
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
@test isequal(n.e, ["no", "no", "yes", "no"])

# strings
T = DropNaN(["b", "d"])
n, c = apply(T, t)
@test isequal(n.a, [0.5, 1.2, 5.0, NaN])
@test isequal(n.b, [5.4f0, 5.4f0, 5.5f0, 2.6f0])
@test isequal(n.c, [5.1, NaN, 8.6, 4.4] * u"m")
@test isequal(n.d, [1.0f0, 8.8f0, 1.5f0, 9.5f0] * u"m")
@test isequal(n.e, ["no", "no", "yes", "no"])

# tuple
# integers
T = DropNaN((1, 2, 3))
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes"])

# symbols
T = DropNaN((:a, :b, :c))
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes"])

# strings
T = DropNaN(("a", "b", "c"))
n, c = apply(T, t)
@test isequal(n.a, [1.8, 0.5, 5.0])
@test isequal(n.b, [6.0f0, 5.4f0, 5.5f0])
@test isequal(n.c, [4.9, 5.1, 8.6] * u"m")
@test isequal(n.d, [NaN32, 1.0f0, 1.5f0] * u"m")
@test isequal(n.e, ["yes", "no", "yes"])

# regex
T = DropNaN(r"[bcd]")
n, c = apply(T, t)
@test isequal(n.a, [0.5, 5.0, NaN])
@test isequal(n.b, [5.4f0, 5.5f0, 2.6f0])
@test isequal(n.c, [5.1, 8.6, 4.4] * u"m")
@test isequal(n.d, [1.0f0, 1.5f0, 9.5f0] * u"m")
@test isequal(n.e, ["no", "yes", "no"])
end

0 comments on commit 5a8ec6d

Please sign in to comment.