diff --git a/src/transforms/sample.jl b/src/transforms/sample.jl index 324d2ef5..39956eb4 100644 --- a/src/transforms/sample.jl +++ b/src/transforms/sample.jl @@ -12,14 +12,14 @@ the same order of the original table. # Examples ```julia -Sample(1_000) -Sample(1_000, replace=false) -Sample(1_000, replace=false, ordered=true) +Sample(1000) +Sample(1000, replace=false) +Sample(1000, replace=false, ordered=true) # with rng using Random rng = MersenneTwister(2) -Sample(1_000, rng=rng) +Sample(1000, rng=rng) # with weights Sample(10, rand(100)) @@ -68,8 +68,8 @@ function applyfeat(::Sample, feat, prep) sinds, rinds = prep # selected and removed rows - srows = Tables.subset(feat, sinds) - rrows = Tables.subset(feat, rinds) + srows = Tables.subset(feat, sinds, viewhint=true) + rrows = Tables.subset(feat, rinds, viewhint=true) newfeat = srows |> Tables.materializer(feat) newfeat, (sinds, rinds, rrows) diff --git a/src/transforms/satisfies.jl b/src/transforms/satisfies.jl index 3586058a..ba7b8791 100644 --- a/src/transforms/satisfies.jl +++ b/src/transforms/satisfies.jl @@ -62,7 +62,7 @@ Selects the columns that don't have scientific type `S`. ```julia import DataScienceTraits as DST -Except(DST.Continuous) +Except(DST.Categorical) ``` """ Except(S::Type{<:SciType}) = Satisfies(x -> !(elscitype(x) <: S)) diff --git a/test/Project.toml b/test/Project.toml index fd271a85..7bf4cfa6 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,7 @@ [deps] CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" +CoDa = "5900dafe-f573-5c72-b367-76665857777b" ColumnSelectors = "9cc86067-7e36-4c61-b350-1ac9833d277f" DataScienceTraits = "6cb2f572-2d2b-4ba6-bdb3-e710fa044d6c" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/test/runtests.jl b/test/runtests.jl index 5f2850f3..bf30b941 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using TableTransforms +using CoDa using Tables using Unitful using TypedTables diff --git a/test/transforms/sample.jl b/test/transforms/sample.jl index dbc5e27e..895c03fe 100644 --- a/test/transforms/sample.jl +++ b/test/transforms/sample.jl @@ -60,4 +60,13 @@ @test isapprox(count(==(trows[4]), nrows) / 10_000, 4 / 21, atol=0.01) @test isapprox(count(==(trows[5]), nrows) / 10_000, 5 / 21, atol=0.01) @test isapprox(count(==(trows[6]), nrows) / 10_000, 6 / 21, atol=0.01) + + # performance tests + x = rand(100_000) + y = rand(100_000) + c = CoDaArray((a = rand(100_000), b = rand(100_000), c = rand(100_000))) + t = Table(; x, y, c) + + T = Sample(10_000) + @test @elapsed(apply(T, t)) < 0.5 end