diff --git a/Project.toml b/Project.toml index 652fed6..5a6ee37 100644 --- a/Project.toml +++ b/Project.toml @@ -1,23 +1,23 @@ name = "SASLib" uuid = "df8f2f22-cfef-5733-af3f-96770d497d85" authors = ["Tom Kwong "] -version = "1.2.0" +version = "1.2.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" StringEncodings = "69024149-9ee7-55f6-a4c4-859efe599b68" +TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" TabularDisplay = "3eeacb1d-13c2-54cc-9b18-30c86af3cadb" -TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" [compat] +IteratorInterfaceExtensions = "0.1.1, 1" StringEncodings = "0.3" +TableTraits = "0.4.1, 1" +Tables = "1.0" TabularDisplay = "1" -Tables = "0.2.3" julia = "1" -IteratorInterfaceExtensions = "0.1.1, 1" -TableTraits = "0.4.1, 1" [extras] InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" diff --git a/src/tables.jl b/src/tables.jl index 74590a6..6c3be12 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,12 +1,25 @@ +# This file implements Tables interface and provide compatibility +# to the Queryverse ecosystem. + +# ----------------------------------------------------------------------------- +# Tables.jl implementation + Tables.istable(::Type{<:ResultSet}) = true -Tables.rowaccess(::Type{<:ResultSet}) = true +# AbstractColumns interface Tables.columnaccess(::Type{<:ResultSet}) = true +Tables.columns(rs::ResultSet) = rs +Tables.getcolumn(rs::ResultSet, i::Int) = rs[names(rs)[i]] +# AbstractRow interface +Tables.rowaccess(::Type{<:ResultSet}) = true Tables.rows(rs::ResultSet) = rs -Tables.columns(rs::ResultSet) = rs +# Schema definition Tables.schema(rs::ResultSet) = Tables.Schema(names(rs), eltype.(columns(rs))) +# ----------------------------------------------------------------------------- +# Queryverse compatibility + IteratorInterfaceExtensions.getiterator(rs::ResultSet) = Tables.datavaluerows(rs) TableTraits.isiterabletable(x::ResultSet) = true diff --git a/test/runtests.jl b/test/runtests.jl index 300b41a..945b3e2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,7 +7,7 @@ using SharedArrays: SharedArray using Tables import IteratorInterfaceExtensions, TableTraits -function getpath(dir, file) +function getpath(dir, file) path = joinpath(dir, file) #println("================ $path ================") path @@ -59,7 +59,7 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) end @testset "case insensitive dict" begin - function testdict(lowercase_key, mixedcase_key, second_lowercase_key) + function testdict(lowercase_key, mixedcase_key, second_lowercase_key) T = typeof(lowercase_key) d = SASLib.CIDict{T,Int}() @@ -108,7 +108,7 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) @testset "read basic test files (test*.sas7bdat)" begin dir = "data_pandas" - files = filter(x -> endswith(x, "sas7bdat") && startswith(x, "test"), + files = filter(x -> endswith(x, "sas7bdat") && startswith(x, "test"), Base.Filesystem.readdir("$dir")) for f in files result = readfile(dir, f) @@ -150,7 +150,7 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) rs = readsas(fname, include_columns=[:MONTH, :YEAR]) @test size(rs, 2) == 2 @test sort(names(rs)) == sort([:MONTH, :YEAR]) - + rs = readsas(fname, include_columns=[1, 2, 7]) @test size(rs, 2) == 3 @test sort(names(rs)) == sort([:ACTUAL, :PREDICT, :PRODUCT]) @@ -171,12 +171,12 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) # test bad include/exclude param # see https://discourse.julialang.org/t/test-warn-doesnt-work-with-warn-in-0-7/9001 - @test_logs (:warn, "Unknown include column blah") (:warn, + @test_logs (:warn, "Unknown include column blah") (:warn, "Unknown include column Year") readsas(fname, include_columns=[:blah, :Year]) - @test_logs (:warn, "Unknown exclude column blah") (:warn, + @test_logs (:warn, "Unknown exclude column blah") (:warn, "Unknown exclude column Year") readsas(fname, exclude_columns=[:blah, :Year]) # error handling - @test_throws SASLib.ConfigError readsas(fname, + @test_throws SASLib.ConfigError readsas(fname, include_columns=[1], exclude_columns=[1]) end @@ -187,8 +187,8 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) @test size(rs) == (1440, 10) @test size(rs,1) == 1440 @test size(rs,2) == 10 - @test length(columns(rs)) == 10 - @test length(names(rs)) == 10 + @test length(columns(rs)) == 10 + @test length(names(rs)) == 10 # cell indexing @test rs[1][1] ≈ 925.0 @@ -204,7 +204,7 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) # iteration @test sum(r[1] for r in rs) ≈ 730337.0 - + # portion of result set @test typeof(rs[1:2]) == SASLib.ResultSet @test typeof(rs[:ACTUAL, :PREDICT]) == SASLib.ResultSet @@ -212,32 +212,53 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) @test rs[:ACTUAL, :PREDICT][1][1] ≈ 925.0 # setindex! - rs[1,1] = 100.0 - @test rs[1,1] ≈ 100.0 - rs[1,:ACTUAL] = 200.0 - @test rs[1,:ACTUAL] ≈ 200.0 + let rscopy = deepcopy(rs) + rs[1,1] = 100.0 + @test rs[1,1] ≈ 100.0 + rs[1,:ACTUAL] = 200.0 + @test rs[1,:ACTUAL] ≈ 200.0 + end # display related @test show(rs) == nothing @test SASLib.sizestr(rs) == "1440 rows x 10 columns" - # Tables.jl interface - getproperty test - @test rs.ACTUAL == rs[:ACTUAL] - @test names(rs) == propertynames(rs) + # Tables.jl + let twocols = rs[:ACTUAL, :PREDICT] + + # General + @test Tables.istable(typeof(rs)) + @test Tables.rowaccess(typeof(rs)) + @test Tables.columnaccess(typeof(rs)) + + # AbstractRow interface + let row = twocols[3] # (ACTUAL = 608.0, PREDICT = 846.0) + @test Tables.getcolumn(row, 1) ≈ 608.0 + @test Tables.getcolumn(row, :ACTUAL) ≈ 608.0 + @test Tables.columnnames(row) === (:ACTUAL, :PREDICT) + end - # Tables.jl coverage - indirect tests / usage - @test Tables.schema(rs).names == Tuple(names(rs)) - @test Tables.schema(rs).types == Tuple(eltype.([rs[s] for s in names(rs)])) - @test length(Tables.rowtable(rs)) == 1440 - @test length(Tables.columntable(rs)) == 10 - @test size(Tables.matrix(rs[:ACTUAL, :PREDICT])) == (1440,2) + # AbstractColumns interface + let tab = Tables.columns(twocols) + @test Tables.getcolumn(tab, 1) isa Array{Float64,1} + @test Tables.getcolumn(tab, 1) |> size === (1440,) + @test Tables.getcolumn(tab, :ACTUAL) isa Array{Float64,1} + @test Tables.getcolumn(tab, :ACTUAL) |> size === (1440,) + @test Tables.columnnames(tab) === propertynames(twocols) + end - # Tables.jl coverage - direct tests - @test Tables.istable(typeof(rs)) === true - @test Tables.rowaccess(typeof(rs)) === true - @test Tables.columnaccess(typeof(rs)) === true - @test Tables.rows(rs) |> first |> propertynames |> Tuple == Tuple(names(rs)) - @test Tables.columns(rs) |> propertynames |> Tuple == Tuple(names(rs)) + # Usages + @test size(Tables.matrix(twocols)) == (1440, 2) + end + + # old tests + #@test Tables.schema(rs).names == Tuple(names(rs)) + #@test Tables.schema(rs).types == Tuple(eltype.([rs[s] for s in names(rs)])) + #@test length(Tables.rowtable(rs)) == 1440 + #@test length(Tables.columntable(rs)) == 10 + #@test size(Tables.matrix(rs[:ACTUAL, :PREDICT])) == (1440,2) + #@test Tables.rows(rs) |> first |> propertynames |> Tuple == Tuple(names(rs)) + #@test Tables.columns(rs) |> propertynames |> Tuple == Tuple(names(rs)) # Queryverse integration @test TableTraits.isiterabletable(rs) == true @@ -333,18 +354,18 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) end @testset "array constructors" begin - + rs = readfile("data_AHS2013", "homimp.sas7bdat") @test typeof(rs[:RAS]) == SASLib.ObjectPool{String,UInt16} # string_array_fn test for specific string columns - rs = readfile("data_AHS2013", "homimp.sas7bdat", + rs = readfile("data_AHS2013", "homimp.sas7bdat", string_array_fn = Dict(:RAS => REGULAR_STR_ARRAY)) @test typeof(rs[:RAS]) == Array{String,1} @test typeof(rs[:RAH]) != Array{String,1} # string_array_fn test for all string columns - rs = readfile("data_AHS2013", "homimp.sas7bdat", + rs = readfile("data_AHS2013", "homimp.sas7bdat", string_array_fn = Dict(:_all_ => REGULAR_STR_ARRAY)) @test typeof(rs[:RAS]) == Array{String,1} @test typeof(rs[:RAH]) == Array{String,1} @@ -354,13 +375,13 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) # number_array_fn test by column name makesharedarray(n) = SharedArray{Float64}(n) - rs = readfile("data_misc", "numeric_1000000_2.sas7bdat", + rs = readfile("data_misc", "numeric_1000000_2.sas7bdat", number_array_fn = Dict(:f => makesharedarray)) @test typeof(rs[:f]) == SharedArray{Float64,1} @test typeof(rs[:x]) == Array{Float64,1} # number_array_fn test for all numeric columns - rs = readfile("data_misc", "numeric_1000000_2.sas7bdat", + rs = readfile("data_misc", "numeric_1000000_2.sas7bdat", number_array_fn = Dict(:_all_ => makesharedarray)) @test typeof(rs[:f]) == SharedArray{Float64,1} @test typeof(rs[:x]) == SharedArray{Float64,1} @@ -371,31 +392,31 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) @testset "user specified column types" begin # normal use case - rs = readfile("data_pandas", "productsales.sas7bdat"; + rs = readfile("data_pandas", "productsales.sas7bdat"; verbose_level = 0, column_types = Dict(:YEAR => Int16, :QUARTER => Int8)) @test eltype(rs[:YEAR]) == Int16 @test eltype(rs[:QUARTER]) == Int8 # error handling - warn() when a column cannot be converted - rs = readfile("data_pandas", "productsales.sas7bdat"; + rs = readfile("data_pandas", "productsales.sas7bdat"; verbose_level = 0, column_types = Dict(:YEAR => Int8, :QUARTER => Int8)) @test eltype(rs[:YEAR]) == Float64 @test eltype(rs[:QUARTER]) == Int8 #TODO expect warning for :YEAR conversion # case insensitive column symbol - rs = readfile("data_pandas", "productsales.sas7bdat"; + rs = readfile("data_pandas", "productsales.sas7bdat"; verbose_level = 0, column_types = Dict(:Quarter => Int8)) @test eltype(rs[:QUARTER]) == Int8 # conversion to custom types - rs = readfile("data_pandas", "productsales.sas7bdat"; + rs = readfile("data_pandas", "productsales.sas7bdat"; verbose_level = 0, column_types = Dict(:Year => YearStr)) @test eltype(rs[:YEAR]) == YearStr # test Union type - let T = Union{Int,Missing} - rs = readfile("data_pandas", "productsales.sas7bdat"; + let T = Union{Int,Missing} + rs = readfile("data_pandas", "productsales.sas7bdat"; verbose_level = 0, column_types = Dict(:Year => T)) @test eltype(rs[:YEAR]) == T end @@ -410,7 +431,7 @@ Base.convert(::Type{YearStr}, v::Float64) = YearStr(string(round(Int, v))) @testset "just reads" begin for dir in ["data_pandas", "data_reikoch", "data_AHS2013", "data_misc"] for f in readdir(dir) - if endswith(f, ".sas7bdat") && + if endswith(f, ".sas7bdat") && !(f in ["zero_variables.sas7bdat"]) rs = readfile(dir, f) @test size(rs, 1) > 0