Skip to content

Commit

Permalink
Change import to using in test (#83)
Browse files Browse the repository at this point in the history
* Change import to using in test
* Fix explicit using Package: Package, foo
  • Loading branch information
clizbe authored Jan 10, 2025
1 parent 6bc433b commit 92895d3
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 84 deletions.
3 changes: 1 addition & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using TulipaIO: TulipaIO

import Test: @test, @testset, @test_throws
using Test: Test, @test, @testset, @test_throws

const DATA = joinpath(@__DIR__, "data")

Expand Down
15 changes: 9 additions & 6 deletions test/test-convenience.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using CSV, DataFrames, DuckDB, TulipaIO
using CSV: CSV
using DataFrames: DataFrames, DataFrame
using DuckDB: DuckDB, DBInterface
using TulipaIO: TulipaIO

@testset "Test convenience functions" begin
@testset "Read CSV folder" begin
Expand All @@ -12,7 +15,7 @@ using CSV, DataFrames, DuckDB, TulipaIO
end

connection = DBInterface.connect(DuckDB.DB)
read_csv_folder(connection, tmpdir)
TulipaIO.read_csv_folder(connection, tmpdir)
@test (DBInterface.execute(connection, "SHOW TABLES") |> DataFrame |> df -> df.name) ==
["some_file"]
end
Expand All @@ -23,16 +26,16 @@ using CSV, DataFrames, DuckDB, TulipaIO
"rep_periods_mapping" =>
Dict(:period => "INT", :rep_period => "VARCHAR", :weight => "DOUBLE"),
)
read_csv_folder(con, "data/Norse"; schemas)
TulipaIO.read_csv_folder(con, "data/Norse"; schemas)
df_types = DuckDB.query(con, "DESCRIBE rep_periods_mapping") |> DataFrame
@test df_types.column_name == ["period", "rep_period", "weight"]
@test df_types.column_type == ["INTEGER", "VARCHAR", "DOUBLE"]
end

@testset "Test show_tables and get_table" begin
connection = DBInterface.connect(DuckDB.DB)
create_tbl(connection, "data/Norse/assets-data.csv"; name = "my_table")
@test show_tables(connection).name == ["my_table"]
@test "Asgard_Battery" in get_table(connection, "my_table").name
TulipaIO.create_tbl(connection, "data/Norse/assets-data.csv"; name = "my_table")
@test TulipaIO.show_tables(connection).name == ["my_table"]
@test "Asgard_Battery" in TulipaIO.get_table(connection, "my_table").name
end
end
169 changes: 93 additions & 76 deletions test/test-pipeline.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
using CSV: CSV
import DataFrames as DF
import DuckDB: DuckDB as DD, DB, DBInterface
using DataFrames: DataFrames, DataFrame
using DuckDB: DuckDB, DBInterface

TIO = TulipaIO

function shape(df::DF.DataFrame)
return (DF.nrow(df), DF.ncol(df))
function shape(df::DataFrame)
return (DataFrames.nrow(df), DataFrames.ncol(df))
end

function tmp_tbls(con::DB)
function tmp_tbls(con::DuckDB.DB)
res = DBInterface.execute(con, "SELECT name FROM (SHOW ALL TABLES) WHERE temporary = true")
return DF.DataFrame(res)
return DataFrame(res)
end

"""
Expand All @@ -19,47 +17,47 @@ end
When row order is different, do a join to determine equality; use the
columns `cols`, join on `on` (often :name). The resulting DataFrame
is returned. It uniquifies columns with clashing names (see
`?DF.leftjoin`), and stores a "source" under the `:source` column.
`?DataFrames.leftjoin`), and stores a "source" under the `:source` column.
"""
function join_cmp(df1, df2, cols; on::Union{Symbol, Vector{Symbol}})
DF.leftjoin(df1[!, cols], df2[!, cols]; on = on, makeunique = true, source = :source)
DataFrames.leftjoin(df1[!, cols], df2[!, cols]; on = on, makeunique = true, source = :source)
end

@testset "Utilities" begin
csv_path = joinpath(DATA, "Norse/assets-data.csv")

@testset "get_tbl_name(source, tmp)" begin
for (name, tmp) in [["my_file", false], ["t_my_file", true]]
@test name == TIO.get_tbl_name("path/my-file.csv", tmp)
@test name == TulipaIO.get_tbl_name("path/my-file.csv", tmp)
end
end

# redundant for the current implementation, needed when we support globs
@testset "check_file(source)" begin
@test TIO.check_file(csv_path)
@test !TIO.check_file("not-there")
@test TulipaIO.check_file(csv_path)
@test !TulipaIO.check_file("not-there")
end

con = DBInterface.connect(DB)
con = DBInterface.connect(DuckDB.DB)
tbl_name = "mytbl"

@testset "check_tbl(con, source)" begin
DBInterface.execute(con, "CREATE TABLE $tbl_name AS SELECT * FROM range(5)")
@test TIO.check_tbl(con, tbl_name)
@test !TIO.check_tbl(con, "not_there")
@test TulipaIO.check_tbl(con, tbl_name)
@test !TulipaIO.check_tbl(con, "not_there")
end

@testset "Conditionally format source as SQL" begin
read_ = TIO.fmt_source(con, csv_path)
read_ = TulipaIO.fmt_source(con, csv_path)
@test occursin("read_csv", read_)
@test occursin(csv_path, read_)
@test TIO.fmt_source(con, tbl_name) == tbl_name
@test_throws TIO.NeitherTableNorFileError TIO.fmt_source(con, "not-there")
@test TulipaIO.fmt_source(con, tbl_name) == tbl_name
@test_throws TulipaIO.NeitherTableNorFileError TulipaIO.fmt_source(con, "not-there")
if (VERSION.major >= 1) && (VERSION.minor >= 8)
msg_re = r"not-there.+"
msg_re *= "$con"
@test_throws msg_re TIO.fmt_source(con, "not-there")
@test_throws msg_re TulipaIO.fmt_source(con, "not-there")
end
end
end
Expand All @@ -69,85 +67,91 @@ end
csv_copy = replace(csv_path, "data.csv" => "data-copy.csv")
csv_fill = replace(csv_path, "data.csv" => "data-alt.csv")

df_org = DF.DataFrame(CSV.File(csv_path; header = 2))
df_org = DataFrame(CSV.File(csv_path; header = 2))

@testset "CSV -> DataFrame" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path; show = true)
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path; show = true)
@test shape(df_org) == shape(df_res)
@test_throws TIO.FileNotFoundError TIO.create_tbl(con, "not-there")
@test_throws TulipaIO.FileNotFoundError TulipaIO.create_tbl(con, "not-there")
if (VERSION.major >= 1) && (VERSION.minor >= 8)
@test_throws r"not-there" TIO.create_tbl(con, "not-there")
@test_throws r"not-there" TulipaIO.create_tbl(con, "not-there")
end
end

@testset "CSV -> DataFrame w/ a schema" begin
con = DBInterface.connect(DB)
con = DBInterface.connect(DuckDB.DB)
mapping_csv_path = joinpath(DATA, "Norse/rep-periods-mapping.csv")
col_schema = Dict(:period => "INT", :rep_period => "VARCHAR", :weight => "DOUBLE")
TIO.create_tbl(con, mapping_csv_path; types = col_schema)
df_types = DD.query(con, "DESCRIBE rep_periods_mapping") |> DF.DataFrame
TulipaIO.create_tbl(con, mapping_csv_path; types = col_schema)
df_types = DuckDB.query(con, "DESCRIBE rep_periods_mapping") |> DataFrame
@test df_types.column_name == ["period", "rep_period", "weight"]
@test df_types.column_type == ["INTEGER", "VARCHAR", "DOUBLE"]
end

opts = Dict(:on => [:name], :cols => [:investable], :show => true)
@testset "CSV w/ alternatives -> DataFrame" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_copy; opts..., fill = false)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_copy; opts..., fill = false)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
@test df_exp.investable == df_res.investable
@test df_org.investable != df_res.investable
end

@testset "no filling for missing rows" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_fill; opts..., fill = false)
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = false)
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).source .== "left_only") |> all
@test (DF.subset(cmp, :investable_1 => DF.ByRow(!ismissing)).source .== "both") |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).source .==
"left_only"
) |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(!ismissing)).source .== "both"
) |> all
end

@testset "back-filling missing rows" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_fill; opts..., fill = true)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = true)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
@test all(cmp.investable .== cmp.investable_1)
@test (cmp.source .== "both") |> all
end

@testset "back-filling missing rows w/ alternate values" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(
con,
csv_path,
csv_fill;
opts...,
fill = true,
fill_values = Dict(:investable => true),
)
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).investable) |> all
@test (DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable) |>
all
end

@testset "temporary tables" begin
con = DBInterface.connect(DB)
tbl_name = TIO.create_tbl(con, csv_path; name = "tmp_assets", tmp = true)
con = DBInterface.connect(DuckDB.DB)
tbl_name = TulipaIO.create_tbl(con, csv_path; name = "tmp_assets", tmp = true)
@test tbl_name in tmp_tbls(con)[!, :name]

tbl_name = TIO.create_tbl(con, csv_path; tmp = true)
tbl_name = TulipaIO.create_tbl(con, csv_path; tmp = true)
@test tbl_name == "t_assets_data" # t_<cleaned up filename>
@test tbl_name in tmp_tbls(con)[!, :name]
end

@testset "CSV -> table" begin
con = DBInterface.connect(DB)
tbl_name = TIO.create_tbl(con, csv_path; name = "no_assets")
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
con = DBInterface.connect(DuckDB.DB)
tbl_name = TulipaIO.create_tbl(con, csv_path; name = "no_assets")
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
@test shape(df_org) == shape(df_res)
# @show df_org[1:3, 1:5] df_res[1:3, 1:5]
#
Expand All @@ -169,36 +173,42 @@ end

@testset "table + CSV w/ alternatives -> table" begin
# test setup
con = DBInterface.connect(DB)
TIO.create_tbl(con, csv_path; name = "no_assets")
con = DBInterface.connect(DuckDB.DB)
TulipaIO.create_tbl(con, csv_path; name = "no_assets")

opts = Dict(:on => [:name], :cols => [:investable])
tbl_name =
TIO.create_tbl(con, "no_assets", csv_copy; name = "alt_assets", opts..., fill = false)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_copy;
name = "alt_assets",
opts...,
fill = false,
)
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
@test df_exp.investable == df_res.investable
@test df_org.investable != df_res.investable

@testset "back-filling missing rows" begin
tbl_name = TIO.create_tbl(
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_fill;
name = "alt_assets_filled",
opts...,
fill = true,
)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
@test all(cmp.investable .== cmp.investable_1)
@test (cmp.source .== "both") |> all
end

@testset "back-filling missing rows w/ alternate values" begin
tbl_name = TIO.create_tbl(
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_fill;
Expand All @@ -207,10 +217,12 @@ end
fill = true,
fill_values = Dict(:investable => true),
)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).investable) |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable
) |> all
end
end
end
Expand All @@ -220,41 +232,46 @@ end
csv_copy = replace(csv_path, "data.csv" => "data-copy.csv")
csv_fill = replace(csv_path, "data.csv" => "data-alt.csv")

df_org = DF.DataFrame(CSV.File(csv_path; header = 2))
df_org = DataFrame(CSV.File(csv_path; header = 2))

opts = Dict(:on => :name, :name => "dummy", :show => true)
@testset "w/ vector" begin
con = DBInterface.connect(DB)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
df_res = TIO.create_tbl(con, csv_path, Dict(:investable => df_exp.investable); opts...)
con = DBInterface.connect(DuckDB.DB)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
df_res = TulipaIO.create_tbl(con, csv_path, Dict(:investable => df_exp.investable); opts...)
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
investable = cmp[!, [c for c in propertynames(cmp) if occursin("investable", String(c))]]
@test isequal.(investable[!, 1], investable[!, 2]) |> all

# stupid Julia! grow up!
args = [con, csv_path, Dict(:investable => df_exp.investable[2:end])]
@test_throws DimensionMismatch TIO.create_tbl(args...; opts...)
@test_throws DimensionMismatch TulipaIO.create_tbl(args...; opts...)
if (VERSION.major >= 1) && (VERSION.minor >= 8)
@test_throws r"Length.+different" TIO.create_tbl(args...; opts...)
@test_throws r"index.+value" TIO.create_tbl(args...; opts...)
@test_throws r"Length.+different" TulipaIO.create_tbl(args...; opts...)
@test_throws r"index.+value" TulipaIO.create_tbl(args...; opts...)
end
end

@testset "w/ constant" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, Dict(:investable => true); opts...)
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, Dict(:investable => true); opts...)
@test df_res.investable |> all

table_name = TIO.create_tbl(con, csv_path, Dict(:investable => true); on = :name)
table_name = TulipaIO.create_tbl(con, csv_path, Dict(:investable => true); on = :name)
@test "assets_data" == table_name
end

@testset "w/ constant after filtering" begin
con = DBInterface.connect(DB)
where_clause = TIO.FmtSQL.@where_(lifetime in 25:50, name % "Valhalla_%")
df_res =
TIO.create_tbl(con, csv_path, Dict(:investable => true); opts..., where_ = where_clause)
con = DBInterface.connect(DuckDB.DB)
where_clause = TulipaIO.FmtSQL.@where_(lifetime in 25:50, name % "Valhalla_%")
df_res = TulipaIO.create_tbl(
con,
csv_path,
Dict(:investable => true);
opts...,
where_ = where_clause,
)
@test shape(df_res) == shape(df_org)
df_res =
filter(row -> 25 <= row.lifetime <= 50 && startswith(row.name, "Valhalla_"), df_res)
Expand Down

0 comments on commit 92895d3

Please sign in to comment.