From 184cda2ceef108f51f254a88eadbb20f1a30c950 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Fri, 11 Oct 2024 14:08:22 +0200 Subject: [PATCH 1/6] extract tweaks --- src/methods/extract.jl | 60 +++++++++++++++++++----------------------- test/methods.jl | 6 ++--- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 898bae16..ff53870c 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -5,7 +5,7 @@ istrue(::_True) = true istrue(::_False) = false """ - extract(x, data; atol) + extract(x, data; [geometry, index, name, skipmissing, atol]) Extracts the value of `Raster` or `RasterStack` at given points, returning an iterable of `NamedTuple` with properties for `:geometry` and raster or @@ -311,50 +311,44 @@ end # _rowtype returns the complete NamedTuple type for a point row # This code is entirely for types stability and performance. -_rowtype(x, g; kw...) = _rowtype(x, typeof(g); kw...) -_rowtype(x, g::Type; geometry, index, skipmissing, names, kw...) = - _rowtype(x, g, geometry, index, skipmissing, names) -function _rowtype(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - keys = (:geometry, :index, Names...,) - types = Tuple{G,Tuple{Int,Int},_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} +function _rowtype(x, g; geometry, index, skipmissing, names, kw...) + G = if skipmissing isa _True + nonmissingtype(typeof(g)) + else + typeof(g) + end + _rowtype(x, G; geometry, index, skipmissing, names) end -function _rowtype(x, ::Type{G}, geometry::_True, index::_False, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - keys = (:geometry, Names...,) - types = Tuple{G,_nametypes(x, names, skipmissing)...} +function _rowtype(x, g::Type; geometry, index, skipmissing, names, kw...) + keys = _rowkeys(geometry, index, names) + types = _rowtypes(x, g, geometry, index, skipmissing, names) NamedTuple{keys,types} end -function _rowtype(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - keys = (:index, Names...,) - types = Tuple{Tuple{Int,Int},_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} + +function _rowtypes(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} + types = Tuple{G,Tuple{Int,Int},_nametypes(x, names, skipmissing)...} end -function _rowtype(x, ::Type{G}, geometry::_False, index::_False, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - keys = Names - types = Tuple{_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} +function _rowtypes(x, ::Type{G}, geometry::_True, index::_False, skipmissing, names::NamedTuple{Names}) where {G,Names} + types = Tuple{G,_nametypes(x, names, skipmissing)...} end -function _rowtype(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - keys = (:geometry, :index, names...,) - types = Tuple{Union{Missing,G},Union{Missing,Tuple{Int,Int}},_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} +function _rowtypes(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} + types = Tuple{Tuple{Int,Int},_nametypes(x, names, skipmissing)...} end -function _rowtype(x, ::Type{G}, geometry::_True, index::_False, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - keys = (:geometry, Names...,) - types = Tuple{Union{Missing,G},_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} +function _rowtypes(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} + types = Tuple{G,Union{Missing,Tuple{Int,Int}},_nametypes(x, names, skipmissing)...} end -function _rowtype(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - keys = (:index, Names...,) +function _rowtypes(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} types = Tuple{Union{Missing,Tuple{Int,Int}},_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} end -function _rowtype(x, ::Type{G}, geometry::_False, index::_False, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - keys = Names +function _rowtypes(x, ::Type{G}, geometry::_False, index::_False, skipmissing, names::NamedTuple{Names}) where {G,Names} types = Tuple{_nametypes(x, names, skipmissing)...} - NamedTuple{keys,types} end +_rowkeys(geometry::_False, index::_False, names::NamedTuple{Names}) where Names = Names +_rowkeys(geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:geometry, Names...) +_rowkeys(geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:geometry, :index, Names...) +_rowkeys(geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:index, Names...) + @inline _skip_missing_rows(rows, ::Missing, names) = Iterators.filter(row -> !any(ismissing, row), rows) @inline _skip_missing_rows(rows, missingval, names) = diff --git a/test/methods.jl b/test/methods.jl index cd07130d..df1a81e5 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -399,7 +399,7 @@ createpoint(args...) = ArchGDAL.createpoint(args...) (index = (1, 2), test = 2,) ]) # NamedTuple (reversed) points - tests a Table that iterates over points - T = @NamedTuple{geometry::Union{Missing,@NamedTuple{Y::Float64,X::Float64}},test::Union{Missing,Int64}} + T = @NamedTuple{geometry::Union{@NamedTuple{Y::Float64,X::Float64}},test::Union{Missing,Int64}} @test all(extract(rast, [(Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== T[ (geometry = (Y = 0.1, X = 9.0), test = 1) (geometry = (Y = 0.2, X = 10.0), test = 4) @@ -412,7 +412,7 @@ createpoint(args...) = ArchGDAL.createpoint(args...) ]) # Extract a polygon p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]]) - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},test::Union{Missing,Int64}} @test all(extract(rast_m, p) .=== T[ (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) @@ -447,7 +447,7 @@ createpoint(args...) = ArchGDAL.createpoint(args...) (index = (2, 1), test = 3) (index = (2, 2), test = missing) ]) - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} + T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} @test all(extract(rast_m, p; index=true) .=== T[ (geometry = (9.0, 0.1), index = (1, 1), test = 1) (geometry = (10.0, 0.1), index = (2, 1), test = 3) From 945542349846025da8780de644166a45c6695b12 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Fri, 11 Oct 2024 16:13:32 +0200 Subject: [PATCH 2/6] just write kw... in docs Co-authored-by: Rafael Schouten --- src/methods/extract.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index ff53870c..1686325f 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -5,7 +5,7 @@ istrue(::_True) = true istrue(::_False) = false """ - extract(x, data; [geometry, index, name, skipmissing, atol]) + extract(x, data; kw...) Extracts the value of `Raster` or `RasterStack` at given points, returning an iterable of `NamedTuple` with properties for `:geometry` and raster or From 30bf5e840d8039a9e972518bca112f840ceac333 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Fri, 11 Oct 2024 16:14:00 +0200 Subject: [PATCH 3/6] use Type{G} Co-authored-by: Rafael Schouten --- src/methods/extract.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 1686325f..b5918eb7 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -319,9 +319,9 @@ function _rowtype(x, g; geometry, index, skipmissing, names, kw...) end _rowtype(x, G; geometry, index, skipmissing, names) end -function _rowtype(x, g::Type; geometry, index, skipmissing, names, kw...) +function _rowtype(x, ::Type{G}; geometry, index, skipmissing, names, kw...) keys = _rowkeys(geometry, index, names) - types = _rowtypes(x, g, geometry, index, skipmissing, names) + types = _rowtypes(x, G, geometry, index, skipmissing, names) NamedTuple{keys,types} end From 6623679b39fcc5b102cc5f4d911cb868c8c66436 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Fri, 11 Oct 2024 16:17:51 +0200 Subject: [PATCH 4/6] add a missing where G --- src/methods/extract.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index b5918eb7..3eab68f6 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -319,7 +319,7 @@ function _rowtype(x, g; geometry, index, skipmissing, names, kw...) end _rowtype(x, G; geometry, index, skipmissing, names) end -function _rowtype(x, ::Type{G}; geometry, index, skipmissing, names, kw...) +function _rowtype(x, ::Type{G}; geometry, index, skipmissing, names, kw...) where G keys = _rowkeys(geometry, index, names) types = _rowtypes(x, G, geometry, index, skipmissing, names) NamedTuple{keys,types} From f1c4eef5c92c85161385471532b8dcd7586eb2b3 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 12 Oct 2024 10:04:50 +0200 Subject: [PATCH 5/6] merge _rowtype changes from other branch --- src/methods/extract.jl | 82 ++++++++++-------------------------------- src/utils.jl | 60 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 64 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 3eab68f6..07b0fe46 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -1,9 +1,3 @@ -using DimensionalData.Lookups: _True, _False - -_booltype(x) = x ? _True() : _False() -istrue(::_True) = true -istrue(::_False) = false - """ extract(x, data; kw...) @@ -78,7 +72,7 @@ function extract end end function _extract(A::RasterStackOrArray, geom::Missing, names, kw...) - T = _rowtype(A, geom; names, kw...) + T = _extractrowtype(A, geom; names, kw...) [_maybe_add_fields(T, map(_ -> missing, names), missing, missing)] end function _extract(A::RasterStackOrArray, geom; names, kw...) @@ -89,9 +83,9 @@ function _extract(A::RasterStackOrArray, ::Nothing, data; ) geoms = _get_geometries(data, geometrycolumn) T = if istrue(skipmissing) - _rowtype(A, nonmissingtype(eltype(geoms)); names, skipmissing, kw...) + _extractrowtype(A, nonmissingtype(eltype(geoms)); names, skipmissing, kw...) else - _rowtype(A, eltype(geoms); names, skipmissing, kw...) + _extractrowtype(A, eltype(geoms); names, skipmissing, kw...) end # Handle empty / all missing cases (length(geoms) > 0 && any(!ismissing, geoms)) || return T[] @@ -143,14 +137,14 @@ end function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; skipmissing, kw... ) - T = _rowtype(A, GI.getpoint(geom, 1); names, skipmissing, kw...) + T = _extractrowtype(A, GI.getpoint(geom, 1); names, skipmissing, kw...) rows = (_extract_point(T, A, p, skipmissing; kw...) for p in GI.getpoint(geom)) return skipmissing isa _True ? collect(_skip_missing_rows(rows, _missingval_or_missing(A), names)) : collect(rows) end function _extract(A::RasterStackOrArray, ::GI.PointTrait, geom; skipmissing, kw... ) - T = _rowtype(A, geom; names, skipmissing, kw...) + T = _extractrowtype(A, geom; names, skipmissing, kw...) _extract_point(T, A, geom, skipmissing; kw...) end function _extract(A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom; @@ -168,7 +162,7 @@ function _extract(A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom; else GI.x(p), GI.y(p) end - T = _rowtype(A, tuplepoint; names, skipmissing, kw...) + T = _extractrowtype(A, tuplepoint; names, skipmissing, kw...) B = boolmask(geom; to=template, kw...) offset = CartesianIndex(map(x -> first(x) - 1, parentindices(parent(template)))) # Add a row for each pixel that is `true` in the mask @@ -290,64 +284,24 @@ Base.@assume_effects :total function _maybe_add_fields(::Type{T}, props::NamedTu end end -_names(A::AbstractRaster) = (Symbol(name(A)),) -_names(A::AbstractRasterStack) = keys(A) - -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_True) where {T,Names} = (nonmissingtype(T),) -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_False) where {T,Names} = (Union{Missing,T},) -# This only compiles away when generated -@generated function _nametypes( - ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_True -) where {T<:NamedTuple{StackNames,Types},PropNames} where {StackNames,Types} - nt = NamedTuple{StackNames}(map(nonmissingtype, Types.parameters)) - return values(nt[PropNames]) -end -@generated function _nametypes( - ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_False -) where {T<:NamedTuple{StackNames,Types},PropNames} where {StackNames,Types} - nt = NamedTuple{StackNames}(map(T -> Union{Missing,T}, Types.parameters)) - return values(nt[PropNames]) +function _extractrowtype(x, g; geometry, index, skipmissing, names, kw...) + I = if istrue(skipmissing) + Tuple{Int, Int} + else + Union{Missing, Tuple{Int, Int}} + end + _extractrowtype(x, g, I; geometry, index, skipmissing, names, kw...) end - -# _rowtype returns the complete NamedTuple type for a point row -# This code is entirely for types stability and performance. -function _rowtype(x, g; geometry, index, skipmissing, names, kw...) - G = if skipmissing isa _True +function _extractrowtype(x, g, ::Type{I}; geometry, index, skipmissing, names, kw...) where I + G = if istrue(skipmissing) nonmissingtype(typeof(g)) else typeof(g) end - _rowtype(x, G; geometry, index, skipmissing, names) -end -function _rowtype(x, ::Type{G}; geometry, index, skipmissing, names, kw...) where G - keys = _rowkeys(geometry, index, names) - types = _rowtypes(x, G, geometry, index, skipmissing, names) - NamedTuple{keys,types} + _extractrowtype(x, G, I; geometry, index, skipmissing, names, kw...) end - -function _rowtypes(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - types = Tuple{G,Tuple{Int,Int},_nametypes(x, names, skipmissing)...} -end -function _rowtypes(x, ::Type{G}, geometry::_True, index::_False, skipmissing, names::NamedTuple{Names}) where {G,Names} - types = Tuple{G,_nametypes(x, names, skipmissing)...} -end -function _rowtypes(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_True, names::NamedTuple{Names}) where {G,Names} - types = Tuple{Tuple{Int,Int},_nametypes(x, names, skipmissing)...} -end -function _rowtypes(x, ::Type{G}, geometry::_True, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - types = Tuple{G,Union{Missing,Tuple{Int,Int}},_nametypes(x, names, skipmissing)...} -end -function _rowtypes(x, ::Type{G}, geometry::_False, index::_True, skipmissing::_False, names::NamedTuple{Names}) where {G,Names} - types = Tuple{Union{Missing,Tuple{Int,Int}},_nametypes(x, names, skipmissing)...} -end -function _rowtypes(x, ::Type{G}, geometry::_False, index::_False, skipmissing, names::NamedTuple{Names}) where {G,Names} - types = Tuple{_nametypes(x, names, skipmissing)...} -end - -_rowkeys(geometry::_False, index::_False, names::NamedTuple{Names}) where Names = Names -_rowkeys(geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:geometry, Names...) -_rowkeys(geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:geometry, :index, Names...) -_rowkeys(geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:index, Names...) +_extractrowtype(x, ::Type{G}, ::Type{I}; geometry, index, skipmissing, names, kw...) where {G, I} = + _rowtype(x, G, I; geometry, index, skipmissing, names) @inline _skip_missing_rows(rows, ::Missing, names) = Iterators.filter(row -> !any(ismissing, row), rows) diff --git a/src/utils.jl b/src/utils.jl index 57a4542d..1b5fbaf5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -359,3 +359,63 @@ function _no_memory_error(f, bytes) """ return error(msg) end + + +# _rowtype returns the complete NamedTuple type for a point row +# This code is entirely for types stability and performance. +# It is used in extract and Rasters.sample +_names(A::AbstractRaster) = (Symbol(name(A)),) +_names(A::AbstractRasterStack) = keys(A) + +using DimensionalData.Lookups: _True, _False +_booltype(x) = x ? _True() : _False() +istrue(::_True) = true +istrue(::_False) = false + +function _rowtype(x, ::Type{G}, ::Type{I}; geometry, index, skipmissing, names) where {G, I} + keys = _rowkeys(geometry, index, names) + types = _rowtypes(x, G, I, geometry, index, skipmissing, names) + NamedTuple{keys,types} +end + +function _rowtypes( + x, ::Type{G}, ::Type{I}, geometry::_True, index::_True, skipmissing, names::NamedTuple{Names} +) where {G,I,Names} + Tuple{G,I,_nametypes(x, names, skipmissing)...} +end +function _rowtypes( + x, ::Type{G}, ::Type{I}, geometry::_True, index::_False, skipmissing, names::NamedTuple{Names} +) where {G,I,Names} + Tuple{G,_nametypes(x, names, skipmissing)...} +end +function _rowtypes( + x, ::Type{G}, ::Type{I}, geometry::_False, index::_True, skipmissing, names::NamedTuple{Names} +) where {G,I,Names} + Tuple{I,_nametypes(x, names, skipmissing)...} +end +function _rowtypes( + x, ::Type{G}, ::Type{I}, geometry::_False, index::_False, skipmissing, names::NamedTuple{Names} +) where {G,I,Names} + Tuple{_nametypes(x, names, skipmissing)...} +end + +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_True) where {T,Names} = (nonmissingtype(T),) +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_False) where {T,Names} = (Union{Missing,T},) +# This only compiles away when generated +@generated function _nametypes( + ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_True +) where {T<:NamedTuple{StackNames,Types},PropNames} where {StackNames,Types} + nt = NamedTuple{StackNames}(map(nonmissingtype, Types.parameters)) + return values(nt[PropNames]) +end +@generated function _nametypes( + ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_False +) where {T<:NamedTuple{StackNames,Types},PropNames} where {StackNames,Types} + nt = NamedTuple{StackNames}(map(T -> Union{Missing,T}, Types.parameters)) + return values(nt[PropNames]) +end + +_rowkeys(geometry::_False, index::_False, names::NamedTuple{Names}) where Names = Names +_rowkeys(geometry::_True, index::_False, names::NamedTuple{Names}) where Names = (:geometry, Names...) +_rowkeys(geometry::_True, index::_True, names::NamedTuple{Names}) where Names = (:geometry, :index, Names...) +_rowkeys(geometry::_False, index::_True, names::NamedTuple{Names}) where Names = (:index, Names...) From d180663d002d890ee36b8ae6938ecf0cf1164ac1 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 12 Oct 2024 11:47:02 +0200 Subject: [PATCH 6/6] optimization for extract points with skipmissing --- src/methods/extract.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 07b0fe46..5fd748ee 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -95,13 +95,15 @@ function _extract(A::RasterStackOrArray, ::Nothing, data; # We need to split out points from other geoms # TODO this will fail with mixed point/geom vectors if trait1 isa GI.PointTrait - rows = Vector{T}(undef, length(geoms)) if istrue(skipmissing) + T2 = _extractrowtype(A, eltype(geoms), Tuple{Int64, Int64}; + names, skipmissing = _False(), kw...) + rows = Vector{T2}(undef, length(geoms)) j = 1 for i in eachindex(geoms) g = geoms[i] ismissing(g) && continue - e = _extract_point(T, A, g, skipmissing; names, kw...) + e = _extract_point(T2, A, g, skipmissing; names, kw...) if !ismissing(e) rows[j] = e j += 1 @@ -109,7 +111,9 @@ function _extract(A::RasterStackOrArray, ::Nothing, data; nothing end deleteat!(rows, j:length(rows)) + rows = T === T2 ? rows : T.(rows) else + rows = Vector{T}(undef, length(geoms)) for i in eachindex(geoms) g = geoms[i] rows[i] = _extract_point(T, A, g, skipmissing; names, kw...)::T