From b91866444d7b91e32666b4789ee65ac94ec100eb Mon Sep 17 00:00:00 2001 From: holgerteichgraeber Date: Wed, 9 Oct 2019 18:22:29 -0700 Subject: [PATCH] Allow for extreme value search among multiple nodes, but a common attribute --- examples/workflow_example_extr.jl | 15 +++++++++++++++ src/clustering/extreme_vals.jl | 14 +++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/examples/workflow_example_extr.jl b/examples/workflow_example_extr.jl index 3327eec..8a285f4 100644 --- a/examples/workflow_example_extr.jl +++ b/examples/workflow_example_extr.jl @@ -6,6 +6,7 @@ using TimeSeriesClustering data_path=normpath(joinpath(dirname(@__FILE__),"..","data","TS_GER_18")) ts_input_data = load_timeseries_data(data_path; T=24, years=[2015]) +################## Example 1 # define simple extreme days of interest ev1 = SimpleExtremeValueDescr("wind-dena42","max","absolute") ev2 = SimpleExtremeValueDescr("solar-dena42","min","integral") @@ -19,3 +20,17 @@ ts_clust_res = run_clust(ts_input_data_mod;method="kmeans",representation="centr # representation modification ts_clust_extr = representation_modification(extr_vals,ts_clust_res.clust_data) + + +################## Example 2 +# find the minimum wind day among all nodes +ev4 = SimpleExtremeValueDescr("wind","min","absolute") + # simple extreme day selection + ts_input_data_mod_2,extr_vals_2,extr_idcs_2 = simple_extr_val_sel(ts_input_data,ev4;rep_mod_method="feasibility") + + # run clustering +ts_clust_res_2 = run_clust(ts_input_data_mod;method="kmeans",representation="centroid",n_init=10,n_clust=5) # default k-means + +# representation modification +ts_clust_extr_2 = representation_modification(extr_vals_2,ts_clust_res.clust_data) + diff --git a/src/clustering/extreme_vals.jl b/src/clustering/extreme_vals.jl index 4fe4f1a..9887b8a 100644 --- a/src/clustering/extreme_vals.jl +++ b/src/clustering/extreme_vals.jl @@ -92,7 +92,19 @@ function simple_extr_val_ident(clust_data::ClustData, extremum::String="max", peak_def::String="absolute", consecutive_periods::Int=1) - data=clust_data.data[data_type] + # all attribute-node pairs in clust_data + data_types = [k for k in keys(clust_data.data)] + attribute_nodes = data_types[occursin.(data_type, data_types)] + if isempty(attribute_nodes) + error("data_type=$data_type is neither an attribute-node pair nor is it an attribute") + # if data_type is an attribute only, aggregate data among all nodes that contain that attribute + # this contains the special case that data_type is an attribute-node pair + else + data = zeros(clust_data.T*length(attribute_nodes), clust_data.K) + for i in 1:length(attribute_nodes) + data[1+(i-1)*clust_data.T:i*clust_data.T, :] = clust_data.data[attribute_nodes[i]] + end + end delta_period=consecutive_periods-1 # set data to be compared if peak_def=="absolute" && consecutive_periods==1