From b91866444d7b91e32666b4789ee65ac94ec100eb Mon Sep 17 00:00:00 2001
From: holgerteichgraeber <holger@teichgr.de>
Date: Wed, 9 Oct 2019 18:22:29 -0700
Subject: [PATCH] Allow for extreme value search among multiple nodes, but a
 common attribute

---
 examples/workflow_example_extr.jl | 15 +++++++++++++++
 src/clustering/extreme_vals.jl    | 14 +++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/examples/workflow_example_extr.jl b/examples/workflow_example_extr.jl
index 3327eec..8a285f4 100644
--- a/examples/workflow_example_extr.jl
+++ b/examples/workflow_example_extr.jl
@@ -6,6 +6,7 @@ using TimeSeriesClustering
 data_path=normpath(joinpath(dirname(@__FILE__),"..","data","TS_GER_18"))
 ts_input_data = load_timeseries_data(data_path; T=24, years=[2015])
 
+################## Example 1
  # define simple extreme days of interest
  ev1 = SimpleExtremeValueDescr("wind-dena42","max","absolute")
  ev2 = SimpleExtremeValueDescr("solar-dena42","min","integral")
@@ -19,3 +20,17 @@ ts_clust_res = run_clust(ts_input_data_mod;method="kmeans",representation="centr
 
 # representation modification
 ts_clust_extr = representation_modification(extr_vals,ts_clust_res.clust_data)
+
+
+################## Example 2
+# find the minimum wind day among all nodes
+ev4 = SimpleExtremeValueDescr("wind","min","absolute")
+ # simple extreme day selection
+ ts_input_data_mod_2,extr_vals_2,extr_idcs_2 = simple_extr_val_sel(ts_input_data,ev4;rep_mod_method="feasibility")
+
+ # run clustering
+ts_clust_res_2 = run_clust(ts_input_data_mod;method="kmeans",representation="centroid",n_init=10,n_clust=5) # default k-means
+
+# representation modification
+ts_clust_extr_2 = representation_modification(extr_vals_2,ts_clust_res.clust_data)
+
diff --git a/src/clustering/extreme_vals.jl b/src/clustering/extreme_vals.jl
index 4fe4f1a..9887b8a 100644
--- a/src/clustering/extreme_vals.jl
+++ b/src/clustering/extreme_vals.jl
@@ -92,7 +92,19 @@ function simple_extr_val_ident(clust_data::ClustData,
                                extremum::String="max",
                                peak_def::String="absolute",
                                consecutive_periods::Int=1)
-  data=clust_data.data[data_type]
+  # all attribute-node pairs in clust_data
+  data_types = [k for k in keys(clust_data.data)]
+  attribute_nodes = data_types[occursin.(data_type, data_types)]
+  if isempty(attribute_nodes)
+    error("data_type=$data_type is neither an attribute-node pair nor is it an attribute")
+  # if data_type is an attribute only, aggregate data among all nodes that contain that attribute
+  # this contains the special case that data_type is an attribute-node pair
+  else
+    data = zeros(clust_data.T*length(attribute_nodes), clust_data.K)
+    for i in 1:length(attribute_nodes)
+      data[1+(i-1)*clust_data.T:i*clust_data.T, :] = clust_data.data[attribute_nodes[i]]
+    end
+  end
   delta_period=consecutive_periods-1
   # set data to be compared
   if peak_def=="absolute" && consecutive_periods==1