holgerteichgraeber · YoungFaithful · Dec 6, 2018 · Nov 23, 2018 · Nov 26, 2018 · Nov 26, 2018
diff --git a/examples/workflow_example_extr.jl b/examples/workflow_example_extr.jl
@@ -0,0 +1,28 @@
+# This file exemplifies the workflow from data input to optimization result generation
+#QUESTION using ClustForOpt_priv.col in module Main conflicts with an existing identifier., using ClustForOpt_priv.cols in module Main conflicts with an existing identifier.
+
+include(normpath(joinpath(dirname(@__FILE__),"..","src","ClustForOpt_priv_development.jl")))
+#using ClustForOpt_priv
+#using Gurobi
+
+# load data
+ts_input_data,~ = load_timeseries_data("CEP", "GER_1";K=365, T=24) #CEP
+
+cep_input_data_GER=load_cep_data("GER_1")
+
+ # define simple extreme days of interest
+ ev1 = SimpleExtremeValueDescr("wind-dena42","max","absolute")
+ ev2 = SimpleExtremeValueDescr("pv-dena42","min","integral")
+ ev3 = SimpleExtremeValueDescr("el_demand-dena21","max","absolute")
+ ev = [ev1, ev2, ev3]
+ # simple extreme day selection
+ ts_input_data_mod,extr_vals,extr_idcs = simple_extr_val_sel(ts_input_data,ev;rep_mod_method="feasibility")
+
+ # run clustering
+ts_clust_res = run_clust(ts_input_data_mod;method="kmeans",representation="centroid",n_init=10,n_clust=5) # default k-means
+
+# representation modification
+ts_clust_extr = representation_modification(extr_vals,ts_clust_res.best_results)
+
+ # optimization
+opt_res = run_cep_opt(ts_clust_extr,cep_input_data_GER;solver=GurobiSolver(),co2limit=1000.0)
diff --git a/src/ClustForOpt_priv_development.jl b/src/ClustForOpt_priv_development.jl
@@ -33,3 +33,4 @@ include(joinpath("utils","load_data.jl"))
 include(joinpath("optim_problems","run_opt.jl"))
 include(joinpath("clustering","run_clust.jl"))
 include(joinpath("clustering","exact_kmedoids.jl"))
+include(joinpath("clustering","extreme_vals.jl"))
diff --git a/src/clustering/extreme_vals.jl b/src/clustering/extreme_vals.jl
@@ -0,0 +1,245 @@
+"""
+function simple_extr_val_sel(data::ClustInputData,
+                             extreme_value_descr_ar::Array{SimpleExtremeValueDescr,1};
+                             rep_mod_method::String="feasibility"
+                             )
+
+Selects simple extreme values and returns modified data, extreme values, and the corresponding indices.
+"""
+function simple_extr_val_sel(data::ClustInputData,
+                             extr_value_descr_ar::Array{SimpleExtremeValueDescr,1};
+                             rep_mod_method::String="feasibility"
+                             )
+  idcs = simple_extr_val_ident(data,extr_value_descr_ar)
+  extr_vals = extreme_val_output(data,idcs;rep_mod_method=rep_mod_method)
+  # for append method: modify data to be clustered to only contain the values that are not extreme values 
+  if rep_mod_method=="feasibility"
+    data_mod = data
+  elseif rep_mod_method=="append"
+    data_mod = input_data_modification(data,idcs)
+  else
+    @error("rep_mod_method - "*rep_mod_method*" - does not exist")
+  end
+  return data_mod,extr_vals,idcs
+end
+
+"""
+function simple_extr_val_sel(data::ClustInputData,
+                             extreme_value_descr_ar::Array{SimpleExtremeValueDescr,1};
+                             rep_mod_method::String="feasibility"
+                             )
+
+Wrapper function for only one simple extreme value.
+Selects simple extreme values and returns modified data, extreme values, and the corresponding indices.
+"""
+function simple_extr_val_sel(data::ClustInputData,
+                             extr_value_descr::SimpleExtremeValueDescr;
+                             rep_mod_method::String="feasibility"
+                             )
+  return simple_extr_val_sel(data,[extr_value_descr];rep_mod_method=rep_mod_method)
+end
+
+"""
+    function simple_extr_val_ident(data::ClustInputData,extreme_value_descr::Array{SimpleExtremeValueDescr,1})
+
+identifies multiple simple extreme values from the data and returns array of column indices of extreme value within data
+
+data_type: any attribute from the attributes contained within *data*
+extremum: "min" or "max"
+peak_def: "absolute" or "integral"
+"""
+function simple_extr_val_ident(data::ClustInputData,
+                               extreme_value_descr_ar::Array{SimpleExtremeValueDescr,1})
+  idcs = Array{Int,1}()
+  # for each desired extreme value description, finds index of that extreme value within data
+  for i=1:length(extreme_value_descr_ar)
+    push!(idcs,simple_extr_val_ident(data,extreme_value_descr_ar[i])) 
+  end
+  return idcs
+end
+
+"""
+    function simple_extr_val_ident(data::ClustInputData,extreme_value_descr::SimpleExtremeValueDescr)
+
+identifies a single simple extreme value from the data and returns column index of extreme value
+
+data_type: any attribute from the attributes contained within *data*
+extremum: "min" or "max"
+peak_def: "absolute" or "integral"
+"""
+function simple_extr_val_ident(data::ClustInputData,
+                               extreme_value_descr::SimpleExtremeValueDescr)
+  return simple_extr_val_ident(data, extreme_value_descr.data_type; extremum=extreme_value_descr.extremum, peak_def=extreme_value_descr.peak_def)
+end
+
+"""
+    function simple_extr_val_ident(data::ClustInputData,data_type::String;extremum="max",peak_def="absolute")
+
+identifies a single simple extreme value from the data and returns column index of extreme value
+
+data_type: any attribute from the attributes contained within *data*
+extremum: "min" or "max"
+peak_def: "absolute" or "integral"
+"""
+function simple_extr_val_ident(data::ClustInputData, 
+                               data_type::String;
+                               extremum::String="max", 
+                               peak_def::String="absolute")
+  # TODO: Possibly add option to find maximum among all series of a data_type for a certain node
+  !(data_type in keys(data.data)) && @error("the provided data type - "*data_type*" - is not contained in data")
+  return simple_extr_val_ident(data.data[data_type]; extremum=extremum, peak_def=peak_def)
+end
+
+"""
+    function simple_extr_val_ident(data::Array{Float64};extremum="max",peak_def="absolute")
+"""
+function simple_extr_val_ident(data::Array{Float64};
+                               extremum::String="max",
+                               peak_def::String="absolute")
+  # set data to be compared 
+  if peak_def=="absolute"
+    data_eval = data
+  elseif peak_def=="integral"
+    data_eval = sum(data,dims=1)
+  else
+    @error("peak_def - "*peak_def*" - not defined")  
+  end
+  # find minimum or maximum index. Second argument returns cartesian indices, second argument of that is the column (period) index
+  if extremum=="max"
+    idx = findmax(data_eval)[2][2]
+  elseif extremum=="min"
+    idx = findmin(data_eval)[2][2]
+  else
+    @error("extremum - "*extremum*" - not defined")  
+  end
+  return idx
+end
+
+"""
+    function input_data_modification(data::ClustInputData,extr_val_idcs::Array{Int,1})
+
+returns ClustInputData structs with extreme vals and with remaining input data [data-extreme_vals].
+Gives extreme vals the weight that they had in data. 
+This function is needed for the append method for representation modification
+"""
+function input_data_modification(data::ClustInputData,extr_val_idcs::Array{Int,1})
+  unique_extr_val_idcs = unique(extr_val_idcs)
+  K_dn = data.K- length(unique_extr_val_idcs) 
+  data_dn=Dict{String,Array}()
+  for dt in keys(data.data)
+    data_dn[dt] = data.data[dt][:,setdiff(1:size(data.data[dt],2),extr_val_idcs)] #take all columns but the ones that are extreme vals. If index occurs multiple times, setdiff only treats it as one.
+  end
+  weights_dn = data.weights[setdiff(1:size(data.weights,2),extr_val_idcs)]
+  data_modified = ClustInputData(data.region,K_dn,data.T,data_dn,weights_dn;mean=data.mean,sdv=data.sdv) 
+  return data_modified
+end
+
+"""
+    function input_data_modification(data::ClustInputData,extr_val_idcs::Int)
+
+wrapper function for a single extreme val. 
+returns ClustInputData structs with extreme vals and with remaining input data [data-extreme_vals].
+Gives extreme vals the weight that they had in data. 
+"""
+function input_data_modification(data::ClustInputData,extr_val_idcs::Int)
+  return input_data_modification(data,[extr_val_idcs])
+end
+
+"""
+   function extreme_val_output(data::ClustInputData,
+                            extr_val_idcs::Array{Int,1};
+                            rep_mod_method="feasibility")
+
+Takes indices as input and returns ClustInputData struct that contains the extreme vals from within data.
+"""
+function extreme_val_output(data::ClustInputData,
+                            extr_val_idcs::Array{Int,1};
+                            rep_mod_method="feasibility")
+  unique_extr_val_idcs = unique(extr_val_idcs)
+  K_ed = length(unique_extr_val_idcs)
+  data_ed=Dict{String,Array}()
+  for dt in keys(data.data)
+    data_ed[dt] = data.data[dt][:,unique_extr_val_idcs]
+  end
+  weights_ed=[]
+  if rep_mod_method == "feasibility"
+    weights_ed = zeros(length(unique_extr_val_idcs)) 
+  elseif rep_mod_method == "append"
+    weights_ed = data.weights[unique_extr_val_idcs]
+  else
+    @error("rep_mod_method - "*rep_mod_method*" - does not exist")
+  end
+  extr_vals = ClustInputData(data.region,K_ed,data.T,data_ed,weights_ed;mean=data.mean,sdv=data.sdv)
+  return extr_vals
+end
+
+"""
+   function extreme_val_output(data::ClustInputData,
+                            extr_val_idcs::Array{Int,1};
+                            rep_mod_method="feasibility")
+
+wrapper function for a single extreme val. 
+Takes indices as input and returns ClustInputData struct that contains the extreme vals from within data.
+"""
+function extreme_val_output(data::ClustInputData,
+                            extr_val_idcs::Int;
+                            rep_mod_method="feasibility")
+  return extreme_val_output(data,[extr_val_idcs];rep_mod_method=rep_mod_method)
+end
+
+"""
+function representation_modification(extr_vals::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     )
+
+Merges the clustered data and extreme vals into one ClustInputData struct. Weights are chosen according to the rep_mod_method 
+"""
+function representation_modification(extr_vals::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     )
+  K_mod = clust_data.K + extr_vals.K
+  data_mod=Dict{String,Array}()
+  for dt in keys(clust_data.data)
+    data_mod[dt] = [clust_data.data[dt] extr_vals.data[dt]]
+  end
+  weights_mod = deepcopy(clust_data.weights)
+  for w in extr_vals.weights 
+    push!(weights_mod,w) 
+  end
+  return ClustInputData(clust_data.region,K_mod,clust_data.T,data_mod,weights_mod;mean=clust_data.mean,sdv=clust_data.sdv)
+end
+
+"""
+    function representation_modification(full_data::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     extr_val_idcs::Array{Int,1};
+                                     rep_mod_method::String="feasibility")
+
+Merges the clustered data and extreme vals into one ClustInputData struct. Weights are chosen according to the rep_mod_method 
+"""
+function representation_modification(full_data::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     extr_val_idcs::Array{Int,1};
+                                     rep_mod_method::String="feasibility")
+  extr_vals = extreme_val_output(full_data,extr_val_idcs;rep_mod_method=rep_mod_method) 
+  return representation_modification(extr_vals,clust_data;rep_mod_method=rep_mod_method)
+end
+
+"""
+    function representation_modification(full_data::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     extr_val_idcs::Int;
+                                     rep_mod_method::String="feasibility")
+
+wrapper function for a single extreme val. 
+Merges the clustered data and extreme vals into one ClustInputData struct. Weights are chosen according to the rep_mod_method 
+"""
+function representation_modification(full_data::ClustInputData,
+                                     clust_data::ClustInputData,
+                                     extr_val_idcs::Int;
+                                     rep_mod_method::String="feasibility")
+  return representation_modification(full_data,clust_data,[extr_val_idcs];rep_mod_method=rep_mod_method) 
+end
+
+
+
diff --git a/src/utils/load_clusters.py → src/clustering/shape_based/load_clusters.py b/src/utils/load_clusters.py → src/clustering/shape_based/load_clusters.py
diff --git a/src/utils/datastructs.jl b/src/utils/datastructs.jl
@@ -58,6 +58,26 @@ struct ClustResultBest <: ClustResult
   data_type::Array{String}
 end
 
+"SimpleExtremeValueDescr"
+struct SimpleExtremeValueDescr
+   data_type::String
+   extremum::String
+   peak_def::String
+   "Replace default constructor to only allow certain entries"
+   function SimpleExtremeValueDescr(data_type::String, 
+                                    extremum::String,
+                                    peak_def::String)
+       # only allow certain entries 
+       if !(extremum in ["min","max"])
+         @error("extremum - "*extremum*" - not defined")  
+       elseif !(peak_def in ["absolute","integral"])
+         @error("peak_def - "*peak_def*" - not defined")  
+       end
+       new(data_type,extremum,peak_def) 
+   end
+end
+
+
 "OptResult"
 struct OptResult
  status::Symbol
@@ -322,3 +342,4 @@ function ClustInputDataMerged(data::ClustInputData)
  end
  ClustInputDataMerged(data.region,data.K,data.T,data_merged,data_type,data.weights,data.mean,data.sdv)
 end
+