Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring R plots using the CC18 data #39

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
1,624 changes: 856 additions & 768 deletions CC18 - Benchmark Analysis in R.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions R/getAvgPerformance.R → R/averagePerformance.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

getAvgPerformance = function(data, measure) {
averagePerformance = function(data, measure) {

temp = na.omit(data[, c("flow.name", measure)])
algos = unique(temp$flow.name)
temp = na.omit(data[, c("learner.name", measure)])
algos = unique(temp$learner.name)

aux = lapply(algos, function(alg) {
d = temp[which(temp$flow.name == alg),]
d = temp[which(temp$learner.name == alg),]
ret = mean(d[,2])
return(ret)
})
Expand Down
14 changes: 7 additions & 7 deletions R/checkMeasure.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

checkMeasure = function(measure){
checkMeasure = function(measure) {

#TODO: replace with checkmate commands
allowed.measures = c("f.measure", "kappa", "precision", "recall",
"usercpu.time.millis", "area.under.roc.curve", "predictive.accuracy")

allowed.measures = c("f.measure", "kappa", "mean.absolute.error", "precision", "recall",
"usercpu.time.millis", "area.under.roc.curve", "predictive.accuracy", "root.mean.squared.error")
if (!( measure %in% allowed.measures)) {
stop(paste0(" - Please, choose one of the following measures: ",
paste(allowed.measures, collapse=', '), " \n"))
} else {
return(TRUE)
stop(paste0(" - Please, choose one of the following measures: ",
paste(allowed.measures, collapse=', '), " \n"))
}
}

Expand Down
27 changes: 14 additions & 13 deletions R/checkPackages.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@
checkPackages = function(pkgs) {

obj = installed.packages()
not.installed = which(!pkgs %in% rownames(obj))

for(pk in pkgs) {

if(pk %in% rownames(obj)) {
cat(paste0(" - Package: ", pk, " \t... is already installed\n"))
} else {
cat(paste0(" - Installing: ", pk, "\n"))
if (pk == "farff") {
devtools::install_github("mlr-org/farff")
} else if(pk == "OpenML") {
devtools::install_github("openml/r", ref = "05b8b97cc5ce6ea1b3f586818cfcf157b16a3cd4")
} else {
install.packages(pkgs = pk)
if(length(not.installed > 0)) {
need = pkgs[not.installed]
cat(paste0(" @ Missing packages: ", paste(need, collapse = ", "), "\n"))
install.packages(pkgs = need, repo = "http://cran.uni-muenster.de/")

if("scmamp" %in% not.installed) {
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
# dependencies are not in CRAN
BiocManager::install("Rgraphviz", version = "3.8")
BiocManager::install("graph", version = "3.8")
}
}
}
cat(" @ All required packages installed.\n")
}

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
29 changes: 29 additions & 0 deletions R/criticalDifferencePlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

criticalDifferencePlot = function(data, measure = "predictive.accuracy", alpha = 0.05) {

sub.df = dplyr::select(.data = data, task.id, learner.name, measure)
tasks = unique(sub.df$task.id)
algos = unique(sub.df$learner.name)
colnames(sub.df) = c("taskId", "learnerName", "predictiveAcc")

aux.task = lapply(tasks, function(task) {
aux.algo = lapply(algos, function(algo) {
tmp = dplyr::filter(.data = sub.df, taskId == task, learnerName == algo)
return( mean(tmp$predictiveAcc))
})
return(unlist(aux.algo))
})

mat = do.call("rbind", aux.task)
mat[is.nan(mat)] = -Inf
rownames(mat) = tasks
colnames(mat) = algos

g = scmamp::plotCD(results.matrix = mat, alpha = alpha)
return(g)
}

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
5 changes: 2 additions & 3 deletions R/getRanking.R → R/generateRanking.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@
#--------------------------------------------------------------------------------------------------

# Obs: Not handling NAs values (removed from the ranking)
generateRanking = function(mat, descending = FALSE) {

getRanking = function(mat, descending = FALSE) {

temp = mat
for(i in 1:nrow(mat)) {
ids = which(!is.na(mat[i,]))
if(descending){
temp[i, ids] = rank(-mat[i,ids])
} else {
temp[i, ids] = rank( mat[i,ids])
temp[i, ids] = rank( mat[i,ids])
}
}

Expand Down
23 changes: 0 additions & 23 deletions R/getAlgoCoverage.R

This file was deleted.

88 changes: 0 additions & 88 deletions R/getAlgosAvgPlot.R

This file was deleted.

33 changes: 0 additions & 33 deletions R/getExperimentsData.R

This file was deleted.

22 changes: 13 additions & 9 deletions R/config.R → R/getExperimentsResults.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

# OpenML version that works
# devtools::install_github("openml/r", ref = "05b8b97cc5ce6ea1b3f586818cfcf157b16a3cd4")

library('ggplot2')
library('reshape2')
library('gridExtra')
library('mlr')
library('OpenML')
library('dplyr')
getExperimentsResults = function(tasks) {

cat(" @ Getting experiment results\n")
aux = lapply(tasks$task.id, function(id) {
# cat(" - loading results from task:", id, "\n")
res = OpenML::listOMLRunEvaluations(task.id = id, limit = 5000, offset = 0)
res$task.id = id
return(res)
})

df = plyr::rbind.fill(aux)
return(df)
}

#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------
7 changes: 0 additions & 7 deletions R/getROCurve.R

This file was deleted.

16 changes: 8 additions & 8 deletions R/getRuntimeData.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#--------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------

getAvgRuntimeData = function(data) {
getRuntimeData = function(data) {

temp = dplyr::select(.data = data, task.id, flow.name, usercpu.time.millis.training,
usercpu.time.millis.testing, usercpu.time.millis)
temp = dplyr::select(.data = data, task.id, learner.name,
usercpu.time.millis.training, usercpu.time.millis.testing, usercpu.time.millis)

algos = unique(temp$flow.name)
algos = unique(temp$learner.name)
aux = lapply(algos, function(alg) {
# TO DO: how to handle missing data here?
d = na.omit(temp[which(temp$flow.name == alg),])
d = na.omit(temp[which(temp$learner.name == alg),])
return(colMeans(d[,3:ncol(d)]))
})

temp = data.frame(do.call("rbind", aux))
temp$alg = algos
return(temp)
ret = data.frame(do.call("rbind", aux))
ret$alg = algos
return(ret)
}


Expand Down
Loading