forked from JMGaljaard/fltk-testbed
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
97 lines (76 loc) · 3.27 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from scipy.stats import wilcoxon
import itertools as it
from bisect import bisect_left
from typing import List
import numpy as np
import pandas as pd
import scipy.stats as ss
from pandas import Categorical
# de vargha and delaney functions are taken from
# https://gist.github.com/jacksonpradolima/f9b19d65b7f16603c837024d5f8c8a65
def VD_A(treatment: List[float], control: List[float]):
"""
Computes Vargha and Delaney A index
A. Vargha and H. D. Delaney.
A critique and improvement of the CL common language
effect size statistics of McGraw and Wong.
Journal of Educational and Behavioral Statistics, 25(2):101-132, 2000
The formula to compute A has been transformed to minimize accuracy errors
See: http://mtorchiano.wordpress.com/2014/05/19/effect-size-of-r-precision/
:param treatment: a numeric list
:param control: another numeric list
:returns the value estimate and the magnitude
"""
m = len(treatment)
n = len(control)
if m != n:
raise ValueError("Data d and f must have the same length")
r = ss.rankdata(treatment + control)
r1 = sum(r[0:m])
# Compute the measure
# A = (r1/m - (m+1)/2)/n # formula (14) in Vargha and Delaney, 2000
A = (2 * r1 - m * (m + 1)) / (2 * n * m) # equivalent formula to avoid accuracy errors
levels = [0.147, 0.33, 0.474] # effect sizes from Hess and Kromrey, 2004
magnitude = ["negligible", "small", "medium", "large"]
scaled_A = (A - 0.5) * 2
magnitude = magnitude[bisect_left(levels, abs(scaled_A))]
estimate = A
return estimate, magnitude
def VD_A_DF(data, val_col: str = None, group_col: str = None, sort=True):
"""
:param data: pandas DataFrame object
An array, any object exposing the array interface or a pandas DataFrame.
Array must be two-dimensional. Second dimension may vary,
i.e. groups may have different lengths.
:param val_col: str, optional
Must be specified if `a` is a pandas DataFrame object.
Name of the column that contains values.
:param group_col: str, optional
Must be specified if `a` is a pandas DataFrame object.
Name of the column that contains group names.
:param sort : bool, optional
Specifies whether to sort DataFrame by group_col or not. Recommended
unless you sort your data manually.
:return: stats : pandas DataFrame of effect sizes
Stats summary ::
'A' : Name of first measurement
'B' : Name of second measurement
'estimate' : effect sizes
'magnitude' : magnitude
"""
x = data.copy()
if sort:
x[group_col] = Categorical(x[group_col], categories=x[group_col].unique(), ordered=True)
x.sort_values(by=[group_col, val_col], ascending=True, inplace=True)
groups = x[group_col].unique()
# Pairwise combinations
g1, g2 = np.array(list(it.combinations(np.arange(groups.size), 2))).T
# Compute effect size for each combination
ef = np.array([VD_A(list(x[val_col][x[group_col] == groups[i]].values),
list(x[val_col][x[group_col] == groups[j]].values)) for i, j in zip(g1, g2)])
return pd.DataFrame({
'A': np.unique(data[group_col])[g1],
'B': np.unique(data[group_col])[g2],
'estimate': ef[:, 0],
'magnitude': ef[:, 1]
})