-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorrelation.js
146 lines (121 loc) · 4.58 KB
/
correlation.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Computing Correlation Demo
// From chapter 4 of https://eloquentjavascript.net/
// Correlation is a measure of dependence between statistical variables.
// A statistical variable is not quite the same as a programming variable.
// In statistics you typically have a set of measurements, and each variable is
// measured for every measurement. Correlation between variables is usually
// expressed as a value that ranges from -1 to 1. Zero correlation means the
// variables are not related. A correlation of one indicates that the two are
// perfectly related—if you know one, you also know the other. Negative one also
// means that the variables are perfectly related but that they are opposites
// —when one is true, the other is false.
// import { JOURNAL } from './journal';
const JOURNAL = require('./data/journal.js');
// This function computes the ϕ coefficient from a four item array
// where each item is the total number of occurrences of each possible
// combination:
// binary decimal(index) description
// 00 0 no occurrences of A or B
// 01 1 B (no A)
// 10 2 A (no B)
// 11 3 A and B
function coefficient_(frequency) {
return (frequency[3] * frequency[0] - frequency[2] * frequency[1]) /
Math.sqrt((frequency[2] + frequency[3]) *
(frequency[0] + frequency[1]) *
(frequency[1] + frequency[3]) *
(frequency[0] + frequency[2]));
}
// With destructuring, it can also be written like:
function coefficient([n00, n01, n10, n11]) {
return (n11 * n00 - n10 * n01) /
Math.sqrt((n10 + n11) * (n00 + n01) * (n01 + n11) * (n00 + n10));
}
console.log(coefficient([76, 9, 4, 1]));
// 0.06859943405700354
// To extract a two-by-two table for a specific event from the journal,
// we must loop over all the entries and tally how many times the event
// occurs in relation to another event (squirrel transformations).
function frequencyFor(event, journal) {
let frequency = [0, 0, 0, 0];
for (let i = 0; i < journal.length; i++) {
let entry = journal[i], index = 0;
if (entry.events.includes(event)) index += 1;
if (entry.squirrel) index += 2;
frequency[index] += 1;
}
return frequency;
}
console.log(frequencyFor('pizza', JOURNAL));
// [76, 9, 4, 1]
// We need to compute a correlation for every type of event that
// occurs in the data set. To do that, we first need to find every
// type of event.
function journalEvents(journal) {
let events = [];
for (let entry of journal) {
for (let event of entry.events) {
if (!events.includes(event)) {
events.push(event);
}
}
}
return events;
}
console.log(journalEvents(JOURNAL));
// [ 'carrot', 'exercise', 'weekend', 'bread', ... ]
// Now we can pass each event:
for (let event of journalEvents(JOURNAL)) {
console.log(event + ':', coefficient(frequencyFor(event, JOURNAL)));
}
// carrot: 0.014097096860865023
// exercise: 0.06859943405700354
// weekend: 0.13719886811400708
// bread: -0.07575540190785703
// pudding: -0.06482037235521644
// brushed teeth: -0.3805211953235953
// touched tree: -0.08084520834544433
// nachos: -0.07043451251197408
// cycling: -0.08084520834544433
// brussel sprouts: -0.05230657809659414
// ice cream: -0.08084520834544433
// computer: 0.06859943405700354
// potatoes: -0.08574929257125442
// candy: 0.12964074471043288
// dentist: -0.036563621206356534
// running: -0.09050203323329065
// pizza: 0.06859943405700354
// work: -0.13719886811400708
// beer: -0.05230657809659414
// cauliflower: -0.08084520834544433
// lasagna: 0.08084520834544433
// lettuce: -0.07043451251197408
// television: -0.08084520834544433
// spaghetti: 0.242535625036333
// reading: 0.11068280537595927
// peanuts: 0.59026798116852
// Narrow the list:
console.log('Significant results:');
for (let event of journalEvents(JOURNAL)) {
let correlation = coefficient(frequencyFor(event, JOURNAL));
if (correlation > 0.1 || correlation < -0.1) {
console.log(event + ':', correlation);
}
}
// weekend: 0.13719886811400708
// brushed teeth: -0.3805211953235953
// candy: 0.12964074471043288
// work: -0.13719886811400708
// spaghetti: 0.242535625036333
// reading: 0.11068280537595927
// peanuts: 0.59026798116852
// The results show that the event happens the most with peanuts
// and the least with brushed teeth. Try testing for this combination:
for (let entry of JOURNAL) {
if (entry.events.includes('peanuts') &&
!entry.events.includes('brushed teeth')) {
entry.events.push('peanuts-no-teeth');
}
}
console.log(coefficient(frequencyFor('peanuts-no-teeth', JOURNAL)));
// 1