-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.py
134 lines (111 loc) · 4.25 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from dataclasses import dataclass, field
import numpy as np
from collections import defaultdict
import pandas as pd
import scipy.stats as st
def ssim2db(ssim):
return -10 * np.log10(1 - ssim)
@dataclass
class StreamStat:
init: int = np.inf
startup: int = np.inf # time_at_startup
startup_rebuf: float = -1 # time_at_startup
sum_ssim_index: float = 0
# count_ssim_1: int = 0
count_ssim_sample: int = 0
playing: bool = False
last_play: int = -1 # time_at_last_play
last_play_cum_rebuf: int = -1 # cum_rebuf_at_last_play
bad: int = 0 # 1: event_interval>8s; 2: stall>20s; 3: stall_while_playing
@property
def startup_delay(self): # in seconds
return (self.startup - self.init) * 1e-9
@property
def total_play(self): # in seconds
return (self.last_play - self.startup) * 1e-9
@property
def total_stall(self): # in seconds
return self.last_play_cum_rebuf - self.startup_rebuf
@property
def invalid(self):
if self.init == np.inf: # never init
return -1
if self.startup == np.inf: # never start
return -2
if self.count_ssim_sample == 0:
return -3
if self.bad:
return self.bad
if self.last_play_cum_rebuf == -1 or self.startup_rebuf == -1:
return -100
return 0
@property
def ssim_index_mean(self):
return self.sum_ssim_index / self.count_ssim_sample
@dataclass
class GroupStat:
# ["session_id", "index", "watch_time", "ssim_index_mean", "stall_time"]
streams: pd.DataFrame = None
num_streams_bad: int = 0
bad_reasons: dict = field(default_factory=lambda: defaultdict(int))
@property
def total_watch(self):
return self.streams["watch_time"].sum()
@property
def total_stall(self):
return self.streams["stall_time"].sum()
@property
def play_stall_ratio(self):
total_watch = self.total_watch
return total_watch and self.total_stall / self.total_watch
@property
def mean_ssim(self):
total_watch = self.total_watch
return total_watch and (self.streams["watch_time"] * self.streams["ssim_index_mean"]).sum() / self.total_watch
@property
def mean_ssim_db(self):
return ssim2db(self.mean_ssim)
@property
def sum_squared_weights(self):
return (self.streams["watch_time"] ** 2).sum() / self.total_watch ** 2
@property
def ssim_stat_db(self):
mean = self.mean_ssim
z95 = st.norm.ppf(.975)
var = (self.streams["watch_time"] *
(self.streams["ssim_index_mean"] - mean) ** 2).sum() / self.total_watch
stddev = np.sqrt(var)
sem = stddev * np.sqrt(self.sum_squared_weights)
return ssim2db(mean - z95 * sem), ssim2db(mean), ssim2db(mean + z95 * sem)
@property
def stall_ratio_stat(self):
mean = self.play_stall_ratio
ratios = self.streams["stall_time"] / \
self.streams["watch_time"]
z95 = st.norm.ppf(.975)
var = (self.streams["watch_time"] * (ratios - mean)
** 2).sum() / self.total_watch
stddev = np.sqrt(var)
sem = stddev * np.sqrt(self.sum_squared_weights)
return mean - z95 * sem, mean, mean + z95 * sem
def ssim_stat_db(df):
total_watch = df["watch_time"].sum()
sum_squared_weights = (df["watch_time"] ** 2).sum() / total_watch ** 2
mean_ssim = (df["watch_time"] * df["ssim_index_mean"]).sum() / total_watch
z95 = st.norm.ppf(.975)
var = (df["watch_time"] * (df["ssim_index_mean"] - mean_ssim)
** 2).sum() / total_watch
stddev = np.sqrt(var)
sem = stddev * np.sqrt(sum_squared_weights)
return ssim2db(mean_ssim - z95 * sem), ssim2db(mean_ssim), ssim2db(mean_ssim + z95 * sem)
def stall_ratio_stat(df):
total_watch = df["watch_time"].sum()
sum_squared_weights = (df["watch_time"] ** 2).sum() / total_watch ** 2
mean_ratio = df["stall_time"].sum() / total_watch
ratios = df["stall_time"] / df["watch_time"]
z95 = st.norm.ppf(.975)
var = (df["watch_time"] * (ratios - mean_ratio)
** 2).sum() / total_watch
stddev = np.sqrt(var)
sem = stddev * np.sqrt(sum_squared_weights)
return mean_ratio - z95 * sem, mean_ratio, mean_ratio + z95 * sem