diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index d0e44ec..e897711 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -153,6 +153,29 @@ def mean(self, axis: int = 0, numeric_only: bool = False): tab ) + @api_return + def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): + tab = self + if 'Keyed' in str(type(tab)): + tab = q('{(keys x) _ 0!x}', tab) + if numeric_only: + tab = _get_numeric_only_subtable(tab) + key_str = '' if axis == 0 else '`$string ' + val_str = '' if axis == 0 else '"f"$value ' + query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]' + where_str = ' where not (::)~/:r[;1]' + + res = q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab) + if ddof != len(tab.pd()): + res = q( + '{[tab]' + f'r:{{[tab; x] ({key_str}x; {{avg sqrt (sum xexp[x-(avg x);2]) % count[x]-{ddof}}} {val_str}tab[x])}}[tab;] each {query_str};' + f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}', + tab + ) + return res + + @api_return def median(self, axis: int = 0, numeric_only: bool = False): tab = self diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index acfe55f..b987e78 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1543,6 +1543,69 @@ def test_mean(kx, q): q_m = tab.mean(axis=1) +def test_std(kx, q): + df = pd.DataFrame( + { + 'a': [1, 2, 2, 4], + 'b': [1, 2, 6, 7], + 'c': [7, 8, 9, 10], + 'd': [7, 11, 14, 14] + } + ) + tab = kx.toq(df) + p_m = df.std() + q_m = tab.std() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1) + q_m = tab.std(axis=1) + for c in range(len(q.cols(tab))): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + p_m = df.std(ddof=0) + q_m = tab.std(ddof=0) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + p_m = df.std(ddof=4) + q_m = tab.std(ddof=4) + for c in q.key(q_m).py(): + assert np.isnan(p_m[c]) == np.isnan(q_m[c].py()) + + q['tab'] = kx.toq(df) + tab = q('1!`idx xcols update idx: til count tab from tab') + p_m = df.std() + q_m = tab.std() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1) + q_m = tab.std(axis=1) + for c in range(len(q.cols(tab)) - 1): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + + df = pd.DataFrame( + { + 'a': [1, 2, 2, 4], + 'b': [1, 2, 6, 7], + 'c': [7, 8, 9, 10], + 'd': ['foo', 'bar', 'baz', 'qux'] + } + ) + tab = kx.toq(df) + p_m = df.std(numeric_only=True) + q_m = tab.std(numeric_only=True) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1, numeric_only=True) + q_m = tab.std(axis=1, numeric_only=True) + for c in range(len(q.cols(tab))): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + + with pytest.raises(kx.QError): + q_m = tab.std() + with pytest.raises(kx.QError): + q_m = tab.std(axis=1) + + def test_median(kx, q): df = pd.DataFrame( { @@ -2029,3 +2092,4 @@ def test_keyed_loc_fixes(q): mkt[['k1', 'y']] with pytest.raises(KeyError): mkt['k1'] +