Skip to content

Commit

Permalink
add std implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
nipsn authored and marcosvm13 committed Nov 23, 2023
1 parent 7de2b7c commit f30beaa
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 0 deletions.
5 changes: 5 additions & 0 deletions docs/release-notes/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

The changelog presented here outlines changes to PyKX when operating within a Python environment specifically, if you require changelogs associated with PyKX operating under a q environment see [here](./underq-changelog.md).

## PyKX 2.2.0

### Additions
- [Pandas API](../user-guide/advanced/Pandas_API.ipynb) Added std to Pandas API.

## PyKX 2.1.0

#### Release Date
Expand Down
96 changes: 96 additions & 0 deletions docs/user-guide/advanced/Pandas_API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,102 @@
"tab.mode(dropna=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Table.std()\n",
"\n",
"```\n",
"Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n",
"```\n",
"\n",
"Return sample standard deviation over requested axis. Normalized by N-1 by default. This can be changed using the ddof argument.\n",
"\n",
"\n",
"**Parameters:**\n",
"\n",
"| Name | Type | Description | Default |\n",
"| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
"| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n",
"| skipna | bool | not yet implemented | True |\n",
"| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n",
"| ddof | int | Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. | 1 |\n",
"\n",
"**Returns:**\n",
"\n",
"| Type | Description |\n",
"| :----------------: | :------------------------------------------------------------------- |\n",
"| Dictionary | The std across each row / column with the key corresponding to the row number or column name. |"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Examples:**\n",
"\n",
"Calculate the std across the columns of a table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tab = kx.Table(data=\n",
" {\n",
" 'a': [1, 2, 2, 4],\n",
" 'b': [1, 2, 6, 7],\n",
" 'c': [7, 8, 9, 10],\n",
" 'd': [7, 11, 14, 14]\n",
" }\n",
")\n",
"tab"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tab.std()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate the std across the rows of a table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tab.std(axis=2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate std accross columns with ddof=0:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tab.std(ddof=0)"
]
},
{
"cell_type": "markdown",
"id": "24cf11d3",
Expand Down
23 changes: 23 additions & 0 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,29 @@ def mean(self, axis: int = 0, numeric_only: bool = False):
tab
)

@api_return
def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False):
tab = self
if 'Keyed' in str(type(tab)):
tab = q('{(keys x) _ 0!x}', tab)
if numeric_only:
tab = _get_numeric_only_subtable(tab)
key_str = '' if axis == 0 else '`$string '
val_str = '' if axis == 0 else '"f"$value '
query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]'
where_str = ' where not (::)~/:r[;1]'

res = q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab)
if ddof != len(tab.pd()):
res = q(
'{[tab]'
f'r:{{[tab; x] ({key_str}x; {{avg sqrt (sum xexp[x-(avg x);2]) % count[x]-{ddof}}} {val_str}tab[x])}}[tab;] each {query_str};'
f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}',
tab
)
return res


@api_return
def median(self, axis: int = 0, numeric_only: bool = False):
tab = self
Expand Down
64 changes: 64 additions & 0 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1543,6 +1543,69 @@ def test_mean(kx, q):
q_m = tab.mean(axis=1)


def test_std(kx, q):
df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': [7, 11, 14, 14]
}
)
tab = kx.toq(df)
p_m = df.std()
q_m = tab.std()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1)
q_m = tab.std(axis=1)
for c in range(len(q.cols(tab))):
assert p_m[c] == q_m[q('{`$string x}', c)].py()
p_m = df.std(ddof=0)
q_m = tab.std(ddof=0)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()

p_m = df.std(ddof=4)
q_m = tab.std(ddof=4)
for c in q.key(q_m).py():
assert np.isnan(p_m[c]) == np.isnan(q_m[c].py())

q['tab'] = kx.toq(df)
tab = q('1!`idx xcols update idx: til count tab from tab')
p_m = df.std()
q_m = tab.std()
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1)
q_m = tab.std(axis=1)
for c in range(len(q.cols(tab)) - 1):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

df = pd.DataFrame(
{
'a': [1, 2, 2, 4],
'b': [1, 2, 6, 7],
'c': [7, 8, 9, 10],
'd': ['foo', 'bar', 'baz', 'qux']
}
)
tab = kx.toq(df)
p_m = df.std(numeric_only=True)
q_m = tab.std(numeric_only=True)
for c in q.key(q_m).py():
assert p_m[c] == q_m[c].py()
p_m = df.std(axis=1, numeric_only=True)
q_m = tab.std(axis=1, numeric_only=True)
for c in range(len(q.cols(tab))):
assert p_m[c] == q_m[q('{`$string x}', c)].py()

with pytest.raises(kx.QError):
q_m = tab.std()
with pytest.raises(kx.QError):
q_m = tab.std(axis=1)


def test_median(kx, q):
df = pd.DataFrame(
{
Expand Down Expand Up @@ -2029,3 +2092,4 @@ def test_keyed_loc_fixes(q):
mkt[['k1', 'y']]
with pytest.raises(KeyError):
mkt['k1']

0 comments on commit f30beaa

Please sign in to comment.