From 95c8c2050af16c9bf313d38a1282f3547cd4ee4f Mon Sep 17 00:00:00 2001 From: NimaAfshari Date: Sat, 27 Apr 2024 21:41:54 +0800 Subject: [PATCH] Solved all puzzels --- 100-pandas-puzzles.ipynb | 2100 +++++++++++++++++++++++++++++++++++--- 1 file changed, 1954 insertions(+), 146 deletions(-) diff --git a/100-pandas-puzzles.ipynb b/100-pandas-puzzles.ipynb index bcafc2be8..90e678ed2 100644 --- a/100-pandas-puzzles.ipynb +++ b/100-pandas-puzzles.ipynb @@ -41,12 +41,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -57,12 +59,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'2.2.2'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.__version__" + ] }, { "cell_type": "markdown", @@ -73,12 +88,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Cellar/python@3.11/3.11.8/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/_distutils_hack/__init__.py:26: UserWarning: Setuptools is replacing distutils.\n", + " warnings.warn(\"Setuptools is replacing distutils.\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "INSTALLED VERSIONS\n", + "------------------\n", + "commit : d9cdd2ee5a58015ef6f4d15c7226110c9aab8140\n", + "python : 3.11.8.final.0\n", + "python-bits : 64\n", + "OS : Darwin\n", + "OS-release : 23.4.0\n", + "Version : Darwin Kernel Version 23.4.0: Fri Mar 15 00:12:49 PDT 2024; root:xnu-10063.101.17~1/RELEASE_ARM64_T6020\n", + "machine : arm64\n", + "processor : arm\n", + "byteorder : little\n", + "LC_ALL : None\n", + "LANG : None\n", + "LOCALE : None.UTF-8\n", + "\n", + "pandas : 2.2.2\n", + "numpy : 1.26.4\n", + "pytz : 2023.3.post1\n", + "dateutil : 2.8.2\n", + "setuptools : 69.1.0\n", + "pip : 24.0\n", + "Cython : 3.0.8\n", + "pytest : None\n", + "hypothesis : None\n", + "sphinx : None\n", + "blosc : None\n", + "feather : None\n", + "xlsxwriter : None\n", + "lxml.etree : None\n", + "html5lib : None\n", + "pymysql : None\n", + "psycopg2 : None\n", + "jinja2 : 3.1.2\n", + "IPython : 8.14.0\n", + "pandas_datareader : None\n", + "adbc-driver-postgresql: None\n", + "adbc-driver-sqlite : None\n", + "bs4 : 4.12.2\n", + "bottleneck : None\n", + "dataframe-api-compat : None\n", + "fastparquet : None\n", + "fsspec : 2023.9.0\n", + "gcsfs : None\n", + "matplotlib : None\n", + "numba : None\n", + "numexpr : 2.8.5\n", + "odfpy : None\n", + "openpyxl : None\n", + "pandas_gbq : None\n", + "pyarrow : 13.0.0\n", + "pyreadstat : None\n", + "python-calamine : None\n", + "pyxlsb : None\n", + "s3fs : None\n", + "scipy : 1.13.0\n", + "sqlalchemy : 2.0.20\n", + "tables : 3.9.2\n", + "tabulate : None\n", + "xarray : None\n", + "xlrd : None\n", + "zstandard : None\n", + "tzdata : 2023.3\n", + "qtpy : None\n", + "pyqt5 : None\n" + ] + } + ], + "source": [ + "pd.show_versions()" + ] }, { "cell_type": "markdown", @@ -112,11 +209,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
ddogNaN3yes
edog5.02no
fcat2.03no
gsnake4.51no
hcatNaN1yes
idog7.02no
jdog3.01no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "c snake 0.5 2 no\n", + "d dog NaN 3 yes\n", + "e dog 5.0 2 no\n", + "f cat 2.0 3 no\n", + "g snake 4.5 1 no\n", + "h cat NaN 1 yes\n", + "i dog 7.0 2 no\n", + "j dog 3.0 1 no" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -127,7 +345,8 @@ "\n", "labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']\n", "\n", - "df = # (complete this line of code)" + "df = pd.DataFrame(data, index = labels)# (complete this line of code)\n", + "df" ] }, { @@ -139,12 +358,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 10 entries, a to j\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 animal 10 non-null object \n", + " 1 age 8 non-null float64\n", + " 2 visits 10 non-null int64 \n", + " 3 priority 10 non-null object \n", + "dtypes: float64(1), int64(1), object(2)\n", + "memory usage: 700.0+ bytes\n" + ] + } + ], + "source": [ + "df.info()" + ] }, { "cell_type": "markdown", @@ -155,12 +394,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "c snake 0.5 2 no" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(3)" + ] }, { "cell_type": "markdown", @@ -171,12 +477,113 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
acat2.5
bcat3.0
csnake0.5
ddogNaN
edog5.0
fcat2.0
gsnake4.5
hcatNaN
idog7.0
jdog3.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "a cat 2.5\n", + "b cat 3.0\n", + "c snake 0.5\n", + "d dog NaN\n", + "e dog 5.0\n", + "f cat 2.0\n", + "g snake 4.5\n", + "h cat NaN\n", + "i dog 7.0\n", + "j dog 3.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['animal', 'age']]" + ] }, { "cell_type": "markdown", @@ -187,12 +594,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
ddogNaN
edog5.0
idog7.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "d dog NaN\n", + "e dog 5.0\n", + "i dog 7.0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['animal', 'age']].iloc[[3,4,8]]" + ] }, { "cell_type": "markdown", @@ -203,12 +669,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [animal, age, visits, priority]\n", + "Index: []" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.visits > 3]" + ] }, { "cell_type": "markdown", @@ -219,12 +730,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
ddogNaN3yes
hcatNaN1yes
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "d dog NaN 3 yes\n", + "h cat NaN 1 yes" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.age.isna()]" + ] }, { "cell_type": "markdown", @@ -235,12 +805,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
fcat2.03no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "f cat 2.0 3 no" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df.animal == 'cat') & (df.age < 3)]" + ] }, { "cell_type": "markdown", @@ -251,12 +880,87 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
fcat2.03no
jdog3.01no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "f cat 2.0 3 no\n", + "j dog 3.0 1 no" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df.age >= 2) & (df.age <= 4)]" + ] }, { "cell_type": "markdown", @@ -267,12 +971,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc['f', 'age'] = 1.5" + ] }, { "cell_type": "markdown", @@ -283,12 +989,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.visits.sum()" + ] }, { "cell_type": "markdown", @@ -299,12 +1018,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "animal\n", + "cat 2.333333\n", + "dog 5.000000\n", + "snake 2.500000\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('animal')['age'].mean()" + ] }, { "cell_type": "markdown", @@ -315,12 +1051,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc['k']= ['dog', 3, 2, 'no']\n", + "df.drop('k', inplace = True)" + ] }, { "cell_type": "markdown", @@ -331,12 +1070,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "animal\n", + "cat 4\n", + "dog 4\n", + "snake 2\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['animal'].value_counts()" + ] }, { "cell_type": "markdown", @@ -347,12 +1103,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
idog7.02no
edog5.02no
gsnake4.51no
jdog3.01no
bcat3.03yes
acat2.51yes
fcat1.53no
csnake0.52no
hcatNaN1yes
ddogNaN3yes
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "i dog 7.0 2 no\n", + "e dog 5.0 2 no\n", + "g snake 4.5 1 no\n", + "j dog 3.0 1 no\n", + "b cat 3.0 3 yes\n", + "a cat 2.5 1 yes\n", + "f cat 1.5 3 no\n", + "c snake 0.5 2 no\n", + "h cat NaN 1 yes\n", + "d dog NaN 3 yes" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(['age', 'visits'], ascending=[False, True])" + ] }, { "cell_type": "markdown", @@ -363,12 +1242,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['priority'] = df['priority'] == 'yes'" + ] }, { "cell_type": "markdown", @@ -379,12 +1260,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['animal'].replace({'snake': 'python'}, inplace = True)" + ] }, { "cell_type": "markdown", @@ -395,12 +1278,82 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
visits123
animal
cat2.5NaN2.25
dog3.06.0NaN
python4.50.5NaN
\n", + "
" + ], + "text/plain": [ + "visits 1 2 3\n", + "animal \n", + "cat 2.5 NaN 2.25\n", + "dog 3.0 6.0 NaN\n", + "python 4.5 0.5 NaN" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.pivot_table(values='age', index = 'animal', columns='visits', aggfunc = 'mean')" + ] }, { "cell_type": "markdown", @@ -435,10 +1388,87 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
01
12
33
44
55
86
97
\n", + "
" + ], + "text/plain": [ + " A\n", + "0 1\n", + "1 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "8 6\n", + "9 7" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'A': [1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7]})\n", + "df = df[(df != df.shift())['A']]\n", + "df" + ] }, { "cell_type": "markdown", @@ -454,12 +1484,90 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
00.076737-0.294802-0.466752
10.0253650.4962000.037168
20.086809-0.0866720.412965
30.035974-0.1860090.399899
4-0.2248840.071283-0.383280
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 0.076737 -0.294802 -0.466752\n", + "1 0.025365 0.496200 0.037168\n", + "2 0.086809 -0.086672 0.412965\n", + "3 0.035974 -0.186009 0.399899\n", + "4 -0.224884 0.071283 -0.383280" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 3)))\n", + "df.subtract(df.mean(axis = 1), axis = 0)" + ] }, { "cell_type": "markdown", @@ -475,12 +1583,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 135, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))\n", + "df.sum().idxmin()" + ] }, { "cell_type": "markdown", @@ -495,12 +1617,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 139, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.randint(0, 2, size=(10, 3)))\n", + "df.drop_duplicates().shape[0]" + ] }, { "cell_type": "markdown", @@ -518,9 +1654,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 e\n", + "1 c\n", + "2 d\n", + "3 h\n", + "4 d\n", + "dtype: object" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nan = np.nan\n", "\n", @@ -534,7 +1686,7 @@ "\n", "df = pd.DataFrame(data, columns=columns)\n", "\n", - "# write a solution to the question here" + "(df.isna().cumsum(axis = 1) == 3).idxmax(axis=1)" ] }, { @@ -558,16 +1710,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 196, "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "grps\n", + "a 409\n", + "b 156\n", + "c 345\n", + "Name: vals, dtype: int64" + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame({'grps': list('aaabbcaabcccbbc'), \n", " 'vals': [12,345,3,1,45,14,4,52,54,23,235,21,57,3,87]})\n", "\n", - "# write a solution to the question here" + "# write a solution to the question here\n", + "df.groupby('grps')['vals'].nlargest(3).groupby('grps').sum()" ] }, { @@ -597,15 +1765,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 206, "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/mx/nlnw6r5j48x9t3075zqb896h0000gn/T/ipykernel_9053/2692871580.py:3: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " df.groupby(pd.cut(df['A'], bins))['B'].sum()\n" + ] + }, + { + "data": { + "text/plain": [ + "A\n", + "(0, 10] 635\n", + "(10, 20] 360\n", + "(20, 30] 315\n", + "(30, 40] 306\n", + "(40, 50] 750\n", + "(50, 60] 284\n", + "(60, 70] 424\n", + "(70, 80] 526\n", + "(80, 90] 835\n", + "(90, 100] 852\n", + "Name: B, dtype: int64" + ] + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(np.random.RandomState(8765).randint(1, 101, size=(100, 2)), columns = [\"A\", \"B\"])\n", - "\n", - "# write a solution to the question here" + "bins = list(range(0, 110, 10))\n", + "df.groupby(pd.cut(df['A'], bins))['B'].sum()\n" ] }, { @@ -640,12 +1838,121 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 254, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
XY
071
122
200
331
442
523
654
700
831
942
\n", + "
" + ], + "text/plain": [ + " X Y\n", + "0 7 1\n", + "1 2 2\n", + "2 0 0\n", + "3 3 1\n", + "4 4 2\n", + "5 2 3\n", + "6 5 4\n", + "7 0 0\n", + "8 3 1\n", + "9 4 2" + ] + }, + "execution_count": 254, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'X': [7, 2, 0, 3, 4, 2, 5, 0, 3, 4]})\n", + "# df['grp']=(df.X == 0).cumsum()\n", + "# df.reset_index().groupby('grp')['index'].rank()\n", + "\n", + "izero = np.r_[-1, (df == 0).values.nonzero()[0]] # indices of zeros\n", + "idx = np.arange(len(df))\n", + "y = df['X'] != 0\n", + "df['Y'] = idx - izero[np.searchsorted(izero - 1, idx) - 1]\n", + "df\n" + ] }, { "cell_type": "markdown", @@ -661,13 +1968,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 275, "metadata": { "collapsed": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(5, 7), (6, 4), (2, 5)]" + ] + }, + "execution_count": 275, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df = pd.DataFrame(np.random.RandomState(30).randint(1, 101, size=(8, 8)))" + "df = pd.DataFrame(np.random.RandomState(30).randint(1, 101, size=(8, 8)))\n", + "df.unstack().sort_values()[-3:].index.tolist()" ] }, { @@ -705,12 +2024,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 280, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/mx/nlnw6r5j48x9t3075zqb896h0000gn/T/ipykernel_9053/1406529502.py:6: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '13.6' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\n", + " group[mask] = group[~mask].mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "0 13.6\n", + "1 28.0\n", + "2 13.6\n", + "3 4.0\n", + "4 13.6\n", + "5 28.0\n", + "6 13.6\n", + "7 13.6\n", + "8 8.0\n", + "9 28.0\n", + "10 28.0\n", + "11 12.0\n", + "12 16.0\n", + "13 13.6\n", + "14 13.6\n", + "Name: vals, dtype: float64" + ] + }, + "execution_count": 280, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\"vals\": np.random.RandomState(31).randint(-30, 30, size=15), \n", + " \"grps\": np.random.RandomState(31).choice([\"A\", \"B\"], 15)})\n", + "\n", + "def replace(group):\n", + " mask = group<0\n", + " group[mask] = group[~mask].mean()\n", + " return group\n", + "\n", + "df.groupby(['grps'])['vals'].transform(replace)\n" + ] }, { "cell_type": "markdown", @@ -757,12 +2120,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 281, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.000000\n", + "1 1.500000\n", + "2 3.000000\n", + "3 3.000000\n", + "4 1.666667\n", + "5 3.000000\n", + "6 3.000000\n", + "7 2.000000\n", + "8 3.666667\n", + "9 2.000000\n", + "10 4.500000\n", + "11 4.000000\n", + "Name: value, dtype: float64" + ] + }, + "execution_count": 281, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'group': list('aabbabbbabab'),\n", + " 'value': [1, 2, 3, np.nan, 2, 3, np.nan, 1, 7, 3, np.nan, 8]})\n", + "\n", + "g1 = df.groupby(['group'])['value'] # group values \n", + "g2 = df.fillna(0).groupby(['group'])['value'] # fillna, then group values\n", + "\n", + "s = g2.rolling(3, min_periods=1).sum() / g1.rolling(3, min_periods=1).count() # compute means\n", + "\n", + "s.reset_index(level=0, drop=True).sort_index() # drop/sort index\n" + ] }, { "cell_type": "markdown", @@ -786,12 +2182,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 290, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "2015-01-01 5\n", + "2015-01-02 63\n", + "2015-01-05 51\n", + "2015-01-06 7\n", + "2015-01-07 3\n", + " ..\n", + "2015-12-25 9\n", + "2015-12-28 68\n", + "2015-12-29 23\n", + "2015-12-30 74\n", + "2015-12-31 33\n", + "Freq: B, Length: 261, dtype: int64" + ] + }, + "execution_count": 290, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "business_days = pd.bdate_range('2015-01-01', '2015-12-31')\n", + "s = pd.Series(data=np.random.randint(100, size = len(business_days)), index=business_days)\n", + "s" + ] }, { "cell_type": "markdown", @@ -802,12 +2224,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] + "execution_count": 300, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2429" + ] + }, + "execution_count": 300, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[s.index.day_name() == 'Wednesday'].sum()" + ] }, { "cell_type": "markdown", @@ -818,12 +2251,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 307, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "2015-01-31 39.090909\n", + "2015-02-28 52.500000\n", + "2015-03-31 44.545455\n", + "2015-04-30 48.954545\n", + "2015-05-31 57.285714\n", + "2015-06-30 52.227273\n", + "2015-07-31 51.043478\n", + "2015-08-31 46.857143\n", + "2015-09-30 52.681818\n", + "2015-10-31 44.954545\n", + "2015-11-30 51.047619\n", + "2015-12-31 35.086957\n", + "Freq: ME, dtype: float64" + ] + }, + "execution_count": 307, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# s.groupby(pd.Grouper(freq='ME')).mean()\n", + "s.resample('ME').mean()" + ] }, { "cell_type": "markdown", @@ -834,12 +2293,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 324, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "2015-01-31 2015-01-12\n", + "2015-05-31 2015-05-12\n", + "2015-09-30 2015-07-06\n", + "2016-01-31 2015-11-19\n", + "Freq: 4ME, dtype: datetime64[ns]" + ] + }, + "execution_count": 324, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.groupby(pd.Grouper(freq='4ME')).idxmax()" + ] }, { "cell_type": "markdown", @@ -850,12 +2326,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 328, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2015-01-15', '2015-02-19', '2015-03-19', '2015-04-16',\n", + " '2015-05-21', '2015-06-18', '2015-07-16', '2015-08-20',\n", + " '2015-09-17', '2015-10-15', '2015-11-19', '2015-12-17',\n", + " '2016-01-21', '2016-02-18', '2016-03-17', '2016-04-21',\n", + " '2016-05-19', '2016-06-16', '2016-07-21', '2016-08-18',\n", + " '2016-09-15', '2016-10-20', '2016-11-17', '2016-12-15'],\n", + " dtype='datetime64[ns]', freq='WOM-3THU')" + ] + }, + "execution_count": 328, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = pd.date_range('2015-01-01', '2016-12-31', freq='WOM-3THU')\n", + "dates\n" + ] }, { "cell_type": "markdown", @@ -903,12 +2399,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 368, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm', \n", + " 'Budapest_PaRis', 'Brussels_londOn'],\n", + " 'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],\n", + " 'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],\n", + " 'Airline': ['KLM(!)', ' (12)', '(British Airways. )', \n", + " '12. Air France', '\"Swiss Air\"']})\n", + "df.FlightNumber = df.FlightNumber.interpolate().astype(int)" + ] }, { "cell_type": "markdown", @@ -919,12 +2423,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 369, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FromTo
0LoNDonparis
1MAdridmiLAN
2londONStockhOlm
3BudapestPaRis
4BrusselslondOn
\n", + "
" + ], + "text/plain": [ + " From To\n", + "0 LoNDon paris\n", + "1 MAdrid miLAN\n", + "2 londON StockhOlm\n", + "3 Budapest PaRis\n", + "4 Brussels londOn" + ] + }, + "execution_count": 369, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp = df.From_To.str.split('_', expand = True).rename(columns = {0: 'From', 1:\"To\"})\n", + "temp" + ] }, { "cell_type": "markdown", @@ -935,12 +2511,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 370, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "temp = temp.apply(lambda x:x.str.title())" + ] }, { "cell_type": "markdown", @@ -951,12 +2529,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 371, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.drop(columns = 'From_To', inplace = True)\n", + "df = pd.concat([df, temp], axis =1)" + ] }, { "cell_type": "markdown", @@ -967,12 +2548,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 372, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.Airline = df.Airline.str.extract('([a-zA-Z\\s]+)')" + ] }, { "cell_type": "markdown", @@ -985,12 +2568,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 373, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FlightNumberAirlineFromTodelay_0delay_1delay_2
010045KLMLondonParis23.047.0NaN
110055Air FranceMadridMilanNaNNaNNaN
210065British AirwaysLondonStockholm24.043.087.0
310075Air FranceBudapestParis13.0NaNNaN
410085Swiss AirBrusselsLondon67.032.0NaN
\n", + "
" + ], + "text/plain": [ + " FlightNumber Airline From To delay_0 delay_1 \\\n", + "0 10045 KLM London Paris 23.0 47.0 \n", + "1 10055 Air France Madrid Milan NaN NaN \n", + "2 10065 British Airways London Stockholm 24.0 43.0 \n", + "3 10075 Air France Budapest Paris 13.0 NaN \n", + "4 10085 Swiss Air Brussels London 67.0 32.0 \n", + "\n", + " delay_2 \n", + "0 NaN \n", + "1 NaN \n", + "2 87.0 \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 373, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "delays = df.RecentDelays.apply(pd.Series)\n", + "delays.columns = [f'delay_{col}' for col in delays.columns ]\n", + "df.drop(columns = 'RecentDelays', inplace = True)\n", + "df = pd.concat([df, delays], axis = 1)\n", + "df" + ] }, { "cell_type": "markdown", @@ -1035,12 +2730,60 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 376, "metadata": { "collapsed": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "A 0 0.833963\n", + " 1 0.141116\n", + " 2 0.994911\n", + " 3 0.027589\n", + " 4 0.599830\n", + " 5 0.806236\n", + " 6 0.936447\n", + " 7 0.250292\n", + " 8 0.281294\n", + " 9 0.159338\n", + "B 0 0.560207\n", + " 1 0.060384\n", + " 2 0.446421\n", + " 3 0.738716\n", + " 4 0.615226\n", + " 5 0.080202\n", + " 6 0.310797\n", + " 7 0.706826\n", + " 8 0.211744\n", + " 9 0.861240\n", + "C 0 0.627807\n", + " 1 0.795426\n", + " 2 0.587121\n", + " 3 0.943596\n", + " 4 0.504250\n", + " 5 0.099501\n", + " 6 0.359862\n", + " 7 0.855281\n", + " 8 0.947229\n", + " 9 0.799641\n", + "dtype: float64" + ] + }, + "execution_count": 376, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "letters = ['A', 'B', 'C']\n", + "numbers = list(range(10))\n", + "\n", + "mi = pd.MultiIndex.from_product([letters, numbers])\n", + "s = pd.Series(data=np.random.rand(30), index = mi)\n", + "s" + ] }, { "cell_type": "markdown", @@ -1056,7 +2799,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "s.index.sortlevel()" + ] }, { "cell_type": "markdown", @@ -1072,7 +2817,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "s.loc[:, [ 1, 3, 6 ], :]" + ] }, { "cell_type": "markdown", @@ -1088,7 +2835,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "s.loc[:'B', 5:, :]" + ] }, { "cell_type": "markdown", @@ -1104,7 +2853,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "s.groupby(level=0).sum()" + ] }, { "cell_type": "markdown", @@ -1120,7 +2871,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "s.unstack().sum(axis=1)" + ] }, { "cell_type": "markdown", @@ -1134,7 +2887,10 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "s1 = s.swaplevel()\n", + "s1" + ] }, { "cell_type": "markdown", @@ -1180,7 +2936,13 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "X = 5\n", + "Y = 4\n", + "index = pd.MultiIndex.from_product([np.arange(X), np.arange(Y)], names = ['X', 'Y'])\n", + "df = pd.DataFrame(index=index).reset_index()\n", + "df" + ] }, { "cell_type": "markdown", @@ -1196,7 +2958,10 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['mine'] = np.random.choice([0,1], df.shape[0], p=[0.6, 0.4])\n", + "df" + ] }, { "cell_type": "markdown", @@ -1214,7 +2979,16 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "# df['adjacent'] = \n", + "from scipy.signal import convolve2d\n", + "\n", + "mine_grid = df.pivot_table(columns='X', index='Y', values='mine')\n", + "counts = convolve2d(mine_grid, np.ones((3, 3)), mode='same')\n", + "counts\n", + "df['adjacent'] = (counts - mine_grid).values.T.reshape(-1).astype(int)\n", + "df" + ] }, { "cell_type": "markdown", @@ -1230,7 +3004,10 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc[df.mine == 1, 'adjacent'] = np.nan\n", + "df" + ] }, { "cell_type": "markdown", @@ -1246,7 +3023,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.pivot_table(values = 'adjacent', index = 'Y', columns = 'X')" + ] }, { "cell_type": "markdown", @@ -1288,7 +3067,13 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "plt.style.use('ggplot')\n", + "df = pd.DataFrame({\"xs\":[1,5,2,8,1], \"ys\":[4,2,1,9,6]})\n", + "df.plot('xs', 'ys', kind='scatter', color='black', marker = 'x')" + ] }, { "cell_type": "markdown", @@ -1315,7 +3100,14 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df = pd.DataFrame({\"productivity\":[5,2,3,1,4,5,6,7,8,3,4,8,9],\n", + " \"hours_in\" :[1,9,6,5,3,9,2,9,1,7,4,2,2],\n", + " \"happiness\" :[2,1,3,2,3,1,2,3,1,2,2,1,3],\n", + " \"caffienated\" :[0,0,1,1,0,0,0,0,1,1,0,1,0]})\n", + "\n", + "df.plot.scatter(\"hours_in\", \"productivity\", s = df.happiness * 30, c = df.caffienated)" + ] }, { "cell_type": "markdown", @@ -1340,7 +3132,15 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df = pd.DataFrame({\"revenue\":[57,68,63,71,72,90,80,62,59,51,47,52],\n", + " \"advertising\":[2.1,1.9,2.7,3.0,3.6,3.2,2.7,2.4,1.8,1.6,1.3,1.9],\n", + " \"month\":range(12)\n", + " })\n", + "\n", + "ax1 = df.plot.bar('month', 'revenue')\n", + "df.plot.line('month', 'advertising', ax = ax1, color= 'blue', secondary_y=True)" + ] }, { "cell_type": "markdown", @@ -1424,7 +3224,13 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df = day_stock_data()\n", + "agg = df.sort_values('time').groupby(pd.Grouper(key = 'time',freq = 'h')).ohlc().droplevel(0, axis=1)\n", + "# agg(['min', 'max', 'first', 'last']).droplevel(0, axis=1).rename(columns = {'min': 'low', 'max': \"high\", \"first\": \"open\", \"last\":\"close\"})\n", + "agg[\"color\"] = (agg.close > agg.open).map({True:\"green\",False:\"red\"})\n", + "agg.head()" + ] }, { "cell_type": "markdown", @@ -1440,7 +3246,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "plot_candlestick(agg)" + ] }, { "cell_type": "markdown", @@ -1466,7 +3274,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.11.8" } }, "nbformat": 4,