diff --git a/.virtual_documents/Phase1/Lectures/06_PythonDataManipulation.ipynb b/.virtual_documents/Phase1/Lectures/06_PythonDataManipulation.ipynb new file mode 100644 index 0000000..6c32875 --- /dev/null +++ b/.virtual_documents/Phase1/Lectures/06_PythonDataManipulation.ipynb @@ -0,0 +1,572 @@ + + + + + + + + + + + + + + + + + + + + + +# Create your bento list +bento = ['salmon', 'rice', 'edamame', 'seaweed salad', 'dumplings'] + + + + + +type(bento) + + +len(bento) + + +# Run this cell without changes +bento[4] + + +# Try to get the last entry +bento[-2] + + +bento[5] + + + + + +# Run this cell without changes +# Play around with these numbers, and start to build some understanding of +# which elements are where exactly in the list +bento[2:] + + +bento + + +bento[:3] + + + + + +# Code here to add to your list +bento.append('kimchi') + + +bento + + + + + +# Code here to test that out +bento.pop() + + +# Now check what your list looks like - is that last item still there? +bento + + +bento.remove('kimchi') + + +bento + + + + + +bento[:-1] + + +# Pay attention to what the .join is doing +print("I'd like my bento to contain: " + ", ".join(bento[:-1]) + ", and " + bento[-1]) + + + + + +# F-string formatting easier! +print(f"My bento box will include: {', '.join(bento[:-1])}, and {bento[1]}.") + + +print(f"My bento box will include: {bento[0]} and {bento[1]}.") + + +# The above cell is the same as: +print("My bento box will include: {} and {}.".format(bento[0], bento[1])) + + + + + + + + + + + +text_str = 'some string here' +text_str.title() + + +# Write a for loop to capitalize each ingredient in our bento list +# for x in interable: +# f(X) +for item in bento: + print(item.title()) + + +bento.title() + + +item + + +bento + + + + + +bento.append('seared tuna') + + +# Write your for loop with a conditional + +# Need to first define an empty list to become our new list +s_bento = [] +r_bento = [] +other_bento = [] +# Now our loop +for ingredient in bento: + if 's' in ingredient: + s_bento.append(ingredient) + if 'r' in ingredient: + r_bento.append(ingredient) + elif 'r' in ingredient: + r_bento.append(ingredient) + else: + other_bento.append(ingredient) + + +# Check your work +s_bento + + +r_bento + + +other_bento + + + + + +# Change our loop to a list comprehension +s_bento = [ingredient for ingredient in bento if 's' in ingredient] +s_bento + + +# We could do the same with our earlier capitalization, too! +[ingredient.title() for ingredient in bento] + + + + + + + + + + + + + + + + + + + + +new_dict = {'key': 'value', 'key2': 'value2'} + + +# Here's an example of zipping two lists together to form a dictionary +example_bento_keys = ["ingredient1", "ingredient2", "ingredient3"] +example_bento_values = ["rice", "tempura", "miso soup"] + +example_bento_dict = dict(zip(example_bento_keys, example_bento_values)) + +print(example_bento_dict) +print(type(example_bento_dict)) + + +# Now let's do that! What does our current list look like? +bento + + +# Let's define keys for our bento +bento_keys = ['protein', 'main', 'vegetable1', 'vegetable2', 'side'] + + +# Now create your bento_dict! +bento_dict = dict(zip(bento_keys, bento)) + + +# Code here to check your work - check type, and print your dictionary +print(type(bento_dict)) + +print(bento_dict) + + + + + +bento_dict['protein'] + + +bento_dict[0] + + +dict1 = {'key1': 20, 'key2': 30} + + +bracket_way = dict1['key3'] +type(bracket_way) + + +# Potentially better way because it returns None rather than an error +#bracket_way = dict1['key3'] +get_way = dict1.get('key3') +type(get_way) + + +get_way + + +bracket_way + + + + + +bento_dict.items() + + +# Write your loop using .items() to unpack key, value pairs +for key, value in bento_dict.items(): + if 'vegetable' in key: + print(value) + + +for x in bento_dict: + print(x, bento_dict[x]) + + + + + +# Need to first define an empty dictionary to become our new dict +veggie_dict = {} + +for key, value in bento_dict.items(): + if 'vegetable' in key: + number = key[-1] + veggie_dict[number] = value.title() + + +# Check your work! +veggie_dict + + + + + +# Change our loop to a dictionary comprehension +{k[-1]: v.title() for k, v in bento_dict.items() if 'vegetable' in k} + + +bento_dict.values() + + +# You can get creative with it too! +{f"Ingredient {x+1}": list(bento_dict.values())[x] for x in range(len((bento_dict.values())))} + + + + + +# Can go ahead and paste at least two other dictionaries +james_bento = { + 'main': 'cheeseburger', + 'cheese': 'pepper jack', + 'side': 'french fries', + 'vegetable1': 'pickles', + 'vegetable2': 'onions', + 'drink': 'milkshake'} + +hannah_bento = { + "main": "salad", + "protein": "tempura shrimp", + "vegetable1": "radishes", + "vegetable2": "cucumbers", + "side": "tuna roll"} + + +# Code here to create your nested dictionaries +group_dict = {'Daniel': bento_dict, 'James': james_bento, 'Hannah': hannah_bento} + + +# Check your work +group_dict + + +group_dict.values() + + + + + +# Code here to grab a list of who you have orders for +group = list(group_dict.keys()) +group + + +# Check your work +type(group) + + + + + +# Access one dictionary's main +group_dict['James']['main'] + + +group_dict.get('James').get('main') + + + + + +list(group_dict.values())[0] + + +# Code here to write a for loop that prints each main +# Think about what we are looping through and if you need .items() +for order in group_dict.values(): + print(order['main']) + + + + + +# An example of nested comprehensions +{f"{name}'s vegetables": [v for k, v in order.items() if 'vegetable' in k] + for name, order in group_dict.items()} + + +# But remember ... it's okay to easier to write this out as a for loop +# THEN you can condense into a comprehension more easily! + +group_veggie_dict = {} + +for name, order in group_dict.items(): + ingredient_list = [] + for key, ingredient in order.items(): + if 'vegetable' in key: + ingredient_list.append(ingredient) + group_veggie_dict[f"{name}'s vegetables"] = ingredient_list + +# Check it +group_veggie_dict + + + + + + + + + + + + + + + + + +def find_ingredients(nested_dict, ingredient_type='main'): + ''' + Function that takes in a dictionary, where names are keys and values are + dictionaries of that person's bento order, and then checks which keys in + the bento order dictionary match the provided string. The output is a list + of tuples, with each person's name and a list of matched ingredients. + + Inputs: + nested_dictionary : dictionary + ingredient_type : string (default is 'main') + + Outputs: + output_list : tuple + ''' + output_list = [] + for name, order in nested_dict.items(): + ingredient_list = [] + for key, ingredient in order.items(): + if ingredient_type in key: + ingredient_list.append(ingredient) + output_list.append((name, ingredient_list)) + + + return output_list + + +# version that outputs dictionary instead of list +def find_ingredients_dict(nested_dict, ingredient_type='main'): + ''' + Function that takes in a dictionary, where names are keys and values are + dictionaries of that person's bento order, and then checks which keys in + the bento order dictionary match the provided string. The output is a list + of tuples, with each person's name and a list of matched ingredients. + + Inputs: + nested_dictionary : dictionary + ingredient_type : string (default is 'main') + + Outputs: + output_list : tuple + ''' + output_dict = {} + for name, order in nested_dict.items(): + ingredient_list = [] + for key, ingredient in order.items(): + if ingredient_type in key: + ingredient_list.append(ingredient) + output_dict[name] = ingredient_list + + + return output_dict + + +# Try it! +output = find_ingredients(group_dict, 'side') +output + + +type(output[0]) + + +find_ingredients_dict(group_dict) + + + + + + + + +nums = set(range(1000)) + + +# Your code here + + + + + + + + +words = ['carbon', 'osmium', 'mercury', 'potassium', 'rhenium', 'einsteinium', + 'hydrogen', 'erbium', 'nitrogen', 'sulfur', 'iodine', 'oxygen', 'niobium'] + + +# Your code here + + + + + + + + +names = ['Randy', 'Robert', 'Alex', 'Ranjit', 'Charlie', 'Richard', 'Ravdeep', + 'Vimal', 'Wu', 'Nelson'] + + +# Your code here (couple ways to do this) + + + + + + + + +phone_nos = [{'name': 'greg', 'nums': {'home': 1234567, 'work': 7654321}}, + {'name': 'max', 'nums': {'home': 9876543, 'work': 1010001}}, + {'name': 'erin', 'nums': {'home': 3333333, 'work': 4444444}}, + {'name': 'joél', 'nums': {'home': 2222222, 'work': 5555555}}, + {'name': 'sean', 'nums': {'home': 9999999, 'work': 8888888}}] + + +# Your code here + + + + + + + + +customers = { + 'bill': {'purchases': {'movies': ['Terminator', 'Elf'], + 'books': []}, + 'id': 1}, + 'dolph': {'purchases': {'movies': ['It Happened One Night'], + 'books': ['The Far Side Gallery']}, + 'id': 2}, + 'pat': {'purchases': {'movies': [], + 'books': ['Seinfeld and Philosophy', 'I Am a Bunny']}, + 'id': 3} +} + + +# Your code here + + + + + + + + +# Your code here + + + + + + + + +# Your code here + + + diff --git a/Phase1/Lectures/06_PythonDataManipulation.ipynb b/Phase1/Lectures/06_PythonDataManipulation.ipynb index d908bfd..8014ab5 100644 --- a/Phase1/Lectures/06_PythonDataManipulation.ipynb +++ b/Phase1/Lectures/06_PythonDataManipulation.ipynb @@ -356,9 +356,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Write a for loop to capitalize each ingredient in our bento list\n", @@ -1480,7 +1478,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.12.4" }, "toc": { "base_numbering": 1, @@ -1502,5 +1500,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/Phase1/Lectures/09_PandasDataframes.ipynb b/Phase1/Lectures/09_PandasDataframes.ipynb index c7fefc7..7c465a0 100644 --- a/Phase1/Lectures/09_PandasDataframes.ipynb +++ b/Phase1/Lectures/09_PandasDataframes.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": { "hidden": true }, @@ -113,11 +113,276 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
.............................................
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", + "

303 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 \n", + ".. ... ... .. ... ... ... ... ... ... ... \n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal target \n", + "0 0 0 1 1 \n", + "1 0 0 2 1 \n", + "2 2 0 2 1 \n", + "3 2 0 2 1 \n", + "4 2 0 2 1 \n", + ".. ... .. ... ... \n", + "298 1 0 3 0 \n", + "299 1 0 3 0 \n", + "300 1 2 3 0 \n", + "301 1 1 3 0 \n", + "302 1 1 2 0 \n", + "\n", + "[303 rows x 14 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Let's check this variable out\n", "heart_df" @@ -125,11 +390,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# What type is this variable?\n", "type(heart_df)" @@ -156,21 +432,67 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['age',\n", + " 'sex',\n", + " 'cp',\n", + " 'trestbps',\n", + " 'chol',\n", + " 'fbs',\n", + " 'restecg',\n", + " 'thalach',\n", + " 'exang',\n", + " 'oldpeak',\n", + " 'slope',\n", + " 'ca',\n", + " 'thal',\n", + " 'target']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "list(heart_df.keys())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "hidden": true, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 63\n", + "1 37\n", + "2 41\n", + "3 56\n", + "4 57\n", + " ..\n", + "298 57\n", + "299 45\n", + "300 68\n", + "301 57\n", + "302 57\n", + "Name: age, Length: 303, dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Let's grab just one column\n", "age_series = heart_df['age']\n", @@ -179,29 +501,377 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['age',\n", + " 'sex',\n", + " 'cp',\n", + " 'trestbps',\n", + " 'chol',\n", + " 'fbs',\n", + " 'restecg',\n", + " 'thalach',\n", + " 'exang',\n", + " 'oldpeak',\n", + " 'slope',\n", + " 'ca',\n", + " 'thal',\n", + " 'target']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "list(heart_df.columns)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "54.366336633663366" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "age_series.mean()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[63,\n", + " 37,\n", + " 41,\n", + " 56,\n", + " 57,\n", + " 57,\n", + " 56,\n", + " 44,\n", + " 52,\n", + " 57,\n", + " 54,\n", + " 48,\n", + " 49,\n", + " 64,\n", + " 58,\n", + " 50,\n", + " 58,\n", + " 66,\n", + " 43,\n", + " 69,\n", + " 59,\n", + " 44,\n", + " 42,\n", + " 61,\n", + " 40,\n", + " 71,\n", + " 59,\n", + " 51,\n", + " 65,\n", + " 53,\n", + " 41,\n", + " 65,\n", + " 44,\n", + " 54,\n", + " 51,\n", + " 46,\n", + " 54,\n", + " 54,\n", + " 65,\n", + " 65,\n", + " 51,\n", + " 48,\n", + " 45,\n", + " 53,\n", + " 39,\n", + " 52,\n", + " 44,\n", + " 47,\n", + " 53,\n", + " 53,\n", + " 51,\n", + " 66,\n", + " 62,\n", + " 44,\n", + " 63,\n", + " 52,\n", + " 48,\n", + " 45,\n", + " 34,\n", + " 57,\n", + " 71,\n", + " 54,\n", + " 52,\n", + " 41,\n", + " 58,\n", + " 35,\n", + " 51,\n", + " 45,\n", + " 44,\n", + " 62,\n", + " 54,\n", + " 51,\n", + " 29,\n", + " 51,\n", + " 43,\n", + " 55,\n", + " 51,\n", + " 59,\n", + " 52,\n", + " 58,\n", + " 41,\n", + " 45,\n", + " 60,\n", + " 52,\n", + " 42,\n", + " 67,\n", + " 68,\n", + " 46,\n", + " 54,\n", + " 58,\n", + " 48,\n", + " 57,\n", + " 52,\n", + " 54,\n", + " 45,\n", + " 53,\n", + " 62,\n", + " 52,\n", + " 43,\n", + " 53,\n", + " 42,\n", + " 59,\n", + " 63,\n", + " 42,\n", + " 50,\n", + " 68,\n", + " 69,\n", + " 45,\n", + " 50,\n", + " 50,\n", + " 64,\n", + " 57,\n", + " 64,\n", + " 43,\n", + " 55,\n", + " 37,\n", + " 41,\n", + " 56,\n", + " 46,\n", + " 46,\n", + " 64,\n", + " 59,\n", + " 41,\n", + " 54,\n", + " 39,\n", + " 34,\n", + " 47,\n", + " 67,\n", + " 52,\n", + " 74,\n", + " 54,\n", + " 49,\n", + " 42,\n", + " 41,\n", + " 41,\n", + " 49,\n", + " 60,\n", + " 62,\n", + " 57,\n", + " 64,\n", + " 51,\n", + " 43,\n", + " 42,\n", + " 67,\n", + " 76,\n", + " 70,\n", + " 44,\n", + " 60,\n", + " 44,\n", + " 42,\n", + " 66,\n", + " 71,\n", + " 64,\n", + " 66,\n", + " 39,\n", + " 58,\n", + " 47,\n", + " 35,\n", + " 58,\n", + " 56,\n", + " 56,\n", + " 55,\n", + " 41,\n", + " 38,\n", + " 38,\n", + " 67,\n", + " 67,\n", + " 62,\n", + " 63,\n", + " 53,\n", + " 56,\n", + " 48,\n", + " 58,\n", + " 58,\n", + " 60,\n", + " 40,\n", + " 60,\n", + " 64,\n", + " 43,\n", + " 57,\n", + " 55,\n", + " 65,\n", + " 61,\n", + " 58,\n", + " 50,\n", + " 44,\n", + " 60,\n", + " 54,\n", + " 50,\n", + " 41,\n", + " 51,\n", + " 58,\n", + " 54,\n", + " 60,\n", + " 60,\n", + " 59,\n", + " 46,\n", + " 67,\n", + " 62,\n", + " 65,\n", + " 44,\n", + " 60,\n", + " 58,\n", + " 68,\n", + " 62,\n", + " 52,\n", + " 59,\n", + " 60,\n", + " 49,\n", + " 59,\n", + " 57,\n", + " 61,\n", + " 39,\n", + " 61,\n", + " 56,\n", + " 43,\n", + " 62,\n", + " 63,\n", + " 65,\n", + " 48,\n", + " 63,\n", + " 55,\n", + " 65,\n", + " 56,\n", + " 54,\n", + " 70,\n", + " 62,\n", + " 35,\n", + " 59,\n", + " 64,\n", + " 47,\n", + " 57,\n", + " 55,\n", + " 64,\n", + " 70,\n", + " 51,\n", + " 58,\n", + " 60,\n", + " 77,\n", + " 35,\n", + " 70,\n", + " 59,\n", + " 64,\n", + " 57,\n", + " 56,\n", + " 48,\n", + " 56,\n", + " 66,\n", + " 54,\n", + " 69,\n", + " 51,\n", + " 43,\n", + " 62,\n", + " 67,\n", + " 59,\n", + " 45,\n", + " 58,\n", + " 50,\n", + " 62,\n", + " 38,\n", + " 66,\n", + " 52,\n", + " 53,\n", + " 63,\n", + " 54,\n", + " 66,\n", + " 55,\n", + " 49,\n", + " 54,\n", + " 56,\n", + " 46,\n", + " 61,\n", + " 67,\n", + " 58,\n", + " 47,\n", + " 52,\n", + " 58,\n", + " 57,\n", + " 58,\n", + " 61,\n", + " 42,\n", + " 52,\n", + " 59,\n", + " 40,\n", + " 61,\n", + " 46,\n", + " 59,\n", + " 57,\n", + " 57,\n", + " 55,\n", + " 61,\n", + " 58,\n", + " 58,\n", + " 67,\n", + " 44,\n", + " 63,\n", + " 63,\n", + " 59,\n", + " 57,\n", + " 45,\n", + " 68,\n", + " 57,\n", + " 57]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "list(age_series)" ] @@ -217,18 +887,51 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 63\n", + "1 37\n", + "2 41\n", + "3 56\n", + "4 57\n", + " ..\n", + "298 57\n", + "299 45\n", + "300 68\n", + "301 57\n", + "302 57\n", + "Name: age, Length: 303, dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.age" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# What type is the column?\n", "type(age_series)" @@ -245,32 +948,367 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=303, step=1)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.index" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "hidden": true, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=303, step=1)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "age_series.index" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0,\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 4,\n", + " 5,\n", + " 6,\n", + " 7,\n", + " 8,\n", + " 9,\n", + " 10,\n", + " 11,\n", + " 12,\n", + " 13,\n", + " 14,\n", + " 15,\n", + " 16,\n", + " 17,\n", + " 18,\n", + " 19,\n", + " 20,\n", + " 21,\n", + " 22,\n", + " 23,\n", + " 24,\n", + " 25,\n", + " 26,\n", + " 27,\n", + " 28,\n", + " 29,\n", + " 30,\n", + " 31,\n", + " 32,\n", + " 33,\n", + " 34,\n", + " 35,\n", + " 36,\n", + " 37,\n", + " 38,\n", + " 39,\n", + " 40,\n", + " 41,\n", + " 42,\n", + " 43,\n", + " 44,\n", + " 45,\n", + " 46,\n", + " 47,\n", + " 48,\n", + " 49,\n", + " 50,\n", + " 51,\n", + " 52,\n", + " 53,\n", + " 54,\n", + " 55,\n", + " 56,\n", + " 57,\n", + " 58,\n", + " 59,\n", + " 60,\n", + " 61,\n", + " 62,\n", + " 63,\n", + " 64,\n", + " 65,\n", + " 66,\n", + " 67,\n", + " 68,\n", + " 69,\n", + " 70,\n", + " 71,\n", + " 72,\n", + " 73,\n", + " 74,\n", + " 75,\n", + " 76,\n", + " 77,\n", + " 78,\n", + " 79,\n", + " 80,\n", + " 81,\n", + " 82,\n", + " 83,\n", + " 84,\n", + " 85,\n", + " 86,\n", + " 87,\n", + " 88,\n", + " 89,\n", + " 90,\n", + " 91,\n", + " 92,\n", + " 93,\n", + " 94,\n", + " 95,\n", + " 96,\n", + " 97,\n", + " 98,\n", + " 99,\n", + " 100,\n", + " 101,\n", + " 102,\n", + " 103,\n", + " 104,\n", + " 105,\n", + " 106,\n", + " 107,\n", + " 108,\n", + " 109,\n", + " 110,\n", + " 111,\n", + " 112,\n", + " 113,\n", + " 114,\n", + " 115,\n", + " 116,\n", + " 117,\n", + " 118,\n", + " 119,\n", + " 120,\n", + " 121,\n", + " 122,\n", + " 123,\n", + " 124,\n", + " 125,\n", + " 126,\n", + " 127,\n", + " 128,\n", + " 129,\n", + " 130,\n", + " 131,\n", + " 132,\n", + " 133,\n", + " 134,\n", + " 135,\n", + " 136,\n", + " 137,\n", + " 138,\n", + " 139,\n", + " 140,\n", + " 141,\n", + " 142,\n", + " 143,\n", + " 144,\n", + " 145,\n", + " 146,\n", + " 147,\n", + " 148,\n", + " 149,\n", + " 150,\n", + " 151,\n", + " 152,\n", + " 153,\n", + " 154,\n", + " 155,\n", + " 156,\n", + " 157,\n", + " 158,\n", + " 159,\n", + " 160,\n", + " 161,\n", + " 162,\n", + " 163,\n", + " 164,\n", + " 165,\n", + " 166,\n", + " 167,\n", + " 168,\n", + " 169,\n", + " 170,\n", + " 171,\n", + " 172,\n", + " 173,\n", + " 174,\n", + " 175,\n", + " 176,\n", + " 177,\n", + " 178,\n", + " 179,\n", + " 180,\n", + " 181,\n", + " 182,\n", + " 183,\n", + " 184,\n", + " 185,\n", + " 186,\n", + " 187,\n", + " 188,\n", + " 189,\n", + " 190,\n", + " 191,\n", + " 192,\n", + " 193,\n", + " 194,\n", + " 195,\n", + " 196,\n", + " 197,\n", + " 198,\n", + " 199,\n", + " 200,\n", + " 201,\n", + " 202,\n", + " 203,\n", + " 204,\n", + " 205,\n", + " 206,\n", + " 207,\n", + " 208,\n", + " 209,\n", + " 210,\n", + " 211,\n", + " 212,\n", + " 213,\n", + " 214,\n", + " 215,\n", + " 216,\n", + " 217,\n", + " 218,\n", + " 219,\n", + " 220,\n", + " 221,\n", + " 222,\n", + " 223,\n", + " 224,\n", + " 225,\n", + " 226,\n", + " 227,\n", + " 228,\n", + " 229,\n", + " 230,\n", + " 231,\n", + " 232,\n", + " 233,\n", + " 234,\n", + " 235,\n", + " 236,\n", + " 237,\n", + " 238,\n", + " 239,\n", + " 240,\n", + " 241,\n", + " 242,\n", + " 243,\n", + " 244,\n", + " 245,\n", + " 246,\n", + " 247,\n", + " 248,\n", + " 249,\n", + " 250,\n", + " 251,\n", + " 252,\n", + " 253,\n", + " 254,\n", + " 255,\n", + " 256,\n", + " 257,\n", + " 258,\n", + " 259,\n", + " 260,\n", + " 261,\n", + " 262,\n", + " 263,\n", + " 264,\n", + " 265,\n", + " 266,\n", + " 267,\n", + " 268,\n", + " 269,\n", + " 270,\n", + " 271,\n", + " 272,\n", + " 273,\n", + " 274,\n", + " 275,\n", + " 276,\n", + " 277,\n", + " 278,\n", + " 279,\n", + " 280,\n", + " 281,\n", + " 282,\n", + " 283,\n", + " 284,\n", + " 285,\n", + " 286,\n", + " 287,\n", + " 288,\n", + " 289,\n", + " 290,\n", + " 291,\n", + " 292,\n", + " 293,\n", + " 294,\n", + " 295,\n", + " 296,\n", + " 297,\n", + " 298,\n", + " 299,\n", + " 300,\n", + " 301,\n", + " 302]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "list(age_series.index)" ] @@ -284,18 +1322,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',\n", + " 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],\n", + " dtype='object')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.columns" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Series' object has no attribute 'columns'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_22108\\1967716809.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# This will throw an error!\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mage_series\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mc:\\Users\\Elif Surucu\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6295\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6296\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6297\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6298\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6299\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'Series' object has no attribute 'columns'" + ] + } + ], "source": [ "# This will throw an error!\n", "age_series.columns" @@ -312,21 +1376,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": { "hidden": true, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[63., 1., 3., ..., 0., 1., 1.],\n", + " [37., 1., 2., ..., 0., 2., 1.],\n", + " [41., 0., 1., ..., 0., 2., 1.],\n", + " ...,\n", + " [68., 1., 0., ..., 2., 3., 0.],\n", + " [57., 1., 0., ..., 1., 3., 0.],\n", + " [57., 0., 1., ..., 1., 2., 0.]])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.values" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 63. , 1. , 3. , 145. , 233. , 1. , 0. , 150. , 0. ,\n", + " 2.3, 0. , 0. , 1. , 1. ])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "#First row\n", "heart_df.values[0]" @@ -334,31 +1427,88 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([[63, 1, 3, ..., 0, 1, 1],\n", + " [37, 1, 2, ..., 0, 2, 1],\n", + " [41, 0, 1, ..., 0, 2, 1],\n", + " ...,\n", + " [68, 1, 0, ..., 2, 3, 0],\n", + " [57, 1, 0, ..., 1, 3, 0],\n", + " [57, 0, 1, ..., 1, 2, 0]])" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.values.astype(int)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(heart_df.values[0][0])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([63, 37, 41, 56, 57, 57, 56, 44, 52, 57, 54, 48, 49, 64, 58, 50, 58,\n", + " 66, 43, 69, 59, 44, 42, 61, 40, 71, 59, 51, 65, 53, 41, 65, 44, 54,\n", + " 51, 46, 54, 54, 65, 65, 51, 48, 45, 53, 39, 52, 44, 47, 53, 53, 51,\n", + " 66, 62, 44, 63, 52, 48, 45, 34, 57, 71, 54, 52, 41, 58, 35, 51, 45,\n", + " 44, 62, 54, 51, 29, 51, 43, 55, 51, 59, 52, 58, 41, 45, 60, 52, 42,\n", + " 67, 68, 46, 54, 58, 48, 57, 52, 54, 45, 53, 62, 52, 43, 53, 42, 59,\n", + " 63, 42, 50, 68, 69, 45, 50, 50, 64, 57, 64, 43, 55, 37, 41, 56, 46,\n", + " 46, 64, 59, 41, 54, 39, 34, 47, 67, 52, 74, 54, 49, 42, 41, 41, 49,\n", + " 60, 62, 57, 64, 51, 43, 42, 67, 76, 70, 44, 60, 44, 42, 66, 71, 64,\n", + " 66, 39, 58, 47, 35, 58, 56, 56, 55, 41, 38, 38, 67, 67, 62, 63, 53,\n", + " 56, 48, 58, 58, 60, 40, 60, 64, 43, 57, 55, 65, 61, 58, 50, 44, 60,\n", + " 54, 50, 41, 51, 58, 54, 60, 60, 59, 46, 67, 62, 65, 44, 60, 58, 68,\n", + " 62, 52, 59, 60, 49, 59, 57, 61, 39, 61, 56, 43, 62, 63, 65, 48, 63,\n", + " 55, 65, 56, 54, 70, 62, 35, 59, 64, 47, 57, 55, 64, 70, 51, 58, 60,\n", + " 77, 35, 70, 59, 64, 57, 56, 48, 56, 66, 54, 69, 51, 43, 62, 67, 59,\n", + " 45, 58, 50, 62, 38, 66, 52, 53, 63, 54, 66, 55, 49, 54, 56, 46, 61,\n", + " 67, 58, 47, 52, 58, 57, 58, 61, 42, 52, 59, 40, 61, 46, 59, 57, 57,\n", + " 55, 61, 58, 58, 67, 44, 63, 63, 59, 57, 45, 68, 57, 57],\n", + " dtype=int64)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "age_series.values" ] @@ -394,11 +1544,159 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", + "\n", + " ca thal target \n", + "0 0 1 1 \n", + "1 0 2 1 \n", + "2 0 2 1 \n", + "3 0 2 1 \n", + "4 0 2 1 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.head()" ] @@ -415,11 +1713,159 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal target \n", + "298 1 0 3 0 \n", + "299 1 0 3 0 \n", + "300 1 2 3 0 \n", + "301 1 1 3 0 \n", + "302 1 1 2 0 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.tail()" ] @@ -436,11 +1882,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 303 entries, 0 to 302\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 age 303 non-null int64 \n", + " 1 sex 303 non-null int64 \n", + " 2 cp 303 non-null int64 \n", + " 3 trestbps 303 non-null int64 \n", + " 4 chol 303 non-null int64 \n", + " 5 fbs 303 non-null int64 \n", + " 6 restecg 303 non-null int64 \n", + " 7 thalach 303 non-null int64 \n", + " 8 exang 303 non-null int64 \n", + " 9 oldpeak 303 non-null float64\n", + " 10 slope 303 non-null int64 \n", + " 11 ca 303 non-null int64 \n", + " 12 thal 303 non-null int64 \n", + " 13 target 303 non-null int64 \n", + "dtypes: float64(1), int64(13)\n", + "memory usage: 33.3 KB\n" + ] + } + ], "source": [ "heart_df.info()" ] @@ -457,39 +1931,313 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": { "hidden": true, "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
count303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000
mean54.3663370.6831680.966997131.623762246.2640260.1485150.528053149.6468650.3267331.0396041.3993400.7293732.3135310.544554
std9.0821010.4660111.03205217.53814351.8307510.3561980.52586022.9051610.4697941.1610750.6162261.0226060.6122770.498835
min29.0000000.0000000.00000094.000000126.0000000.0000000.00000071.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%47.5000000.0000000.000000120.000000211.0000000.0000000.000000133.5000000.0000000.0000001.0000000.0000002.0000000.000000
50%55.0000001.0000001.000000130.000000240.0000000.0000001.000000153.0000000.0000000.8000001.0000000.0000002.0000001.000000
75%61.0000001.0000002.000000140.000000274.5000000.0000001.000000166.0000001.0000001.6000002.0000001.0000003.0000001.000000
max77.0000001.0000003.000000200.000000564.0000001.0000002.000000202.0000001.0000006.2000002.0000004.0000003.0000001.000000
\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", + "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", + "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", + "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", + "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", + "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", + "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", + "\n", + " restecg thalach exang oldpeak slope ca \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", + "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", + "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", + "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", + "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", + "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", + "\n", + " thal target \n", + "count 303.000000 303.000000 \n", + "mean 2.313531 0.544554 \n", + "std 0.612277 0.498835 \n", + "min 0.000000 0.000000 \n", + "25% 2.000000 0.000000 \n", + "50% 2.000000 1.000000 \n", + "75% 3.000000 1.000000 \n", + "max 3.000000 1.000000 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.describe()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "heart_df['chol'].hist(bins='auto')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 303.000000\n", + "mean 54.366337\n", + "std 9.082101\n", + "min 29.000000\n", + "25% 47.500000\n", + "50% 55.000000\n", + "75% 61.000000\n", + "max 77.000000\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.describe()['age']" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 303.000000\n", + "mean 54.366337\n", + "std 9.082101\n", + "min 29.000000\n", + "25% 47.500000\n", + "50% 55.000000\n", + "75% 61.000000\n", + "max 77.000000\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "age_series.describe()" ] @@ -506,11 +2254,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "age int64\n", + "sex int64\n", + "cp int64\n", + "trestbps int64\n", + "chol int64\n", + "fbs int64\n", + "restecg int64\n", + "thalach int64\n", + "exang int64\n", + "oldpeak float64\n", + "slope int64\n", + "ca int64\n", + "thal int64\n", + "target int64\n", + "dtype: object" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "heart_df.dtypes" ] @@ -633,9 +2406,159 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animal_idnamedatetimedatetime2found_locationintake_typeintake_conditionanimal_typesex_upon_intakeage_upon_intakebreedcolor
0A786884*Brock2019-01-03T16:19:00.0002019-01-03T16:19:00.0002501 Magin Meadow Dr in Austin (TX)StrayNormalDogNeutered Male2 yearsBeagle MixTricolor
1A706918Belle2015-07-05T12:59:00.0002015-07-05T12:59:00.0009409 Bluegrass Dr in Austin (TX)StrayNormalDogSpayed Female8 yearsEnglish Springer SpanielWhite/Liver
2A724273Runster2016-04-14T18:43:00.0002016-04-14T18:43:00.0002818 Palomino Trail in Austin (TX)StrayNormalDogIntact Male11 monthsBasenji MixSable/White
3A665644NaN2013-10-21T07:59:00.0002013-10-21T07:59:00.000Austin (TX)StraySickCatIntact Female4 weeksDomestic Shorthair MixCalico
4A857105Johnny Ringo2022-05-12T00:23:00.0002022-05-12T00:23:00.0004404 Sarasota Drive in Austin (TX)Public AssistNormalCatNeutered Male2 yearsDomestic ShorthairOrange Tabby
\n", + "
" + ], + "text/plain": [ + " animal_id name datetime datetime2 \\\n", + "0 A786884 *Brock 2019-01-03T16:19:00.000 2019-01-03T16:19:00.000 \n", + "1 A706918 Belle 2015-07-05T12:59:00.000 2015-07-05T12:59:00.000 \n", + "2 A724273 Runster 2016-04-14T18:43:00.000 2016-04-14T18:43:00.000 \n", + "3 A665644 NaN 2013-10-21T07:59:00.000 2013-10-21T07:59:00.000 \n", + "4 A857105 Johnny Ringo 2022-05-12T00:23:00.000 2022-05-12T00:23:00.000 \n", + "\n", + " found_location intake_type intake_condition \\\n", + "0 2501 Magin Meadow Dr in Austin (TX) Stray Normal \n", + "1 9409 Bluegrass Dr in Austin (TX) Stray Normal \n", + "2 2818 Palomino Trail in Austin (TX) Stray Normal \n", + "3 Austin (TX) Stray Sick \n", + "4 4404 Sarasota Drive in Austin (TX) Public Assist Normal \n", + "\n", + " animal_type sex_upon_intake age_upon_intake breed \\\n", + "0 Dog Neutered Male 2 years Beagle Mix \n", + "1 Dog Spayed Female 8 years English Springer Spaniel \n", + "2 Dog Intact Male 11 months Basenji Mix \n", + "3 Cat Intact Female 4 weeks Domestic Shorthair Mix \n", + "4 Cat Neutered Male 2 years Domestic Shorthair \n", + "\n", + " color \n", + "0 Tricolor \n", + "1 White/Liver \n", + "2 Sable/White \n", + "3 Calico \n", + "4 Orange Tabby " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Accessing a CSV from a url\n", "intakes_url = pd.read_csv('https://data.austintexas.gov/resource/wter-evkm.csv')\n", @@ -644,9 +2567,159 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animal_idnamedatetimedatetime2found_locationintake_typeintake_conditionanimal_typesex_upon_intakeage_upon_intakebreedcolor
0A786884*Brock2019-01-03 16:19:002019-01-03T16:19:00.0002501 Magin Meadow Dr in Austin (TX)StrayNormalDogNeutered Male2 yearsBeagle MixTricolor
1A706918Belle2015-07-05 12:59:002015-07-05T12:59:00.0009409 Bluegrass Dr in Austin (TX)StrayNormalDogSpayed Female8 yearsEnglish Springer SpanielWhite/Liver
2A724273Runster2016-04-14 18:43:002016-04-14T18:43:00.0002818 Palomino Trail in Austin (TX)StrayNormalDogIntact Male11 monthsBasenji MixSable/White
3A665644NaN2013-10-21 07:59:002013-10-21T07:59:00.000Austin (TX)StraySickCatIntact Female4 weeksDomestic Shorthair MixCalico
4A857105Johnny Ringo2022-05-12 00:23:002022-05-12T00:23:00.0004404 Sarasota Drive in Austin (TX)Public AssistNormalCatNeutered Male2 yearsDomestic ShorthairOrange Tabby
\n", + "
" + ], + "text/plain": [ + " animal_id name datetime datetime2 \\\n", + "0 A786884 *Brock 2019-01-03 16:19:00 2019-01-03T16:19:00.000 \n", + "1 A706918 Belle 2015-07-05 12:59:00 2015-07-05T12:59:00.000 \n", + "2 A724273 Runster 2016-04-14 18:43:00 2016-04-14T18:43:00.000 \n", + "3 A665644 NaN 2013-10-21 07:59:00 2013-10-21T07:59:00.000 \n", + "4 A857105 Johnny Ringo 2022-05-12 00:23:00 2022-05-12T00:23:00.000 \n", + "\n", + " found_location intake_type intake_condition \\\n", + "0 2501 Magin Meadow Dr in Austin (TX) Stray Normal \n", + "1 9409 Bluegrass Dr in Austin (TX) Stray Normal \n", + "2 2818 Palomino Trail in Austin (TX) Stray Normal \n", + "3 Austin (TX) Stray Sick \n", + "4 4404 Sarasota Drive in Austin (TX) Public Assist Normal \n", + "\n", + " animal_type sex_upon_intake age_upon_intake breed \\\n", + "0 Dog Neutered Male 2 years Beagle Mix \n", + "1 Dog Spayed Female 8 years English Springer Spaniel \n", + "2 Dog Intact Male 11 months Basenji Mix \n", + "3 Cat Intact Female 4 weeks Domestic Shorthair Mix \n", + "4 Cat Neutered Male 2 years Domestic Shorthair \n", + "\n", + " color \n", + "0 Tricolor \n", + "1 White/Liver \n", + "2 Sable/White \n", + "3 Calico \n", + "4 Orange Tabby " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Same as the JSON output from this API endpoint, but different levels of detail for dates!\n", "pd.read_json('https://data.austintexas.gov/resource/wter-evkm.json').head()" @@ -654,9 +2727,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(1000, 12)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# But this is only 1000 rows... website says there's 136K rows!\n", "intakes_url.shape" @@ -1478,9 +3562,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python (Cohort_Env)", "language": "python", - "name": "python3" + "name": "cohort_env" }, "language_info": { "codemirror_mode": { @@ -1492,7 +3576,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.12.4" }, "toc": { "base_numbering": 1, diff --git a/Phase1/Lectures/10_Plotting_with_Pandas.ipynb b/Phase1/Lectures/10_Plotting_with_Pandas.ipynb index 387515e..24d99bb 100644 --- a/Phase1/Lectures/10_Plotting_with_Pandas.ipynb +++ b/Phase1/Lectures/10_Plotting_with_Pandas.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -78,12 +78,106 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "hidden": true, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", + "0 5.1 3.5 1.4 0.2 \n", + "1 4.9 3.0 1.4 0.2 \n", + "2 4.7 3.2 1.3 0.2 \n", + "3 4.6 3.1 1.5 0.2 \n", + "4 5.0 3.6 1.4 0.2 \n", + "\n", + " target \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# This iris dataset comes bundled with sklearn\n", "data = load_iris()\n", @@ -94,9 +188,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "target\n", + "0 50\n", + "1 50\n", + "2 50\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_iris['target'].value_counts()" ] @@ -131,11 +240,105 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
736.12.84.71.21
185.73.81.70.30
1187.72.66.92.32
786.02.94.51.51
766.82.84.81.41
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", + "73 6.1 2.8 4.7 1.2 \n", + "18 5.7 3.8 1.7 0.3 \n", + "118 7.7 2.6 6.9 2.3 \n", + "78 6.0 2.9 4.5 1.5 \n", + "76 6.8 2.8 4.8 1.4 \n", + "\n", + " target \n", + "73 1 \n", + "18 0 \n", + "118 2 \n", + "78 1 \n", + "76 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Five random rows of the iris dataset\n", "df_iris.sample(5, random_state=42)" @@ -152,11 +355,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
count150.000000150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.1993331.000000
std0.8280660.4358661.7652980.7622380.819232
min4.3000002.0000001.0000000.1000000.000000
25%5.1000002.8000001.6000000.3000000.000000
50%5.8000003.0000004.3500001.3000001.000000
75%6.4000003.3000005.1000001.8000002.000000
max7.9000004.4000006.9000002.5000002.000000
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) \\\n", + "count 150.000000 150.000000 150.000000 \n", + "mean 5.843333 3.057333 3.758000 \n", + "std 0.828066 0.435866 1.765298 \n", + "min 4.300000 2.000000 1.000000 \n", + "25% 5.100000 2.800000 1.600000 \n", + "50% 5.800000 3.000000 4.350000 \n", + "75% 6.400000 3.300000 5.100000 \n", + "max 7.900000 4.400000 6.900000 \n", + "\n", + " petal width (cm) target \n", + "count 150.000000 150.000000 \n", + "mean 1.199333 1.000000 \n", + "std 0.762238 0.819232 \n", + "min 0.100000 0.000000 \n", + "25% 0.300000 0.000000 \n", + "50% 1.300000 1.000000 \n", + "75% 1.800000 2.000000 \n", + "max 2.500000 2.000000 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Summary statistics of the iris dataset\n", "df_iris.describe()" @@ -164,29 +491,296 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
count50.0000050.00000050.00000050.00000050.0
mean5.006003.4280001.4620000.2460000.0
std0.352490.3790640.1736640.1053860.0
min4.300002.3000001.0000000.1000000.0
25%4.800003.2000001.4000000.2000000.0
50%5.000003.4000001.5000000.2000000.0
75%5.200003.6750001.5750000.3000000.0
max5.800004.4000001.9000000.6000000.0
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) \\\n", + "count 50.00000 50.000000 50.000000 \n", + "mean 5.00600 3.428000 1.462000 \n", + "std 0.35249 0.379064 0.173664 \n", + "min 4.30000 2.300000 1.000000 \n", + "25% 4.80000 3.200000 1.400000 \n", + "50% 5.00000 3.400000 1.500000 \n", + "75% 5.20000 3.675000 1.575000 \n", + "max 5.80000 4.400000 1.900000 \n", + "\n", + " petal width (cm) target \n", + "count 50.000000 50.0 \n", + "mean 0.246000 0.0 \n", + "std 0.105386 0.0 \n", + "min 0.100000 0.0 \n", + "25% 0.200000 0.0 \n", + "50% 0.200000 0.0 \n", + "75% 0.300000 0.0 \n", + "max 0.600000 0.0 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_iris.loc[df_iris['target'] == 0].describe()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
count50.00000050.00000050.00000050.00000050.0
mean5.9360002.7700004.2600001.3260001.0
std0.5161710.3137980.4699110.1977530.0
min4.9000002.0000003.0000001.0000001.0
25%5.6000002.5250004.0000001.2000001.0
50%5.9000002.8000004.3500001.3000001.0
75%6.3000003.0000004.6000001.5000001.0
max7.0000003.4000005.1000001.8000001.0
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) \\\n", + "count 50.000000 50.000000 50.000000 \n", + "mean 5.936000 2.770000 4.260000 \n", + "std 0.516171 0.313798 0.469911 \n", + "min 4.900000 2.000000 3.000000 \n", + "25% 5.600000 2.525000 4.000000 \n", + "50% 5.900000 2.800000 4.350000 \n", + "75% 6.300000 3.000000 4.600000 \n", + "max 7.000000 3.400000 5.100000 \n", + "\n", + " petal width (cm) target \n", + "count 50.000000 50.0 \n", + "mean 1.326000 1.0 \n", + "std 0.197753 0.0 \n", + "min 1.000000 1.0 \n", + "25% 1.200000 1.0 \n", + "50% 1.300000 1.0 \n", + "75% 1.500000 1.0 \n", + "max 1.800000 1.0 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_iris.loc[df_iris['target'] == 1].describe()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 150 entries, 0 to 149\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal length (cm) 150 non-null float64\n", + " 1 sepal width (cm) 150 non-null float64\n", + " 2 petal length (cm) 150 non-null float64\n", + " 3 petal width (cm) 150 non-null float64\n", + " 4 target 150 non-null int32 \n", + "dtypes: float64(4), int32(1)\n", + "memory usage: 5.4 KB\n" + ] + } + ], "source": [ "df_iris.info()" ] @@ -202,11 +796,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "hidden": true }, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Now, let's plot two variables from the iris dataset\n", "fig, ax = plt.subplots(figsize=(8, 5))\n", @@ -1528,9 +2133,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python (Cohort_Env)", "language": "python", - "name": "python3" + "name": "cohort_env" }, "language_info": { "codemirror_mode": { @@ -1542,7 +2147,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.12.4" }, "toc": { "base_numbering": 1, diff --git a/Phase1/Lectures/14_P1_CodeChallenge_Review.ipynb b/Phase1/Lectures/14_P1_CodeChallenge_Review.ipynb index 2d1c942..0e02365 100644 --- a/Phase1/Lectures/14_P1_CodeChallenge_Review.ipynb +++ b/Phase1/Lectures/14_P1_CodeChallenge_Review.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -51,11 +51,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "# Before anything else - need to import pandas!\n" + "# Before anything else - need to import pandas!\n", + "import pandas as pd\n" ] }, { @@ -840,9 +841,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (learn-env)", + "display_name": "Python (Cohort_Env)", "language": "python", - "name": "learn-env" + "name": "cohort_env" }, "language_info": { "codemirror_mode": { @@ -854,7 +855,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.12.4" } }, "nbformat": 4, diff --git a/Phase1/anotherfile.txt b/Phase1/anotherfile.txt new file mode 100644 index 0000000..e69de29 diff --git a/Phase1/new_file.txt b/Phase1/new_file.txt new file mode 100644 index 0000000..4399a58 --- /dev/null +++ b/Phase1/new_file.txt @@ -0,0 +1 @@ +some text here diff --git a/Phase2/dsc-getting-started-sql-intro b/Phase2/dsc-getting-started-sql-intro new file mode 160000 index 0000000..a3b451b --- /dev/null +++ b/Phase2/dsc-getting-started-sql-intro @@ -0,0 +1 @@ +Subproject commit a3b451b68ccfe93d4dbc45e506c74464796489e5 diff --git a/Phase2/dsc-intro-to-sqlite b/Phase2/dsc-intro-to-sqlite new file mode 160000 index 0000000..3b58c42 --- /dev/null +++ b/Phase2/dsc-intro-to-sqlite @@ -0,0 +1 @@ +Subproject commit 3b58c420d3633dbdc7e272e0333a4d8274d54138 diff --git a/Phase2/dsc-selecting-data-v2-4 b/Phase2/dsc-selecting-data-v2-4 new file mode 160000 index 0000000..271be54 --- /dev/null +++ b/Phase2/dsc-selecting-data-v2-4 @@ -0,0 +1 @@ +Subproject commit 271be54ca54a0a60a87262f95bc2d8978388b8b1 diff --git a/Phase2/dsc-sql-lab b/Phase2/dsc-sql-lab new file mode 160000 index 0000000..05b901a --- /dev/null +++ b/Phase2/dsc-sql-lab @@ -0,0 +1 @@ +Subproject commit 05b901addcfc01eb702a3f2be53ebd3c274eb2ee