-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcomprehensions.py
247 lines (164 loc) · 7.55 KB
/
comprehensions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
'''Comprehensions'''
# List Comprehensions
# -----------------------------------------------------------------------------
# [expression for item in iterable]
# [expression for item in iterable if condition]
# Let's say we wanted to build a list of squares. Traditionally we could
# create a list, then use an iterator and range() like this:
squares = []
for number in range(1, 11):
squares.append(number ** 2)
print(squares) # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
# or we could do it this way:
squares = list(range(1, 11))
for number in squares:
squares[number-1] = number ** 2
print(squares) # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
# or we could use list comprehensions:
# [expression for item in iterable]
squares = [number ** 2 for number in range(1, 11)]
print(squares) # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
# First, define the expression for the values you want to store in the list,
# then write a for loop to generate the numbers you want to feed into the
# expression:
bottles = [(str(number) + ' bottles') for number in range(12, 49, 12)]
print(bottles) # ['12 bottles', '24 bottles', '36 bottles', '48 bottles']
# List comprehension with if conditional:
# [expression for item in iterable if condition]
odds = [number for number in range(1, 10) if number % 2 == 1]
print(odds) # [1, 3, 5, 7, 9]
# The above is the same as:
odds = []
for number in range(1, 10):
if number % 2 == 1:
odds.append(number)
# Because this expression is so simple we could obviously just do this:
odds = list(range(1, 10, 2))
# But the point here is to illustrate the syntax.
# List comprehension with if/else conditionals:
# [expression if condition else expression for item in iterable]
# For this example, the values in 'reps' belong to the 'sets' in 'units'.
# I want to fill out the reps list with zero values for the other units so
# that both lista re the same length and could be zipped togtether:
units = ['laps', 'seconds', 'sets', 'twirls', 'sets']
reps = ['10', '6']
# My onstincts would be to add the else at the end, but this is wrong!
# new_reps = [reps.pop() for unit in units if unit == 'sets' else '0']
new_reps = [reps.pop(0) if unit == 'sets' else '0' for unit in units]
print(new_reps)
# ['0', '0', '10', '0', '6']
# List Comprehensions: add a value
# ----------------------------------------------------------------------------
# You can add values at the end of your comprehension using the + operator:
import random
rgba = [random.random() for i in range(3)]
rgba.append(1)
# same as:
rgba = [random.random() for i in range(3)] + [1]
print(rgba)
# [0.2785526331653764, 0.2124238306054994, 0.6514248582355008, 1]
# List Comprehensions: nested loops example
# ----------------------------------------------------------------------------
# Another example using nested loops.
# Here's the traditional way:
rows = range(1, 4)
cols = range(1, 3)
cells = []
for row in rows:
for col in cols:
cells.append((row, col))
print(cells) # [(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]
# or use a comprehension:
rows = range(1, 4)
cols = range(1, 3)
cells = [(row, col) for row in rows for col in cols]
print(cells) # [(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)]
# Another example of two expressions in a list comprehension.
# Lets say we wanted to flatten a list of lists into one list:
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# NOTE: In general, for readability, avoid using more than two expressions
# in a list comprehension whether that be two loops or a loop and a condition.
# If you need to write more, consider normal if and for statements and possibly
# helper functions (see helper_functions.py)
# List Comprehensions: practical example from data plotting
# -----------------------------------------------------------------------------
import pandas
# An example from a pandas, bokeh data plot. What we're trying to do is create
# a new DataFrame column containing a color which will be used for the data
# plot; green if the price went up and red if the price went down.
df = pandas.read_csv('data/NEO_historical.csv')
difference = df.close - df.open
df['color'] = ['green' if x > 0 else 'red' for x in difference]
# Note the 'expression' can be a function, and the iterable can be a zip()
# containing more than one iterable. This essentially does the same as above:
def color_by_value(open_price, close_price):
if close_price >= open_price:
color = 'green'
else:
color = 'red'
return color
df['color'] = [color_by_value(open_price, close_price)
for open_price, close_price in zip(df.open, df.close)]
# see also: pygal_intro.py
# Dictionary Comprehensions
# -----------------------------------------------------------------------------
# {key_expression : value_expression for expression in iterable}:
word = 'letters'
letter_counts = {letter: word.count(letter) for letter in word}
print(letter_counts) # {'l': 1, 'e': 2, 't': 2, 'r': 1, 's': 1}
print(type(letter_counts)) # <class 'dict'>
# Technically we are counting some letters twice.
# By converting the word into a set(), we remove any duplicates for the
# checking part. We still get the same result from the count, but we're only
# asking it to count each letter once.
word = 'letters'
letter_counts = {letter: word.count(letter) for letter in set(word)}
# Review dictionary comprehensions
squares = {number: (number * number) for number in range(7)}
print(squares) # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36}
# Set comprehensions
# -----------------------------------------------------------------------------
# {expression for expression in iterable}
a_set = {number ** 2 for number in range(1, 10)}
print(a_set) # {64, 1, 4, 36, 9, 16, 49, 81, 25}
# Review set comprehensions:
odds = {number for number in range(10) if number % 2 != 0}
print(odds) # {1, 3, 5, 7, 9}
# Generator comprehensions (see also generators.py)
# -----------------------------------------------------------------------------
# Tuples don't have comprehensions. Changing the [] or {} to () is actually
# creating a generator comprehension and returns a generator object.
# Generator comprehensions are helpful for when large amounts of data would
# otherwise consume too much memory and possible crash your program. This
# example imagines we wan to read a file and get the length of each line.
# If we use a list comprehensions, each value would have to be held in memory.
# This works fine for small files:
value = [len(line) for line in open('data/example.txt')]
print(value)
# [8, 12, 48, 22, 7]
# Generator expressions don't materialize the whole output sequence when
# they're run. They evaluate to an iterator that yields one item at a time.
gen = (len(line) for line in open('data/example.txt'))
print(type(gen)) # <class 'generator'>
print(next(gen)) # 8
for x in gen:
print(x)
# 12
# 48
# 22
# 6
# Once a generator has been exhausted, it's done. If we trying getting the
# next value we'll raise a StopIteration exception.
# Another interesting thing about generator expressions, is that they can
# be composed together. See here:
gen = (len(line) for line in open('data/example.txt'))
squares = ((x, x**2) for x in gen)
print(next(squares)) # (8, 64)
print(next(squares)) # (12, 144)
print(next(squares)) # (48, 2304)
# Chaining generators like this executes very quickly in Python. Bottom line
# is list comprehensions can cause problems for very large inputs of data by
# hogging too much memory. Generator expression avoid this by producing
# outputs one at a time as an iterator.