-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject2.py
92 lines (70 loc) · 2.07 KB
/
project2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#%%
import pandas as pd
#%%
import altair as alt
#%%
import numpy as ny
#%%
url = "https://raw.githubusercontent.com/byuidatascience/data4missing/master/data-raw/flights_missing/flights_missing.json"
#%%
data = pd.read_json (url)
#%%
print (data.head(2))
#%%-------------------Question 1------------------------------
daniel = (data
.groupby (['airport_code'])
.agg(total_flights = ('num_of_flights_total',sum),
total_delays = ('num_of_delays_total',sum),
total_delay_min = ('minutes_delayed_total',sum)
)
.assign(delay_percentage = lambda x: round(((x.total_delays/x.total_flights)*100),2)
, avg_delay_time_hrs = lambda x: round(((x.total_delay_min /
x.total_delays)/60),3)
)
.reset_index()
)
print(daniel.head(5))
#which airport has the worst
#total # fligjts
#toal # of delayed flights
#proportion of delayed flights
#average delay time in hours
# %%
#------------------Question 2------------------------------------
# What is the best month to fly? -
month = (data
.groupby (["month"])
.agg(total_flights = ('num_of_flights_total',sum),
total_delays = ('num_of_delays_total',sum))
.drop ("n/a")
.assign(delay_percentage = lambda x: round(((x.total_delays/x.total_flights)*100),2))
)
print (month)
#--------------------- CHART 2----------------------------------
#%%
chart2 = (alt.Chart (month)
.encode(
x='month',
y='delay_percentage')
.mark_bar()
)
chart2
#%%
print (chart2)
#%%
bars_month = (alt.Chart(month, title='Flights Delay Perentage', width = 400)
.encode(
x = alt.X('month',
sort = ['month']),
y = alt.Y(
'delay_percentage',
scale = alt.Scale(domain = [0,100]), title = "Delay Percentage"),
color = alt.condition(
alt.datum.delay_percentage >= 24, alt.value('red'),
alt.value('steelblue')
))
.mark_bar()
)
martin = ("Martin")
daniel = ("hola)")
zavala = ("otro")