-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_world_bank.py
69 lines (59 loc) · 3.04 KB
/
create_world_bank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
##
## Reshape World Bank World Development Indicators for data analysis
##
import pandas as pd
#grab all indicators from 1990 and on
all = pd.read_excel('data/world_development_indicators_download.xlsx', 'Data',
usecols = [0,1,2,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,
49,50,51,52,53,54,55,56,57])
all = all.rename(columns = {
'Country Name':'country',
'Country Code':'country_code',
'Indicator Name':'indicator',
})
#create dataframe that's a subset of the indicators we want
indicator_list = [
'Prevalence of undernourishment (% of population)',
'Crop production index (2004-2006 = 100)',
'Agricultural land (% of land area)',
'Food production index (2004-2006 = 100)',
'Food exports (% of merchandise exports)',
'Food imports (% of merchandise imports)',
'Land area (sq. km)',
'Population (Total)',
'Unemployment, female (% of female labor force) (modeled ILO estimate)',
'Unemployment, male (% of male labor force) (modeled ILO estimate)',
'Unemployment, total (% of total labor force) (modeled ILO estimate)',
'GDP (constant 2005 US$)',
'GDP per capita (constant 2005 US$)',
'Adjusted net national income (constant 2005 US$)',
]
fs = all[all.indicator.isin(indicator_list)]
#create three DataFrames: individual countries,
#continent/regional aggregates, world aggregate
#(see "Country" tab of the original indicators downloaded .xlsx
#country_code details)
world = fs[fs.country_code.isin(['WLD'])]
#continent/regional aggregates
agg_list = ['ARB', 'CSS', 'EAS', 'EMU', 'LCN', 'MEA', 'PSS', 'SAS', 'SSF']
agg = fs[fs.country_code.isin(agg_list)]
#individual countries - remove country names that represent aggregates
agg_country_code_list = [
'ARB', 'CSS', 'EAS', 'EAP', 'CEA', 'EMU', 'ECS', 'ECA', 'CEU', 'EUU',
'HPC', 'HIC', 'NOC', 'OEC', 'LCN', 'LAC', 'CLA', 'LDC', 'LMY', 'LIC',
'LMC', 'MEA', 'MNA', 'CME', 'MIC', 'NAC', 'OED', 'OSS', 'PSS', 'SST',
'SAS', 'CSA', 'SSF', 'SSA', 'CAA', 'UMC', 'WLD'
]
fs = fs[~fs.country_code.isin(agg_country_code_list)]
#reshape to put years in rows instead of columns
fs = pd.melt(fs, id_vars=['country', 'country_code', 'indicator'], var_name = 'year')
agg = pd.melt(agg, id_vars=['country', 'country_code', 'indicator'], var_name = 'year')
world = pd.melt(world, id_vars=['country', 'country_code', 'indicator'], var_name = 'year')
#reshape again to put indicators in columns instead of rows & save results
fs = pd.pivot_table(fs, values='value', index=['country', 'country_code', 'year'], columns = ['indicator'])
agg = pd.pivot_table(agg, values='value', index=['country', 'country_code', 'year'], columns = ['indicator'])
world = pd.pivot_table(world, values='value', index=['country', 'country_code', 'year'], columns = ['indicator'])
#write files
fs.to_csv('data/world_development_indicators.csv', cols=indicator_list)
agg.to_csv('data/world_development_indicators_agg.csv', cols=indicator_list)
world.to_csv('data/world_development_indicators_world.csv', cols=indicator_list)