Skip to content

Commit

Permalink
Adding food calories and macro nutrient
Browse files Browse the repository at this point in the history
adding chunks
  • Loading branch information
realxinzhao committed Aug 7, 2023
1 parent 8a8586c commit 9b6cb0e
Show file tree
Hide file tree
Showing 7 changed files with 826 additions and 3 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ export(standardize_iso)
export(unprotect_integer_cols)
importFrom(assertthat,assert_that)
importFrom(data.table,data.table)
importFrom(dplyr,any_vars)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,filter_all)
importFrom(dplyr,first)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
Expand Down
16 changes: 15 additions & 1 deletion R/xfaostat_L100_constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,23 @@



# used in Get_SUA_TEMPLATE and SUA_bal_adjust
# Balance elements; used in Get_SUA_TEMPLATE and SUA_bal_adjust

c("Opening stocks", "Production", "Import",
"Export", "Processed", "Food", "Feed", "Seed", "Other uses", "Loss", "Closing stocks",
"Residuals", "Regional supply", "Regional demand", "Stock Variation") ->
Bal_element_new


# Assumed parameters for data processing or interpolation ----
#*******************************************
# Forest trade data adjustment
# Adjust Export when Demand = Production + Import - Export < 0
# Adjust Export Production * Export_Production_ratio
For_Export_Production_Ratio_Adj = 0.9

# Boundary used for correct regional value with world of the conversion from mass to macro-nutrient
# Used in FAOSTAT_S1D_Food_Kcal.R
REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY <- 0.15
Hist_MEAN_Year_NUTRIENT_MASS_CONV <- 2010:2019 # average cal per g

255 changes: 255 additions & 0 deletions R/xfaostat_L104_FoodMacroNutrient.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# Copyright 2019 Battelle Memorial Institute; see the LICENSE file.

#' module_xfaostat_L104_FoodMacroNutrient
#'
#' Preprocess producer prices
#'
#' @param command API command to execute
#' @param ... other optional parameters, depending on command
#' @return Depends on \code{command}: either a vector of required inputs, a vector of output names, or (if
#' \code{command} is "MAKE") all the generated outputs
#' @details This chunk compiles balanced supply utilization data in primary equivalent in GCAM region and commodities.
#' @importFrom assertthat assert_that
#' @importFrom dplyr summarize bind_rows filter if_else inner_join left_join mutate rename select n group_by_at
#' first case_when vars filter_all any_vars
#' @importFrom tibble tibble
#' @importFrom tidyr complete drop_na gather nesting spread replace_na fill
#' @author XZ 2023
module_xfaostat_L104_FoodMacroNutrient <- function(command, ...) {

MODULE_INPUTS <-
c("SCL",
"FBS",
"OA",
FILE = "aglu/FAO/FAO_an_items_cal_SUA",
FILE = "aglu/FAO/MAPPING_FAO_FBS_SUA")

MODULE_OUTPUTS <-
c("SUA_food_macronutrient_rate")

if(command == driver.DECLARE_INPUTS) {
return(MODULE_INPUTS)
} else if(command == driver.DECLARE_OUTPUTS) {
return(MODULE_OUTPUTS)
} else if(command == driver.MAKE) {

year <- value <- Year <- Value <- FAO_country <- iso <- NULL # silence package check.

all_data <- list(...)[[1]]

# Load required inputs ----

get_data_list(all_data, MODULE_INPUTS, strip_attributes = TRUE)



# Background----
#*******************************************
# Identifying food availability and macro-nutrient consumption
# Reconciling FAO FBS and SUA dataset for global Diet composition estimates
# Macro-nutrient: fat 9 kcal per g; protein and carbohydrates 4 kcal per g
# Focus on FBS and SCL first as they both have data since 2010
# Our World in Data example (FBSH to 2013): https://ourworldindata.org/diet-compositions#cereal-preferences-across-the-world

#
#
# Goals----
#*******************************************
# Macro-nutrient mapping at SUA level to identify potential heterogeneity of cal/g across regions
# For each SUA item, e.g., wheat flour, connect food consumption in balance to cal, protein, and fat; so carb
# Need to check for each SUA item, whether the conversion rate changes

# Compare FBS to SUA for the cal/ca accounting by FBS categories (matching well)
# Recommend new mappings between FAO FBS and GCAM food categories
# Note that the key of calculating cal, protein, fat consumption was the food supply in mass at the SUA level!
# Cassidy et al. 2013 ERL used more simplified assumptions in FBSH, similar to many studies

# More importantly and related to modeling, extraction rates and cal per g coefficient per SUA item determines over conversion
# Thus, to get macro-nutrients we need to understand the conversion coefficient from mass units
# First at the SUA level that is constant over time
# And second, aggregated to FBS level and the mix of the SUA items and the depth of the processing chains matter



# simplify population data
OA %>% filter(element_code == 511, item_code == 3010) %>%
transmute(area_code, year, pop = value) -> POP #1000 persons

# Quick checks----
#*******************************************
# Check area

# 19 countries not in FBS/SUA but in QCL
# including (3 populous) Somalia, South Sudan, and Singapore (UAE was recently added in FAOSTAT)
FF_join_checkmap(c("FBS", "SCL"), "area_code", "area") -> checkarea

#Check element
FF_join_checkmap(c("FBS", "SCL"), "item_code", "item") -> checkitem
FF_join_checkmap(c("FBS", "SCL"), "element_code", "element") -> checkelement
#*******************************************
# Check calories calculation first to make sure we can trace appropriately
# the key here is elements, deal with missing values, and conversion rate (cal/g)
# the goal is to get macro-nutrient conversion rates at the SUA level by area (constant across year)!
# regional difference is limited by REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY

# check unit
SCL %>% distinct(element, unit)


# Main processing----
#*******************************************

# it is not useful to calculate cal/g using `Food supply (kcal/capita/day)` /`Food supply quantity (g/capita/day)`
# unit too small
# `Calories/Year` / `Food supply quantity (tonnes)` is more accurate!
# similarly for protein and fat
# Use annual value in SUA to calculate the conversion rate!

# For FBS (12 fish items) we only have `Food supply (kcal/capita/day)` /`Food supply quantity (g/capita/day)`

## Deal with SCL data first ----
#*******************************************

SCL %>% filter(element_code %in% c(261, 271, 281, 5141)) %>% #All 3 cal protein fats and food in ton
right_join(MAPPING_FAO_FBS_SUA %>%
filter(!is.na(CPC_code)) %>%
select(item_code = SCL_item_code, FAO_FBS_code, FBS_label),
by = "item_code") %>%
filter(!is.na(item)) %>%
select(-element_code, -unit) ->
SUA_food_macronutrient

# calculate time-series mean at SUA levels
SUA_food_macronutrient %>%
filter(year %in% Hist_MEAN_Year_NUTRIENT_MASS_CONV) %>%
spread(element, value) %>%
dplyr::filter_all(any_vars(!is.na(.))) %>%
mutate(calperg = `Calories/Year` / `Food supply quantity (tonnes)` *1000,
proteinperc = `Proteins/Year` / `Food supply quantity (tonnes)` * 100,
fatperc = `Fats/Year` / `Food supply quantity (tonnes)` * 100) %>%
select(area_code, item_code, item, year, calperg, proteinperc, fatperc) %>%
gather(element, value, calperg, proteinperc, fatperc) %>%
filter(is.finite(value), value > 0) %>% # calculate world mean for positive values later
group_by(area_code, item_code, item, element) %>%
summarise(value = mean(value, na.rm = T), .groups = "drop") %>%
mutate(value = if_else(element == "calperg", round(value, -1), value)) %>% # round to nearest 10
ungroup() ->
SUA_food_yearmean

# Check if any item has NA for all areas; affecting fill
SUA_food_yearmean %>%
group_by(item_code, item, element) %>% summarise(value = sum(value), .groups = "drop") %>%
filter(value == 0) %>% spread(element, value) -> A # Empty

# world mean (ex ante adjustment)
SUA_food_yearmean %>%
group_by(item_code, item, element) %>%
summarise(value_world = mean(value, na.rm = T), .groups = "drop") %>%
mutate(value_world = if_else(element == "calperg", round(value_world, -1), value_world))->
SUA_food_yearareamean

# fill regional NA using world mean
SUA_food_yearmean %>% spread(area_code, value) %>%
gather(area_code, value_reg, -item_code, -item, -element) %>%
left_join(SUA_food_yearareamean, by = c("item_code", "item", "element")) %>%
mutate(value_reg = if_else(is.na(value_reg), value_world, value_reg),
Diff = value_reg - value_world,
p_Diff = Diff / value_world) ->
SUA_food_yearmean_fill
#*******************************************
# check to make sure REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY is reasonable (compared with world mean)
# REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY = 0.15
SUA_food_yearmean_fill %>% group_by(element) %>%
summarise(pmean = mean(p_Diff, na.rm = T),
P05 = quantile(p_Diff, 0.15, na.rm = T), P95 = quantile(p_Diff, 0.85, na.rm = T),
dmean = mean(Diff)) -> A
rm(A)
# data is okay generally
# but set outliers (p_Diff > 0.15 or p_Diff < -0.15) to world conversion value
# That is macro-nutrient coefficient per mass unit across regions should not be too different
# smaller than +-15% roughly of ex ante simple average

SUA_food_yearmean_fill %>%
transmute(area_code, item_code, item, element,
value = if_else(p_Diff > REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY| p_Diff< -REGIONAL_NUTRIENT_MASS_CONV_OUTLIER_BOUNDARY, value_world, value_reg)) %>%
spread(element, value) %>%
gather(element, value, -area_code, -item_code, -item) %>% # replace na to zero for some fats and protein items
mutate(area_code = as.integer(area_code)) %>%
replace_na(list(value = 0)) %>%
spread(element, value)->
SUA_food_macronutrient_rate_nofish # Final product

## Add in non-SUA FBS items (12 fish items) ----
#*******************************************

# Adding the 12 fish item from FBS
MAPPING_FAO_FBS_SUA %>% filter(is.na(CPC_code)) %>%
select(item = FBS_label, item_code = FAO_FBS_code) -> Fish_item

FBS %>% right_join(Fish_item, by = c("item_code", "item")) -> FBS_fish
FBS_fish %>% distinct(element, element_code, unit)

FBS_fish %>% filter(element_code %in% c(645, 664, 674, 684)) %>%
select(-unit, -element_code) ->
FBS_fish_food_macronutrient

# calculate time-series mean at SUA/FBS levels
FBS_fish_food_macronutrient %>% filter(year %in% Hist_MEAN_Year_NUTRIENT_MASS_CONV) %>%
spread(element, value) %>%
filter_all(any_vars(!is.na(.))) %>%
mutate(calperg = `Food supply (kcal/capita/day)` / `Food supply quantity (kg/capita/yr)`,
proteinperc = `Protein supply quantity (g/capita/day)` / `Food supply quantity (kg/capita/yr)` /1000 * 100,
fatperc = `Fat supply quantity (g/capita/day)` / `Food supply quantity (kg/capita/yr)` /1000 * 100) %>%
select(area_code, item_code, item, year, calperg, proteinperc, fatperc) %>%
gather(element, value, calperg, proteinperc, fatperc) %>%
filter(is.finite(value), value > 0) %>% # calculate world mean for positive values later
group_by(area_code, item_code, item, element) %>%
summarise(value = mean(value, na.rm = T), .groups = "drop") %>%
mutate(value = if_else(element == "calperg", round(value, -1), value)) %>% # round to nearest 10
ungroup() ->
FBS_fish_food_yearmean
# The data quality is too poor from FBS for deriving the conversion rates
rm(FBS_fish, FBS_fish_food_macronutrient, FBS_fish_food_yearmean)

# Change strategy here by using fixed values from
# https://www.fao.org/3/X9892E/X9892e05.htm#P8217_125315

SUA_food_macronutrient_rate_nofish %>%
bind_rows(
SUA_food_macronutrient_rate_nofish %>% distinct(area_code) %>%
full_join(Fish_item, by = character()) %>%
left_join(FAO_an_items_cal_SUA %>%
select(item_code, calperg = Mcal_t,fatperc = fat_Perc,
proteinperc = protein_Perc), by = "item_code" )
) -> SUA_food_macronutrient_rate

unique(SUA_food_macronutrient_rate$area_code) %>% length()
unique(SUA_food_macronutrient_rate$item_code) %>% length()
SUA_food_macronutrient_rate %>% distinct(item, item_code) -> SUA_COMM_FOOD_NUTRIENT
# 76244 =179 area * (414 items + 12 fish items)
# remove processing data

rm(SUA_food_macronutrient, SUA_food_yearmean,
SUA_food_yearmean_fill, SUA_food_yearareamean,
OA, POP, SCL, FBS, MAPPING_FAO_FBS_SUA,
SUA_food_macronutrient_rate_nofish, FAO_an_items_cal_SUA)
rm(Fish_item)
rm(checkarea, checkitem, checkelement)



SUA_food_macronutrient_rate %>%
add_title("FAO food calories and macronutrient rate") %>%
add_units("rates") %>%
add_comments("Detailed FAO food calories and macrotunitent info for 414 SUA items + 12 fish items") ->
SUA_food_macronutrient_rate

# P.S. ----
# China Wheat, bran for food? Need to fix in SUA later; no changes needed here for this

return_data(MODULE_OUTPUTS)

} else {
stop("Unknown command")
}
}
6 changes: 4 additions & 2 deletions R/xfaostat_L105_DataConnectionToSUA.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {

MODULE_INPUTS <-
c(FILE = "aglu/FAO/FAO_items")
c(FILE = "aglu/FAO/FAO_items",
FILE = "aglu/FAO/Mapping_FBSH_SCL_OilCake")

MODULE_OUTPUTS <-
c("GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019")
Expand Down Expand Up @@ -495,7 +496,8 @@ module_xfaostat_L105_DataConnectionToSUA <- function(command, ...) {
### 3.5.1 Get oil cake production data based on cake rate from CB_FBS_CakeRate ----

# Get the mapping of oil and cake items between FBSH, CB and SCL
Mapping_FBSH_SCL_OilCake <- readr::read_csv(file.path("inst/extdata/aglu/FAO", "Mapping_FBSH_SCL_OilCake.csv"), comment = "#")
# read in already
#Mapping_FBSH_SCL_OilCake <- readr::read_csv(file.path("inst/extdata/aglu/FAO", "Mapping_FBSH_SCL_OilCake.csv"), comment = "#")

# The reference period is 2011: 2013 for cake rate calculation
# Merge oil and cake data
Expand Down
47 changes: 47 additions & 0 deletions inst/extdata/aglu/FAO/FAO_an_items_cal_SUA.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# File: FAO_an_items_cal_SUA.csv
# Title: Mapping file of FAO and GCAM animal commodities and caloric content
# Unit: Mcal per tonne
# Description: Maps FAO Supply Utilization Accounts animal commodities to GCAM animal commodities and reports caloric content.
# item and item_code are both updated to new FAO FBS;
# caloric content will not be used anymore as they will be derived from data.
# Animal offals_fats are included under otherMeat. Snail_not sea was included in OtherMeat_Fish under Meat_Other and products.
# Items not mapped were mostly old mapping items. They are either nonfood or incuded in other places.
# Original source: http://www.fao.org/DOCREP/003/X9892E/X9892e05.htm#P8217_125315
# Sources: FAO FBS and FBSH
# Date of last update: 2022-03-22 (XZ)
# Column types: ciccnnn
# ----------
item,item_code,GCAM_commodity,crop_cal,Mcal_t,protein_Perc,fat_Perc
Bovine Meat and products,2731,Beef,BEEF BONELESS,1500,,
"Butter, Ghee and products",2740,Dairy,GHEE FROM COW MILK,8730,,
Cream and products,2743,Dairy,"CREAM, FRESH",1950,,
Milk - (Excluding excluding Butter butter) and products,2848,Dairy,STANDARDIZED MILK,480,,
"Meat, Other and products",2735,OtherMeat_Fish,MEAT NES,1260,,
Freshwater Fish,2761,OtherMeat_Fish,FRESHWATER DIADROMOUS FISH FRESH,690,10.9,2.5
Demersal Fish,2762,OtherMeat_Fish,DEMERSAL FISH FRESH,420,8.3,0.8
Pelagic Fish,2763,OtherMeat_Fish,PELAGIC FISH FRESH,860,12.6,3.6
"Marine Fish, Other",2764,OtherMeat_Fish,MARINE FISH NES FRESH,640,10.3,2.2
Crustaceans,2765,OtherMeat_Fish,CRUSTACEANS FRESH,470,9.3,0.5
Cephalopods,2766,OtherMeat_Fish,CEPHALOPODS FRESH,660,13.5,0.7
"Molluscs, Other",2767,OtherMeat_Fish,MOLLUSCS FRESH,150,2.3,0.2
"Meat, Aquatic Mammals",2768,OtherMeat_Fish,AQUATIC MAMMALS MEAT,1360,21,5
"Aquatic Animals, Others",2769,OtherMeat_Fish,AQUATIC ANIMALS NES FRESH,300,4,0.2
"Fish, Body Oil",2781,OtherMeat_Fish,MARINE FISH NES BODY OIL,9020,0,100
"Fish, Liver Oil",2782,OtherMeat_Fish,MARINE FISH L221NES LIVER OIL,9020,0,100
Pigmeat and products,2733,Pork,PIGMEAT,3260,,
Poultry Meat and products,2734,Poultry,CHICKEN MEAT,1220,,
Eggs and products,2744,Poultry,HEN EGGS,1390,,
Mutton & and Goat Meat and products,2732,SheepGoat,MUTTON AND LAMB,2630,,
"Fats, Animals, Raw",2737,OtherMeat_Fish,ANIMAL OILS AND FATS NES,9020,,
"Offals, Edible",2736,OtherMeat_Fish,OFFALS NES,1050,,
Honey,2745,OtherMeat_Fish,HONEY,2980,,
Aquatic Plants,2775,OtherMeat_Fish," AQUATIC PLANTS",540,2.8,0.6
"Milk, Whole",2738,,,0,,
"Milk, Skimmed",2739,,,0,,
Wool (Clean Eq.),2746,,,0,,
Silk,2747,,,0,,
Hides and skins,2748,,,0,,
Fish Meal,2855,,FRESHWATER DIADROM. FISH PREPARED NES,2620,,
Meat Meal,2749,,MEAT PREPARED NES,2420,,
Cheese,2741,,CHEESE WHOLE COW MILK,3870,,
Whey,2742,,WHEY FRESH,260,,
Loading

0 comments on commit 9b6cb0e

Please sign in to comment.