-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
85a4894
commit f401e9f
Showing
9 changed files
with
568 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## Instructions on running the code | ||
|
||
1. Install R programming language | ||
2. Install IDE that supports R programming language (e.g., RStudio) | ||
3. Clone this GitHub repository to your local environment | ||
4. With your preferred IDE, open the directory where you clone the repository[^1] | ||
5. Install the required R packages | ||
6. Run the commands in any of the four R script files from top to bottom[^2] | ||
|
||
[^1]: Make sure that you do not change the folder structure | ||
[^2]: Each R script file runs independently |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# ======================================= Load Libraries and Data ======================================= | ||
|
||
# Import required libraries | ||
library(tidyverse) | ||
library(readxl) | ||
library(ggstatsplot) | ||
library(MetBrewer) | ||
|
||
# Load data from file 2023birthregistrations.xlsx from sheet "Table_9" and start read the data from the sixth row | ||
# This is England and Wales data in 2023 | ||
imdStillBirth2023EW <- read_excel("2023birthregistrations.xlsx", sheet = "Table_9", skip = 5) | ||
|
||
# Load data from file cim2022deathcohortworkbook.xlsx from sheet "21" and start read the data from the eight row | ||
# This is England data from 2010 to 2022 | ||
imdStillBirth2022E <- read_excel("cim2022deathcohortworkbook.xlsx", sheet = "21", skip = 7) | ||
|
||
# Load data from file cim2022deathcohortworkbook.xlsx from sheet "25" and start read the data from the tenth row | ||
# This is Wales data from 2010 to 2022 | ||
imdStillBirth2022W <- read_excel("cim2022deathcohortworkbook.xlsx", sheet = "25", skip = 9) | ||
|
||
# ======================================= Data Pre-Processing ======================================= | ||
|
||
# For table imdStillBirth2023EW, add column Year with value 2023 | ||
imdStillBirth2023EW <- imdStillBirth2023EW %>% mutate(Year = 2023) | ||
|
||
# For table imdStillBirth2023EW, rename column IMD Decile to IMD | ||
imdStillBirth2023EW <- imdStillBirth2023EW %>% rename(IMD = `IMD Decile`) | ||
|
||
# Drop all columns besides Year, IMD and Stillbirths | ||
imdStillBirth2023EW <- imdStillBirth2023EW %>% select(`Year`, `IMD`, Stillbirths) | ||
imdStillBirth2022E <- imdStillBirth2022E %>% select(Year, IMD, Stillbirths) | ||
imdStillBirth2022W <- imdStillBirth2022W %>% select(Year, IMD, Stillbirths) | ||
|
||
# Merge the data from all three tables | ||
imdStillBirth <- rbind(imdStillBirth2022E, imdStillBirth2022W, imdStillBirth2023EW) | ||
|
||
# Remove rows with values "All deciles" or "Total" in column IMD | ||
imdStillBirth <- imdStillBirth %>% filter(`IMD` != "All deciles" & `IMD` != "Total") | ||
|
||
# Convert the data in column IMD to numeric class | ||
imdStillBirth <- imdStillBirth %>% mutate(across(2, as.numeric)) | ||
|
||
# Sum the data based on column Year and IMD | ||
imdStillBirth <- imdStillBirth %>% | ||
group_by(Year, IMD) %>% | ||
summarise(across(everything(), sum)) | ||
|
||
# ======================================= Data Visualisation ======================================= | ||
|
||
# Generate combination of box plot, violin plot and jitter plot | ||
ggbetweenstats( | ||
data = imdStillBirth, | ||
x = IMD, | ||
y = Stillbirths, | ||
title = "Number of stillbirths by IMD decile", | ||
xlab = "Index of Multiple Deprivation", | ||
ylab = "Number of Stillbirths", | ||
package = "MetBrewer", | ||
palette = "Redon", | ||
type = "np", | ||
centrality.point.args = list(size = 0), | ||
point.args = list( | ||
position = position_jitterdodge(dodge.width = 0.7), | ||
alpha = 0.7, | ||
size = 3.5, | ||
stroke = 0 | ||
), | ||
boxplot.args = list( | ||
width = 0.2, | ||
alpha = 0.3, | ||
fill = "grey85", | ||
colour = "black", | ||
linewidth = 0.7 | ||
), | ||
violin.args = list( | ||
width = 0.67, | ||
alpha = 0.1, | ||
colour = "grey30", | ||
linetype = 5 | ||
), | ||
partial = FALSE, | ||
results.subtitle = FALSE | ||
) + | ||
geom_segment( | ||
data = imdStillBirth %>% | ||
group_by(IMD) %>% | ||
summarise(median = median(Stillbirths)), | ||
aes( | ||
x = IMD - 0.1, | ||
xend = IMD + 0.1, | ||
y = median, | ||
yend = median | ||
), | ||
colour = "#BF2F24", | ||
size = 1.3 | ||
) + | ||
coord_cartesian( | ||
ylim = c(100, 670), | ||
xlim = c(1, 10.1) | ||
) + | ||
theme( | ||
panel.grid.major.x = element_line(color = "grey95"), | ||
panel.grid.major.y = element_line(color = "grey90"), | ||
panel.grid.minor.y = element_line(linetype = 3, color = "grey50"), | ||
plot.title = element_text(face = "bold", size = 30, hjust = 0.5, margin = margin(b = 20)), | ||
axis.title.x = element_text(size = 22, margin = margin(t = 20)), | ||
axis.text.x = element_text(face = "bold", size = 15), | ||
axis.title.y = element_text(size = 22, margin = margin(r = 20)), | ||
axis.text.y = element_text(face = "bold", size = 15), | ||
plot.margin = margin(l = 25, r = -8, b = 15, t = 25) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# ======================================= Load Libraries and Data ======================================= | ||
|
||
# Import required libraries | ||
library(tidyverse) | ||
library(readxl) | ||
library(reshape2) | ||
library(ggtext) | ||
|
||
# Load data from file 2023birthsbyparentscountryofbirth.xlsx from sheet "Table_2a" | ||
# and start read the data from the ninth row | ||
parentsCountryOfBirth <- read_excel("2023birthsbyparentscountryofbirth.xlsx", sheet = "Table_2a", skip = 8) | ||
|
||
# ======================================= Data Pre-Processing ======================================= | ||
|
||
# Remove all but first row | ||
parentsCountryOfBirth <- parentsCountryOfBirth[1,] | ||
|
||
# Get all column name that contain "Percentage of all live births" | ||
selectedColumnName <- grep("Percentage of all live births", colnames(parentsCountryOfBirth)) | ||
|
||
# Create a list of number from 2023 to 2003 decrement by 5 | ||
selectedYears <- 2023 - 5 * (0:4) | ||
|
||
# Filter only columns with year in selectedYears | ||
selectedColumnName <- colnames(parentsCountryOfBirth[, selectedColumnName]) %>% | ||
str_subset(paste(selectedYears, collapse = "|")) | ||
|
||
# Remove all columns except the columns in selectedColumnName | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% select(all_of(selectedColumnName)) | ||
|
||
# Flip the column to become row | ||
parentsCountryOfBirth <- t(parentsCountryOfBirth) | ||
|
||
# Rename column name to "Non_UK" | ||
colnames(parentsCountryOfBirth) <- "Non_UK" | ||
|
||
# Rename row to number from 1 to 5 | ||
rownames(parentsCountryOfBirth) <- c(1:5) | ||
|
||
# Convert above matrix to data frame | ||
parentsCountryOfBirth <- as.data.frame(parentsCountryOfBirth) | ||
|
||
# Add new column named Year with value from selectedYears | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% | ||
mutate(Year = selectedYears) | ||
|
||
# Change value in column "Non_UK" to numeric | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% | ||
mutate(across(Non_UK, as.numeric)) | ||
|
||
# Round all value in column "Non_UK" to 1 decimal place | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% | ||
mutate(across(Non_UK, ~round(., 1))) | ||
|
||
# Create new column named "UK" with value "Non_UK" - 100 | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% | ||
mutate(`UK` = `Non_UK` - 100) | ||
|
||
# Transpose the data frame | ||
parentsCountryOfBirth <- melt(parentsCountryOfBirth, id.vars = "Year") | ||
|
||
# Sort the data frame by Year | ||
parentsCountryOfBirth <- parentsCountryOfBirth %>% | ||
arrange(Year) | ||
|
||
# Update column name | ||
colnames(parentsCountryOfBirth) <- c("Year", "Country of Birth", "Percentage") | ||
|
||
# Create new table that only consists of data with "Non_UK" countries | ||
countryNonUK <- subset(parentsCountryOfBirth, `Country of Birth` == "Non_UK") | ||
|
||
# Create new table that only consists of data with "UK" countries | ||
countryUK <- subset(parentsCountryOfBirth, `Country of Birth` == "UK") | ||
|
||
# Update the Percentage value to positive | ||
countryUK$Percentage <- abs(countryUK$Percentage) | ||
|
||
# ======================================= Data Visualisation ======================================= | ||
|
||
# Generate butterfly chart | ||
ggplot(parentsCountryOfBirth, aes(x = Year, color = `Country of Birth`)) + | ||
geom_linerange( | ||
data = parentsCountryOfBirth[parentsCountryOfBirth$`Country of Birth` == "UK",], | ||
aes(ymin = -2, ymax = -2 + `Percentage` + 66), | ||
linewidth = 20 | ||
) + | ||
geom_linerange(data = parentsCountryOfBirth[parentsCountryOfBirth$`Country of Birth` == "Non_UK",], | ||
aes(ymin = 2, ymax = 2 + `Percentage` - 16), | ||
linewidth = 20 | ||
) + | ||
geom_label( | ||
aes(x = Year, y = 0, label = Year), | ||
inherit.aes = F, | ||
fontface = "bold", | ||
size = 8, | ||
label.padding = unit(0.0, "lines"), | ||
label.size = 0, | ||
fill = "#ffffff", | ||
color = "black" | ||
) + | ||
geom_text( | ||
data = countryNonUK, | ||
aes(x = Year, y = 2, label = paste0(Percentage, "%")), | ||
nudge_y = 0.37, | ||
family = "Arial Narrow", | ||
fontface = "bold", | ||
colour = "white", | ||
hjust = 0, | ||
size = 6.5 | ||
) + | ||
geom_text( | ||
data = countryUK, | ||
aes(x = Year, y = -2, label = paste0(Percentage, "%")), | ||
nudge_y = -0.37, | ||
family = "Arial Narrow", | ||
fontface = "bold", | ||
colour = "white", | ||
hjust = 1, | ||
size = 6.5 | ||
) + | ||
scale_color_manual( | ||
name = "", | ||
values = c(`UK` = "#7B2C3CFF", `Non_UK` = "#294F5EFF"), | ||
labels = c("`UK`", "Non_UK") | ||
) + | ||
scale_x_reverse( | ||
breaks = c(seq(2003, 2023, 5)) | ||
) + | ||
scale_y_continuous( | ||
limits = c(-17.8, 17.8), | ||
breaks = c(c(-16, -12, -8, -4, 0) + -2, c(0, 4, 8, 12, 16) + 2), | ||
labels = c("82", "78", "74", "70", "66", "16", "20", "24", "28", "32") | ||
) + | ||
coord_flip() + | ||
labs( | ||
title = "Live birth percentage by mother's country of birth", | ||
subtitle = "<b><span style='color:#7B2C3CFF '>Red bar</span></b> represents <span | ||
style='color:black'><i>\"UK\"</i></span> countries. <b><span style='color:#294F5EFF'>Blue bar</span></b> | ||
represents <span style='color:black'><i>\"Non-UK\"</i></span> countries.", | ||
x = "Number of Live Births", | ||
y = "Year" | ||
) + | ||
theme_minimal() + | ||
theme( | ||
legend.position = "none", | ||
plot.title = element_text(face = "bold", size = 28, hjust = 0, margin = margin(l = 55, b = 12)), | ||
plot.subtitle = element_markdown(size = 19, hjust = 0, margin = margin(l = 55, b = 23), color = "grey35"), | ||
panel.grid.major.x = element_line(linetype = 5, color = "grey83"), | ||
panel.grid.minor.x = element_blank(), | ||
panel.grid.major.y = element_blank(), | ||
panel.grid.minor.y = element_blank(), | ||
axis.title = element_blank(), | ||
axis.text.x = element_text(face = "bold", size = 18.5, color = "black", margin = margin(t = 15)), | ||
axis.text.y = element_blank(), | ||
plot.margin = margin(l = 0, r = 0, b = 20, t = 30), | ||
) |
Binary file not shown.
Oops, something went wrong.