Skip to content

Commit

Permalink
Final update to include year in app, includes only 2022 data in prep …
Browse files Browse the repository at this point in the history
…for 2023
  • Loading branch information
Chris-bennettWk committed Nov 7, 2023
1 parent 67c5c93 commit 4f670bc
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 200 deletions.
12 changes: 6 additions & 6 deletions R/dashboard_panels.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ homepage_panel <- function() {
div(
class = "panel-body",
h3("Context and purpose"),
p("To use the 16-18 Transition Matrices tool click onto the '16-18 TM tool' tab found on the left panel. Please then
select a qualification, subject and subject size from the dropdown boxes.
p("To use the 16-18 Transition Matrices tool click onto the 'Dashboard' tab found on the left panel. Please then
select a report year (the year students finished 16-18 study), qualification, subject and subject size from the dropdown boxes.
Use the 'Numbers data' and 'Percentage Data' options to switch the
table view between number of students and percentage of students."),
br(),
Expand Down Expand Up @@ -133,10 +133,10 @@ dashboard_panel <- function() {
column(
width = 6,
selectizeInput(
inputId = "AcadYr_select",
label = "1. Select an academic year",
choices = list(AcadYr = sort(unique(qual_lookup$AcadYr))),
selected = max(qual_lookup$AcadYr)
inputId = "ReportYr_select",
label = "1. Select a report year",
choices = unique(qual_lookup$ReportYr), # list(ReportYr = sort(unique(qual_lookup$ReportYr))),
selected = max(qual_lookup$ReportYr)
)
),

Expand Down
176 changes: 11 additions & 165 deletions background_scripts/data_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,184 +154,30 @@ grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
# ---- add academic year to data ----
# -----------------------------------------------------------------------------------------------------------------------------

student_numbers_bind <- student_numbers %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

student_percentages_bind <- student_percentages %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

qual_lookup_bind <- qual_lookup %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

grades_ordered_lookup_bind <- grades_ordered_lookup %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())


# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)




# -----------------------------------------------------------------------------------------------------------------------------
# ---- Things to change between runs ----
# -----------------------------------------------------------------------------------------------------------------------------

tm_file = "[KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2021U]"

current_year <- "2021U"

# -----------------------------------------------------------------------------------------------------------------------------
# ---- Reading in the data from SQL tables ----
# -----------------------------------------------------------------------------------------------------------------------------


# establish connection to server
con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
server = "VMT1PR-DHSQL02")


# Select data from SQL tables

tm_data_raw <- tbl(con, sql(paste("select * from", tm_file))) %>% collect()

# disconnect
DBI::dbDisconnect(con)




# -----------------------------------------------------------------------------------------------------------------------------
# ---- SORTING NAs ----
# -----------------------------------------------------------------------------------------------------------------------------

colSums(is.na(tm_data_raw)) %>% as.data.frame()
# NAs in prior band which need removing
# NAs in subj because no subj code was assigned within the SQL production code
# SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP,
# but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
# for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
# to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)


tm_data_subj_na <- tm_data_raw %>%
filter(!(is.na(PRIOR_BAND)),
is.na(SUBJ)) %>%
group_by(Subject) %>%
mutate(SUBJ = sample(100:900,1))

tm_data <- tm_data_raw %>%
filter(!(is.na(PRIOR_BAND)),
!(is.na(SUBJ))) %>%
bind_rows(tm_data_subj_na)

colSums(is.na(tm_data)) %>% as.data.frame()



# -----------------------------------------------------------------------------------------------------------------------------
# ---- NUMBERS & PERCENTAGES CALCULATED ----
# -----------------------------------------------------------------------------------------------------------------------------


student_numbers <- tm_data %>%
mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
TRUE ~ GRADE),
SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE),
QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
arrange(ROW_ID) %>%
pivot_wider(names_from = GRADE, values_from = total_students)



student_percentages <- student_numbers %>%
janitor::adorn_percentages() %>%
mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
mutate_if(is.numeric, ~paste0(.x, "%"))





# -----------------------------------------------------------------------------------------------------------------------------
# ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ----
# -----------------------------------------------------------------------------------------------------------------------------


qual_lookup <- tm_data %>%
select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE))


grade_lookup_u <- tm_data %>%
select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
TRUE ~ "U"))

grade_lookup_sep <- tm_data %>%
select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(GRADE = gradeStructure) %>%
separate_rows(. , GRADE, sep = ",")

grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE))



# would like to sort numeric and character grades differently so that all grades go from low - high
# for numeric this is ascending, for character this is descending
# will need to split again based on numeric and character and then re-combine again
grades_char <- grade_lookup %>%
mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>% # gives extra column - True is character, False is number
filter(char_grade_check == TRUE) %>%
arrange(desc(GRADE))

grades_num <- grade_lookup %>%
mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>% # gives extra column - True is character, False is number
filter(char_grade_check == FALSE) %>%
arrange(GRADE)

grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
arrange(SUBLEVNO)

student_numbers <- student_numbers %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())
mutate(ReportYr = 2022) %>%
select(ReportYr, everything())

student_percentages <- student_percentages %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())
mutate(ReportYr = 2022) %>%
select(ReportYr, everything())

qual_lookup <- qual_lookup %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())
mutate(ReportYr = 2022) %>%
select(ReportYr, everything())

grades_ordered <- grades_ordered_lookup %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())
grades_ordered_lookup <- grades_ordered_lookup %>%
mutate(ReportYr = 2022) %>%
select(ReportYr, everything())


# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)


# -----------------------------------------------------------------------------------------------------------------------------
# ---- Saving Data ----
# -----------------------------------------------------------------------------------------------------------------------------

student_numbers <- bind_rows(student_numbers, student_numbers_bind)
student_percentages <- bind_rows(student_percentages, student_percentages_bind)
qual_lookup <- bind_rows(qual_lookup, qual_lookup_bind)
grades_ordered_lookup <- bind_rows(grades_ordered_lookup, grades_ordered_lookup_bind)

saveRDS(student_numbers, "./data/all_student_numbers.rds")
saveRDS(student_percentages, "./data/all_student_percentages.rds")
Expand Down
Binary file modified data/all_student_numbers.rds
Binary file not shown.
Binary file modified data/all_student_percentages.rds
Binary file not shown.
Binary file modified data/grade_lookup.rds
Binary file not shown.
Binary file modified data/qual_lookup.rds
Binary file not shown.
12 changes: 6 additions & 6 deletions global.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tidy_code_function <- function() {
# -----------------------------------------------------------------------------------------------------------------------------

# Returns a table from the Student Numbers CSV
number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
number_select_function <- function(ReportYr_sel, qual, subj, size, grade_structure) {
filter_selection <- paste0(qual, subj, size, grade_structure)
qual_grades <- filter(
grade_lookup,
Expand All @@ -66,7 +66,7 @@ number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure
grade_list <- qual_grades$GRADE

table <- stud_numbers %>%
filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
filter(ReportYr == ReportYr_sel & QUAL_ID == filter_selection) %>%
select(PRIOR_BAND, grade_list)

return(table)
Expand All @@ -77,15 +77,15 @@ number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure
# -----------------------------------------------------------------------------------------------------------------------------

# Returns a table from the Student Percentages CSV
percentage_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
percentage_select_function <- function(ReportYr_sel, qual, subj, size, grade_structure) {
filter_selection <- paste0(qual, subj, size, grade_structure)
qual_grades <- filter(grade_lookup, SUBLEVNO == qual & SUBJ == subj & SIZE == size & gradeStructure == grade_structure)

# Grades already sorted so just need to extract list of grades
grade_list <- qual_grades$GRADE

table <- stud_percentages %>%
filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
filter(ReportYr == ReportYr_sel & QUAL_ID == filter_selection) %>%
select(PRIOR_BAND, grade_list)

return(table)
Expand Down Expand Up @@ -166,12 +166,12 @@ grade_boundaries <- c("<1", "1-<2", "2-<3", "3-<4", "4-<5", "5-<6", "6-<7", "7-<

# Create a fixed table for example table
user_selection_example <- qual_lookup %>%
filter(AcadYr == max(AcadYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
filter(ReportYr == max(ReportYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
distinct()


example_data <- number_select_function(
user_selection_example$AcadYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
user_selection_example$ReportYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
user_selection_example$SIZE, user_selection_example$gradeStructure
) %>%
rename("Prior Band" = PRIOR_BAND) %>%
Expand Down
Loading

0 comments on commit 4f670bc

Please sign in to comment.