Final update to include year in app, includes only 2022 data in prep …

…for 2023
dfe-analytical-services · Nov 7, 2023 · 4f670bc · 4f670bc
1 parent 67c5c93
commit 4f670bc
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 200 deletions.
diff --git a/R/dashboard_panels.R b/R/dashboard_panels.R
@@ -57,8 +57,8 @@ homepage_panel <- function() {
               div(
                 class = "panel-body",
                 h3("Context and purpose"),
-                p("To use the 16-18 Transition Matrices tool click onto the '16-18 TM tool' tab found on the left panel. Please then
-                select a qualification, subject and subject size from the dropdown boxes. 
+                p("To use the 16-18 Transition Matrices tool click onto the 'Dashboard' tab found on the left panel. Please then
+                select a report year (the year students finished 16-18 study), qualification, subject and subject size from the dropdown boxes. 
                 Use the 'Numbers data' and 'Percentage Data' options to switch the 
                 table view between number of students and percentage of students."),
                 br(),
@@ -133,10 +133,10 @@ dashboard_panel <- function() {
               column(
                 width = 6,
                 selectizeInput(
-                  inputId = "AcadYr_select",
-                  label = "1. Select an academic year",
-                  choices = list(AcadYr = sort(unique(qual_lookup$AcadYr))),
-                  selected = max(qual_lookup$AcadYr)
+                  inputId = "ReportYr_select",
+                  label = "1. Select a report year",
+                  choices = unique(qual_lookup$ReportYr), # list(ReportYr = sort(unique(qual_lookup$ReportYr))),
+                  selected = max(qual_lookup$ReportYr)
                 )
               ),
 

diff --git a/background_scripts/data_processing.R b/background_scripts/data_processing.R
@@ -154,184 +154,30 @@ grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
 # ---- add academic year to data ---- 
 # -----------------------------------------------------------------------------------------------------------------------------
 
-student_numbers_bind <- student_numbers %>% 
-  mutate(AcadYr = "2021/22") %>% 
-  select(AcadYr, everything())
-
-student_percentages_bind <- student_percentages %>% 
-  mutate(AcadYr = "2021/22") %>% 
-  select(AcadYr, everything())
-
-qual_lookup_bind <- qual_lookup %>% 
-  mutate(AcadYr = "2021/22") %>% 
-  select(AcadYr, everything())
-
-grades_ordered_lookup_bind <- grades_ordered_lookup %>% 
-  mutate(AcadYr = "2021/22") %>% 
-  select(AcadYr, everything())
-
-
-# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)
-
-
-
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- Things to change between runs ----
-# -----------------------------------------------------------------------------------------------------------------------------
-
-tm_file = "[KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2021U]"
-
-current_year <- "2021U"
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- Reading in the data from SQL tables ----
-# -----------------------------------------------------------------------------------------------------------------------------
-
-
-# establish connection to server
-con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
-                      server = "VMT1PR-DHSQL02")
-
-
-# Select data from SQL tables
-
-tm_data_raw <- tbl(con, sql(paste("select * from", tm_file))) %>% collect()
-
-# disconnect
-DBI::dbDisconnect(con)
-
-
-
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- SORTING NAs ----
-# -----------------------------------------------------------------------------------------------------------------------------
-
-colSums(is.na(tm_data_raw)) %>% as.data.frame() 
-# NAs in prior band which need removing
-# NAs in subj because no subj code was assigned within the SQL production code
-# SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP, 
-# but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
-# for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
-# to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)
-
-
-tm_data_subj_na <- tm_data_raw %>% 
-  filter(!(is.na(PRIOR_BAND)),
-         is.na(SUBJ)) %>%
-  group_by(Subject) %>%
-  mutate(SUBJ = sample(100:900,1))
-
-tm_data <- tm_data_raw %>%
-  filter(!(is.na(PRIOR_BAND)),
-         !(is.na(SUBJ))) %>%
-  bind_rows(tm_data_subj_na)
-
-colSums(is.na(tm_data)) %>% as.data.frame() 
-
-
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- NUMBERS & PERCENTAGES CALCULATED ---- 
-# -----------------------------------------------------------------------------------------------------------------------------
-
-
-student_numbers <- tm_data %>% 
-  mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
-                           TRUE ~ GRADE),
-         SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE),
-         QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
-         ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
-  mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
-  arrange(ROW_ID) %>% 
-  pivot_wider(names_from = GRADE, values_from = total_students)
-
-
-
-student_percentages <- student_numbers %>% 
-  janitor::adorn_percentages() %>%
-  mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
-  mutate_if(is.numeric, ~paste0(.x, "%"))
-
-
-
-
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ---- 
-# -----------------------------------------------------------------------------------------------------------------------------
-
-
-qual_lookup <- tm_data %>%
-  select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE))
-
-
-grade_lookup_u <- tm_data %>%
-  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
-                           TRUE ~ "U")) 
-
-grade_lookup_sep <- tm_data %>%
-  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(GRADE = gradeStructure) %>%
-  separate_rows(. , GRADE, sep = ",") 
-
-grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
-  arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
-  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE))
-
-
-
-# would like to sort numeric and character grades differently so that all grades go from low - high
-# for numeric this is ascending, for character this is descending
-# will need to split again based on numeric and character and then re-combine again
-grades_char <- grade_lookup %>%
-  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
-  filter(char_grade_check == TRUE) %>% 
-  arrange(desc(GRADE))
-
-grades_num <- grade_lookup %>%
-  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
-  filter(char_grade_check == FALSE) %>%
-  arrange(GRADE)
-
-grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
-  arrange(SUBLEVNO)
-
 student_numbers <- student_numbers %>% 
-  mutate(AcadYr = "2020/21") %>% 
-  select(AcadYr, everything())
+  mutate(ReportYr = 2022) %>% 
+  select(ReportYr, everything())
 
 student_percentages <- student_percentages %>% 
-  mutate(AcadYr = "2020/21") %>% 
-  select(AcadYr, everything())
+  mutate(ReportYr = 2022) %>% 
+  select(ReportYr, everything())
 
 qual_lookup <- qual_lookup %>% 
-  mutate(AcadYr = "2020/21") %>% 
-  select(AcadYr, everything())
+  mutate(ReportYr = 2022) %>% 
+  select(ReportYr, everything())
 
-grades_ordered <- grades_ordered_lookup %>% 
-  mutate(AcadYr = "2020/21") %>% 
-  select(AcadYr, everything())
+grades_ordered_lookup <- grades_ordered_lookup %>% 
+  mutate(ReportYr = 2022) %>% 
+  select(ReportYr, everything())
 
 
+# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)
+
 
 # -----------------------------------------------------------------------------------------------------------------------------
 # ---- Saving Data ---- 
 # -----------------------------------------------------------------------------------------------------------------------------
 
-student_numbers <- bind_rows(student_numbers, student_numbers_bind)
-student_percentages <- bind_rows(student_percentages, student_percentages_bind)
-qual_lookup <- bind_rows(qual_lookup, qual_lookup_bind)
-grades_ordered_lookup <- bind_rows(grades_ordered_lookup, grades_ordered_lookup_bind)
 
 saveRDS(student_numbers, "./data/all_student_numbers.rds")
 saveRDS(student_percentages, "./data/all_student_percentages.rds")

diff --git a/data/all_student_numbers.rds b/data/all_student_numbers.rds
diff --git a/data/all_student_percentages.rds b/data/all_student_percentages.rds
diff --git a/data/grade_lookup.rds b/data/grade_lookup.rds
diff --git a/data/qual_lookup.rds b/data/qual_lookup.rds
diff --git a/global.R b/global.R
@@ -55,7 +55,7 @@ tidy_code_function <- function() {
 # -----------------------------------------------------------------------------------------------------------------------------
 
 # Returns a table from the Student Numbers CSV
-number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
+number_select_function <- function(ReportYr_sel, qual, subj, size, grade_structure) {
   filter_selection <- paste0(qual, subj, size, grade_structure)
   qual_grades <- filter(
     grade_lookup,
@@ -66,7 +66,7 @@ number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure
   grade_list <- qual_grades$GRADE
 
   table <- stud_numbers %>%
-    filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
+    filter(ReportYr == ReportYr_sel & QUAL_ID == filter_selection) %>%
     select(PRIOR_BAND, grade_list)
 
   return(table)
@@ -77,15 +77,15 @@ number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure
 # -----------------------------------------------------------------------------------------------------------------------------
 
 # Returns a table from the Student Percentages CSV
-percentage_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
+percentage_select_function <- function(ReportYr_sel, qual, subj, size, grade_structure) {
   filter_selection <- paste0(qual, subj, size, grade_structure)
   qual_grades <- filter(grade_lookup, SUBLEVNO == qual & SUBJ == subj & SIZE == size & gradeStructure == grade_structure)
 
   # Grades already sorted so just need to extract list of grades
   grade_list <- qual_grades$GRADE
 
   table <- stud_percentages %>%
-    filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
+    filter(ReportYr == ReportYr_sel & QUAL_ID == filter_selection) %>%
     select(PRIOR_BAND, grade_list)
 
   return(table)
@@ -166,12 +166,12 @@ grade_boundaries <- c("<1", "1-<2", "2-<3", "3-<4", "4-<5", "5-<6", "6-<7", "7-<
 
 # Create a fixed table for example table
 user_selection_example <- qual_lookup %>%
-  filter(AcadYr == max(AcadYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
+  filter(ReportYr == max(ReportYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
   distinct()
 
 
 example_data <- number_select_function(
-  user_selection_example$AcadYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
+  user_selection_example$ReportYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
   user_selection_example$SIZE, user_selection_example$gradeStructure
 ) %>%
   rename("Prior Band" = PRIOR_BAND) %>%