working year code for 2021 and 2022

dfe-analytical-services · Nov 7, 2023 · 67c5c93 · 67c5c93
1 parent fc92ad8
commit 67c5c93
Show file tree

Hide file tree

Showing 8 changed files with 221 additions and 30 deletions.
diff --git a/R/dashboard_panels.R b/R/dashboard_panels.R
@@ -130,11 +130,21 @@ dashboard_panel <- function() {
             class = "well",
             style = "min-height: 100%; height: 100%; overflow-y: visible",
             gov_row(
+              column(
+                width = 6,
+                selectizeInput(
+                  inputId = "AcadYr_select",
+                  label = "1. Select an academic year",
+                  choices = list(AcadYr = sort(unique(qual_lookup$AcadYr))),
+                  selected = max(qual_lookup$AcadYr)
+                )
+              ),
+
               column(
                 width = 6,
                 selectizeInput(
                   inputId = "qual_select",
-                  label = "1. Select a qualification",
+                  label = "2. Select a qualification",
                   choices = list(Qualifications = sort(unique(qual_lookup$Qual_Description))),
                   selected = "GCE A level"
                 )
@@ -144,7 +154,7 @@ dashboard_panel <- function() {
                 width = 6,
                 selectizeInput(
                   inputId = "subj_select",
-                  label = "2. Select a subject",
+                  label = "3. Select a subject",
                   choices = list(Subjects = sort(unique(qual_lookup$Subject))),
                   selected = "Mathematics"
                 )
@@ -154,7 +164,7 @@ dashboard_panel <- function() {
                 width = 6,
                 selectizeInput(
                   inputId = "size_select",
-                  label = "3. Select a size",
+                  label = "4. Select a size",
                   choices = list(Sizes = sort(qual_lookup$SIZE))
                 )
               ),
@@ -163,15 +173,15 @@ dashboard_panel <- function() {
                 width = 6,
                 selectizeInput(
                   inputId = "grade_structure_select",
-                  label = "4. Select a grade structure",
+                  label = "5. Select a grade structure",
                   choices = list(GradeStructures = sort(qual_lookup$gradeStructure))
                 )
               ),
 
               column(
                 width = 12,
                 radioButtons(inputId="format", 
-                             label="5. Select format of data: ", 
+                             label="6. Select format of data: ", 
                              choices=c("Numbers data", "Percentage data")
                 ),
                 uiOutput("chart_band_appear")

diff --git a/background_scripts/data_processing.R b/background_scripts/data_processing.R
@@ -33,7 +33,7 @@ current_year <- "2022A"
 
 # establish connection to server
 con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
-                      server = "3dcpri-pdb16\\acsqls")
+                      server = "VMT1PR-DHSQL02")
 
 
 # Select data from SQL tables
@@ -150,14 +150,189 @@ grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
 
 
 
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- add academic year to data ---- 
+# -----------------------------------------------------------------------------------------------------------------------------
+
+student_numbers_bind <- student_numbers %>% 
+  mutate(AcadYr = "2021/22") %>% 
+  select(AcadYr, everything())
+
+student_percentages_bind <- student_percentages %>% 
+  mutate(AcadYr = "2021/22") %>% 
+  select(AcadYr, everything())
+
+qual_lookup_bind <- qual_lookup %>% 
+  mutate(AcadYr = "2021/22") %>% 
+  select(AcadYr, everything())
+
+grades_ordered_lookup_bind <- grades_ordered_lookup %>% 
+  mutate(AcadYr = "2021/22") %>% 
+  select(AcadYr, everything())
 
 
 # grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)
 
+
+
+
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- Things to change between runs ----
+# -----------------------------------------------------------------------------------------------------------------------------
+
+tm_file = "[KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2021U]"
+
+current_year <- "2021U"
+
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- Reading in the data from SQL tables ----
+# -----------------------------------------------------------------------------------------------------------------------------
+
+
+# establish connection to server
+con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
+                      server = "VMT1PR-DHSQL02")
+
+
+# Select data from SQL tables
+
+tm_data_raw <- tbl(con, sql(paste("select * from", tm_file))) %>% collect()
+
+# disconnect
+DBI::dbDisconnect(con)
+
+
+
+
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- SORTING NAs ----
+# -----------------------------------------------------------------------------------------------------------------------------
+
+colSums(is.na(tm_data_raw)) %>% as.data.frame() 
+# NAs in prior band which need removing
+# NAs in subj because no subj code was assigned within the SQL production code
+# SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP, 
+# but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
+# for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
+# to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)
+
+
+tm_data_subj_na <- tm_data_raw %>% 
+  filter(!(is.na(PRIOR_BAND)),
+         is.na(SUBJ)) %>%
+  group_by(Subject) %>%
+  mutate(SUBJ = sample(100:900,1))
+
+tm_data <- tm_data_raw %>%
+  filter(!(is.na(PRIOR_BAND)),
+         !(is.na(SUBJ))) %>%
+  bind_rows(tm_data_subj_na)
+
+colSums(is.na(tm_data)) %>% as.data.frame() 
+
+
+
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- NUMBERS & PERCENTAGES CALCULATED ---- 
+# -----------------------------------------------------------------------------------------------------------------------------
+
+
+student_numbers <- tm_data %>% 
+  mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
+                           TRUE ~ GRADE),
+         SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                          TRUE ~ ASIZE),
+         QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
+         ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
+  mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
+  arrange(ROW_ID) %>% 
+  pivot_wider(names_from = GRADE, values_from = total_students)
+
+
+
+student_percentages <- student_numbers %>% 
+  janitor::adorn_percentages() %>%
+  mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
+  mutate_if(is.numeric, ~paste0(.x, "%"))
+
+
+
+
+
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ---- 
+# -----------------------------------------------------------------------------------------------------------------------------
+
+
+qual_lookup <- tm_data %>%
+  select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+  distinct() %>%
+  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                          TRUE ~ ASIZE))
+
+
+grade_lookup_u <- tm_data %>%
+  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+  distinct() %>%
+  mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
+                           TRUE ~ "U")) 
+
+grade_lookup_sep <- tm_data %>%
+  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+  distinct() %>%
+  mutate(GRADE = gradeStructure) %>%
+  separate_rows(. , GRADE, sep = ",") 
+
+grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
+  arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
+  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                          TRUE ~ ASIZE))
+
+
+
+# would like to sort numeric and character grades differently so that all grades go from low - high
+# for numeric this is ascending, for character this is descending
+# will need to split again based on numeric and character and then re-combine again
+grades_char <- grade_lookup %>%
+  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
+  filter(char_grade_check == TRUE) %>% 
+  arrange(desc(GRADE))
+
+grades_num <- grade_lookup %>%
+  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
+  filter(char_grade_check == FALSE) %>%
+  arrange(GRADE)
+
+grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
+  arrange(SUBLEVNO)
+
+student_numbers <- student_numbers %>% 
+  mutate(AcadYr = "2020/21") %>% 
+  select(AcadYr, everything())
+
+student_percentages <- student_percentages %>% 
+  mutate(AcadYr = "2020/21") %>% 
+  select(AcadYr, everything())
+
+qual_lookup <- qual_lookup %>% 
+  mutate(AcadYr = "2020/21") %>% 
+  select(AcadYr, everything())
+
+grades_ordered <- grades_ordered_lookup %>% 
+  mutate(AcadYr = "2020/21") %>% 
+  select(AcadYr, everything())
+
+
+
 # -----------------------------------------------------------------------------------------------------------------------------
 # ---- Saving Data ---- 
 # -----------------------------------------------------------------------------------------------------------------------------
 
+student_numbers <- bind_rows(student_numbers, student_numbers_bind)
+student_percentages <- bind_rows(student_percentages, student_percentages_bind)
+qual_lookup <- bind_rows(qual_lookup, qual_lookup_bind)
+grades_ordered_lookup <- bind_rows(grades_ordered_lookup, grades_ordered_lookup_bind)
+
 saveRDS(student_numbers, "./data/all_student_numbers.rds")
 saveRDS(student_percentages, "./data/all_student_percentages.rds")
 

diff --git a/data/all_student_numbers.rds b/data/all_student_numbers.rds
diff --git a/data/all_student_percentages.rds b/data/all_student_percentages.rds
diff --git a/data/grade_lookup.rds b/data/grade_lookup.rds
diff --git a/data/qual_lookup.rds b/data/qual_lookup.rds
diff --git a/global.R b/global.R
@@ -55,7 +55,7 @@ tidy_code_function <- function() {
 # -----------------------------------------------------------------------------------------------------------------------------
 
 # Returns a table from the Student Numbers CSV
-number_select_function <- function(qual, subj, size, grade_structure) {
+number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
   filter_selection <- paste0(qual, subj, size, grade_structure)
   qual_grades <- filter(
     grade_lookup,
@@ -66,7 +66,7 @@ number_select_function <- function(qual, subj, size, grade_structure) {
   grade_list <- qual_grades$GRADE
 
   table <- stud_numbers %>%
-    filter(QUAL_ID == filter_selection) %>%
+    filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
     select(PRIOR_BAND, grade_list)
 
   return(table)
@@ -77,15 +77,15 @@ number_select_function <- function(qual, subj, size, grade_structure) {
 # -----------------------------------------------------------------------------------------------------------------------------
 
 # Returns a table from the Student Percentages CSV
-percentage_select_function <- function(qual, subj, size, grade_structure) {
+percentage_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
   filter_selection <- paste0(qual, subj, size, grade_structure)
   qual_grades <- filter(grade_lookup, SUBLEVNO == qual & SUBJ == subj & SIZE == size & gradeStructure == grade_structure)
 
   # Grades already sorted so just need to extract list of grades
   grade_list <- qual_grades$GRADE
 
   table <- stud_percentages %>%
-    filter(QUAL_ID == filter_selection) %>%
+    filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
     select(PRIOR_BAND, grade_list)
 
   return(table)
@@ -166,12 +166,12 @@ grade_boundaries <- c("<1", "1-<2", "2-<3", "3-<4", "4-<5", "5-<6", "6-<7", "7-<
 
 # Create a fixed table for example table
 user_selection_example <- qual_lookup %>%
-  filter(Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
+  filter(AcadYr == max(AcadYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
   distinct()
 
 
 example_data <- number_select_function(
-  user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
+  user_selection_example$AcadYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
   user_selection_example$SIZE, user_selection_example$gradeStructure
 ) %>%
   rename("Prior Band" = PRIOR_BAND) %>%