Skip to content

Commit

Permalink
working year code for 2021 and 2022
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris-bennettWk committed Nov 7, 2023
1 parent fc92ad8 commit 67c5c93
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 30 deletions.
20 changes: 15 additions & 5 deletions R/dashboard_panels.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,21 @@ dashboard_panel <- function() {
class = "well",
style = "min-height: 100%; height: 100%; overflow-y: visible",
gov_row(
column(
width = 6,
selectizeInput(
inputId = "AcadYr_select",
label = "1. Select an academic year",
choices = list(AcadYr = sort(unique(qual_lookup$AcadYr))),
selected = max(qual_lookup$AcadYr)
)
),

column(
width = 6,
selectizeInput(
inputId = "qual_select",
label = "1. Select a qualification",
label = "2. Select a qualification",
choices = list(Qualifications = sort(unique(qual_lookup$Qual_Description))),
selected = "GCE A level"
)
Expand All @@ -144,7 +154,7 @@ dashboard_panel <- function() {
width = 6,
selectizeInput(
inputId = "subj_select",
label = "2. Select a subject",
label = "3. Select a subject",
choices = list(Subjects = sort(unique(qual_lookup$Subject))),
selected = "Mathematics"
)
Expand All @@ -154,7 +164,7 @@ dashboard_panel <- function() {
width = 6,
selectizeInput(
inputId = "size_select",
label = "3. Select a size",
label = "4. Select a size",
choices = list(Sizes = sort(qual_lookup$SIZE))
)
),
Expand All @@ -163,15 +173,15 @@ dashboard_panel <- function() {
width = 6,
selectizeInput(
inputId = "grade_structure_select",
label = "4. Select a grade structure",
label = "5. Select a grade structure",
choices = list(GradeStructures = sort(qual_lookup$gradeStructure))
)
),

column(
width = 12,
radioButtons(inputId="format",
label="5. Select format of data: ",
label="6. Select format of data: ",
choices=c("Numbers data", "Percentage data")
),
uiOutput("chart_band_appear")
Expand Down
177 changes: 176 additions & 1 deletion background_scripts/data_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ current_year <- "2022A"

# establish connection to server
con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
server = "3dcpri-pdb16\\acsqls")
server = "VMT1PR-DHSQL02")


# Select data from SQL tables
Expand Down Expand Up @@ -150,14 +150,189 @@ grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%



# -----------------------------------------------------------------------------------------------------------------------------
# ---- add academic year to data ----
# -----------------------------------------------------------------------------------------------------------------------------

student_numbers_bind <- student_numbers %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

student_percentages_bind <- student_percentages %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

qual_lookup_bind <- qual_lookup %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())

grades_ordered_lookup_bind <- grades_ordered_lookup %>%
mutate(AcadYr = "2021/22") %>%
select(AcadYr, everything())


# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)




# -----------------------------------------------------------------------------------------------------------------------------
# ---- Things to change between runs ----
# -----------------------------------------------------------------------------------------------------------------------------

tm_file = "[KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2021U]"

current_year <- "2021U"

# -----------------------------------------------------------------------------------------------------------------------------
# ---- Reading in the data from SQL tables ----
# -----------------------------------------------------------------------------------------------------------------------------


# establish connection to server
con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
server = "VMT1PR-DHSQL02")


# Select data from SQL tables

tm_data_raw <- tbl(con, sql(paste("select * from", tm_file))) %>% collect()

# disconnect
DBI::dbDisconnect(con)




# -----------------------------------------------------------------------------------------------------------------------------
# ---- SORTING NAs ----
# -----------------------------------------------------------------------------------------------------------------------------

colSums(is.na(tm_data_raw)) %>% as.data.frame()
# NAs in prior band which need removing
# NAs in subj because no subj code was assigned within the SQL production code
# SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP,
# but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
# for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
# to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)


tm_data_subj_na <- tm_data_raw %>%
filter(!(is.na(PRIOR_BAND)),
is.na(SUBJ)) %>%
group_by(Subject) %>%
mutate(SUBJ = sample(100:900,1))

tm_data <- tm_data_raw %>%
filter(!(is.na(PRIOR_BAND)),
!(is.na(SUBJ))) %>%
bind_rows(tm_data_subj_na)

colSums(is.na(tm_data)) %>% as.data.frame()



# -----------------------------------------------------------------------------------------------------------------------------
# ---- NUMBERS & PERCENTAGES CALCULATED ----
# -----------------------------------------------------------------------------------------------------------------------------


student_numbers <- tm_data %>%
mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
TRUE ~ GRADE),
SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE),
QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
arrange(ROW_ID) %>%
pivot_wider(names_from = GRADE, values_from = total_students)



student_percentages <- student_numbers %>%
janitor::adorn_percentages() %>%
mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
mutate_if(is.numeric, ~paste0(.x, "%"))





# -----------------------------------------------------------------------------------------------------------------------------
# ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ----
# -----------------------------------------------------------------------------------------------------------------------------


qual_lookup <- tm_data %>%
select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE))


grade_lookup_u <- tm_data %>%
select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
TRUE ~ "U"))

grade_lookup_sep <- tm_data %>%
select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
distinct() %>%
mutate(GRADE = gradeStructure) %>%
separate_rows(. , GRADE, sep = ",")

grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE,
TRUE ~ ASIZE))



# would like to sort numeric and character grades differently so that all grades go from low - high
# for numeric this is ascending, for character this is descending
# will need to split again based on numeric and character and then re-combine again
grades_char <- grade_lookup %>%
mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>% # gives extra column - True is character, False is number
filter(char_grade_check == TRUE) %>%
arrange(desc(GRADE))

grades_num <- grade_lookup %>%
mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>% # gives extra column - True is character, False is number
filter(char_grade_check == FALSE) %>%
arrange(GRADE)

grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
arrange(SUBLEVNO)

student_numbers <- student_numbers %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())

student_percentages <- student_percentages %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())

qual_lookup <- qual_lookup %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())

grades_ordered <- grades_ordered_lookup %>%
mutate(AcadYr = "2020/21") %>%
select(AcadYr, everything())



# -----------------------------------------------------------------------------------------------------------------------------
# ---- Saving Data ----
# -----------------------------------------------------------------------------------------------------------------------------

student_numbers <- bind_rows(student_numbers, student_numbers_bind)
student_percentages <- bind_rows(student_percentages, student_percentages_bind)
qual_lookup <- bind_rows(qual_lookup, qual_lookup_bind)
grades_ordered_lookup <- bind_rows(grades_ordered_lookup, grades_ordered_lookup_bind)

saveRDS(student_numbers, "./data/all_student_numbers.rds")
saveRDS(student_percentages, "./data/all_student_percentages.rds")

Expand Down
Binary file modified data/all_student_numbers.rds
Binary file not shown.
Binary file modified data/all_student_percentages.rds
Binary file not shown.
Binary file modified data/grade_lookup.rds
Binary file not shown.
Binary file modified data/qual_lookup.rds
Binary file not shown.
12 changes: 6 additions & 6 deletions global.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tidy_code_function <- function() {
# -----------------------------------------------------------------------------------------------------------------------------

# Returns a table from the Student Numbers CSV
number_select_function <- function(qual, subj, size, grade_structure) {
number_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
filter_selection <- paste0(qual, subj, size, grade_structure)
qual_grades <- filter(
grade_lookup,
Expand All @@ -66,7 +66,7 @@ number_select_function <- function(qual, subj, size, grade_structure) {
grade_list <- qual_grades$GRADE

table <- stud_numbers %>%
filter(QUAL_ID == filter_selection) %>%
filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
select(PRIOR_BAND, grade_list)

return(table)
Expand All @@ -77,15 +77,15 @@ number_select_function <- function(qual, subj, size, grade_structure) {
# -----------------------------------------------------------------------------------------------------------------------------

# Returns a table from the Student Percentages CSV
percentage_select_function <- function(qual, subj, size, grade_structure) {
percentage_select_function <- function(AcadYr_sel, qual, subj, size, grade_structure) {
filter_selection <- paste0(qual, subj, size, grade_structure)
qual_grades <- filter(grade_lookup, SUBLEVNO == qual & SUBJ == subj & SIZE == size & gradeStructure == grade_structure)

# Grades already sorted so just need to extract list of grades
grade_list <- qual_grades$GRADE

table <- stud_percentages %>%
filter(QUAL_ID == filter_selection) %>%
filter(AcadYr == AcadYr_sel & QUAL_ID == filter_selection) %>%
select(PRIOR_BAND, grade_list)

return(table)
Expand Down Expand Up @@ -166,12 +166,12 @@ grade_boundaries <- c("<1", "1-<2", "2-<3", "3-<4", "4-<5", "5-<6", "6-<7", "7-<

# Create a fixed table for example table
user_selection_example <- qual_lookup %>%
filter(Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
filter(AcadYr == max(AcadYr) & Qual_Description == "GCE A level" & Subject == "Mathematics" & ASIZE == 1 & gradeStructure == "*,A,B,C,D,E") %>%
distinct()


example_data <- number_select_function(
user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
user_selection_example$AcadYr, user_selection_example$SUBLEVNO, user_selection_example$SUBJ,
user_selection_example$SIZE, user_selection_example$gradeStructure
) %>%
rename("Prior Band" = PRIOR_BAND) %>%
Expand Down
Loading

0 comments on commit 67c5c93

Please sign in to comment.