Skip to content
R for the Rest of Us Logo

Going Deeper with R

Functions

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video
# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data --------------------------------------------

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")

# Import, Tidy, and Clean Data -------------------------------------------

clean_math_proficiency_data <- function(raw_data) {
  read_excel(
    path = raw_data
  ) |>
    clean_names() |>
    filter(student_group == "Total Population (All Students)") |>
    filter(grade_level == "Grade 3") |>
    select(academic_year, school_id, contains("number_level")) |>
    pivot_longer(
      cols = starts_with("number_level"),
      names_to = "proficiency_level",
      values_to = "number_of_students"
    ) |>
    mutate(proficiency_level = parse_number(proficiency_level)) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    mutate(
      pct = number_of_students / sum(number_of_students, na.rm = TRUE),
      .by = school_id
    )
}


third_grade_math_proficiency_2021_2022 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx"
  )

third_grade_math_proficiency_2018_2019 <-
  clean_math_proficiency_data(
    raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx"
  )

third_grade_math_proficiency <-
  bind_rows(
    third_grade_math_proficiency_2021_2022,
    third_grade_math_proficiency_2018_2019
  )

Your Turn

  1. Create a function to clean each year of enrollment data.

  2. To check that your function works, create enrollment_by_race_ethnicity_2021_2022 and enrollment_by_race_ethnicity_2022_2023 data frames and then bind them together with bind_rows().

This exercise is challenging! Use the starter code below to help you if you need to.

# Load Packages ----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directory -------------------------------------------------------

dir_create("data-raw")

# Download Data ----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file(
#   url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )

# download.file(
#   "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#   mode = "wb",
#   destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )

# Tidy and Clean Function ------------------------------------------------

clean_enrollment_data <- function(excel_file, sheet_name) {
  read_excel(
    path = YOURCODEHERE,
    sheet = YOURCODEHERE
  ) |>
    clean_names() |>
    select(
      1,
      3,
      7:19
    ) |>
    select(-contains("percent")) |>
    set_names(
      "district_institution_id",
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
	  YOURCODEHERE,
    ) |>
    pivot_longer(
      cols = -c(district_institution_id, school_institution_id),
      names_to = "race_ethnicity",
      values_to = "number_of_students"
    ) |>
    mutate(
      race_ethnicity = recode_values(
        race_ethnicity,
        "american_indian_alaska_native" ~ "American Indian Alaska Native",
        "asian" ~ "Asian",
        "black_african_american" ~ "Black/African American",
        "hispanic_latino" ~ "Hispanic/Latino",
        "multiracial" ~ "Multi-Racial",
        "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
        "white" ~ "White",
        "multi_racial" ~ "Multiracial"
      )
    ) |>
    mutate(number_of_students = parse_number(number_of_students)) |>
    group_by(district_institution_id, race_ethnicity) |>
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
    ungroup() |>
    group_by(district_institution_id) |>
    mutate(pct = number_of_students / sum(number_of_students)) |>
    ungroup() |>
    mutate(year = sheet_name) |>
    mutate(year = str_remove(year, "School "))
}

# Tidy and Clean Data ----------------------------------------------------

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(
    excel_file = YOURCODEHERE,
    sheet_name = YOURCODEHERE
  )

bind_rows(
  YOURCODEHERE,
  YOURCODEHERE
)

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Course Content

44 Lessons