Skip to content

Going Deeper with R (RStudio)

Functions

This lesson is called Functions, part of the Going Deeper with R (RStudio) course. This lesson is called Functions, part of the Going Deeper with R (RStudio) course.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")


# Import, Tidy, and Clean Data --------------------------------------------

clean_math_proficiency_data <- function(raw_data) {
  
  read_excel(path = raw_data) |> 
    clean_names() |> 
    filter(student_group == "Total Population (All Students)") |> 
    filter(grade_level == "Grade 3") |> 
    select(academic_year, school_id, contains("number_level")) |> 
    pivot_longer(cols = starts_with("number_level"),
                 names_to = "proficiency_level",
                 values_to = "number_of_students") |> 
    mutate(proficiency_level = case_when(
      proficiency_level == "number_level_4" ~ "4",
      proficiency_level == "number_level_3" ~ "3",
      proficiency_level == "number_level_2" ~ "2",
      proficiency_level == "number_level_1" ~ "1"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(school_id) |> 
    mutate(pct = number_of_students / sum(number_of_students, na.rm = TRUE)) |> 
    ungroup()
  
}


third_grade_math_proficiency_2021_2022 <-
  clean_math_proficiency_data(raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")

third_grade_math_proficiency_2018_2019 <-
  clean_math_proficiency_data(raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")


third_grade_math_proficiency <- 
  bind_rows(third_grade_math_proficiency_2018_2019,
            third_grade_math_proficiency_2021_2022)

Your Turn

Solution

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20222023.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20212022.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20202021.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20192020.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20182019.xlsx")

# Import, Tidy, and Clean Data -----------------------------------------------------

clean_enrollment_data <- function(excel_file,
                                  sheet_name) {
  
  read_excel(path = excel_file,
             sheet = sheet_name) |> 
    clean_names() |> 
    select(1, 3, 7:19) |> 
    select(-contains("percent")) |> 
    set_names("district_institution_id",
              "school_institution_id",
              "american_indian_alaska_native",
              "asian",
              "native_hawaiian_pacific_islander",
              "black_african_american",
              "hispanic_latino",
              "white",
              "multi_racial") |> 
    pivot_longer(cols = -c(district_institution_id, school_institution_id),
                 names_to = "race_ethnicity",
                 values_to = "number_of_students") |> 
    mutate(race_ethnicity = case_when(
      race_ethnicity == "american_indian_alaska_native" ~ "American Indian Alaska Native",
      race_ethnicity == "asian" ~ "Asian",
      race_ethnicity == "black_african_american" ~ "Black/African American",
      race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
      race_ethnicity == "multiracial" ~ "Multi-Racial",
      race_ethnicity == "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      race_ethnicity == "white" ~ "White",
      race_ethnicity == "multi_racial" ~ "Multiracial"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(district_institution_id, race_ethnicity) |> 
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> 
    ungroup() |> 
    group_by(district_institution_id) |> 
    mutate(pct = number_of_students / sum(number_of_students)) |> 
    ungroup() |> 
    mutate(year = sheet_name) 
  
}

enrollment_by_race_ethnicity_2022_2023 <- 
  clean_enrollment_data(excel_file = "data-raw/fallmembershipreport_20222023.xlsx",
                        sheet_name = "School 2022-23") 

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(excel_file = "data-raw/fallmembershipreport_20212022.xlsx",
                        sheet_name = "School 2021-22") 

enrollment_by_race_ethnicity <- 
  bind_rows(enrollment_by_race_ethnicity_2021_2022,
            enrollment_by_race_ethnicity_2022_2023)

Create a function to clean each year of enrollment data.
To check that your function works, create enrollment_by_race_ethnicity_2021_2022 and enrollment_by_race_ethnicity_2022_2023 data frames and then bind them together with bind_rows().

This exercise is challenging! Use the starter code below to help you if you need to.

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20222023.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20212022.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20202021.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20192020.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20182019.xlsx")

# Import, Tidy, and Clean Data -----------------------------------------------------

clean_enrollment_data <- function(excel_file,
                                  sheet_name) {
  
  read_excel(path = YOURCODEHERE,
             sheet = YOURCODEHERE) |> 
    clean_names() |> 
    
    # I've selected by column position rather than names 
    # because the column names vary in the data between years
    # but they're always in the same positions
    select(1, 3, 7:19) |> 
    
    select(-contains("percent")) |> 
    set_names("district_institution_id",
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE) |> 
    pivot_longer(cols = -c(district_institution_id, school_institution_id),
                 names_to = "race_ethnicity",
                 values_to = "number_of_students") |> 
    mutate(race_ethnicity = case_when(
      race_ethnicity == "american_indian_alaska_native" ~ "American Indian Alaska Native",
      race_ethnicity == "asian" ~ "Asian",
      race_ethnicity == "black_african_american" ~ "Black/African American",
      race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
      race_ethnicity == "multiracial" ~ "Multi-Racial",
      race_ethnicity == "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      race_ethnicity == "white" ~ "White",
      race_ethnicity == "multi_racial" ~ "Multiracial"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(district_institution_id, race_ethnicity) |> 
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> 
    ungroup() |> 
    group_by(district_institution_id) |> 
    mutate(pct = number_of_students / sum(number_of_students)) |> 
    ungroup() |> 
    mutate(year = sheet_name) 
  
}

enrollment_by_race_ethnicity_2021_2022 <- 
  clean_enrollment_data(excel_file = YOURCODEHERE,
                        sheet_name = YOURCODEHERE) 

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(excel_file = YOURCODEHERE,
                        sheet_name = YOURCODEHERE) 

enrollment_by_race_ethnicity <- 
  bind_rows(enrollment_by_race_ethnicity_2021_2022,
            enrollment_by_race_ethnicity_2022_2023)

Learn More

If you want to learn more about the global options I showed in this lesson, the video from another lesson is below.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

Further reading:

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Course Content

44 Lessons

Downloading and Importing Data

Overview of Tidy Data

Tidy Data Rule #1: Every Column is a Variable

Tidy Data Rule #3: Every Cell is a Single Value

Tidy Data Rule #2: Every Row is an Observation

Changing Variable Types

Dealing with Missing Data

Advanced Summarizing

Binding Data Frames

Bring It All Together (Advanced Data Wrangling)

Best Practices in Data Visualization

Pipe Data into ggplot

Reorder Plots to Highlight Findings

Use Color to Highlight Findings

Add Descriptive Labels to Your Plots

Use Titles to Highlight Findings

Use Annotations to Explain

Create a Custom Theme

Customize Your Fonts

Try New Plot Types

Bring it All Together (Advanced Data Visualization)

Advanced Markdown

Advanced YAML and Code Chunk Options

Making Your Reports Shine: Word Edition

Making Your Reports Shine: PDF Edition

Making Your Reports Shine: HTML Edition

Publishing Your Work

Quarto Extensions

Parameterized Reporting, Part 1

Parameterized Reporting, Part 2

Parameterized Reporting, Part 3

Wrapping up Going Deeper with R