Skip to content

R in 3 Months (Fall 2025)

Functions

This lesson is called Functions, part of the R in 3 Months (Fall 2025) course. This lesson is called Functions, part of the R in 3 Months (Fall 2025) course.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

View code shown in video

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx

# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
# 
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
#               mode = "wb",
#               destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")


# Import, Tidy, and Clean Data --------------------------------------------

clean_math_proficiency_data <- function(raw_data) {
  
  read_excel(path = raw_data) |> 
    clean_names() |> 
    filter(student_group == "Total Population (All Students)") |> 
    filter(grade_level == "Grade 3") |> 
    select(academic_year, school_id, contains("number_level")) |> 
    pivot_longer(cols = starts_with("number_level"),
                 names_to = "proficiency_level",
                 values_to = "number_of_students") |> 
    mutate(proficiency_level = case_when(
      proficiency_level == "number_level_4" ~ "4",
      proficiency_level == "number_level_3" ~ "3",
      proficiency_level == "number_level_2" ~ "2",
      proficiency_level == "number_level_1" ~ "1"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(school_id) |> 
    mutate(pct = number_of_students / sum(number_of_students, na.rm = TRUE)) |> 
    ungroup()
  
}


third_grade_math_proficiency_2021_2022 <-
  clean_math_proficiency_data(raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")

third_grade_math_proficiency_2018_2019 <-
  clean_math_proficiency_data(raw_data = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")


third_grade_math_proficiency <- 
  bind_rows(third_grade_math_proficiency_2018_2019,
            third_grade_math_proficiency_2021_2022)

Your Turn

Solution

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20222023.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20212022.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20202021.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20192020.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20182019.xlsx")

# Import, Tidy, and Clean Data -----------------------------------------------------

clean_enrollment_data <- function(excel_file,
                                  sheet_name) {
  
  read_excel(path = excel_file,
             sheet = sheet_name) |> 
    clean_names() |> 
    select(1, 3, 7:19) |> 
    select(-contains("percent")) |> 
    set_names("district_institution_id",
              "school_institution_id",
              "american_indian_alaska_native",
              "asian",
              "native_hawaiian_pacific_islander",
              "black_african_american",
              "hispanic_latino",
              "white",
              "multi_racial") |> 
    pivot_longer(cols = -c(district_institution_id, school_institution_id),
                 names_to = "race_ethnicity",
                 values_to = "number_of_students") |> 
    mutate(race_ethnicity = case_when(
      race_ethnicity == "american_indian_alaska_native" ~ "American Indian Alaska Native",
      race_ethnicity == "asian" ~ "Asian",
      race_ethnicity == "black_african_american" ~ "Black/African American",
      race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
      race_ethnicity == "multiracial" ~ "Multi-Racial",
      race_ethnicity == "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      race_ethnicity == "white" ~ "White",
      race_ethnicity == "multi_racial" ~ "Multiracial"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(district_institution_id, race_ethnicity) |> 
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> 
    ungroup() |> 
    group_by(district_institution_id) |> 
    mutate(pct = number_of_students / sum(number_of_students)) |> 
    ungroup() |> 
    mutate(year = sheet_name) 
  
}

enrollment_by_race_ethnicity_2022_2023 <- 
  clean_enrollment_data(excel_file = "data-raw/fallmembershipreport_20222023.xlsx",
                        sheet_name = "School 2022-23") 

enrollment_by_race_ethnicity_2021_2022 <-
  clean_enrollment_data(excel_file = "data-raw/fallmembershipreport_20212022.xlsx",
                        sheet_name = "School 2021-22") 

enrollment_by_race_ethnicity <- 
  bind_rows(enrollment_by_race_ethnicity_2021_2022,
            enrollment_by_race_ethnicity_2022_2023)

Create a function to clean each year of enrollment data.
To check that your function works, create enrollment_by_race_ethnicity_2021_2022 and enrollment_by_race_ethnicity_2022_2023 data frames and then bind them together with bind_rows().

This exercise is challenging! Use the starter code below to help you if you need to.

# Load Packages -----------------------------------------------------------

library(tidyverse)
library(fs)
library(readxl)
library(janitor)

# Create Directories ------------------------------------------------------

dir_create("data-raw")

# Download Data -----------------------------------------------------------

# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20222023.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20212022.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20202021.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20192020.xlsx")
# 
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
#               mode = "wb",
#               destfile = "data-raw/fallmembershipreport_20182019.xlsx")

# Import, Tidy, and Clean Data -----------------------------------------------------

clean_enrollment_data <- function(excel_file,
                                  sheet_name) {
  
  read_excel(path = YOURCODEHERE,
             sheet = YOURCODEHERE) |> 
    clean_names() |> 
    
    # I've selected by column position rather than names 
    # because the column names vary in the data between years
    # but they're always in the same positions
    select(1, 3, 7:19) |> 
    
    select(-contains("percent")) |> 
    set_names("district_institution_id",
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE,
              YOURCODEHERE) |> 
    pivot_longer(cols = -c(district_institution_id, school_institution_id),
                 names_to = "race_ethnicity",
                 values_to = "number_of_students") |> 
    mutate(race_ethnicity = case_when(
      race_ethnicity == "american_indian_alaska_native" ~ "American Indian Alaska Native",
      race_ethnicity == "asian" ~ "Asian",
      race_ethnicity == "black_african_american" ~ "Black/African American",
      race_ethnicity == "hispanic_latino" ~ "Hispanic/Latino",
      race_ethnicity == "multiracial" ~ "Multi-Racial",
      race_ethnicity == "native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
      race_ethnicity == "white" ~ "White",
      race_ethnicity == "multi_racial" ~ "Multiracial"
    )) |> 
    mutate(number_of_students = parse_number(number_of_students)) |> 
    group_by(district_institution_id, race_ethnicity) |> 
    summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |> 
    ungroup() |> 
    group_by(district_institution_id) |> 
    mutate(pct = number_of_students / sum(number_of_students)) |> 
    ungroup() |> 
    mutate(year = sheet_name) 
  
}

enrollment_by_race_ethnicity_2021_2022 <- 
  clean_enrollment_data(excel_file = YOURCODEHERE,
                        sheet_name = YOURCODEHERE) 

enrollment_by_race_ethnicity_2022_2023 <-
  clean_enrollment_data(excel_file = YOURCODEHERE,
                        sheet_name = YOURCODEHERE) 

enrollment_by_race_ethnicity <- 
  bind_rows(enrollment_by_race_ethnicity_2021_2022,
            enrollment_by_race_ethnicity_2022_2023)

Learn More

If you want to learn more about the global options I showed in this lesson, the video from another lesson is below.

Transcript

Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.

Further reading:

Have any questions? Put them below and we will help you out!

You need to be signed-in to comment on this post. Login.

Course Content

128 Lessons

Welcome to R in 3 Months (Fall 2025)

Complete Pre-Survey

Sign up for Discord

How to Organize Your Files in R in 3 Months

Welcome to Getting Started with R

Install RStudio

Objects and Functions

Examine our Data

Import Our Data Again

Week 1 Live Session (Fall 2025)

Welcome to Fundamentals of R

Update Everything

Start a New Project

group_by() and summarize()

Create a New Data Frame

Bring it All Together (Data Wrangling)

Week 2 Project Assignment

Week 2 Coworking Session (Fall 2025)

Week 2 Live Session (Fall 2025)

The Grammar of Graphics

Setting color and fill Aesthetic Properties

Setting color and fill Scales

Setting x and y Scales

Adding Text to Plots

Bring it All Together (Data Visualization)

Week 3 Project Assignment

Week 3 Coworking Session (Fall 2025)

Week 3 Live Session (Fall 2025)

Quarto Overview

Tips for Working with Quarto

Bring It All Together (Quarto)

An Important Workflow Tip

Week 4 Project Assignment

Week 4 Coworking Session (Fall 2025)

Week 4 Live Session (Fall 2025)

Downloading and Importing Data

Overview of Tidy Data

Tidy Data Rule #1: Every Column is a Variable

Tidy Data Rule #3: Every Cell is a Single Value

Tidy Data Rule #2: Every Row is an Observation

Week 6 Coworking Session (Fall 2025)

Week 6 Live Session (Fall 2025)

Changing Variable Types

Dealing with Missing Data

Advanced Summarizing

Binding Data Frames

Week 7 Coworking Session (Fall 2025)

Week 7 Live Session (Fall 2025)

Bring It All Together (Advanced Data Wrangling)

Week 8 Project Assignment

Week 8 Coworking Session (Fall 2025)

Week 8 Live Session (Fall 2025)

Best Practices in Data Visualization

Pipe Data into ggplot

Reorder Plots to Highlight Findings

Use Color to Highlight Findings

Add Descriptive Labels to Your Plots

Use Titles to Highlight Findings

Use Annotations to Explain

Week 9 Coworking Session (Fall 2025)

Week 9 Live Session (Fall 2025)

Create a Custom Theme

Customize Your Fonts

Try New Plot Types

Bring it All Together (Advanced Data Visualization)

Week 11 Project Assignment

Week 11 Coworking Session (Fall 2025)

Week 11 Live Session (Fall 2025)

Advanced Markdown

Advanced YAML and Code Chunk Options

Making Your Reports Shine: Word Edition

Making Your Reports Shine: PDF Edition

Making Your Reports Shine: HTML Edition

Publishing Your Work

Quarto Extensions

Parameterized Reporting, Part 1

Parameterized Reporting, Part 2

Parameterized Reporting, Part 3

Week 12 Coworking Session (Fall 2025)

Week 12 Live Session (Fall 2025)

R in 3 Months Progress Survey

R in 3 Months Feedback Survey

R in 3 Months Final Project

Week 13 Coworking Session (Fall 2025)

Week 13 Live Session (Fall 2025)

Add citations to Quarto documents

Change titles of facet plots

Difference between == and %in%

Quarto - rendering and working directories

Using Function Arguments

Positron Assistant