Binding Data Frames
This lesson is called Binding Data Frames, part of the R in 3 Months (Spring 2026) course. This lesson is called Binding Data Frames, part of the R in 3 Months (Spring 2026) course.
Transcript
Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.
Loading transcript...
View code shown in video
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directories ------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")
# Import Data -------------------------------------------------------------
math_scores_2021_2022 <-
read_excel(path = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx") |>
clean_names()
math_scores_2018_2019 <-
read_excel(path = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx") |>
clean_names()
# Tidy and Clean Data -----------------------------------------------------
third_grade_math_proficiency_2021_2022 <-
math_scores_2021_2022 |>
filter(student_group == "Total Population (All Students)") |>
filter(grade_level == "Grade 3") |>
select(academic_year, school_id, contains("number_level")) |>
pivot_longer(
cols = starts_with("number_level"),
names_to = "proficiency_level",
values_to = "number_of_students"
) |>
mutate(proficiency_level = parse_number(proficiency_level)) |>
mutate(number_of_students = parse_number(number_of_students)) |>
mutate(
pct = number_of_students / sum(number_of_students, na.rm = TRUE),
.by = school_id
)
third_grade_math_proficiency_2018_2019 <-
math_scores_2018_2019 |>
filter(student_group == "Total Population (All Students)") |>
filter(grade_level == "Grade 3") |>
select(academic_year, school_id, contains("number_level")) |>
pivot_longer(
cols = starts_with("number_level"),
names_to = "proficiency_level",
values_to = "number_of_students"
) |>
mutate(proficiency_level = parse_number(proficiency_level)) |>
mutate(number_of_students = parse_number(number_of_students)) |>
mutate(
pct = number_of_students / sum(number_of_students, na.rm = TRUE),
.by = school_id
)
bind_rows(
third_grade_math_proficiency_2021_2022,
third_grade_math_proficiency_2018_2019
)
third_grade_math_proficiency_2021_2022 |>
bind_rows(third_grade_math_proficiency_2018_2019)
Your Turn
Import 2021-2022 enrollment data into a data frame called
enrollment_by_race_ethnicity_2021_2022and clean it using the code you used for the 2022-2023 data. You’ll need to change bits of your code to make it work.Add a
yearvariable to theenrollment_by_race_ethnicity_2021_2022andenrollment_by_race_ethnicity_2022_2023data frames.Use bind_rows() to make a
enrollment_by_race_ethnicitydata frame by combining theenrollment_by_race_ethnicity_2021_2022andenrollment_by_race_ethnicity_2022_2023data frames.
Here is code to get started:
# Load Packages ----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directory -------------------------------------------------------
dir_create("data-raw")
# Download Data ----------------------------------------------------------
# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx
# download.file(
# url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )
# Import Data ------------------------------------------------------------
enrollment_2022_2023 <-
read_excel(
path = "data-raw/fallmembershipreport_20222023.xlsx",
sheet = "School 2022-23"
) |>
clean_names()
enrollment_2021_2022 <-
read_excel(
path = "data-raw/fallmembershipreport_20212022.xlsx",
sheet = "School 2021-22"
) |>
clean_names()
# Tidy and Clean Data ----------------------------------------------------
enrollment_by_race_ethnicity_2022_2023 <-
enrollment_2022_2023 |>
select(
district_institution_id,
school_institution_id,
x2022_23_american_indian_alaska_native:x2022_23_multi_racial
) |>
select(-contains("percent")) |>
pivot_longer(
cols = -c(district_institution_id, school_institution_id),
names_to = "race_ethnicity",
values_to = "number_of_students"
) |>
mutate(race_ethnicity = str_remove(race_ethnicity, "x2022_23_")) |>
mutate(
race_ethnicity = recode_values(
race_ethnicity,
"american_indian_alaska_native" ~ "American Indian Alaska Native",
"asian" ~ "Asian",
"black_african_american" ~ "Black/African American",
"hispanic_latino" ~ "Hispanic/Latino",
"multiracial" ~ "Multi-Racial",
"native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
"white" ~ "White",
"multi_racial" ~ "Multiracial"
)
) |>
mutate(number_of_students = parse_number(number_of_students)) |>
group_by(district_institution_id, race_ethnicity) |>
summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
ungroup() |>
group_by(district_institution_id) |>
mutate(pct = number_of_students / sum(number_of_students)) |>
ungroup() |>
mutate(year = YOURCODEHERE)
enrollment_by_race_ethnicity_2021_2022 <-
enrollment_2021_2022 |>
select(
attending_district_institution_id,
attending_school_institution_id,
YOURCODEHERE:YOURCODEHERE
) |>
rename(
YOURCODEHERE = attending_district_institution_id,
YOURCODEHERE = attending_school_institution_id
) |>
select(-contains("percent")) |>
pivot_longer(
cols = -c(district_institution_id, school_institution_id),
names_to = "race_ethnicity",
values_to = "number_of_students"
) |>
mutate(race_ethnicity = str_remove(race_ethnicity, "x2021_22_")) |>
mutate(
race_ethnicity = recode_values(
race_ethnicity,
"american_indian_alaska_native" ~ "American Indian Alaska Native",
"asian" ~ "Asian",
"black_african_american" ~ "Black/African American",
"hispanic_latino" ~ "Hispanic/Latino",
"multiracial" ~ "Multi-Racial",
"native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
"white" ~ "White",
"multi_racial" ~ "Multiracial"
)
) |>
mutate(number_of_students = parse_number(number_of_students)) |>
group_by(district_institution_id, race_ethnicity) |>
summarize(number_of_students = sum(number_of_students, na.rm = TRUE)) |>
ungroup() |>
group_by(district_institution_id) |>
mutate(pct = number_of_students / sum(number_of_students)) |>
ungroup() |>
mutate(year = YOURCODEHERE)
bind_rows(
YOURCODEHERE,
YOURCODEHERE
)
Have any questions? Put them below and we will help you out!
Course Content
144 Lessons
You need to be signed-in to comment on this post. Login.