Advanced Summarizing
This lesson is called Advanced Summarizing, part of the Going Deeper with R course. This lesson is called Advanced Summarizing, part of the Going Deeper with R course.
Transcript
Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.
Loading transcript...
View code shown in video
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directories ------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# https://www.oregon.gov/ode/educator-resources/assessment/Pages/Assessment-Group-Reports.aspx
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")
# Import Data -------------------------------------------------------------
math_scores_2021_2022 <-
read_excel(path = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx") |>
clean_names()
# Tidy and Clean Data -----------------------------------------------------
third_grade_math_proficiency_2021_2022 <-
math_scores_2021_2022 |>
filter(student_group == "Total Population (All Students)") |>
filter(grade_level == "Grade 3") |>
select(academic_year, school_id, contains("number_level")) |>
pivot_longer(
cols = starts_with("number_level"),
names_to = "proficiency_level",
values_to = "number_of_students"
) |>
mutate(proficiency_level = parse_number(proficiency_level)) |>
mutate(number_of_students = parse_number(number_of_students))
third_grade_math_proficiency_2021_2022 |>
# group_by(school_id) |>
mutate(
pct = number_of_students / sum(number_of_students, na.rm = TRUE),
.by = school_id
) |>
# ungroup() |>
filter(proficiency_level == 4) |>
slice_max(order_by = pct, n = 1)
Your Turn
Create a new variable called
pctthat shows each race/ethnicity as a percentage of all students in each district. This will require two steps.You'll need to use
group_by()andsummarize()to calculate the number of students in each race/ethnicity group in each district.You’ll need to use
group_by()andmutate()to calculate the percentage of students in each race/ethnicity group in each district.
Don’t forget to ungroup() at the end of each step. Use this code to get started:
# Load Packages ----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directory -------------------------------------------------------
dir_create("data-raw")
# Download Data ----------------------------------------------------------
# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx
# download.file(
# url = "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20222023.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20212022.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20202021.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20192020.xlsx"
# )
# download.file(
# "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20182019.xlsx"
# )
# Import Data ------------------------------------------------------------
enrollment_2022_2023 <-
read_excel(
path = "data-raw/fallmembershipreport_20222023.xlsx",
sheet = "School 2022-23"
) |>
clean_names()
# Tidy and Clean Data ----------------------------------------------------
enrollment_by_race_ethnicity_2022_2023 <-
enrollment_2022_2023 |>
select(
district_institution_id,
school_institution_id,
x2022_23_american_indian_alaska_native:x2022_23_multi_racial
) |>
select(-contains("percent")) |>
pivot_longer(
cols = -c(district_institution_id, school_institution_id),
names_to = "race_ethnicity",
values_to = "number_of_students"
) |>
mutate(race_ethnicity = str_remove(race_ethnicity, "x2022_23_")) |>
mutate(
race_ethnicity = recode_values(
race_ethnicity,
"american_indian_alaska_native" ~ "American Indian Alaska Native",
"asian" ~ "Asian",
"black_african_american" ~ "Black/African American",
"hispanic_latino" ~ "Hispanic/Latino",
"multiracial" ~ "Multi-Racial",
"native_hawaiian_pacific_islander" ~ "Native Hawaiian Pacific Islander",
"white" ~ "White",
"multi_racial" ~ "Multiracial"
)
) |>
mutate(number_of_students = parse_number(number_of_students))
Learn More
Daniel Carter has a nice walkthrough of using group_by() and mutate().
Have any questions? Put them below and we will help you out!
Course Content
44 Lessons
You need to be signed-in to comment on this post. Login.