Tidy Data Rule #3: Every Cell is a Single Value
This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course. This lesson is called Tidy Data Rule #3: Every Cell is a Single Value, part of the Going Deeper with R course.
Transcript
Click on the transcript to go to that point in the video. Please note that transcripts are auto generated and may contain minor inaccuracies.
Loading transcript...
View code shown in video
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(janitor)
library(readxl)
library(fs)
# Create Directory --------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# download.file(url = "https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/pagr_schools_math_tot_raceethnicity_2122.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/Documents/TestResults2122/TestResults2019/pagr_schools_math_tot_raceethnicity_1819.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_tot_raceethnicity_1819.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2018/pagr_schools_math_raceethnicity_1718.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1718.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2017/pagr_schools_math_raceethnicity_1617.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1617.xlsx")
#
# download.file("https://www.oregon.gov/ode/educator-resources/assessment/TestResults2016/pagr_schools_math_raceethnicity_1516.xlsx",
# mode = "wb",
# destfile = "data-raw/pagr_schools_math_raceethnicity_1516.xlsx")
# Import Data -------------------------------------------------------------
math_scores_2021_2022 <-
read_excel("data-raw/pagr_schools_math_tot_raceethnicity_2122.xlsx") |>
clean_names()
third_grade_math_proficiency_2021_2022 <-
math_scores_2021_2022 |>
filter(student_group == "Total Population (All Students)") |>
filter(grade_level == "Grade 3") |>
select(academic_year, school_id, contains("number_level_")) |>
pivot_longer(
cols = contains("number_level_"),
names_to = "proficiency_level",
values_to = "number_of_students"
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = str_remove(proficiency_level, pattern = "number_level_")
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = recode_values(
proficiency_level,
"number_level_4" ~ "4",
"number_level_3" ~ "3",
"number_level_2" ~ "2",
"number_level_1" ~ "1"
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_4",
true = "4",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_3",
true = "3",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_2",
true = "2",
false = proficiency_level
)
) |>
mutate(
proficiency_level = if_else(
proficiency_level == "number_level_1",
true = "1",
false = proficiency_level
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(
proficiency_level = case_when(
proficiency_level == "number_level_4" ~ "4",
proficiency_level == "number_level_3" ~ "3",
proficiency_level == "number_level_2" ~ "2",
proficiency_level == "number_level_1" ~ "1"
)
)
third_grade_math_proficiency_2021_2022 |>
mutate(proficiency_level = parse_number(proficiency_level))
Your Turn
Remove the “x2022_23_” portion of the
race_ethnicityvariable usingstr_remove().Convert all instances of the
race_ethnicityvariable to more meaningful observations (e.g. turn “american_indian_alaska_native” into “American Indian/Alaska Native”) usingrecode_values(),if_else(), orcase_when().
Use the following starter code to help you if necessary.
# Load Packages -----------------------------------------------------------
library(tidyverse)
library(fs)
library(readxl)
library(janitor)
# Create Directories ------------------------------------------------------
dir_create("data-raw")
# Download Data -----------------------------------------------------------
# https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20222023.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20222023.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20212022.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20212022.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20202021.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20202021.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20192020.xlsx")
#
# download.file("https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20182019.xlsx",
# mode = "wb",
# destfile = "data-raw/fallmembershipreport_20182019.xlsx")
# Import Data -------------------------------------------------------------
enrollment_2022_2023 <- read_excel(path = "data-raw/fallmembershipreport_20222023.xlsx",
sheet = "School 2022-23") |>
clean_names()
# Tidy and Clean Data -----------------------------------------------------
enrollment_by_race_ethnicity_2022_2023 <-
enrollment_2022_2023 |>
select(district_institution_id, x2022_23_american_indian_alaska_native:x2022_23_percent_multi_racial) |>
select(-contains("percent")) |>
pivot_longer(cols = -district_institution_id,
names_to = "race_ethnicity",
values_to = "number_of_students")
Learn More
Bob Rudis has a semi-complex article on how he uses case_when() to work with data related to his work on internet security.
You might also find this video by Sharon Machlis on case_when() helpful.
And, in case you think I’m the only one who loves case_when() check out this love letter to the function by Matt Kerlogue.
Have any questions? Put them below and we will help you out!
Course Content
44 Lessons
You need to be signed-in to comment on this post. Login.