Using EPI’s CPS Extracts in R

Author

Ben Zipperer

Welcome to the Using the EPI CPS Microdata Extracts in R landing page!

This training and code workflow was originally delivered as an EARN Talk on September 16th, 2025.

Missed this talk? See the recording here: Using EPI’s CPS Microdata Extracts in R. Passcode: 3xtracts2025!

Additional links:

Final CPS script

library(tidyverse)
library(epiextractr)
# low wage threshold
low_wage_threshold = 20

# use the CPS ORG
org_data = load_org(2024, year, orgwgt, wage, wageotc, statefips, female, wbhao)
Using EPI CPS ORG Extracts, Version 2025.9.11
ga_data = org_data |> 
  # wage earners only
  filter(wageotc > 0) |> 
  # in Georgia
  filter(statefips == 13) |> 
  # 2024 only
  filter(year == 2024) |> 
  # low wage indicator
  mutate(low_wage = if_else(wageotc < low_wage_threshold, 1, 0))

# number low-wage in Georgia
ga_data |> 
  count(low_wage, wt = orgwgt / 12)
# A tibble: 2 × 2
  low_wage        n
     <dbl>    <dbl>
1        0 2904705.
2        1 1668287.
# share low-wage
ga_data |> 
  summarize(
    weighted.mean(low_wage, w = orgwgt),
    .by = wbhao
  )
# A tibble: 5 × 2
  wbhao        `weighted.mean(low_wage, w = orgwgt)`
  <int+lbl>                                    <dbl>
1 1 [White]                                    0.305
2 2 [Black]                                    0.384
3 3 [Hispanic]                                 0.557
4 4 [Asian]                                    0.299
5 5 [Other]                                    0.494
# analysis by gender
ga_data |> 
  count(female, wt = orgwgt / 12)
# A tibble: 2 × 2
  female            n
  <int+lbl>     <dbl>
1 0 [Male]   2309205.
2 1 [Female] 2263787.
# analysis by race
ga_data |> 
  count(wbhao, wt = orgwgt / 12)
# A tibble: 5 × 2
  wbhao               n
  <int+lbl>       <dbl>
1 1 [White]    2197534.
2 2 [Black]    1486286.
3 3 [Hispanic]  612498.
4 4 [Asian]     258840.
5 5 [Other]      17833.
Back to top