# example: EE and 14th becomes 050514, where each E = 05th letter of the alphabet
<- 050514 condition_1_seed
ADA2: Class 27, ATUS data subset and model selection
Advanced Data Analysis 2, Stat 428/528, Spring 2023, Prof. Erik Erhardt, UNM
The American Time Use Survey (ATUS) measures the amount of time people spend doing various activities, such as paid work, childcare, volunteering, and socializing. We use my summarized version of the USA BLS.gov ATUS 2003-2021 Multi-Year Microdata Files.
1 Rubric
Complete everything below.
2 (1 p) Personalized analysis conditions
Set your personalized analysis conditions. This assigns to you an effectively random sample from the dataset, a stepwise selection strategy, and a model selection criterion.
Your path through this analysis will differ depending on your name and birth date.
- Sample of data.
- Choose a random number seed based on your first and last name initials and birth day of the month.
- Stepwise selection starting model.
- If your birth day of the month is 01–10, then your stepwise model selection will start with the mean (empty) model. Choose index
[1]
below. - If your birth day of the month is 11–20, then your stepwise model selection will start with the main effects model. Choose index
[2]
below. - If your birth day of the month is 21–31, then your stepwise model selection will start with the two-way interaction model. Choose index
[3]
below.
- If your birth day of the month is 01–10, then your stepwise model selection will start with the mean (empty) model. Choose index
# example: 14th becomes "Main effects", that's the 2nd index
<- c("Mean", "Main effects", "Two-way interaction")[2] condition_2_init_model
- Model selection criterion.
- If your birth month is 01–06, then use AIC for model selection. Choose index
[1]
below. - If your birth month is 07–12, then use BIC for model selection. Choose index
[2]
below.
- If your birth month is 01–06, then use AIC for model selection. Choose index
We will compare the results of all of the assignments, so please do your best with your given conditions. The point I hope will become clear is that the results are sensitive to the sample and the choices you make in analysis.
# example: December is 12th month, giving "BIC", that's the 2nd index
<- c("AIC", "BIC")[2] condition_3_criterion
- Everyone will use \(n = 500\) observations for analysis.
- This is large enough to clearly identify patterns in the data but not overwhelmingly large to detect tiny effects.
- Do not change this value.
<- 500 n_analysis
3 Data
3.1 Read and format
These data have been prepared for you.
library(erikmisc)
── Attaching packages ─────────────────────────────────────── erikmisc 0.1.20 ──
✔ tibble 3.1.8 ✔ dplyr 1.1.0
── Conflicts ─────────────────────────────────────────── erikmisc_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
erikmisc, solving common complex data analysis workflows
by Dr. Erik Barry Erhardt <erik@StatAcumen.com>
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.4
✔ ggplot2 3.4.1 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(erikdata) # ATUS data, install with devtools::install_github("erikerhardt/erikdata")
library(labelled) # for variabel labels, use: var_label(dat_atus$TUCASEID)
set.seed(condition_1_seed) # must run prior to dplyr::slice_sample() to draw the same sample
<-
dat_atus ::dat_atus %>%
erikdata::select(
dplyr
TUCASEID
, t0101, TESEX, TEAGE, GTMETSTA, PEEDUCA, TRERNHLY, TEHRUSL1, TEHRUSL2, TRHHCHILD, TRTALONE, TRTHHFAMILY
)
# list of variables with their labels
%>%
labels_dat_atus ::filter(
dplyr%in% names(dat_atus)
Var )
# A tibble: 12 × 2
Var Label
<chr> <chr>
1 TUCASEID ATUS Case ID (14-digit identifier)
2 TESEX Edited: sex
3 TEAGE Edited: age
4 GTMETSTA Metropolitan status (2000 or 2010 definitions, see note)
5 PEEDUCA Edited: what is the highest level of school you have completed o…
6 TRERNHLY Hourly earnings (2 implied decimals)
7 TRHHCHILD Presence of household children < 18
8 TRTALONE Total time respondent spent alone (in minutes)
9 TRTHHFAMILY Total time respondent spent with household family members (in mi…
10 TEHRUSL1 Edited: how many hours per week do you usually work at your main…
11 TEHRUSL2 Edited: how many hours per week do you usually work at your othe…
12 t0101 Sleeping
<-
dat_atus %>%
dat_atus ::filter(
dplyr> 0 # only people who work and earn an hourly wage
TRERNHLY > 0 # only people who went to sleep
, t0101 %>%
) ::mutate(
dplyrt0101 = t0101 / 60 # convert minutes to hours
PEEDUCA_num =
, case_when(
== "Less than 1st grade" ~ 0 # 1
PEEDUCA == "1st, 2nd, 3rd, or 4th grade" ~ 2.5 # 2
, PEEDUCA == "5th or 6th grade" ~ 5.5 # 3
, PEEDUCA == "7th or 8th grade" ~ 7.5 # 4
, PEEDUCA == "9th grade" ~ 9 # 5
, PEEDUCA == "10th grade" ~ 10 # 6
, PEEDUCA == "11th grade" ~ 11 # 7
, PEEDUCA == "12th grade - no diploma" ~ 12 # 8
, PEEDUCA == "High school graduate - diploma or equivalent (GED)" ~ 12 # 9
, PEEDUCA == "Some college but no degree" ~ 13 # 10
, PEEDUCA == "Associate degree - occupational/vocational" ~ 14 # 11
, PEEDUCA == "Associate degree - academic program" ~ 14 # 12
, PEEDUCA == "Bachelor's degree (BA, AB, BS, etc.)" ~ 16 # 13
, PEEDUCA == "Master's degree (MA, MS, MEng, MEd, MSW, etc.)" ~ 18 # 14
, PEEDUCA == "Professional school degree (MD, DDS, DVM, etc.)" ~ 21 # 15
, PEEDUCA == "Doctoral degree (PhD, EdD, etc.)" ~ 21 # 16
, PEEDUCA TRUE ~ NA %>% as.numeric()
,
)# set the "Not identified" Metropolitan areas to NA
GTMETSTA =
, %>%
GTMETSTA factor(
levels =
# keep the levels that are not "Not identified"
::str_subset(
stringrstring = levels(dat_atus$GTMETSTA)
pattern = "Not identified"
, negate = TRUE
,
)
)# hours worked at all jobs
TEHRUSL_all = TEHRUSL1 + TEHRUSL2
, %>%
) ::select(
dplyr-PEEDUCA
-TEHRUSL1
, -TEHRUSL2
, %>%
) # drop rows with any missing values
::drop_na() %>%
tidyr# select your sample of rows for analysis
::slice_sample(
dplyrn = n_analysis
)
# label new variables
::var_label(dat_atus[[ "PEEDUCA_num" ]]) <-
labelled::var_label(dat_atus[[ "PEEDUCA" ]])
labelled# relabel variables that were modified in a way that removes the label attribute
::var_label(dat_atus[[ "GTMETSTA" ]]) <-
labelled%>% filter(Var == "GTMETSTA") %>% pull(Label)
labels_dat_atus ::var_label(dat_atus[[ "TEHRUSL_all" ]]) <-
labelled%>% filter(Var == "TEHRUSL1") %>% pull(Label)
labels_dat_atus
# wrap all labels for plots
for (i_var in seq_len(ncol(dat_atus))) {
::var_label(dat_atus[, i_var]) <-
labelled::var_label(dat_atus[, i_var]) %>%
labelledstr_wrap(width = 30)
}
Warning in stri_split_lines(str): argument is not an atomic vector; coercing
str(dat_atus)
tibble [500 × 11] (S3: tbl_df/tbl/data.frame)
$ TUCASEID : num [1:500] 2.02e+13 2.02e+13 2.02e+13 2.01e+13 2.01e+13 ...
..- attr(*, "label")= chr "ATUS Case ID (14-digit\nidentifier)"
$ t0101 : num [1:500] 10.7 10 7.25 11.42 7 ...
..- attr(*, "label")= chr "Sleeping"
$ TESEX : Factor w/ 2 levels "Male","Female": 2 1 2 1 2 1 1 1 1 2 ...
..- attr(*, "label")= chr "Edited: sex"
$ TEAGE : num [1:500] 42 46 22 41 34 31 49 29 37 34 ...
..- attr(*, "label")= chr "Edited: age"
$ GTMETSTA : Factor w/ 2 levels "Metropolitan",..: 1 2 2 2 1 1 1 2 1 2 ...
..- attr(*, "label")= chr "Metropolitan status (2000 or\n2010 definitions, see note)"
$ TRERNHLY : num [1:500] 3318 3800 1700 1220 1000 ...
..- attr(*, "label")= chr "Hourly earnings (2 implied\ndecimals)"
$ TRHHCHILD : Factor w/ 2 levels "Yes","No": 2 1 2 2 1 2 1 2 1 1 ...
..- attr(*, "label")= chr "Presence of household children\n< 18"
$ TRTALONE : num [1:500] 334 75 200 245 0 0 205 0 10 88 ...
..- attr(*, "label")= chr "Total time respondent spent\nalone (in minutes)"
$ TRTHHFAMILY: num [1:500] 399 765 85 0 1020 945 715 485 845 391 ...
..- attr(*, "label")= chr "Total time respondent spent\nwith household family members\n(in minutes)"
$ PEEDUCA_num: num [1:500] 16 16 16 12 13 21 12 16 12 14 ...
..- attr(*, "label")= chr "NULL"
$ TEHRUSL_all: num [1:500] 39 39 39 39 11 39 39 39 44 29 ...
..- attr(*, "label")= chr "Edited: how many hours per\nweek do you usually work at\nyour main job?"
3.2 Data decisions start here
There are probably some unsustainably short or long numbers of hours slept. Let’s filter to keep only people who slept at least 5 and at most 12 hours of sleep.
Add any other changes to this code for filtering, excluding outliers, or transforming variables.
## filter and mutate data here to satisfy model assumptions
<-
dat_atus %>%
dat_atus ::filter(
dplyr>= 5
t0101 <= 12
, t0101 #, TUCASEID %notin% c() # Can use this to exclude observations by ID number
%>%
) ::mutate(
dplyr
)
str(dat_atus)
tibble [444 × 11] (S3: tbl_df/tbl/data.frame)
$ TUCASEID : num [1:444] 2.02e+13 2.02e+13 2.02e+13 2.01e+13 2.01e+13 ...
..- attr(*, "label")= chr "ATUS Case ID (14-digit\nidentifier)"
$ t0101 : num [1:444] 10.7 10 7.25 11.42 7 ...
..- attr(*, "label")= chr "Sleeping"
$ TESEX : Factor w/ 2 levels "Male","Female": 2 1 2 1 2 1 1 1 2 2 ...
..- attr(*, "label")= chr "Edited: sex"
$ TEAGE : num [1:444] 42 46 22 41 34 31 49 37 34 24 ...
..- attr(*, "label")= chr "Edited: age"
$ GTMETSTA : Factor w/ 2 levels "Metropolitan",..: 1 2 2 2 1 1 1 1 2 1 ...
..- attr(*, "label")= chr "Metropolitan status (2000 or\n2010 definitions, see note)"
$ TRERNHLY : num [1:444] 3318 3800 1700 1220 1000 ...
..- attr(*, "label")= chr "Hourly earnings (2 implied\ndecimals)"
$ TRHHCHILD : Factor w/ 2 levels "Yes","No": 2 1 2 2 1 2 1 1 1 2 ...
..- attr(*, "label")= chr "Presence of household children\n< 18"
$ TRTALONE : num [1:444] 334 75 200 245 0 0 205 10 88 555 ...
..- attr(*, "label")= chr "Total time respondent spent\nalone (in minutes)"
$ TRTHHFAMILY: num [1:444] 399 765 85 0 1020 945 715 845 391 0 ...
..- attr(*, "label")= chr "Total time respondent spent\nwith household family members\n(in minutes)"
$ PEEDUCA_num: num [1:444] 16 16 16 12 13 21 12 12 14 16 ...
..- attr(*, "label")= chr "NULL"
$ TEHRUSL_all: num [1:444] 39 39 39 39 11 39 39 44 29 35 ...
..- attr(*, "label")= chr "Edited: how many hours per\nweek do you usually work at\nyour main job?"
3.3 Plot
Set eval: false
to skip this plot once you’re satisfied with the data choices you’ve made. Please remember to set it back to true
so that this plot appears as part of your homework submission.
## Scatterplot matrix
library(ggplot2)
library(GGally)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
<-
p ggpairs(
%>% dplyr::select(-TUCASEID)
dat_atus title = "ATUS Sleeping"
, mapping = ggplot2::aes(colour = TESEX, alpha = 0.5)
, diag = list(
, continuous =
wrap(
c("densityDiag", "barDiag", "blankDiag")[1]
alpha = 1/2
,
)discrete =
, c("barDiag", "blankDiag")[1]
)# scatterplots on top so response as first variable has y on vertical axis
upper = list(
, continuous =
wrap(
c("points", "smooth", "smooth_loess", "density", "cor", "blank")[2]
se = FALSE
, alpha = 1/2
, size = 1
,
)discrete =
, c("ratio", "facetbar", "blank")[2]
combo =
, wrap(
c("box", "box_no_facet", "dot", "dot_no_facet", "facethist", "facetdensity", "denstrip", "blank")[2]
#, bins = 10 # for facethist
)
)lower = list(
, continuous =
wrap(
c("points", "smooth", "smooth_loess", "density", "cor", "blank")[5]
#, se = FALSE
#, alpha = 1/2
#, size = 1
)discrete =
, c("ratio", "facetbar", "blank")[2]
combo =
, wrap(
c("box", "box_no_facet", "dot", "dot_no_facet", "facethist", "facetdensity", "denstrip", "blank")[5]
bins = 10 # for facethist
,
)
)progress = FALSE
, legend = 1 # create legend
,
)<- p + theme_bw()
p <- p + theme(legend.position = "bottom")
p print(p)
4 Analysis
4.1 Initial model
This is done automatically based on your personalized analysis conditions.
# Mean model
if (condition_2_init_model == "Mean") {
<-
lm_fit_init lm(
~ 1
t0101 data = dat_atus
,
)
}
# Main-effects model
if (condition_2_init_model == "Main effects") {
<-
lm_fit_init lm(
~ TESEX + TEAGE + GTMETSTA + PEEDUCA_num + TRERNHLY + TEHRUSL_all + TRHHCHILD + TRTALONE + TRTHHFAMILY
t0101 data = dat_atus
,
)
}
# Two-way interaction model
if (condition_2_init_model == "Two-way interaction") {
<-
lm_fit_init lm(
~ (TESEX + TEAGE + GTMETSTA + PEEDUCA_num + TRERNHLY + TEHRUSL_all + TRHHCHILD + TRTALONE + TRTHHFAMILY)^2
t0101 data = dat_atus
,
)# If the two-way interaction model has NA coefficients,
# then there were probably pairs of categories that had no observations so could not be estimated.
# In this case, set the argument "singular.ok = TRUE" in the car::Anova() function below.
}
lm_fit_init
Call:
lm(formula = t0101 ~ TESEX + TEAGE + GTMETSTA + PEEDUCA_num +
TRERNHLY + TEHRUSL_all + TRHHCHILD + TRTALONE + TRTHHFAMILY,
data = dat_atus)
Coefficients:
(Intercept) TESEXFemale TEAGE
1.016e+01 -6.781e-02 -1.944e-02
GTMETSTANon-metropolitan PEEDUCA_num TRERNHLY
-6.313e-02 -6.983e-02 -4.401e-05
TEHRUSL_all TRHHCHILDNo TRTALONE
-9.158e-03 6.237e-01 7.820e-05
TRTHHFAMILY
1.012e-03
::Anova(lm_fit_init, type = 3, singular.ok = FALSE) car
Anova Table (Type III tests)
Response: t0101
Sum Sq Df F value Pr(>F)
(Intercept) 1023.80 1 360.6903 < 2.2e-16 ***
TESEX 0.45 1 0.1597 0.6896175
TEAGE 29.11 1 10.2550 0.0014634 **
GTMETSTA 0.24 1 0.0851 0.7707015
PEEDUCA_num 10.53 1 3.7094 0.0547605 .
TRERNHLY 0.57 1 0.2004 0.6545861
TEHRUSL_all 7.13 1 2.5133 0.1136154
TRHHCHILD 31.51 1 11.1016 0.0009364 ***
TRTALONE 0.09 1 0.0311 0.8601988
TRTHHFAMILY 21.46 1 7.5596 0.0062183 **
Residuals 1231.89 434
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagostics(lm_fit_init)
Error in e_plot_lm_diagostics(lm_fit_init): could not find function "e_plot_lm_diagostics"
4.2 (3 p) Interpret diagnostics for initial model, resolve issues
4.3 Model selection
This is done automatically based on your personalized analysis conditions.
# criterion
if (condition_3_criterion == "AIC") {
= 2
AIC_k
}if (condition_3_criterion == "BIC") {
= log(nrow(dat_atus))
AIC_k
}
## AIC/BIC stepwise selection
# option: test="F" includes additional information
# for parameter estimate tests that we're familiar with
# option: for BIC, include k=log(nrow( [data.frame name] ))
<-
lm_fit_AIC step(
lm_fit_initscope =
, list(
upper = t0101 ~ (TESEX + TEAGE + GTMETSTA + PEEDUCA_num + TRERNHLY + TEHRUSL_all + TRHHCHILD + TRTALONE + TRTHHFAMILY)^2
lower = t0101 ~ 1
,
)direction = "both"
, test = "F"
, trace = 1
, k = AIC_k # condition_3_criterion takes effect here
, )
Start: AIC=514.05
t0101 ~ TESEX + TEAGE + GTMETSTA + PEEDUCA_num + TRERNHLY + TEHRUSL_all +
TRHHCHILD + TRTALONE + TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
- TRTALONE 1 0.0881 1232.0 507.99 0.0311 0.8601988
- GTMETSTA 1 0.2414 1232.1 508.04 0.0851 0.7707015
- TESEX 1 0.4533 1232.3 508.12 0.1597 0.6896175
- TRERNHLY 1 0.5690 1232.5 508.16 0.2004 0.6545861
- TEHRUSL_all 1 7.1340 1239.0 510.52 2.5133 0.1136154
- PEEDUCA_num 1 10.5289 1242.4 511.73 3.7094 0.0547605 .
<none> 1231.9 514.05
+ GTMETSTA:PEEDUCA_num 1 13.5115 1218.4 515.25 4.8019 0.0289611 *
- TRTHHFAMILY 1 21.4576 1253.3 515.62 7.5596 0.0062183 **
+ TRHHCHILD:TRTALONE 1 10.2065 1221.7 516.45 3.6175 0.0578381 .
+ PEEDUCA_num:TRTALONE 1 9.3805 1222.5 516.75 3.3225 0.0690276 .
+ GTMETSTA:TRTALONE 1 8.8112 1223.1 516.96 3.1194 0.0780705 .
+ TRTALONE:TRTHHFAMILY 1 7.7115 1224.2 517.36 2.7276 0.0993516 .
+ TESEX:TEAGE 1 7.6162 1224.3 517.39 2.6937 0.1014715
+ TRERNHLY:TRTALONE 1 7.4006 1224.5 517.47 2.6170 0.1064540
- TEAGE 1 29.1084 1261.0 518.32 10.2550 0.0014634 **
+ GTMETSTA:TRTHHFAMILY 1 4.2022 1227.7 518.63 1.4821 0.2241111
+ TRERNHLY:TRHHCHILD 1 4.1015 1227.8 518.67 1.4465 0.2297507
+ TESEX:TRTHHFAMILY 1 3.3664 1228.5 518.93 1.1865 0.2766379
+ TEAGE:PEEDUCA_num 1 2.7432 1229.2 519.16 0.9664 0.3261331
- TRHHCHILD 1 31.5113 1263.4 519.17 11.1016 0.0009364 ***
+ TEAGE:TRHHCHILD 1 2.5233 1229.4 519.24 0.8887 0.3463480
+ TEHRUSL_all:TRHHCHILD 1 2.3752 1229.5 519.29 0.8365 0.3609105
+ TEHRUSL_all:TRTHHFAMILY 1 2.3443 1229.5 519.30 0.8256 0.3640615
+ TESEX:TRTALONE 1 2.1046 1229.8 519.39 0.7410 0.3898105
+ TRERNHLY:TRTHHFAMILY 1 1.8700 1230.0 519.47 0.6583 0.4176128
+ TRHHCHILD:TRTHHFAMILY 1 1.8132 1230.1 519.49 0.6383 0.4247714
+ TEHRUSL_all:TRTALONE 1 1.7779 1230.1 519.51 0.6258 0.4293223
+ PEEDUCA_num:TRTHHFAMILY 1 1.1009 1230.8 519.75 0.3873 0.5340516
+ TESEX:TEHRUSL_all 1 0.7057 1231.2 519.89 0.2482 0.6186081
+ TESEX:PEEDUCA_num 1 0.6885 1231.2 519.90 0.2422 0.6229055
+ TESEX:TRERNHLY 1 0.6664 1231.2 519.91 0.2344 0.6285589
+ GTMETSTA:TRERNHLY 1 0.6186 1231.3 519.92 0.2176 0.6411423
+ TEAGE:TRERNHLY 1 0.5725 1231.3 519.94 0.2013 0.6538801
+ PEEDUCA_num:TEHRUSL_all 1 0.5600 1231.3 519.94 0.1969 0.6574306
+ PEEDUCA_num:TRHHCHILD 1 0.5291 1231.4 519.96 0.1861 0.6664367
+ TEAGE:TRTALONE 1 0.5248 1231.4 519.96 0.1845 0.6677224
+ PEEDUCA_num:TRERNHLY 1 0.5161 1231.4 519.96 0.1815 0.6703167
+ GTMETSTA:TEHRUSL_all 1 0.4784 1231.4 519.97 0.1682 0.6819109
+ TEAGE:TRTHHFAMILY 1 0.3854 1231.5 520.01 0.1355 0.7129590
+ TESEX:GTMETSTA 1 0.3331 1231.6 520.03 0.1171 0.7323593
+ TRERNHLY:TEHRUSL_all 1 0.3131 1231.6 520.03 0.1101 0.7402050
+ TEAGE:GTMETSTA 1 0.2156 1231.7 520.07 0.0758 0.7832071
+ GTMETSTA:TRHHCHILD 1 0.1528 1231.7 520.09 0.0537 0.8168579
+ TESEX:TRHHCHILD 1 0.0812 1231.8 520.12 0.0285 0.8659376
+ TEAGE:TEHRUSL_all 1 0.0463 1231.8 520.13 0.0163 0.8985783
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Step: AIC=507.99
t0101 ~ TESEX + TEAGE + GTMETSTA + PEEDUCA_num + TRERNHLY + TEHRUSL_all +
TRHHCHILD + TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
- GTMETSTA 1 0.2554 1232.2 501.98 0.0902 0.7640710
- TESEX 1 0.4233 1232.4 502.04 0.1495 0.6992360
- TRERNHLY 1 0.5366 1232.5 502.08 0.1895 0.6635648
- TEHRUSL_all 1 7.4361 1239.4 504.56 2.6256 0.1058757
- PEEDUCA_num 1 10.6636 1242.6 505.72 3.7652 0.0529749 .
<none> 1232.0 507.99
+ GTMETSTA:PEEDUCA_num 1 13.5268 1218.5 509.18 4.8181 0.0286912 *
- TRTHHFAMILY 1 26.0056 1258.0 511.17 9.1824 0.0025893 **
+ TESEX:TEAGE 1 7.4889 1224.5 511.38 2.6543 0.1039936
- TEAGE 1 29.6494 1261.6 512.45 10.4689 0.0013067 **
+ GTMETSTA:TRTHHFAMILY 1 4.1916 1227.8 512.57 1.4816 0.2241798
+ TRERNHLY:TRHHCHILD 1 4.0508 1227.9 512.62 1.4317 0.2321379
+ TESEX:TRTHHFAMILY 1 3.2982 1228.7 512.89 1.1650 0.2810316
+ TEAGE:PEEDUCA_num 1 2.7032 1229.3 513.11 0.9544 0.3291504
- TRHHCHILD 1 31.5396 1263.5 513.11 11.1364 0.0009192 ***
+ TEAGE:TRHHCHILD 1 2.4898 1229.5 513.18 0.8789 0.3490257
+ TEHRUSL_all:TRTHHFAMILY 1 2.4114 1229.6 513.21 0.8511 0.3567414
+ TEHRUSL_all:TRHHCHILD 1 2.3497 1229.6 513.24 0.8293 0.3629715
+ TRERNHLY:TRTHHFAMILY 1 1.8874 1230.1 513.40 0.6659 0.4149255
+ TRHHCHILD:TRTHHFAMILY 1 1.8648 1230.1 513.41 0.6579 0.4177417
+ PEEDUCA_num:TRTHHFAMILY 1 1.0710 1230.9 513.70 0.3776 0.5392004
+ TESEX:TEHRUSL_all 1 0.7254 1231.2 513.82 0.2557 0.6133626
+ TESEX:TRERNHLY 1 0.6850 1231.3 513.84 0.2415 0.6234058
+ TESEX:PEEDUCA_num 1 0.6377 1231.3 513.85 0.2248 0.6356636
+ GTMETSTA:TRERNHLY 1 0.6246 1231.3 513.86 0.2202 0.6391543
+ PEEDUCA_num:TRHHCHILD 1 0.5519 1231.4 513.88 0.1945 0.6594052
+ TEAGE:TRERNHLY 1 0.5474 1231.4 513.89 0.1929 0.6607005
+ PEEDUCA_num:TEHRUSL_all 1 0.5426 1231.4 513.89 0.1912 0.6621016
+ PEEDUCA_num:TRERNHLY 1 0.5162 1231.5 513.90 0.1819 0.6699387
+ GTMETSTA:TEHRUSL_all 1 0.4884 1231.5 513.91 0.1721 0.6784397
+ TEAGE:TRTHHFAMILY 1 0.3458 1231.6 513.96 0.1219 0.7271973
+ TESEX:GTMETSTA 1 0.3328 1231.6 513.96 0.1173 0.7321784
+ TRERNHLY:TEHRUSL_all 1 0.3167 1231.7 513.97 0.1116 0.7384964
+ TEAGE:GTMETSTA 1 0.2163 1231.8 514.00 0.0762 0.7826146
+ GTMETSTA:TRHHCHILD 1 0.1554 1231.8 514.03 0.0548 0.8150754
+ TRTALONE 1 0.0881 1231.9 514.05 0.0311 0.8601988
+ TESEX:TRHHCHILD 1 0.0787 1231.9 514.05 0.0277 0.8678076
+ TEAGE:TEHRUSL_all 1 0.0440 1231.9 514.07 0.0155 0.9009763
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Step: AIC=501.98
t0101 ~ TESEX + TEAGE + PEEDUCA_num + TRERNHLY + TEHRUSL_all +
TRHHCHILD + TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
- TESEX 1 0.428 1232.7 496.04 0.1514 0.6974222
- TRERNHLY 1 0.483 1232.7 496.06 0.1708 0.6796141
- TEHRUSL_all 1 7.771 1240.0 498.68 2.7496 0.0979994 .
- PEEDUCA_num 1 10.494 1242.7 499.65 3.7131 0.0546364 .
<none> 1232.2 501.98
- TRTHHFAMILY 1 25.947 1258.2 505.14 9.1810 0.0025909 **
+ TESEX:TEAGE 1 7.381 1224.8 505.41 2.6214 0.1061538
- TEAGE 1 29.919 1262.2 506.54 10.5862 0.0012280 **
+ TRERNHLY:TRHHCHILD 1 3.962 1228.3 506.65 1.4032 0.2368326
+ TESEX:TRTHHFAMILY 1 3.284 1229.0 506.89 1.1625 0.2815416
+ TEAGE:PEEDUCA_num 1 2.718 1229.5 507.10 0.9618 0.3272896
+ TEAGE:TRHHCHILD 1 2.445 1229.8 507.20 0.8649 0.3528882
- TRHHCHILD 1 31.820 1264.0 507.21 11.2587 0.0008618 ***
+ TEHRUSL_all:TRTHHFAMILY 1 2.392 1229.8 507.22 0.8460 0.3581946
+ TEHRUSL_all:TRHHCHILD 1 2.282 1230.0 507.26 0.8069 0.3695236
+ TRERNHLY:TRTHHFAMILY 1 1.877 1230.3 507.40 0.6638 0.4156728
+ TRHHCHILD:TRTHHFAMILY 1 1.846 1230.4 507.41 0.6526 0.4196363
+ PEEDUCA_num:TRTHHFAMILY 1 1.079 1231.2 507.69 0.3811 0.5373378
+ TESEX:TEHRUSL_all 1 0.681 1231.5 507.83 0.2404 0.6241731
+ TESEX:TRERNHLY 1 0.673 1231.6 507.84 0.2377 0.6261507
+ TESEX:PEEDUCA_num 1 0.665 1231.6 507.84 0.2347 0.6282680
+ TEAGE:TRERNHLY 1 0.554 1231.7 507.88 0.1958 0.6583657
+ PEEDUCA_num:TEHRUSL_all 1 0.532 1231.7 507.89 0.1877 0.6650389
+ PEEDUCA_num:TRERNHLY 1 0.495 1231.7 507.90 0.1749 0.6759673
+ PEEDUCA_num:TRHHCHILD 1 0.490 1231.7 507.90 0.1729 0.6777194
+ TRERNHLY:TEHRUSL_all 1 0.342 1231.9 507.96 0.1209 0.7282165
+ TEAGE:TRTHHFAMILY 1 0.320 1231.9 507.96 0.1131 0.7367832
+ GTMETSTA 1 0.255 1232.0 507.99 0.0902 0.7640710
+ TRTALONE 1 0.102 1232.1 508.04 0.0361 0.8494529
+ TESEX:TRHHCHILD 1 0.079 1232.2 508.05 0.0279 0.8675080
+ TEAGE:TEHRUSL_all 1 0.045 1232.2 508.06 0.0159 0.8998406
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Step: AIC=496.04
t0101 ~ TEAGE + PEEDUCA_num + TRERNHLY + TEHRUSL_all + TRHHCHILD +
TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
- TRERNHLY 1 0.350 1233.0 490.07 0.1242 0.7246752
- TEHRUSL_all 1 7.359 1240.0 492.59 2.6089 0.1069869
- PEEDUCA_num 1 11.849 1244.5 494.19 4.2007 0.0410031 *
<none> 1232.7 496.04
- TRTHHFAMILY 1 25.648 1258.3 499.09 9.0925 0.0027162 **
- TEAGE 1 30.280 1262.9 500.72 10.7348 0.0011352 **
+ TRERNHLY:TRHHCHILD 1 3.697 1229.0 500.80 1.3115 0.2527469
+ TEAGE:PEEDUCA_num 1 2.976 1229.7 501.06 1.0552 0.3048796
+ TEAGE:TRHHCHILD 1 2.468 1230.2 501.25 0.8747 0.3501758
+ TEHRUSL_all:TRTHHFAMILY 1 2.322 1230.3 501.30 0.8229 0.3648312
- TRHHCHILD 1 31.990 1264.7 501.32 11.3412 0.0008252 ***
+ TEHRUSL_all:TRHHCHILD 1 2.236 1230.4 501.33 0.7925 0.3738499
+ TRERNHLY:TRTHHFAMILY 1 1.876 1230.8 501.46 0.6644 0.4154494
+ TRHHCHILD:TRTHHFAMILY 1 1.756 1230.9 501.50 0.6218 0.4307955
+ PEEDUCA_num:TRTHHFAMILY 1 1.080 1231.6 501.75 0.3822 0.5367245
+ TEAGE:TRERNHLY 1 0.678 1232.0 501.89 0.2399 0.6245463
+ PEEDUCA_num:TEHRUSL_all 1 0.557 1232.1 501.94 0.1972 0.6572473
+ PEEDUCA_num:TRHHCHILD 1 0.440 1232.2 501.98 0.1558 0.6932736
+ PEEDUCA_num:TRERNHLY 1 0.437 1232.2 501.98 0.1545 0.6944384
+ TESEX 1 0.428 1232.2 501.98 0.1514 0.6974222
+ TRERNHLY:TEHRUSL_all 1 0.395 1232.3 501.99 0.1397 0.7087531
+ TEAGE:TRTHHFAMILY 1 0.394 1232.3 502.00 0.1394 0.7090109
+ GTMETSTA 1 0.260 1232.4 502.04 0.0920 0.7618422
+ TRTALONE 1 0.070 1232.6 502.11 0.0246 0.8754372
+ TEAGE:TEHRUSL_all 1 0.043 1232.6 502.12 0.0153 0.9015217
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Step: AIC=490.07
t0101 ~ TEAGE + PEEDUCA_num + TEHRUSL_all + TRHHCHILD + TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
- TEHRUSL_all 1 8.092 1241.1 486.88 2.8743 0.0907127 .
- PEEDUCA_num 1 17.038 1250.0 490.07 6.0524 0.0142728 *
<none> 1233.0 490.07
- TRTHHFAMILY 1 25.536 1258.5 493.08 9.0712 0.0027470 **
+ TEAGE:PEEDUCA_num 1 3.084 1229.9 495.06 1.0959 0.2957496
+ TEAGE:TRHHCHILD 1 2.486 1230.5 495.27 0.8828 0.3479477
+ TEHRUSL_all:TRTHHFAMILY 1 2.274 1230.7 495.35 0.8075 0.3693654
+ TEHRUSL_all:TRHHCHILD 1 2.195 1230.8 495.38 0.7795 0.3777913
+ TRHHCHILD:TRTHHFAMILY 1 1.747 1231.3 495.54 0.6202 0.4313985
- TRHHCHILD 1 32.693 1265.7 495.59 11.6134 0.0007153 ***
+ PEEDUCA_num:TRTHHFAMILY 1 1.200 1231.8 495.74 0.4256 0.5144861
- TEAGE 1 33.285 1266.3 495.80 11.8236 0.0006408 ***
+ PEEDUCA_num:TEHRUSL_all 1 0.515 1232.5 495.98 0.1826 0.6693407
+ TEAGE:TRTHHFAMILY 1 0.412 1232.6 496.02 0.1461 0.7024844
+ PEEDUCA_num:TRHHCHILD 1 0.401 1232.6 496.02 0.1420 0.7064933
+ TRERNHLY 1 0.350 1232.7 496.04 0.1242 0.7246752
+ TESEX 1 0.295 1232.7 496.06 0.1048 0.7463551
+ GTMETSTA 1 0.212 1232.8 496.09 0.0750 0.7843283
+ TEAGE:TEHRUSL_all 1 0.060 1233.0 496.15 0.0212 0.8841798
+ TRTALONE 1 0.049 1233.0 496.15 0.0172 0.8955827
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Step: AIC=486.88
t0101 ~ TEAGE + PEEDUCA_num + TRHHCHILD + TRTHHFAMILY
Df Sum of Sq RSS AIC F value Pr(>F)
<none> 1241.1 486.88
- PEEDUCA_num 1 19.331 1260.4 487.65 6.8377 0.0092327 **
+ TEHRUSL_all 1 8.092 1233.0 490.07 2.8743 0.0907127 .
- TRTHHFAMILY 1 27.639 1268.7 490.56 9.7765 0.0018850 **
+ TEAGE:TRHHCHILD 1 5.005 1236.1 491.18 1.7736 0.1836358
+ TEAGE:PEEDUCA_num 1 3.729 1237.4 491.64 1.3201 0.2512094
- TRHHCHILD 1 31.770 1272.9 492.01 11.2378 0.0008709 ***
- TEAGE 1 32.445 1273.5 492.24 11.4764 0.0007684 ***
+ TRHHCHILD:TRTHHFAMILY 1 1.271 1239.8 492.52 0.4490 0.5031461
+ TRERNHLY 1 1.083 1240.0 492.59 0.3825 0.5365906
+ PEEDUCA_num:TRTHHFAMILY 1 0.721 1240.4 492.72 0.2546 0.6141379
+ PEEDUCA_num:TRHHCHILD 1 0.711 1240.4 492.72 0.2509 0.6166674
+ GTMETSTA 1 0.484 1240.6 492.80 0.1709 0.6795566
+ TRTALONE 1 0.358 1240.7 492.85 0.1264 0.7223789
+ TEAGE:TRTHHFAMILY 1 0.318 1240.8 492.86 0.1122 0.7378483
+ TESEX 1 0.007 1241.1 492.97 0.0025 0.9605036
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
<- lm_fit_AIC
lm_fit_final
::Anova(lm_fit_final, type = 3) car
Anova Table (Type III tests)
Response: t0101
Sum Sq Df F value Pr(>F)
(Intercept) 1263.29 1 446.8487 < 2.2e-16 ***
TEAGE 32.45 1 11.4764 0.0007684 ***
PEEDUCA_num 19.33 1 6.8377 0.0092327 **
TRHHCHILD 31.77 1 11.2378 0.0008709 ***
TRTHHFAMILY 27.64 1 9.7765 0.0018850 **
Residuals 1241.10 439
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagostics(lm_fit_final)
Error in e_plot_lm_diagostics(lm_fit_final): could not find function "e_plot_lm_diagostics"
summary(lm_fit_final)
Call:
lm(formula = t0101 ~ TEAGE + PEEDUCA_num + TRHHCHILD + TRTHHFAMILY,
data = dat_atus)
Residuals:
Min 1Q Median 3Q Max
-4.0543 -1.1496 -0.0369 1.1845 3.9825
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.9152243 0.4690534 21.139 < 2e-16 ***
TEAGE -0.0195953 0.0057843 -3.388 0.000768 ***
PEEDUCA_num -0.0825317 0.0315622 -2.615 0.009233 **
TRHHCHILDNo 0.6234762 0.1859859 3.352 0.000871 ***
TRTHHFAMILY 0.0010070 0.0003221 3.127 0.001885 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.681 on 439 degrees of freedom
Multiple R-squared: 0.0577, Adjusted R-squared: 0.04911
F-statistic: 6.72 on 4 and 439 DF, p-value: 2.955e-05
<-
lm_fit_criteria e_lm_model_criteria(
lm_fit = lm_fit_final
dat_fit = dat_atus
, )
4.4 (3 p) Interpret diagnostics for selected model, resolve issues
4.5 Model effects/contrasts
<-
p_cont e_plot_model_contrasts(
fit = lm_fit_final
dat_cont = dat_atus
, choose_contrasts = NULL
, sw_table_in_plot = TRUE
, adjust_method = c("none", "tukey", "scheffe", "sidak", "bonferroni", "dunnettx", "mvt")[2]
, CI_level = 0.95
, sw_glm_scale = c("link", "response")[1]
, sw_print = FALSE
, sw_marginal_even_if_interaction = FALSE
, sw_TWI_plots_keep = c("singles", "both", "all")[1]
, sw_TWI_both_orientation = c("wide", "tall")[1]
, sw_plot_quantiles_values = c("quantiles", "values")[1]
, plot_quantiles = c(0.05, 0.25, 0.5, 0.75, 0.95)
, sw_quantile_type = 7
, plot_values = NULL
, emmip_rg.limit = 1000
, )
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
# Since plot interactions have sublists of plots, we want to pull those out
# into a one-level plot list.
# The code here works for sw_TWI_plots_keep = "singles"
# which will make each plot the same size in the plot_grid() below.
# For a publications, you'll want to manually choose which plots to show.
# index for plot list,
# needed since interactions add 2 plots to the list, so the number of terms
# is not necessarily the same as the number of plots.
<- 0
i_list # initialize a list of plots
<- list()
p_list
for (i_term in 1:length(p_cont$plots)) {
## i_term = 1
# extract the name of the plot
<- names(p_cont$plots)[i_term]
n_list
# test whether the name has a colon ":"; if so, it's an interaction
if (stringr::str_detect(string = n_list, pattern = stringr::fixed(":"))) {
# an two-way interaction has two plots
# first plot
<- i_list + 1
i_list <- p_cont$plots[[ i_term ]][[ 1 ]]
p_list[[ i_list ]]
# second plot
<- i_list + 1
i_list <- p_cont$plots[[ i_term ]][[ 2 ]]
p_list[[ i_list ]]
else {
} # not an interaction, only one plot
<- i_list + 1
i_list <- p_cont$plots[[ i_term ]]
p_list[[ i_list ]]
# if
}
# Every 4 plots, print them
if (i_list >= 4) {
<-
p_arranged ::plot_grid(
cowplotplotlist = p_list
nrow = NULL
, ncol = 2
, labels = "AUTO"
,
)
%>% print()
p_arranged
<- 0
i_list
next
}
# if last term, print the plots
if (i_term == length(p_cont$plots)) {
<-
p_arranged ::plot_grid(
cowplotplotlist = p_list
nrow = NULL
, ncol = 2
, labels = "AUTO"
,
)
%>% print()
p_arranged
}
# for }
4.6 (2 p) Interpret one main effect and one interaction
If you don’t have an interaction, interpret a second main effect. Choose the two you think are the most interesting (to you).
- Main effect: [Variable name here]
- [Interpretation here]
- Interaction: [Variable names here]
- [Interpretation here]