If you have ridden on the back of an elephant, does that influence what proportion of people you think have also been on the back of an elephant?
library(tidyverse)
## -- Attaching packages --------
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.8
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.3.1 v forcats 0.3.0
## -- Conflicts -----------------
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
dat_ele <- read_csv("S4R_WS_01_BackOfElephantSurvey.csv")
## Parsed with column specification:
## cols(
## GuessProp = col_double(),
## OnBack = col_character()
## )
dat_ele <- na.omit(dat_ele)
dat_ele$OnBack <- factor(dat_ele$OnBack, levels = c("N", "Y"))
str(dat_ele)
## Classes 'tbl_df', 'tbl' and 'data.frame': 38 obs. of 2 variables:
## $ GuessProp: num 0.05 0.02 0.02 0.05 0.07 0.2 0.15 0.03 0.005 0.015 ...
## $ OnBack : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 1 1 1 1 ...
Plot the estimated mean from our class sample versus the true US mean.
## If we create a summary data.frame with a similar structure as our data, then we
## can annotate our plot with those summaries.
# calculate the estimated mean and order M then F
est_prop <- as.numeric(by(dat_ele$GuessProp, dat_ele$OnBack, mean))
true_prop <- sum(dat_ele$OnBack == "Y") / nrow(dat_ele)
# combine true US mean with our estimated mean
Prop_true_est <- data.frame(OnBack = unique(dat_ele$OnBack)
, GuessProp = c(rep(true_prop, 2), est_prop)
, TrueEst = c(rep("True", 2), rep("Est", 2)))
Prop_true_est
## OnBack GuessProp TrueEst
## 1 N 0.10526316 True
## 2 Y 0.10526316 True
## 3 N 0.05229412 Est
## 4 Y 0.06250000 Est
Hereโs two ways to plot our data, annotating the observed and hypothesized means.
library(ggplot2)
p <- ggplot(data = dat_ele, aes(x = OnBack, y = GuessProp))
p <- p + geom_boxplot(alpha = 1/4)
p <- p + geom_jitter(position = position_jitter(width = 0.1))
p <- p + geom_point(data = Prop_true_est, aes(colour = TrueEst, shape = TrueEst), size = 4, alpha = 3/4)
p <- p + labs(title = "Proportion of people who have been on the back of an elephant"
, caption = "True proportion in class labelled True")
print(p)
library(ggplot2)
p <- ggplot(data = dat_ele, aes(x = GuessProp))
p <- p + geom_histogram(binwidth = 0.05)
p <- p + geom_vline(data = Prop_true_est, aes(xintercept = GuessProp, colour = TrueEst, linetype = TrueEst))
p <- p + facet_grid(OnBack ~ .)
print(p)