# Elephant proportion

If you have ridden on the back of an elephant, does that influence what proportion of people you think have also been on the back of an elephant?

library(tidyverse)
## -- Attaching packages --------
## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.8
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.3.1     v forcats 0.3.0
## -- Conflicts -----------------
## x dplyr::lag()    masks stats::lag()
library(readr)
dat_ele <- read_csv("S4R_WS_01_BackOfElephantSurvey.csv")
## Parsed with column specification:
## cols(
##   GuessProp = col_double(),
##   OnBack = col_character()
## )
dat_ele <- na.omit(dat_ele)
dat_ele$OnBack <- factor(dat_ele$OnBack, levels = c("N", "Y"))

str(dat_ele)
## Classes 'tbl_df', 'tbl' and 'data.frame':    38 obs. of  2 variables:
##  $GuessProp: num 0.05 0.02 0.02 0.05 0.07 0.2 0.15 0.03 0.005 0.015 ... ##$ OnBack   : Factor w/ 2 levels "N","Y": 1 1 2 1 1 1 1 1 1 1 ...

Plot the estimated mean from our class sample versus the true US mean.

## If we create a summary data.frame with a similar structure as our data, then we
##   can annotate our plot with those summaries.

# calculate the estimated mean and order M then F
est_prop <- as.numeric(by(dat_ele$GuessProp, dat_ele$OnBack, mean))
true_prop <- sum(dat_ele$OnBack == "Y") / nrow(dat_ele) # combine true US mean with our estimated mean Prop_true_est <- data.frame(OnBack = unique(dat_ele$OnBack)
, GuessProp = c(rep(true_prop, 2), est_prop)
, TrueEst = c(rep("True", 2), rep("Est", 2)))
Prop_true_est
##   OnBack  GuessProp TrueEst
## 1      N 0.10526316    True
## 2      Y 0.10526316    True
## 3      N 0.05229412     Est
## 4      Y 0.06250000     Est

Hereโs two ways to plot our data, annotating the observed and hypothesized means.

library(ggplot2)
p <- ggplot(data = dat_ele, aes(x = OnBack, y = GuessProp))
p <- p + geom_boxplot(alpha = 1/4)
p <- p + geom_jitter(position = position_jitter(width = 0.1))
p <- p + geom_point(data = Prop_true_est, aes(colour = TrueEst, shape = TrueEst), size = 4, alpha = 3/4)
p <- p + labs(title = "Proportion of people who have been on the back of an elephant"
, caption = "True proportion in class labelled True")
print(p)

library(ggplot2)
p <- ggplot(data = dat_ele, aes(x = GuessProp))
p <- p + geom_histogram(binwidth = 0.05)
p <- p + geom_vline(data = Prop_true_est, aes(xintercept = GuessProp, colour = TrueEst, linetype = TrueEst))
p <- p + facet_grid(OnBack ~ .)
print(p)