## ----settings, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ## ----load-packages, echo = FALSE, message = FALSE, warning = FALSE------------ library(ggplot2) library(dplyr) library(infer) ## ----glimpse-gss-actual, warning = FALSE, message = FALSE--------------------- dplyr::glimpse(gss) ## ----------------------------------------------------------------------------- set.seed(1) gss_paired <- gss %>% mutate( hours_previous = hours + 5 - rpois(nrow(.), 4.8), diff = hours - hours_previous ) gss_paired %>% select(hours, hours_previous, diff) ## ----plot-diff, echo = FALSE-------------------------------------------------- unique_diff <- unique(gss_paired$diff) gss_paired %>% ggplot2::ggplot() + ggplot2::aes(x = diff) + ggplot2::geom_histogram(bins = diff(range(unique_diff))) + ggplot2::labs(x = "diff: Difference in Number of Hours Worked", y = "Number of Responses") + ggplot2::scale_x_continuous(breaks = c(range(unique_diff), 0)) ## ----calc-obs-mean------------------------------------------------------------ # calculate the observed statistic observed_statistic <- gss_paired %>% specify(response = diff) %>% calculate(stat = "mean") ## ----generate-null------------------------------------------------------------ # generate the null distribution null_dist <- gss_paired %>% specify(response = diff) %>% hypothesize(null = "paired independence") %>% generate(reps = 1000, type = "permute") %>% calculate(stat = "mean") null_dist ## ----visualize---------------------------------------------------------------- # visualize the null distribution and test statistic null_dist %>% visualize() + shade_p_value(observed_statistic, direction = "two-sided") ## ----p-value------------------------------------------------------------------ # calculate the p value from the test statistic and null distribution p_value <- null_dist %>% get_p_value(obs_stat = observed_statistic, direction = "two-sided") p_value ## ----generate-boot------------------------------------------------------------ # generate a bootstrap distribution boot_dist <- gss_paired %>% specify(response = diff) %>% hypothesize(null = "paired independence") %>% generate(reps = 1000, type = "bootstrap") %>% calculate(stat = "mean") visualize(boot_dist) ## ----confidence-interval------------------------------------------------------ # calculate the confidence from the bootstrap distribution confidence_interval <- boot_dist %>% get_confidence_interval(level = .95) confidence_interval ## ----------------------------------------------------------------------------- boot_dist %>% get_confidence_interval(type = "se", point_estimate = observed_statistic, level = .95)