library(Sleuth3)
library(ggplot2)
library(knitr)
source(url("http://stat512.cwick.co.nz/code/stat_qqline.r"))
load(url("http://stat512.cwick.co.nz/data/df1.rda"))
load(url("http://stat512.cwick.co.nz/data/df2.rda"))
load(url("http://stat512.cwick.co.nz/data/df3.rda"))
load(url("http://stat512.cwick.co.nz/data/df4.rda"))
fit1 <- lm(y~x,data=df1) #fit 1
fit2 <- lm(y~x,data=df2) #fit 2
fit3 <- lm(y~x,data=df3) #fit 3
fit4 <- lm(y~x,data=df4) #fit 4
#' As mentioned on Canvas TWO of the data sets had clear issues.
#'
#' The following is the residual plot from df3.
# qplot(.fitted, x, data = fit3)
# qplot(sample = .resid, data = fit3) + stat_qqline()
qplot(.fitted, .resid, data = fit3)
#' This plot suggests the constant spread (or variation) assumption is violated. Clearly the spread of the residuals about the zero line decreases as the fitted values increase.
#'
#' The following is the residual plot from df4.
qplot(.fitted, .resid, data = fit4)
#' The plot suggests the linearity assumption has be violated as the most residuals are positive for the ends of the fitted values and negative for middle fitted values.
#'
#' ## Interpretation
#'
#' Since df1 and df2 have no obvious violations the slope and intercept will be interpreted for both.
#' First df1
summary(fit1)
#'Slope: Is it estimated, that as x increases by 1 unit the mean of y increases by 4.056 units (corresponding CI (3.940,4.172)).
#'Intercept: Is it estimated, that when x is equal to zero the mean of y is 1.698 units (corresponding CI (1.381,2.555)).
#'
#' For df2
summary(fit2)
#'Slope: Is it estimated, that as x increases by 1 unit the mean of y increases by 1.662 units (corresponding CI (0.825,2.498)).
#'Intercept: Is it estimated, that when x is equal to zero the mean of y is 1.184 units (corresponding CI(0.678,1.690)).