Data Analysis & Statistical Inference

Foreword

Output options: the ‘tango’ syntax and the ‘readable’ theme.
Custom functions

plot_ci; for plotting confidence intervals.
plot_ss; for plotting the sum of squares.
multiLine; for plotting regressions with dummy variables.
inference; for running bootstraps and plotting results.

Plot Confidence Interval

plot_ci <- function(lo, hi, m){
  par(mar=c(2, 1, 1, 1), mgp=c(2.7, 0.7, 0))
  k <- 50
  ci.max <- max(rowSums(matrix(c(-1*lo,hi),ncol=2)))

  xR <- m + ci.max*c(-1, 1)
  yR <- c(0, 41*k/40)

  plot(xR, yR, type='n', xlab='', ylab='', axes=FALSE)
  abline(v=m, lty=2, col='#00000088')
  axis(1, at=m, paste("mu = ",round(m,4)), cex.axis=1.15)
  #axis(2)
  for(i in 1:k){
    x <- mean(c(hi[i],lo[i]))
      ci <- c(lo[i],hi[i])
      if(contains(lo[i],hi[i],m)==FALSE){
          col <- "#F05133"
          points(x, i, cex=1.4, col=col)
#         points(x, i, pch=20, cex=1.2, col=col)
          lines(ci, rep(i, 2), col=col, lwd=5)
      }
      col <- 1
    points(x, i, pch=20, cex=1.2, col=col)
      lines(ci, rep(i, 2), col=col)
  }
}

Plot Sum of Squares

plot_ss <- function(x, y, x1, y1, x2, y2, showSquares = FALSE, leastSquares = FALSE){
  plot(y~x, asp = 1)# xlab = paste(substitute(x)), ylab = paste(substitute(y)))
  
  if(leastSquares){
    m1 <- lm(y~x)
    y.hat <- m1$fit
  } else{
    #cat("Click two points to make a line.")
    #pt1 <- locator(1)
    points(x1, y1, pch = 21, col='red', bg='red', cex=1.8)
    #pt2 <- locator(1)
    points(x2, y2, pch = 21, col='red', bg='red', cex=1.8)
    pts <- data.frame("x" = c(x1, x2),"y" = c(y1, y2))
    m1 <- lm(y ~ x, data = pts)
    y.hat <- predict(m1, newdata = data.frame(x))
  }
  r <- y - y.hat
  abline(m1)
  
  oSide <- x - r
  LLim <- par()$usr[1]
  RLim <- par()$usr[2]
  oSide[oSide < LLim | oSide > RLim] <- c(x + r)[oSide < LLim | oSide > RLim] # move boxes to avoid margins
  
  n <- length(y.hat)
  for(i in 1:n){
    lines(rep(x[i], 2), c(y[i], y.hat[i]), lty = 2, col = "blue")
    if(showSquares){
      lines(rep(oSide[i], 2), c(y[i], y.hat[i]), lty = 3, col = "orange")
      lines(c(oSide[i], x[i]), rep(y.hat[i],2), lty = 3, col = "orange")
      lines(c(oSide[i], x[i]), rep(y[i],2), lty = 3, col = "orange")
    }
  }
  
  SS <- round(sum(r^2), 3)
  cat("\r                                ")
  print(m1)
  cat("Sum of Squares: ", SS)
}

Plot multilines

multiLines <- function(model, ...){
  if(class(model)!="lm"){
    warning("Model must be the output of the function lm()")
  }
  
  if(length(model$xlevels)!=1){
    warning("Model must contain exactly one categorical predictor")
  }
  
  if(length(model$coef)-length(model$xlevels[[1]])!=1){
    warning("Model must contain exactly one non-categorical predictor")
  }
  
  palette <- c("#E69F00", "#56B4E9", "#D55E00", "#009E73", "#CC79A7", "#F0E442", "#0072B2")
  
  baseIntercept <- model$coef[1]
  nLines <- length(model$xlevels[[1]])
  intercepts <- c(baseIntercept, rep(0, nLines-1))
  indicatorInd <- c(1, rep(0, nLines)) # used to find slope parameter by process of elimination
  
  for(i in 1:(nLines-1)){
    indicatorName <- paste(names(model$contrasts),model$xlevels[[1]][1+i], sep = "")
    intercepts[i+1] <- baseIntercept + model$coef[names(model$coef)==indicatorName]
    indicatorInd <- indicatorInd + (names(model$coef)==indicatorName)
  }
  
  slope <- model$coef[!indicatorInd]
  
  num_pred = which(names(model$model[,-1]) != names(model$xlevels)) + 1
  cat_pred = which(names(model$model[,-1]) == names(model$xlevels)) + 1
  
  model$model$COL = NA
  model$model$PCH = NA
  for(i in 1:nLines){
    model$model$COL[model$model[,cat_pred] == levels(model$model[,cat_pred])[i]] = adjustcolor(palette[i],0.40)
    model$model$PCH[model$model[,cat_pred] == levels(model$model[,cat_pred])[i]] = i+14
  }
  
  plot(model$model[,1] ~ jitter(model$model[,num_pred]), col = model$model$COL, pch = model$model$PCH,
       ylab = names(model$model)[1],
       xlab = names(model$model)[num_pred])
    
  for(j in 1:nLines){
    abline(intercepts[j], slope, col = palette[j], lwd = 2, ...)
  }

  if(slope > 0){legend_pos = "bottomright"}
  if(slope < 0){legend_pos = "topleft"}  
  
  legend(legend_pos, col = palette[1:nLines], lty = 1, legend = levels(model$model[,cat_pred]), lwd = 2)
}

Compute Bootstap and Plot Results

# Run the inference function (and plot the results)
boot_awg_90 <- inference(nc$gained,
          type = 'ci',
          method = 'simulation',
          conflevel = 0.9,
          est = 'mean',
          boot_method = 'perc')
boot_awg_90

Data Analysis & Statistical Inference - Custom Functions

Plot Confidence Interval

Plot Sum of Squares

Plot multilines

Compute Bootstap and Plot Results