Creating a fake dataset:

cont1 <- runif(1:30)
cont2 <- runif(30)
fact1 <- c(rep("Blue",20), rep("Green",10))
fact2 <- as.factor(c(rep(1,5), rep(0,25)))

fakeDF <- data.frame(cont1, cont2, fact1, fact2)

LINEAR REGRESSION

# FUNCTION: fitLinear
# inputs: numeric vector of predictor (x) and response (y)
# outputs: slope and p-value
# data
fitLinear <- function(x=runif(1:40), y=runif(40)) {
  myMod <- lm(y~x) # fit the model
  myOut <- c(slope=summary(myMod)$coefficients[2,1],
             pValue=summary(myMod)$coefficients[2,4])
  return(myOut)
}
fitLinear()
##     slope    pValue 
## 0.0367671 0.8451700

Testing the function on my fake dataset:

fitLinear(fakeDF$cont1,fakeDF$cont2)
##       slope      pValue 
## -0.04334008  0.82659647

Create output plot

plotfitLinear <- function(x=runif(1:40), y=runif(40)) {
  plot1 <- plot(x=x,y=y,pch=21,bg="lightblue",cex=2)
  myMod <- lm(y~x)
  abline(myMod)
  return(plot1)
}
plotfitLinear()

## NULL

Check with fake dataset:

plotfitLinear(fakeDF$cont1,fakeDF$cont2)

## NULL

ANOVA

anovafunc <- function(x=as.factor(rep(c("birds", "poodles"),each=5)), y=runif(10)) {
  myAnova <- aov(y~x)
  AnovaOut1 <- summary(myAnova)
  AnovaOut2 <- print(myAnova)
  myOut <- list(AnovaOut1,AnovaOut2)
  return(myOut)
}
anovafunc()
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0007466 0.6706205
## Deg. of Freedom         1         8
## 
## Residual standard error: 0.2895299
## Estimated effects may be unbalanced
## [[1]]
##             Df Sum Sq Mean Sq F value Pr(>F)
## x            1 0.0007 0.00075   0.009  0.927
## Residuals    8 0.6706 0.08383               
## 
## [[2]]
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0007466 0.6706205
## Deg. of Freedom         1         8
## 
## Residual standard error: 0.2895299
## Estimated effects may be unbalanced

Checking function with my data

anovafunc(fakeDF$fact1, fakeDF$cont1)
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0100375 1.9951422
## Deg. of Freedom         1        28
## 
## Residual standard error: 0.2669365
## Estimated effects may be unbalanced
## [[1]]
##             Df Sum Sq Mean Sq F value Pr(>F)
## x            1  0.010 0.01004   0.141   0.71
## Residuals   28  1.995 0.07126               
## 
## [[2]]
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0100375 1.9951422
## Deg. of Freedom         1        28
## 
## Residual standard error: 0.2669365
## Estimated effects may be unbalanced

Function for Anova output:

myboxplot <- function(x=as.factor(rep(c("birds", "poodles"),each=5)), y=runif(10)) {
  box <- boxplot(y~x)
  return(box)
}

myboxplot()

## $stats
##           [,1]       [,2]
## [1,] 0.2145327 0.03764205
## [2,] 0.2400781 0.16919911
## [3,] 0.2466310 0.34589827
## [4,] 0.3290376 0.46315614
## [5,] 0.3290376 0.46315614
## 
## $n
## [1] 5 5
## 
## $conf
##           [,1]      [,2]
## [1,] 0.1837724 0.1381890
## [2,] 0.3094896 0.5536076
## 
## $out
## [1] 0.5514013 0.9297257
## 
## $group
## [1] 1 2
## 
## $names
## [1] "birds"   "poodles"

data

xVar <- as.factor(rep(c(“Control”,“Heated”,“Cooled”),each=5)) yVar <- c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10)) dataFrame <- data.frame(xVar,yVar)

model

anovaModel <- aov(yVar~xVar,data=dataFrame)

model output

print(anovaModel) summary(anovaModel)

plot

boxplot(yVar~xVar,data=dataFrame,col=c(“grey”,“thistle”,“orchid”)) ```

CONTINGENCY TABLE
# data
vec1 <- c(50,66,22)
vec2 <- c(120,22,30)
dataMatrix <- rbind(vec1,vec2)
rownames(dataMatrix) <- c("Cold","Warm")
colnames(dataMatrix) <-c("Aphaenogaster",
                         "Camponotus",
                         "Crematogaster")



# model + model output
print(chisq.test(dataMatrix))
## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 48.914, df = 2, p-value = 2.391e-11
# plot
mosaicplot(x=dataMatrix,
           col=c("goldenrod","grey","black"),
           shade=FALSE)

barplot(height=dataMatrix,
        beside=TRUE,
        col=c("cornflowerblue","tomato"))

chisq.test(dataMatrix)$expected
##      Aphaenogaster Camponotus Crematogaster
## Cold      75.67742   39.17419      23.14839
## Warm      94.32258   48.82581      28.85161
#verify expected counts
sum(dataMatrix[,1])
## [1] 170
LOGISTIC REGRESSION
# data
xVar <- rgamma(n=20,shape=5,scale=5)
yVar <- rbinom(n=20,size=1,p=0.5)
dataFrame <- data.frame(xVar,yVar)

# model
logRegMod <- glm(yVar ~ xVar,
                 data=dataFrame,
                 family=binomial(link="logit"))
# model output
print(logRegMod)
## 
## Call:  glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## 
## Coefficients:
## (Intercept)         xVar  
##      3.2882      -0.1734  
## 
## Degrees of Freedom: 19 Total (i.e. Null);  18 Residual
## Null Deviance:       25.9 
## Residual Deviance: 21.35     AIC: 25.35
summary(logRegMod)
## 
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4917  -0.9145  -0.3824   0.9917   1.5317  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   3.2882     2.1815   1.507   0.1317  
## xVar         -0.1734     0.1002  -1.730   0.0836 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25.898  on 19  degrees of freedom
## Residual deviance: 21.353  on 18  degrees of freedom
## AIC: 25.353
## 
## Number of Fisher Scoring iterations: 5
# plot
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="tan",cex=2.5)
curve(predict(logRegMod,data.frame(xVar=x),type="response"),add=TRUE,lwd=2)