Creating a fake dataset:
cont1 <- runif(1:30)
cont2 <- runif(30)
fact1 <- c(rep("Blue",20), rep("Green",10))
fact2 <- as.factor(c(rep(1,5), rep(0,25)))
fakeDF <- data.frame(cont1, cont2, fact1, fact2)
LINEAR REGRESSION
# FUNCTION: fitLinear
# inputs: numeric vector of predictor (x) and response (y)
# outputs: slope and p-value
# data
fitLinear <- function(x=runif(1:40), y=runif(40)) {
myMod <- lm(y~x) # fit the model
myOut <- c(slope=summary(myMod)$coefficients[2,1],
pValue=summary(myMod)$coefficients[2,4])
return(myOut)
}
fitLinear()
## slope pValue
## 0.0367671 0.8451700
Testing the function on my fake dataset:
fitLinear(fakeDF$cont1,fakeDF$cont2)
## slope pValue
## -0.04334008 0.82659647
Create output plot
plotfitLinear <- function(x=runif(1:40), y=runif(40)) {
plot1 <- plot(x=x,y=y,pch=21,bg="lightblue",cex=2)
myMod <- lm(y~x)
abline(myMod)
return(plot1)
}
plotfitLinear()
## NULL
Check with fake dataset:
plotfitLinear(fakeDF$cont1,fakeDF$cont2)
## NULL
ANOVA
anovafunc <- function(x=as.factor(rep(c("birds", "poodles"),each=5)), y=runif(10)) {
myAnova <- aov(y~x)
AnovaOut1 <- summary(myAnova)
AnovaOut2 <- print(myAnova)
myOut <- list(AnovaOut1,AnovaOut2)
return(myOut)
}
anovafunc()
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0007466 0.6706205
## Deg. of Freedom 1 8
##
## Residual standard error: 0.2895299
## Estimated effects may be unbalanced
## [[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0007 0.00075 0.009 0.927
## Residuals 8 0.6706 0.08383
##
## [[2]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0007466 0.6706205
## Deg. of Freedom 1 8
##
## Residual standard error: 0.2895299
## Estimated effects may be unbalanced
Checking function with my data
anovafunc(fakeDF$fact1, fakeDF$cont1)
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0100375 1.9951422
## Deg. of Freedom 1 28
##
## Residual standard error: 0.2669365
## Estimated effects may be unbalanced
## [[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.010 0.01004 0.141 0.71
## Residuals 28 1.995 0.07126
##
## [[2]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0100375 1.9951422
## Deg. of Freedom 1 28
##
## Residual standard error: 0.2669365
## Estimated effects may be unbalanced
Function for Anova output:
myboxplot <- function(x=as.factor(rep(c("birds", "poodles"),each=5)), y=runif(10)) {
box <- boxplot(y~x)
return(box)
}
myboxplot()
## $stats
## [,1] [,2]
## [1,] 0.2145327 0.03764205
## [2,] 0.2400781 0.16919911
## [3,] 0.2466310 0.34589827
## [4,] 0.3290376 0.46315614
## [5,] 0.3290376 0.46315614
##
## $n
## [1] 5 5
##
## $conf
## [,1] [,2]
## [1,] 0.1837724 0.1381890
## [2,] 0.3094896 0.5536076
##
## $out
## [1] 0.5514013 0.9297257
##
## $group
## [1] 1 2
##
## $names
## [1] "birds" "poodles"
xVar <- as.factor(rep(c(“Control”,“Heated”,“Cooled”),each=5)) yVar <- c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10)) dataFrame <- data.frame(xVar,yVar)
anovaModel <- aov(yVar~xVar,data=dataFrame)
print(anovaModel) summary(anovaModel)
boxplot(yVar~xVar,data=dataFrame,col=c(“grey”,“thistle”,“orchid”)) ```
# data
vec1 <- c(50,66,22)
vec2 <- c(120,22,30)
dataMatrix <- rbind(vec1,vec2)
rownames(dataMatrix) <- c("Cold","Warm")
colnames(dataMatrix) <-c("Aphaenogaster",
"Camponotus",
"Crematogaster")
# model + model output
print(chisq.test(dataMatrix))
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 48.914, df = 2, p-value = 2.391e-11
# plot
mosaicplot(x=dataMatrix,
col=c("goldenrod","grey","black"),
shade=FALSE)
barplot(height=dataMatrix,
beside=TRUE,
col=c("cornflowerblue","tomato"))
chisq.test(dataMatrix)$expected
## Aphaenogaster Camponotus Crematogaster
## Cold 75.67742 39.17419 23.14839
## Warm 94.32258 48.82581 28.85161
#verify expected counts
sum(dataMatrix[,1])
## [1] 170
# data
xVar <- rgamma(n=20,shape=5,scale=5)
yVar <- rbinom(n=20,size=1,p=0.5)
dataFrame <- data.frame(xVar,yVar)
# model
logRegMod <- glm(yVar ~ xVar,
data=dataFrame,
family=binomial(link="logit"))
# model output
print(logRegMod)
##
## Call: glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Coefficients:
## (Intercept) xVar
## 3.2882 -0.1734
##
## Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
## Null Deviance: 25.9
## Residual Deviance: 21.35 AIC: 25.35
summary(logRegMod)
##
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4917 -0.9145 -0.3824 0.9917 1.5317
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.2882 2.1815 1.507 0.1317
## xVar -0.1734 0.1002 -1.730 0.0836 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25.898 on 19 degrees of freedom
## Residual deviance: 21.353 on 18 degrees of freedom
## AIC: 25.353
##
## Number of Fisher Scoring iterations: 5
# plot
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="tan",cex=2.5)
curve(predict(logRegMod,data.frame(xVar=x),type="response"),add=TRUE,lwd=2)