# Clear memory of characters:
ls()
## character(0)
rm(list=ls())
#Set Working Directory:
setwd("~/CompBio")
# get programs
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# read in data:
plantTrans <- read.csv("plantTransPlantsCompBio.csv", header=TRUE, sep = ",", stringsAsFactors=FALSE)
BombSurvey <- read.csv("2015SurveyCompBio.csv", header=TRUE, sep = ",", stringsAsFactors=FALSE)
head(BombSurvey)
## X target_name site HBviralLoad n BombusViralLoad sd
## 1 1 BQCV BOST-NEAR 366994727 21 5316210.40 9459693.7
## 2 2 BQCV CIND 14771905 10 28455499.25 56110898.3
## 3 3 BQCV CLERK 0 21 62970.41 146142.2
## 4 4 BQCV COL 992531887 23 1372201.88 1964750.5
## 5 5 BQCV FERL 419532555 20 15633038.03 28317436.2
## 6 6 BQCV FLAN 0 10 12662784.20 13246489.6
## se apiary_near_far ShannonDIV Density sumApiaries sumColonies
## 1 2064274.40 1 1.6187671 40.8 9 45
## 2 17743824.00 0 0.0000000 132.7 1 1
## 3 31890.84 0 0.5948803 27.3 0 0
## 4 409678.81 0 0.6917615 3.8 3 3
## 5 6331971.24 1 0.7977888 26.5 2 27
## 6 4188907.81 0 0.1683164 67.5 4 10
## BombPrev BombMeanNA
## 1 0.9523810 5582020.9
## 2 1.0000000 28455499.3
## 3 0.2380952 264475.7
## 4 0.8695652 1578032.2
## 5 0.8000000 19541297.5
## 6 1.0000000 12662784.2
#adding a column that will be the log transformation of VL to fix non-normality
BombSurvey$VL <- log(BombSurvey$BombusViralLoad)
#Make all '-Inf' into zeros
BombSurvey$VL[BombSurvey$VL == "-Inf"] <- 0
#check distribution
hist(BombSurvey$VL)
Linear Regression:
#########LINEAR REGRESSION#######################################
# FUNCTION: fitLinear
# inputs: numeric vector of predictor (x) and response (y)
# outputs: slope and p-value
# data
#Splitting the dataframe based on virus target:
DWV <- subset(BombSurvey, target_name=="DWV")
BQCV <- subset(BombSurvey, target_name=="BQCV")
#Creating a two column dataframe for the linear regression
names(DWV)
## [1] "X" "target_name" "site"
## [4] "HBviralLoad" "n" "BombusViralLoad"
## [7] "sd" "se" "apiary_near_far"
## [10] "ShannonDIV" "Density" "sumApiaries"
## [13] "sumColonies" "BombPrev" "BombMeanNA"
## [16] "VL"
BQReg<-cbind(BQCV$sumApiaries, BQCV$BombPrev)
DWReg <- cbind(DWV$sumApiaries, DWV$BombPrev)
#Get rid of zeros, (STILL WORKING ON THIS)
#creating a two column dataframe for the ANOVA
BQAnov <- cbind(BQCV$VL,BQCV$apiary_near_far)
DWAnov <- cbind(DWV$VL,DWV$apiary_near_far)
fitLinear <- function(data=data.frame(runif(10),runif(10))) {
x = data[,1]
y = data[,2]
myMod <- lm(y~x) # fit the model
myOut <- c(slope=summary(myMod)$coefficients[2,1],
pValue=summary(myMod)$coefficients[2,4])
return(myOut)
}
fitLinear()
## slope pValue
## 0.101887 0.720334
#Testing the function on my dataset:
#Using the linear regression function:
fitLinear(BQReg)
## slope pValue
## 0.065235870 0.008274574
fitLinear(DWReg)
## slope pValue
## -0.0003124868 0.9797618055
#Create output plot
plotfitLinear <- function(data=data.frame(runif(10),runif(10))) {
x = data[,1]
y = data[,2]
plot1 <- plot(x=x,y=y,pch=21,bg="lightblue",cex=2)
myMod <- lm(y~x)
abline(myMod)
return(plot1)
}
plotfitLinear()
## NULL
#Plotting my data
plotfitLinear(BQReg)
## NULL
plotfitLinear(DWReg)
## NULL
ANOVA
#########ANOVA#########################################
# FUNCTION: ANOVA
# inputs: categorical vector predictor (x) numeric vector of response (y)
# outputs: p-value
# data
anovafunc <- function(data=data.frame(runif(10),rep(c("birds","poodles"),each=5))) {
x = data[,2]
y = data[,1]
myAnova <- aov(y~x)
AnovaOut1 <- summary(myAnova)
AnovaOut2 <- print(myAnova)
myOut <- list(AnovaOut1,AnovaOut2)
return(myOut)
}
anovafunc()
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0443390 0.6516589
## Deg. of Freedom 1 8
##
## Residual standard error: 0.2854074
## Estimated effects may be unbalanced
## [[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0443 0.04434 0.544 0.482
## Residuals 8 0.6517 0.08146
##
## [[2]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0443390 0.6516589
## Deg. of Freedom 1 8
##
## Residual standard error: 0.2854074
## Estimated effects may be unbalanced
#Checking function with my data
anovafunc(BQAnov)
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 5.18842 69.30858
## Deg. of Freedom 1 17
##
## Residual standard error: 2.019152
## Estimated effects may be unbalanced
## [[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 5.19 5.188 1.273 0.275
## Residuals 17 69.31 4.077
##
## [[2]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 5.18842 69.30858
## Deg. of Freedom 1 17
##
## Residual standard error: 2.019152
## Estimated effects may be unbalanced
anovafunc(DWAnov)
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 68.1897 356.8392
## Deg. of Freedom 1 17
##
## Residual standard error: 4.581543
## Estimated effects may be unbalanced
## [[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 68.2 68.19 3.249 0.0892 .
## Residuals 17 356.8 20.99
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [[2]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 68.1897 356.8392
## Deg. of Freedom 1 17
##
## Residual standard error: 4.581543
## Estimated effects may be unbalanced
#Function for Anova output:
myboxplot <- function(data=data.frame(runif(10),rep(c("birds","poodles"),each=5))) {
x = data[,2]
y = data[,1]
box <- boxplot(y~x)
return(box)
}
myboxplot()
## $stats
## [,1] [,2]
## [1,] 0.3123433 0.02177750
## [2,] 0.4112163 0.09241337
## [3,] 0.5305865 0.64407506
## [4,] 0.5791060 0.76242163
## [5,] 0.6579374 0.79719163
##
## $n
## [1] 5 5
##
## $conf
## [,1] [,2]
## [1,] 0.4119561 0.1706489
## [2,] 0.6492170 1.1175012
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "birds" "poodles"
#Plotting my data
myboxplot(BQAnov)
## $stats
## [,1] [,2]
## [1,] 10.17631 12.82901
## [2,] 12.67244 14.19091
## [3,] 14.38769 15.48627
## [4,] 15.93283 16.03233
## [5,] 17.16385 17.70910
##
## $n
## [1] 12 7
##
## $conf
## [,1] [,2]
## [1,] 12.90060 14.38660
## [2,] 15.87478 16.58594
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "0" "1"
myboxplot(DWAnov)
## $stats
## [,1] [,2]
## [1,] 0.000000 5.849028
## [2,] 0.000000 7.180848
## [3,] 3.585786 8.523187
## [4,] 8.884197 11.124129
## [5,] 10.512802 13.208084
##
## $n
## [1] 12 7
##
## $conf
## [,1] [,2]
## [1,] -0.4663559 6.168323
## [2,] 7.6379276 10.878051
##
## $out
## [1] 0
##
## $group
## [1] 2
##
## $names
## [1] "0" "1"
Contingency Table:
#############################################
#Data setup for contingency table
#HBplants <- subset(plantTrans, group=="T")
#HBplants <- ddply(HBplants, c("spp"), summarise,
# n = length(BINYprefilter),
# mean = mean(BINYprefilter, na.rm=TRUE),
# sum = sum(BINYprefilter, na.rm = TRUE),
# sd = sd(BINYprefilter, na.rm=TRUE),
# se = sd / sqrt(n))
#Checking function with my data
#anovafunc(HBplants$spp,HBplants$mean)
Creating a Random Dataset
#Creating Random Data Set