library(tidyverse)
library(vegan)
library(RAM)
library(lmtest)
library(car)
library(coefplot)
Create a directory called Data
inside of your working directory. Then copy these files into your Data folder:
BioDatFull <- read.csv("./Data/BioDatFull2.csv")
Mapping_file <- read.csv("./Data/mapping_file.csv",fileEncoding="UTF-8-BOM")
OTU_table <- read.delim("./Data/97_OTU_table_QIIME1.txt", header=T)
names(Mapping_file)
## [1] "SampleID" "Species" "tissue"
We need to rearrange the OTU table. Think of this table exactly as you did the floristic survey!
We want the species names at the top, and the sites are analogous to the samples (SRR…).
row.names(OTU_table)<-OTU_table$OTU_ID
OTU_table$OTU_ID<-NULL
OTU_table.nz<-OTU_table[rowSums(OTU_table[,c(1:(length(OTU_table)-1))])!=0,]
OTU_table.ns<-OTU_table.nz[rowSums(OTU_table.nz[,c(1:(length(OTU_table.nz)-1))])!=1,] # We want to remove singletons because they are likely to be errors, especially in such a large dataset.
dim(OTU_table)
## [1] 21201 178
dim(OTU_table.ns)
## [1] 17245 178
OTU_table<-OTU_table.ns
OTU_table.t<-transpose.OTU(OTU_table)
which( colSums(OTU_table.t)==0 )
## named integer(0)
which( rowSums(OTU_table.t)==0 )
## named integer(0)
OTUmapping<-data.frame(SampleID=rownames(OTU_table.t))
row.names(OTUmapping)<-rownames(OTU_table.t)
OTUmapping$Richness<-rowSums(OTU_table.t!=0)
OTUmapping$Shannon<-diversity(OTU_table.t, index = "shannon", MARGIN = 1, base = exp(1))
Mapping_file$Species<-as.character(Mapping_file$Species)
Mapping_file$SampleID<-as.character(Mapping_file$SampleID)
OTUmapping$SampleID<-as.character(OTUmapping$SampleID)
for(i in 1:(length(row.names(OTUmapping)))){
OTUmapping$Species[i]<-Mapping_file[Mapping_file$SampleID==OTUmapping$SampleID[i],]$Species
}
Unlike before, the “site” information is not attached to the file, its in a seperate CSV.
We have two types of information: climatic info for each species (BioDatFull), and then species for each sample (Mapping_file). Now we also have our Richness and diversity information (OTU.info). We need to merge all of this and make just one dataset with the richness per species and the location/ climatic information.
OTUmapping %>% group_by(Species) %>% dplyr::summarise(mean.Richness=mean(Richness),mean.Shannon=mean(Shannon)) -> OTU.info
OTU.info<-merge(OTU.info, BioDatFull, by="Species")
OTU.info$Species<-as.factor(OTU.info$Species)
MyData<-OTU.info
Now we model this.
STEP 1. Fit the response data to a known distribution
mod1<-lm(mean.Richness~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C+ Min.temperature.C + Temperature.annual.range.C+ Annual.precipitation.mm. + Precipitation.seasonality.C.of.V.+ Annual.mean.moisture.index + Highest.weekly.moisture.index+ Lowest.weekly.moisture.index+ Moisture.index.seasonality.C.of.V., data = MyData)
car::qqp(MyData$mean.Richness, "norm")
## [1] 14 6
plot(density(MyData$mean.Richness)) # does it fit a normal distribution?
shapiro.test(MyData$mean.Richness)
##
## Shapiro-Wilk normality test
##
## data: MyData$mean.Richness
## W = 0.87353, p-value = 0.01356
bptest(mod1) # are the residuals homoskedastic?
##
## studentized Breusch-Pagan test
##
## data: mod1
## BP = 13.179, df = 11, p-value = 0.2818
plot(mod1) # visual check
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
STEP 2. Likelihood ratio test (test parameters)
To test each parameter, remove it and see if the model is as ‘likely’ without it.
mod1.mLat<-update(mod1,.~. - Lat)
anova(mod1,mod1.mLat)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Long + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 1404413 -1 -1021516 21.343 0.001711 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mLong<-update(mod1,.~. - Long)
anova(mod1,mod1.mLong)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 1188407 -1 -805510 16.83 0.003426 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mAnnual.mean.temperature.C<-update(mod1,.~. - Annual.mean.temperature.C)
anova(mod1,mod1.mAnnual.mean.temperature.C)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Max.temperature.C + Min.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 1523500 -1 -1140603 23.831 0.001222 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mMin.temperature.C<-update(mod1,.~. - Min.temperature.C)
anova(mod1,mod1.mMin.temperature.C)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 8 382897 0 0.30859
mod1.mTemperature.annual.range.C<-update(mod1.mMin.temperature.C,.~. - Temperature.annual.range.C)
anova(mod1.mMin.temperature.C,mod1.mTemperature.annual.range.C)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 1647688 -1 -1264791 26.426 0.0008845 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mAnnual.precipitation.mm.<-update(mod1.mMin.temperature.C,.~. - Annual.precipitation.mm.)
anova(mod1.mMin.temperature.C,mod1.mAnnual.precipitation.mm.)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 1604861 -1 -1221964 25.531 0.0009858 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mPrecipitation.seasonality.C.of.V.<-update(mod1.mMin.temperature.C,.~. - Precipitation.seasonality.C.of.V.)
anova(mod1.mMin.temperature.C,mod1.mPrecipitation.seasonality.C.of.V.)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Annual.mean.moisture.index +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 847656 -1 -464759 9.7104 0.01431 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mAnnual.mean.moisture.index<-update(mod1.mMin.temperature.C,.~. - Annual.mean.moisture.index)
anova(mod1.mMin.temperature.C,mod1.mAnnual.mean.moisture.index)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Annual.mean.moisture.index + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 8 382897
## 2 9 457517 -1 -74620 1.5591 0.2471
mod1.mHighest.weekly.moisture.index<-update(mod1.mAnnual.mean.moisture.index,.~. - Highest.weekly.moisture.index)
anova(mod1.mAnnual.mean.moisture.index,mod1.mHighest.weekly.moisture.index)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 457517
## 2 10 2406534 -1 -1949017 38.34 0.0001604 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mLowest.weekly.moisture.index<-update(mod1.mAnnual.mean.moisture.index,.~. - Lowest.weekly.moisture.index)
anova(mod1.mAnnual.mean.moisture.index,mod1.mLowest.weekly.moisture.index)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 457517
## 2 10 784160 -1 -326643 6.4255 0.03198 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1.mMoisture.index.seasonality.C.of.V<-update(mod1.mAnnual.mean.moisture.index,.~. - Moisture.index.seasonality.C.of.V.)
anova(mod1.mAnnual.mean.moisture.index,mod1.mMoisture.index.seasonality.C.of.V)
## Analysis of Variance Table
##
## Model 1: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index +
## Moisture.index.seasonality.C.of.V.
## Model 2: mean.Richness ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C +
## Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. +
## Highest.weekly.moisture.index + Lowest.weekly.moisture.index
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 9 457517
## 2 10 2227734 -1 -1770216 34.823 0.0002288 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1<-mod1.mAnnual.mean.moisture.index
summary(mod1) # this is not done to look at the "stars" but rather to look at the Estimate value.
##
## Call:
## lm(formula = mean.Richness ~ Lat + Long + Annual.mean.temperature.C +
## Max.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. +
## Precipitation.seasonality.C.of.V. + Highest.weekly.moisture.index +
## Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V.,
## data = MyData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -302.22 -37.66 -16.63 81.21 380.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -870.448 1015.544 -0.857 0.413617
## Lat -114.383 19.504 -5.865 0.000239
## Long -9.509 2.410 -3.946 0.003376
## Annual.mean.temperature.C -2883.019 465.207 -6.197 0.000159
## Max.temperature.C 2465.422 402.822 6.120 0.000175
## Temperature.annual.range.C -781.639 127.073 -6.151 0.000168
## Annual.precipitation.mm. 26.389 4.306 6.129 0.000173
## Precipitation.seasonality.C.of.V. -3781.997 1245.390 -3.037 0.014091
## Highest.weekly.moisture.index -37602.814 6072.885 -6.192 0.000160
## Lowest.weekly.moisture.index -1846.175 728.314 -2.535 0.031979
## Moisture.index.seasonality.C.of.V. 47241.777 8005.632 5.901 0.000229
##
## (Intercept)
## Lat ***
## Long **
## Annual.mean.temperature.C ***
## Max.temperature.C ***
## Temperature.annual.range.C ***
## Annual.precipitation.mm. ***
## Precipitation.seasonality.C.of.V. *
## Highest.weekly.moisture.index ***
## Lowest.weekly.moisture.index *
## Moisture.index.seasonality.C.of.V. ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 225.5 on 9 degrees of freedom
## Multiple R-squared: 0.8818, Adjusted R-squared: 0.7505
## F-statistic: 6.716 on 10 and 9 DF, p-value: 0.004249
coefplot(mod1)
avPlots(mod1)
We have the same problem here with the samples having no specific data but rather we have experiement specific characteristics. We need to average them out then.
X<-OTUmapping[,c(1,4)]
OTU_table.t$SampleID<-row.names(OTU_table.t)
OTU_table.t_species<-merge(OTU_table.t,X, by="SampleID",all = T)
OTU_table.t_species$SampleID<-NULL
OTU_table.tX<-aggregate(OTU_table.t_species, by = list(Species=OTU_table.t_species$Species), FUN=mean)
rownames(OTU_table.tX)<-OTU_table.tX$Species
OTU_table.tX$Species<-NULL
OTU_table.tX$Species<-NULL
OTU_table.hell <- decostand(OTU_table.tX, "hell")
Now we do the RDA:
var <-MyData[,c(4:15)] # Isolate my variables to make this whole code easier to read
formulaRDA<- rda(OTU_table.hell ~ ., data=var, scale=T) # notice my predictive variable are being scaled too.
head(summary(formulaRDA))
##
## Call:
## rda(formula = OTU_table.hell ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C + Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index + Highest.weekly.moisture.index + Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V., data = var, scale = T)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 17245 1.0000
## Constrained 11678 0.6772
## Unconstrained 5567 0.3228
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## RDA1 RDA2 RDA3 RDA4 RDA5
## Eigenvalue 2327.930 1975.9423 1763.1408 1.255e+03 1.221e+03
## Proportion Explained 0.135 0.1146 0.1022 7.279e-02 7.078e-02
## Cumulative Proportion 0.135 0.2496 0.3518 4.246e-01 4.954e-01
## RDA6 RDA7 RDA8 RDA9 RDA10
## Eigenvalue 788.44555 727.6898 640.65842 392.72631 354.84682
## Proportion Explained 0.04572 0.0422 0.03715 0.02277 0.02058
## Cumulative Proportion 0.54110 0.5833 0.62045 0.64322 0.66380
## RDA11 PC1 PC2 PC3 PC4
## Eigenvalue 230.68745 2020.2116 1.578e+03 592.13703 487.21018
## Proportion Explained 0.01338 0.1171 9.149e-02 0.03434 0.02825
## Cumulative Proportion 0.67718 0.7943 8.858e-01 0.92015 0.94840
## PC5 PC6 PC7 PC8
## Eigenvalue 350.63881 254.17974 1.498e+02 1.352e+02
## Proportion Explained 0.02033 0.01474 8.685e-03 7.839e-03
## Cumulative Proportion 0.96874 0.98348 9.922e-01 1.000e+00
##
## Accumulated constrained eigenvalues
## Importance of components:
## RDA1 RDA2 RDA3 RDA4 RDA5
## Eigenvalue 2327.9302 1975.9423 1763.1408 1255.2038 1220.6500
## Proportion Explained 0.1993 0.1692 0.1510 0.1075 0.1045
## Cumulative Proportion 0.1993 0.3685 0.5195 0.6270 0.7315
## RDA6 RDA7 RDA8 RDA9 RDA10
## Eigenvalue 788.44555 727.68980 640.65842 392.72631 354.84682
## Proportion Explained 0.06752 0.06231 0.05486 0.03363 0.03039
## Cumulative Proportion 0.79906 0.86137 0.91623 0.94986 0.98025
## RDA11
## Eigenvalue 230.68745
## Proportion Explained 0.01975
## Cumulative Proportion 1.00000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 23.92512
##
##
## Species scores
##
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## 4479946 0.120579 -0.006196 -0.023640 0.005052 -0.0002417 -0.004474
## 4386156 -0.004256 0.012811 0.024301 0.005215 -0.0031244 0.016578
## 4479944 0.008353 -0.021082 0.008378 0.019595 -0.0052499 0.009275
## 4412915 -0.018010 -0.130073 -0.062329 -0.057562 0.0129175 0.024759
## 244336 0.020496 0.073791 -0.096600 0.006888 -0.0623253 0.017351
## 1050608 -0.008912 0.009627 0.021546 0.010389 -0.0050353 0.002826
## ....
##
##
## Site scores (weighted sums of species scores)
##
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## Brassica_napus -8.0465 -13.4364 -2.591 5.20962 -5.3914 -8.306462
## Brassica_oleracea -0.7190 0.5422 1.949 -0.09298 -0.2694 0.107349
## Bryum -0.9808 1.5322 2.719 1.09116 -0.3782 0.992394
## Cladophora -0.8066 1.4137 2.726 1.01144 -0.4450 0.932533
## Cratoneuron -1.0411 1.4761 3.137 1.20609 -0.5026 1.114271
## Espeletia -4.8310 10.8993 -17.770 -1.16103 -9.1949 0.004111
## ....
##
##
## Site constraints (linear combinations of constraining variables)
##
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6
## Brassica_napus -6.470 -11.052 -1.301 3.3308 -4.0023 -2.7981
## Brassica_oleracea -1.191 -0.172 1.563 0.4698 -0.6854 -1.5426
## Bryum -1.141 1.232 2.758 1.3296 -0.6440 0.3620
## Cladophora -1.140 1.232 2.757 1.3294 -0.6436 0.3622
## Cratoneuron -1.140 1.232 2.757 1.3294 -0.6436 0.3622
## Espeletia -4.793 10.956 -17.739 -1.2058 -9.1618 0.1354
## ....
##
##
## Biplot scores for constraining variables
##
## RDA1 RDA2 RDA3 RDA4
## Lat 0.10979 -0.20120 -0.04349 0.66615
## Long 0.26942 -0.37821 0.38409 0.44217
## Annual.mean.temperature.C 0.08016 -0.05119 0.34589 -0.01086
## Max.temperature.C 0.35129 -0.47076 0.55479 0.07438
## Min.temperature.C -0.06659 0.15962 0.20177 -0.08737
## Annual.precipitation.mm. -0.18360 0.35781 0.02947 0.03423
## Precipitation.seasonality.C.of.V. 0.02001 -0.55082 0.06582 0.34738
## Annual.mean.moisture.index -0.26766 0.48863 -0.06818 -0.15345
## Highest.weekly.moisture.index -0.31729 0.18520 0.06817 0.08945
## Lowest.weekly.moisture.index -0.17952 0.40587 -0.20965 -0.14140
## Moisture.index.seasonality.C.of.V. -0.08482 -0.48564 0.14857 0.29684
## RDA5 RDA6
## Lat 0.22696 0.104460
## Long -0.32993 0.012740
## Annual.mean.temperature.C -0.29649 -0.018128
## Max.temperature.C -0.15561 -0.082129
## Min.temperature.C -0.24543 0.004117
## Annual.precipitation.mm. -0.22776 0.211486
## Precipitation.seasonality.C.of.V. -0.21315 -0.093221
## Annual.mean.moisture.index -0.08738 0.238996
## Highest.weekly.moisture.index -0.15837 0.133139
## Lowest.weekly.moisture.index -0.05741 0.209658
## Moisture.index.seasonality.C.of.V. -0.07581 -0.205038
RsquareAdj(formulaRDA)
## $r.squared
## [1] 0.6771772
##
## $adj.r.squared
## [1] 0.2332959
anova(formulaRDA, permutations=1000) # Is the RDA meaningful?
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 1000
##
## Model: rda(formula = OTU_table.hell ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C + Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index + Highest.weekly.moisture.index + Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V., data = var, scale = T)
## Df Variance F Pr(>F)
## Model 11 11677.9 1.5256 0.07992 .
## Residual 8 5567.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova.cca(formulaRDA, by="margin", permutations=1000) # If so, are any predictor variables meaningful?
## Permutation test for rda under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 1000
##
## Model: rda(formula = OTU_table.hell ~ Lat + Long + Annual.mean.temperature.C + Max.temperature.C + Min.temperature.C + Temperature.annual.range.C + Annual.precipitation.mm. + Precipitation.seasonality.C.of.V. + Annual.mean.moisture.index + Highest.weekly.moisture.index + Lowest.weekly.moisture.index + Moisture.index.seasonality.C.of.V., data = var, scale = T)
## Df Variance F Pr(>F)
## Lat 1 976.1 1.4027 0.185814
## Long 1 1628.9 2.3408 0.000999 ***
## Annual.mean.temperature.C 1 1088.4 1.5641 0.106893
## Max.temperature.C 0 0.0 -Inf 1.000000
## Min.temperature.C 0 0.0 Inf 0.525475
## Temperature.annual.range.C 0 0.0 -Inf
## Annual.precipitation.mm. 1 1113.2 1.5997 0.082917 .
## Precipitation.seasonality.C.of.V. 1 992.0 1.4256 0.202797
## Annual.mean.moisture.index 1 834.4 1.1990 0.334665
## Highest.weekly.moisture.index 1 1040.1 1.4946 0.122877
## Lowest.weekly.moisture.index 1 1073.7 1.5429 0.120879
## Moisture.index.seasonality.C.of.V. 1 947.7 1.3618 0.209790
## Residual 8 5567.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Keep in mind none of these are significant though! so their direction does not have a true effect on the species composition.
smry <- summary(formulaRDA)
df1 <- data.frame(smry$sites[,1:2]) # RDA1 and RDA2 (for each quadrate)
df2 <- data.frame(smry$biplot[,1:2]) # loadings for RDA1 and RDA2 (for each variable)
rda.plot <- ggplot(df1, aes(x=RDA1, y=RDA2)) +
geom_text(aes(label=rownames(df1)),size=3,position=position_jitter(width=.2,height=.2)) +
geom_point(aes(alpha=0.3)) +
geom_hline(yintercept=0, linetype="dotted") +
geom_vline(xintercept=0, linetype="dotted")
(formulaRDA.PLOT<-rda.plot +
geom_segment(data=df2, aes(x=0, xend=RDA1, y=0, yend=RDA2),
color="red", arrow=arrow(length=unit(0.01,"npc"))) +
geom_text(data=df2,
aes(x=RDA1,y=RDA2,label=rownames(df2),
hjust=0.5*(1-sign(RDA1)),vjust=0.5*(1-sign(RDA2))),
color="red", size=4)+ theme(legend.position="none"))
So this is when the NMDS can be really helpful. You dont know what is playing a role in the OTU community assembly. Looking at them in space might give you a hint, or might inspire you.
We need to produce a betadiversity matrix again. We can keep the samples seperate now since we arent using climate here. We might be able to see on this if something else is happening.
df<-data.frame(Species=OTU_table.t_species$Species)
M<-OTU_table.t
M$SampleID<-NULL
M$Species<-NULL
M$Species<-NULL
dist_M <- vegdist(M, method = "bray",binary=T)
meta.nmds <- metaMDS(dist_M, k=2, trymax = 1000) # ok so k=2 means I want 2 dimensions
## Run 0 stress 0.2453008
## Run 1 stress 0.2353647
## ... New best solution
## ... Procrustes: rmse 0.06606693 max resid 0.1449976
## Run 2 stress 0.2567882
## Run 3 stress 0.2351881
## ... New best solution
## ... Procrustes: rmse 0.05218517 max resid 0.1752106
## Run 4 stress 0.222502
## ... New best solution
## ... Procrustes: rmse 0.04891165 max resid 0.1636964
## Run 5 stress 0.2550905
## Run 6 stress 0.2365938
## Run 7 stress 0.2281988
## Run 8 stress 0.2370462
## Run 9 stress 0.2691975
## Run 10 stress 0.262286
## Run 11 stress 0.2609197
## Run 12 stress 0.2659172
## Run 13 stress 0.2721596
## Run 14 stress 0.2426102
## Run 15 stress 0.267714
## Run 16 stress 0.2225212
## ... Procrustes: rmse 0.01242132 max resid 0.1176545
## Run 17 stress 0.2233586
## Run 18 stress 0.2682434
## Run 19 stress 0.2670352
## Run 20 stress 0.2513551
## Run 21 stress 0.2260423
## Run 22 stress 0.2258616
## Run 23 stress 0.2485766
## Run 24 stress 0.2594208
## Run 25 stress 0.2225164
## ... Procrustes: rmse 0.01242563 max resid 0.1176113
## Run 26 stress 0.2559494
## Run 27 stress 0.2529215
## Run 28 stress 0.2220297
## ... New best solution
## ... Procrustes: rmse 0.008104794 max resid 0.105861
## Run 29 stress 0.2251661
## Run 30 stress 0.2339725
## Run 31 stress 0.2674457
## Run 32 stress 0.243719
## Run 33 stress 0.2249059
## Run 34 stress 0.2224959
## ... Procrustes: rmse 0.01038182 max resid 0.1336895
## Run 35 stress 0.2241099
## Run 36 stress 0.2236641
## Run 37 stress 0.2366872
## Run 38 stress 0.2690754
## Run 39 stress 0.2492914
## Run 40 stress 0.2376731
## Run 41 stress 0.2415259
## Run 42 stress 0.2225166
## ... Procrustes: rmse 0.009642021 max resid 0.1185501
## Run 43 stress 0.251796
## Run 44 stress 0.2335305
## Run 45 stress 0.2223726
## ... Procrustes: rmse 0.01388562 max resid 0.1822947
## Run 46 stress 0.252111
## Run 47 stress 0.2270387
## Run 48 stress 0.2347563
## Run 49 stress 0.2329588
## Run 50 stress 0.2241153
## Run 51 stress 0.2493487
## Run 52 stress 0.2248977
## Run 53 stress 0.2674966
## Run 54 stress 0.2702946
## Run 55 stress 0.2271467
## Run 56 stress 0.2419372
## Run 57 stress 0.2290252
## Run 58 stress 0.2558111
## Run 59 stress 0.2575862
## Run 60 stress 0.2271479
## Run 61 stress 0.2271457
## Run 62 stress 0.228069
## Run 63 stress 0.2382398
## Run 64 stress 0.2324819
## Run 65 stress 0.2513287
## Run 66 stress 0.2351049
## Run 67 stress 0.2223729
## ... Procrustes: rmse 0.01389372 max resid 0.1823208
## Run 68 stress 0.2526711
## Run 69 stress 0.2225017
## ... Procrustes: rmse 0.008109692 max resid 0.1059359
## Run 70 stress 0.2225168
## ... Procrustes: rmse 0.009652401 max resid 0.11866
## Run 71 stress 0.2257124
## Run 72 stress 0.2609918
## Run 73 stress 0.2708831
## Run 74 stress 0.2622874
## Run 75 stress 0.2305228
## Run 76 stress 0.2220854
## ... Procrustes: rmse 0.002389304 max resid 0.02992955
## Run 77 stress 0.2350395
## Run 78 stress 0.232021
## Run 79 stress 0.2522759
## Run 80 stress 0.2229323
## Run 81 stress 0.2675179
## Run 82 stress 0.2225167
## ... Procrustes: rmse 0.009637103 max resid 0.1185451
## Run 83 stress 0.2594739
## Run 84 stress 0.2334917
## Run 85 stress 0.2249128
## Run 86 stress 0.233665
## Run 87 stress 0.2619509
## Run 88 stress 0.2477801
## Run 89 stress 0.2249103
## Run 90 stress 0.2325516
## Run 91 stress 0.2332477
## Run 92 stress 0.229517
## Run 93 stress 0.2430495
## Run 94 stress 0.2340634
## Run 95 stress 0.2738789
## Run 96 stress 0.2248293
## Run 97 stress 0.2423261
## Run 98 stress 0.248582
## Run 99 stress 0.2292606
## Run 100 stress 0.2230128
## Run 101 stress 0.2338423
## Run 102 stress 0.2589482
## Run 103 stress 0.2684254
## Run 104 stress 0.2438671
## Run 105 stress 0.2272628
## Run 106 stress 0.2233592
## Run 107 stress 0.2220298
## ... Procrustes: rmse 0.0001846949 max resid 0.001049585
## ... Similar to previous best
## *** Solution reached
#data for plotting
##NMDS points
NMDS.data<-data.frame(Species=df$Species) # this is the only true difference from floristic composition analysis. Instead of using plots location and population, we want to visualize their species.
NMDS.data$NMDS1<-meta.nmds$points[ ,1]
NMDS.data$NMDS2<-meta.nmds$points[ ,2]
ggplot(data = NMDS.data, aes(y = NMDS2, x = NMDS1))+
geom_point( aes(col = NMDS.data$Species), size = 1.5,alpha=0.6)
They are not assembling because of climate… so what do you think might play into their assembly? This NMDS can help you think about it…