if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ANCOMBC")
BiocManager::install("phyloseq")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("microbiome")
install.packages("dplyr")
install.packages("tidyverse")
install.packages("ggplot2")
install.packages("GUniFrac")
install.packages("phangorn")
install.packages("vegan")
install.packages("gdata")
install.packages("purrr")
install.packages("devtools")
install.packages("rstatix")
install.packages("ggstasplot")
install.packages("DescTools")
install.packages("tibble")
install.packages("WRS2")
install.packages("tibble")
install.packages("ARTool")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("edgeR")
remotes::install_github("wilkelab/ggtext")
install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
install_github("nuriamw/micro4all")
library(ANCOMBC)
library(phyloseq)
library(microbiome)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(GUniFrac)
library(vegan)
library(gdata)
library(purrr)
library(devtools)
library(rstatix)
library(ggstatsplot)
library(DescTools)
library(tibble)
library(WRS2)
library(ARTool)
library(edgeR)
library(pairwiseAdonis)
library(micro4all)Ecological analyses of fungal dataset
Firstly, we have to install and load many packages needed to analyze our fungal data:
Our analyses will based on phyloseq objects. That is to say, we are going to use phyloseq package
1. Create a phyloseq object
phyloseq object are very useful since they harbor several elements of interest: an ASV (or OTU) table in which the absolute abundance of each ASV is registered, the taxonomy of each ASV (from Domain or Kingdom to ASV level), a metadata table in which a full description of each sample is given, and finally, a phylogenetic tree at ASV level. Thus, lets load all the input files and create the phyloseq object. So, all the above mentioned files should be in our working directory:
data=read.table("ASV_Hongos_FINAL.txt", header = T, sep="\t")
data$Kingdom=gsub("k__","",data$Kingdom)#replace the name of the taxa, bause Unite databse adds a code for each taxonomic rank
data$Phylum=gsub("p__","",data$Phylum)
data$Class=gsub("c__","",data$Class)
data$Order=gsub("o__","",data$Order)
data$Family=gsub("f__","",data$Family)
data$Genus=gsub("g__","",data$Genus)
colnames_data=colnames(data)
mt=read.table("metadata_fungi.txt",sep="\t", header=T)#load the metadata table
mt$Year=gsub("2022", "A", mt$Year)#we replaced the number of the years by a character
mt$Year=gsub("2023", "B", mt$Year)
mt$Replicate=as.character(mt$Replicate)#convert into characters
mt$Fusion=paste0(mt$Year, sep="_",mt$Plot, sep="_", mt$Condition,sep="_", mt$Compartment)#here we create a joined artificial variable in order to plot better our samples
mt$Condition_Year=paste0(mt$Condition,sep="_", mt$Year)
row.names(mt)=mt$Sample#the rownames of the metadata table should be identical to the name of the samplesThe order of the sample is very important! In particular, the order of the samples in the metadata file must be the same as the order of the samples in the ASV table. So, check (and correct it) previously.
tax =data[,2:8]#indicate the taxonomy of each ASV: from Domain/Kingdom to ASV level
ASV=data[,9:ncol(data)]#indicate the ASV counts
dna=Biostrings::DNAStringSet(data$ASV_seqs) #save the sequence of each ASV into a variable
names(dna)= data$ASV_names
row.names(tax)=data$ASV_names#the name of each row should be the name of each ASV (i.e., "ASV02156")
row.names(ASV)=data$ASV_names
identical(rownames(ASV), rownames(tax))#this is very important and should give "TRUE". Otherwise, review the lines above because there is a mistake elsewhere
#Convert each element into suitable objects for the construction of a phyloseq object
phy_OTUtable=otu_table(ASV, taxa_are_rows = T)
phy_taxonomy=tax_table(as.matrix(tax))
phy_metadata=sample_data(mt)
phy_data_total=phyloseq(phy_OTUtable,phy_taxonomy,phy_metadata)#bind all the elements into the phyloseq object
nsamples(phy_OTUtable); sum(sample_names(phy_metadata) %in% sample_names(phy_OTUtable)); nsamples(phy_metadata)#check all the elements have the same number of samples2. Data curation
The phyloseq object is already created, however, we still have to check the quality of the data. For instance, we have to check if all the samples are comprised by enough sequences, and if the sampling and sequencing efforts are enough. For that purpose, we have to check the number of total sequences.
numsec=as.data.frame(colSums(otu_table(phy_data_total)))
View(numsec)#order the table in ascending order to check the lowest library size (the sample with the lowest number of sequences)We have to remove some samples that were not well sterilized, as we detected in the bacterial dataset.
phy_total=subset_samples(phy_data_total,
Sample != "A1S04E" &
Sample != "A1S05E" &
Sample!= "A1M01E"&
Sample != "A2S09E" & Sample != "A2S12E"& Sample != "A2S08E" & Sample!="A2S01E"&
Sample !="B1S03E"& Sample != "B1S06E"& Sample!="B1S09E"& Sample!="B1S10E"&
Sample !="B1M12E"&
Sample != "B2S06E" & Sample !="B2S11E" & Sample !="B2S07E" & Sample !="B2S02E" &
Sample !="B2M08E" & Sample !="B2M03E")
numsec_pruned=as.data.frame(colSums(otu_table(phy_total)))Now, it would be interesting to check the quality of the sterilization process. We want to visualize in an ordination plot if rhizosphere and root endosphere samples are separated in the multivariate space. As indicated in the step 5, we have to normalize the ASV counts and then, calculate the ordination of the samples and plot them into a multivariate ordination plot:
#Let's sub-sample the phyloseq objects
AA=subset_samples(phy_total, Year=="A")
AA=prune_taxa(taxa_sums(AA)>0.0, AA)
BB=subset_samples(phy_total, Year=="B")
BB=prune_taxa(taxa_sums(BB)>0.0, BB)
#normalization of the data. For instance, the data corresponding to year 2023
todas_B_ASV=as.data.frame(otu_table(BB,taxa_are_rows = T))#obtain the ASV table from the phyloseq object
todas_B_mt=as.data.frame(sample_data(BB))#obtain the metadata table from the phyloseq object
todas_B_tax=as.data.frame(tax_table(BB))#obtain the taxonomy table from the phyloseq objec
#all the previous steps are required to create a new object suitable for edgeR functions
todas_B_edgeR = DGEList(counts = todas_B_ASV, samples = todas_B_mt, genes = todas_B_tax)#Create a DGEList object need for the TMM normalization
todas_B_edgeR = calcNormFactors(todas_B_edgeR)#calculate all the normalization factor so that we can correct potential biases associated to the different librazy sizes (different number of sequences per sample)
todas_B_ASV_norm = cpm(todas_B_edgeR, normalized.lib.sizes=T, log=F)#extracting the normalized abundance
todas_B_phy_OTU_norm=otu_table(as.data.frame(todas_B_ASV_norm,row.names=F), taxa_are_rows = T)#create the normalized phyloseq elements
todas_B_phy_taxonomy_norm=tax_table(as.matrix(todas_B_tax))
todas_B_phy_metadata_norm=sample_data(todas_B_mt)
taxa_names(todas_B_phy_OTU_norm)= taxa_names(todas_B_phy_taxonomy_norm)#if everything is well calculated, it should give "TRUE"
identical(rownames(todas_B_ASV_norm), rownames(todas_B_tax))
todas_B_normalized_phyloseq<-phyloseq(todas_B_phy_OTU_norm,
todas_B_phy_taxonomy_norm,
todas_B_phy_metadata_norm)#Create the new phyloseq object, in which the abundance of all the taxa is normalizedOne the normalization is done, the plot is made as follows:
PCOA_bray_B = ordinate(todas_B_normalized_phyloseq, "PCoA", "bray")#here we create the ordination plot (PCoA) based on Bray-Curtis dissimilarities
peso2=PCOA_bray_B$values$Relative_eig[1:2] #save the percentage of the variance explained by each axis into a new variable
peso2 #check the variance explained
p= plot_ordination(todas_B_normalized_phyloseq, PCOA_bray_B,type= "samples", color= "Compartment",shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso2[1]*100,digits = 2),"%)"),
y=paste0("Axis 2 (",round(peso2[2]*100,digits = 2),"%)"))+
geom_text(aes(label=Sample, fontface="bold"), hjust=0.15, vjust=0.15, nudge_y = 0.0075,size=4, show.legend =F) +
scale_shape_manual(values=c("2022"=16, "2023"=17))+
scale_color_manual(values=c("Rhizosphere"="blue", "Endosphere"="green"))+
theme_bw()+
theme(legend.key=element_blank(),
legend.title.align = 0,
legend.title = element_text(face="bold",size=18),
legend.text.align = 0,
axis.text = element_text(size=18),
axis.title = element_text(size = 20),
plot.title = element_text(hjust=0.5, face="bold",size=18),
legend.text = element_text(size = 18))+
geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Year 2023\nPCoA on Bray Curtis distance")
x11()
pAs shown in Figure 1, there are still some root endosphere samples (B1M10E and B1M09E) that are closer than rhizosphere samples than other root endosphere samples. Thus, we consider that these roots were not properly sterilized. We should remove them because we are not aware of their origin

This, we have to curate our dataset, and remove all these samples:
phy_total_TOTAL=subset_samples(phy_total,
Sample != "B1M10E" &
Sample != "B1M09E")
numsec_pruned_TOTAL=as.data.frame(colSums(otu_table(phy_total_TOTAL)))
write.table(data.frame(" "=rownames(numsec_pruned_TOTAL),numsec_pruned_TOTAL),file="Numero_secuencias_deTrabajo.txt", sep="\t",row.names =F)#save the table with the number of total sequences per sample3. Rarefaction curves
We still have to check whether the sequencing effort has been enough, so let’s have a look at rarefaction curves. They are the best way to visualize the quality of the sequencing and sampling.
In our case, we should visualize the rarefaction curves by plant compartment, to check whether the data curation was made well.
We will split according to the orchards, otherwise it will be really difficult to understand the graphs. So, let’s subset the data according to our experimental design. We will create new phyloseq objects: two per plant compartment, corresponding to each orchard or plot.
rizo1=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot=="Plot1")
rizo1=prune_taxa(taxa_sums(rizo1)>0.0, rizo1)
rizo2=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot== "Plot2")
rizo2=prune_taxa(taxa_sums(rizo2)>0.0, rizo2)
endo1=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot=="Plot1")
endo1=prune_taxa(taxa_sums(endo1)>0.0, endo1)
endo2=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot== "Plot2")
endo2=prune_taxa(taxa_sums(endo2)>0.0, endo2)Now, calculate and plot the rarefaction curves:
#a) Rhizosphere
#a.1) South plot
min(sample_sums(rizo1)) #get the minimum library size (the number of sequences)
colSums(otu_table(rizo1))[which.min(colSums(otu_table(rizo1)))]
mt=as.data.frame(sample_data(rizo1))[order(as.character(rownames(as.data.frame(sample_data(rizo1)))),decreasing=F),]
otu = otu_table(rizo1)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]
rownames(otu)==rownames(mt) #we want to color in different colors mothers and suckers, and also the samples taken in each year
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)
out <- rarecurve(otu, step = 100, label = F) #calculate the rarefaction
rare <- lapply(out, function(x){
b <- as.data.frame(x)
b <- data.frame(ASV = b[,1], raw.read = rownames(b))
b$raw.read <- as.numeric(gsub("N", "", b$raw.read))
return(b)
})#this function is to create a table from the rarefaction variable
names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
z <- data.frame(x)
return(z)
}, .id = "Sample")
rare$Condition_Year=rare$Sample #these lines aimed at creating a new column in which the information needed for the plotting is included. You should include here the factor that determines the color of the curves
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)#replace all the text before "/"
rare$raw.read=as.numeric(rare$raw.read)
p_rizo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
theme_bw()+
geom_point(aes(colour=Condition_Year), size=0.85)+
geom_line(aes(colour=Condition_Year),size=1.2)+
geom_vline(aes(xintercept = min(sample_sums(rizo1))),
lty=1, colour="black")+
scale_fill_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen",
"Immature_B"="#6600CC" ,"Mature_B"="magenta"))+
scale_color_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen",
"Immature_B"="#6600CC" ,"Mature_B"="magenta"),
name="Condition (sampling year)",
breaks=c("Immature_A", "Mature_A",
"Immature_B" ,"Mature_B"),
labels=c("Immature (2022)", "Mature (2022)",
"Immature (2023)","Mature (2023)"))+
labs(title= "Rhizosphere; Plot 1", x="Number of sequences", y="Number of ASV")+
guides(alpha=FALSE)+
theme(legend.key=element_blank(),
legend.title.align = 0.85,
legend.title = element_text(face="bold",size=14),
axis.text = element_text(size=14),
axis.title = element_text(size = 16),
plot.title = element_text(hjust=0.5, face="bold", size=16),
legend.text = element_text(size = 16))
x11()
p_rizo1
#a.2) North plot
min(sample_sums(rizo2))
colSums(otu_table(rizo2))[which.min(colSums(otu_table(rizo2)))]
mt=as.data.frame(sample_data(rizo2))[order(as.character(rownames(as.data.frame(sample_data(rizo2)))),decreasing=F),]
otu = otu_table(rizo2)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]
rownames(otu)==rownames(mt)
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)
out <- rarecurve(otu, step = 100, label = F)
rare <- lapply(out, function(x){
b <- as.data.frame(x)
b <- data.frame(ASV = b[,1], raw.read = rownames(b))
b$raw.read <- as.numeric(gsub("N", "", b$raw.read))
return(b)
})
names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
z <- data.frame(x)
return(z)
}, .id = "Sample")
rare$Condition_Year=rare$Sample
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)
p_rizo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
theme_bw()+
geom_point(aes(colour=Condition_Year), size=0.85)+
geom_line(aes(colour=Condition_Year),size=1.2)+
geom_vline(aes(xintercept = min(sample_sums(rizo2))),
lty=1, colour="black")+
scale_fill_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2",
"Immature_B"="#993300" ,"Mature_B"="black"))+
scale_color_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2",
"Immature_B"="#993300" ,"Mature_B"="black"),
name="Condition (sampling year)",
breaks=c("Immature_A", "Mature_A",
"Immature_B" ,"Mature_B"),
labels=c("Immature (2022)", "Mature (2022)",
"Immature (2023)","Mature (2023)"))+
labs(title= "Rhizosphere; Plot 2", x="Number of sequences", y="Number of ASV")+
guides(alpha=FALSE)+
theme(legend.key=element_blank(),
legend.title.align = 0.85,
legend.title = element_text(face="bold",size=14),
axis.text = element_text(size=14),
axis.title = element_text(size = 16),
plot.title = element_text(hjust=0.5, face="bold", size=16),
legend.text = element_text(size = 16))
x11()
p_rizo2
#b) ENDOSPHERE
#b.1) South plot
min(sample_sums(endo1))
colSums(otu_table(endo1))[which.min(colSums(otu_table(endo1)))]
mt=as.data.frame(sample_data(endo1))[order(as.character(rownames(as.data.frame(sample_data(endo1)))),decreasing=F),]
otu = otu_table(endo1)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]
rownames(otu)==rownames(mt)
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)
out <- rarecurve(otu, step = 100, label = F)
rare <- lapply(out, function(x){
b <- as.data.frame(x)
b <- data.frame(ASV = b[,1], raw.read = rownames(b))
b$raw.read <- as.numeric(gsub("N", "", b$raw.read))
return(b)
})
names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
z <- data.frame(x)
return(z)
}, .id = "Sample")
rare$Condition_Year=rare$Sample
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)
p_endo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
theme_bw()+
geom_point(aes(colour=Condition_Year), size=0.85)+
geom_line(aes(colour=Condition_Year),size=1.2)+
geom_vline(aes(xintercept = min(sample_sums(endo1))),
lty=1, colour="black")+
scale_fill_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green",
"Immature_B"="#6600CC" ,"Mature_B"="magenta"))+
scale_color_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green",
"Immature_B"="#6600CC" ,"Mature_B"="magenta"),
name="Condition (sampling year)",
breaks=c("Immature_A", "Mature_A",
"Immature_B" ,"Mature_B"),
labels=c("Immature (2022)", "Mature (2022)",
"Immature (2023)","Mature (2023)"))+
labs(title= "Endosphere; Plot 1", x="Number of sequences", y="Number of ASV")+
guides(alpha=FALSE)+
theme(legend.key=element_blank(),
legend.title.align = 0.85,
legend.title = element_text(face="bold",size=14),
axis.text = element_text(size=14),
axis.title = element_text(size = 16),
plot.title = element_text(hjust=0.5, face="bold", size=16),
legend.text = element_text(size = 16))
x11()
p_endo1
#b.2) North plot
min(sample_sums(endo2))
colSums(otu_table(endo2))[which.min(colSums(otu_table(endo2)))]
mt=as.data.frame(sample_data(endo2))[order(as.character(rownames(as.data.frame(sample_data(endo2)))),decreasing=F),]
otu = otu_table(endo2)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]
rownames(otu)==rownames(mt)
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)
out <- rarecurve(otu, step = 100, label = F)
rare <- lapply(out, function(x){
b <- as.data.frame(x)
b <- data.frame(ASV = b[,1], raw.read = rownames(b))
b$raw.read <- as.numeric(gsub("N", "", b$raw.read))
return(b)
})
names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
z <- data.frame(x)
return(z)
}, .id = "Sample")
rare$Condition_Year=rare$Sample
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)
p_endo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
theme_bw()+
geom_point(aes(colour=Condition_Year), size=0.85)+
geom_line(aes(colour=Condition_Year),size=1.2)+
geom_vline(aes(xintercept = min(sample_sums(endo2))),
lty=1, colour="black")+
scale_fill_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine",
"Immature_B"="#993300" ,"Mature_B"="black"))+
scale_color_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine",
"Immature_B"="#993300" ,"Mature_B"="black"),
name="Condition (sampling year)",
breaks=c("Immature_A", "Mature_A",
"Immature_B" ,"Mature_B"),
labels=c("Immature (2022)", "Mature (2022)",
"Immature (2023)","Mature (2023)"))+
labs(title= "Endosphere; Plot 2", x="Number of sequences", y="Number of ASV")+
guides(alpha=FALSE)+
theme(legend.key=element_blank(),
legend.title.align = 0.85,
legend.title = element_text(face="bold",size=14),
axis.text = element_text(size=14),
axis.title = element_text(size = 16),
plot.title = element_text(hjust=0.5, face="bold", size=16),
legend.text = element_text(size = 16))
x11()
p_endo2As shown in Figures 2-5, at the same rarefaction levels, most of the samples reached to the asymptote, so the sequencing effort can be considered enough. Furthermore, no clear patterns of richnes can be deduced from the rarefaction curves.

4. Alpha diversity analysis
We are going to estimate the richness, diversity and evenness of each sample by calculating the number of observed ASVs, and Shannon’s and Inverse of Simpson and Pielou’s indices, respectively.
Some alpha diversity indices are sensitive to different library sizes, so first, we will rarefy all the samples to the smallest library size to avoid potential biases associated to different number of sequences.
It should be made a rarefaction per groups of samples to be compared. For instance, if we want to compare the diversity of rhizosphere bacterial communitites of Mother and Suckers plants located in the south plot, we should calculate the rarefaction level corresponding to the subgroup of Rhizosphere samples of the south plot (namely, object rizo1). And then, rarefy to the smallest library size of the samples included in the object rizo1.
#a) Endosphere
#a.1) South plot
rarefac_endo1=rarefy_even_depth(endo1, sample.size = min(sample_sums(endo1)),rngseed=T)#rarefaction. You can indicate manually the minimum sample size or by using the function "min"
sample_sums(rarefac_endo1)#check that now all the samples have the same number of sequences
colSums(otu_table(rarefac_endo1))[which.min(colSums(otu_table(rarefac_endo1)))]
indices_endo1=estimate_richness(rarefac_endo1, measures=c("Observed", "InvSimpson", "Shannon"))#calculate the indices
indices_endo1$Pielou=indices_endo1$Shannon/log(indices_endo1$Observed)#calculated manually the Pielou index
rownames(sample_data(rarefac_endo1))==row.names(indices_endo1)
mt_indices_endo1=data.frame(sample_data(rarefac_endo1))#extract the metadata
indices_endo1=add_column(indices_endo1, mt_indices_endo1[1:ncol(mt_indices_endo1)], .before = "Observed")#add the metadata to the indices table, before the column named "Observed"
write.table(data.frame(" "=rownames(indices_endo1),indices_endo1),file="Indices_Endo_Plot1_porReplicas.txt", sep="\t",row.names =F)
#We have already calculated the indices per replicates but we are interested in the mean and standard values per groups of samples, so let's calculate them:
media_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)#be careful. Here you have to indicate the position of the columns where the values of the indices are placed
sd_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)
mean_sd = NULL
for (i in 2:5){ #be careful with the position of the numeric values (alpha indices)
mean_sd <- cbind(mean_sd,paste0(media_endo1[,i], " +/- ", sd_endo1[,i]))}#then in Excel you can replace "+/-" by the corresponding symbol
tabla_publicaciones_endo1 = cbind(media_endo1$grouping, mean_sd)
colnames(tabla_publicaciones_endo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_endo1)
write.table(tabla_publicaciones_endo1,file="Indices_media_sd_Endo_Plot1.txt", sep="\t",row.names =F)
#a.2) North plot
rarefac_endo2=rarefy_even_depth(endo2, sample.size = min(sample_sums(endo2)),rngseed=T)
sample_sums(rarefac_endo2)
colSums(otu_table(rarefac_endo2))[which.min(colSums(otu_table(rarefac_endo2)))]
indices_endo2=estimate_richness(rarefac_endo2, measures=c("Observed", "InvSimpson", "Shannon"))
indices_endo2$Pielou=indices_endo2$Shannon/log(indices_endo2$Observed)
rownames(sample_data(rarefac_endo2))==row.names(indices_endo2)
mt_indices_endo2=data.frame(sample_data(rarefac_endo2))
indices_endo2=add_column(indices_endo2, mt_indices_endo2[1:ncol(mt_indices_endo2)], .before = "Observed")
write.table(data.frame(" "=rownames(indices_endo2),indices_endo2),file="Indices_Endo_Plot2_porReplicas.txt", sep="\t",row.names =F)
media_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)
mean_sd = NULL
for (i in 2:5){
mean_sd <- cbind(mean_sd,paste0(media_endo2[,i], " +/- ", sd_endo2[,i]))}
tabla_publicaciones_endo2 = cbind(media_endo2$grouping, mean_sd)
colnames(tabla_publicaciones_endo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_endo2)
write.table(tabla_publicaciones_endo2,file="Indices_media_sd_Endo_Plot2.txt", sep="\t",row.names =F)
#b) Rhizosphere
#b.1) South plot
rarefac_rizo1=rarefy_even_depth(rizo1, sample.size = min(sample_sums(rizo1)),rngseed=T)
sample_sums(rarefac_rizo1)
colSums(otu_table(rarefac_rizo1))[which.min(colSums(otu_table(rarefac_rizo1)))]
indices_rizo1=estimate_richness(rarefac_rizo1, measures=c("Observed", "InvSimpson", "Shannon"))
indices_rizo1$Pielou=indices_rizo1$Shannon/log(indices_rizo1$Observed)
rownames(sample_data(rarefac_rizo1))==row.names(indices_rizo1)
mt_indices_rizo1=data.frame(sample_data(rarefac_rizo1))
indices_rizo1=add_column(indices_rizo1, mt_indices_rizo1[1:ncol(mt_indices_rizo1)], .before = "Observed")
write.table(data.frame(" "=rownames(indices_rizo1),indices_rizo1),file="Indices_rizo_Plot1_porReplicas.txt", sep="\t",row.names =F)
media_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)
mean_sd = NULL
for (i in 2:5){
mean_sd <- cbind(mean_sd,paste0(media_rizo1[,i], " +/- ", sd_rizo1[,i]))}
tabla_publicaciones_rizo1 = cbind(media_rizo1$grouping, mean_sd)
colnames(tabla_publicaciones_rizo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_rizo1)
write.table(tabla_publicaciones_rizo1,file="Indices_media_sd_rizo_Plot1.txt", sep="\t",row.names =F)
#Pb.2) North plot
rarefac_rizo2=rarefy_even_depth(rizo2, sample.size = min(sample_sums(rizo2)),rngseed=T)
sample_sums(rarefac_rizo2)
colSums(otu_table(rarefac_rizo2))[which.min(colSums(otu_table(rarefac_rizo2)))]
indices_rizo2=estimate_richness(rarefac_rizo2, measures=c("Observed", "InvSimpson", "Shannon"))
indices_rizo2$Pielou=indices_rizo2$Shannon/log(indices_rizo2$Observed)
rownames(sample_data(rarefac_rizo2))==row.names(indices_rizo2)
mt_indices_rizo2=data.frame(sample_data(rarefac_rizo2))
indices_rizo2=add_column(indices_rizo2, mt_indices_rizo2[1:ncol(mt_indices_rizo2)], .before = "Observed")
write.table(data.frame(" "=rownames(indices_rizo2),indices_rizo2),file="Indices_rizo_Plot2_porReplicas.txt", sep="\t",row.names =F)
media_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)
mean_sd = NULL
for (i in 2:5){
mean_sd <- cbind(mean_sd,paste0(media_rizo2[,i], " +/- ", sd_rizo2[,i]))}
tabla_publicaciones_rizo2 = cbind(media_rizo2$grouping, mean_sd)
colnames(tabla_publicaciones_rizo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_rizo2)
write.table(tabla_publicaciones_rizo2,file="Indices_media_sd_rizo_Plot2.txt", sep="\t",row.names =F)4.1 Statistical analysis
We have just calculated the alpha indices per sample and per group of samples, but we have to compare them among group of samples. For that purpose, we will apply univariate statistic test. We will go into different steps, which briefly consist of:
- Check the normality of the data: we have to confirm whether our variables follow a normal distribution. If they are not normally distributed, we will also check if they are very far from the normal distribution.
- Check the homoscedasticity of the data: confirm if dispersion of the variable is the same in all the groups of samples to compare. If the distribution of the variables is normal (or almost normal) and there is homogeneity of variances, we will apply parametric tests. Otherwise, we will use non-parametric tests.
- Check the presence of the outliers. Outliers are not a problem, but we have to check if they are extreme outliers. In that case, we will apply robust tests.
- Apply the proper statistical test. Depending on the characteristics of the variables and the experimental design (one factor to be studied: one-way statistics; two factors: two-way statistical tests).
- Calculate the size of the effect. In case we find statistical differences among groups of samples in the studied variables, we have to measure the magnitude of the effect.
We have to study separately each individual variable (in this case, each alpha diversity index).
In this example, we are interested in two factors: the effect of the sampling year, and the effect of the plants’ developmental stage, also named Condition in this web.
#a) ENDOSPHERE
#a.1) North plot
#Normality
indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Observed)
indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Shannon)
indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson)
indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Pielou)
x11()#let's see how far are our data from the normal distribution into a qq-plot.
ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
#Homoscedasticity
indices_endo1 %>% levene_test(Observed ~ Condition*Year)
indices_endo1 %>% levene_test(Shannon ~ Condition*Year)
indices_endo1 %>% levene_test(InvSimpson ~ Condition*Year)
indices_endo1 %>% levene_test(Pielou ~ Condition*Year)
#Outliers
a=indices_endo1 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a)
b=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(Shannon) ; View(b)
c=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c)
d=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d)
#Hypothesis contrasting tests
indices_endo1 %>% anova_test(Shannon ~ Condition * Year)#two-way ANOVA
indices_endo1 %>% anova_test(InvSimpson ~ Condition * Year)
indices_endo1 %>% anova_test(Pielou ~ Condition * Year)
indices_endo1$Condition=as.factor(indices_endo1$Condition)
indices_endo1$Year=as.factor(indices_endo1$Year)
t2way(Observed ~ Year*Condition, data=indices_endo1)#two-way robust test
#We did not find the interaction between Year and plant developmental Condition significant in any of the variables studied, but the effect of Year or Condition (or both) was statistaclly significant on some of the indices. In that case, we have to study the effect of each factor individually:
#Post-hoc tests
indices_endo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm")
test=mcp2atm(Observed~Year*Condition, data=indices_endo1)
#Effect size
z=lm(Observed~Condition*Year, data=indices_endo1)
eta_squared(z)
#a.2) North plot
#Normality
indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Observed)
indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Shannon)
indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson)
indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Pielou)
x11()
ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
#Homoscedasticity
indices_endo2 %>% levene_test(Observed ~ Condition*Year)#heterocedastic
indices_endo2 %>% levene_test(Shannon ~ Condition*Year)
indices_endo2 %>% levene_test(InvSimpson ~ Condition*Year)
indices_endo2 %>% levene_test(Pielou ~ Condition*Year)
#Outliers
a=indices_endo2 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a)
b=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(Shannon); View(b)
c=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c)
d=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d)
#Check wether with robust test the homoscedasticity is maintained
indices_endo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>%
levene_test(InvSimpson ~ Year*Condition)
#Hypothesis contrasting tests
#Observed ASVs is not homoscedastic, thus, we should apply non-parametric tests. However, there is not a suitable option for two-way tests, so let's transformate the data and then apply the corresponding test. Check this link: http://depts.washington.edu/acelab/proj/art/index.html
indices_endo2$Condition=as.factor(indices_endo2$Condition)
indices_endo2$Year=as.factor(indices_endo2$Year)
m = art(Observed ~ Year * Condition, data=indices_endo2) # linear model syntax; see lm()
anova(m)
eta_squared(m)
indices_endo2 %>% anova_test(Shannon ~ Condition * Year)
t2way(InvSimpson ~ Condition*Year, data=indices_endo2)
indices_endo2 %>% anova_test(Pielou ~ Condition * Year)
#Post-hoc tests
art.con(m, "Year:Condition", adjust="holm") %>%
summary() %>% # add significance stars to the output
mutate(sig. = symnum(p.value, corr=FALSE, na=FALSE,
cutpoints = c(0, .001, .01, .05, .10, 1),
symbols = c("***", "**", "*", ".", " ")))
t_test(Observed~Year, data=indices_endo2, var.equal=F)
indices_endo2 %>%pairwise_t_test(Shannon~Year)
indices_endo2 %>%pairwise_t_test(Shannon~Condition)
#b) RHIZOSPHERE
#b.1) South plot
#Normality
indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Observed)
indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Shannon)
indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson)
indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Pielou)
x11()
ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
x11()
ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
#Homoscedasticity
indices_rizo1 %>% levene_test(Observed ~ Condition*Year)
indices_rizo1 %>% levene_test(Shannon ~ Condition*Year)
indices_rizo1 %>% levene_test(InvSimpson ~ Condition*Year)
indices_rizo1 %>% levene_test(Pielou ~ Condition*Year)
#Outliers
a=indices_rizo1 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a)
b=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(Shannon); View(b)
c=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c)
d=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d)
#check wether robust test mantain the homoscedasticity
indices_rizo1%>% filter(between(Shannon,quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Year*Condition)
indices_rizo1%>% filter(between(Pielou,quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>%levene_test(Pielou ~ Year*Condition)
#Hypothesis contrasting tests
indices_rizo1$Condition=as.factor(indices_rizo1$Condition)
indices_rizo1$Year=as.factor(indices_rizo1$Year)
indices_rizo1 %>% anova_test(Observed ~ Condition * Year)
indices_rizo1$Year=as.factor(indices_rizo1$Year); indices_rizo1$Condition=as.factor(indices_rizo1$Condition)
t2way(Shannon ~ Condition*Year, data=indices_rizo1)
indices_rizo1 %>% anova_test(InvSimpson ~ Condition * Year)
indices_rizo1 %>% anova_test(Pielou ~ Condition * Year)
t2way(Pielou ~ Condition*Year, data=indices_rizo1)
#Post-hoc tests
indices_rizo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm") indices_rizo1 %>%pairwise_t_test(Observed~Condition,p.adjust.method = "holm")
mcp2atm(InvSimpson~Year*Condition, data=indices_rizo1)
mcp2atm(Pielou~Year*Condition, data=indices_rizo1)
#Effect size:
y=lm(Pielou~Year*Condition, data=indices_rizo1)
eta_squared(y)
#b.2) North plot
#Normality
indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Observed)
indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Shannon)
indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson)
indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Pielou)
x11()
ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
x11()
ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) +
facet_grid(Condition ~ Year)
#Homoscedasticity
indices_rizo2 %>% levene_test(Observed ~ Condition*Year)
indices_rizo2 %>% levene_test(Shannon ~ Condition*Year)
indices_rizo2 %>% levene_test(InvSimpson ~ Condition*Year)
indices_rizo2 %>% levene_test(Pielou ~ Condition*Year)
#Outliers
a=indices_rizo2 %>% group_by(Condition,Year)%>% identify_outliers(Observed);View(a)
b=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(Shannon);View(b)
c=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c)
d=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d)
indices_rizo2%>%
filter(between(Shannon,
quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>%
levene_test(Shannon ~ Year*Condition)
indices_rizo2%>%
filter(between(Pielou,
quantile(Pielou,0.1),
quantile(Pielou, 0.9)))%>%
levene_test(Pielou ~ Year*Condition)
#Hypothesis contrasting tests
indices_rizo2 %>% anova_test(Observed ~ Condition * Year)
indices_rizo2$Year=as.factor(indices_rizo2$Year)
indices_rizo2$Condition=as.factor(indices_rizo2$Condition)
t2way(Shannon ~ Condition*Year, data=indices_rizo2)
indices_rizo2 %>% anova_test(InvSimpson ~ Condition * Year)
t2way(Pielou ~ Condition*Year, data=indices_rizo2)
#Post-hoc tests
indices_rizo2 %>%pairwise_t_test(Observed~Year)We can also compare if the alpha diversity indices of fungal communities associated to mothers, first and second suckers are different each other. In the index of this website, you can find more details about the concepts of Mothers, first suckers and second suckers. These concepts are very similar to the concept of “Generation” in sexually reproducing plants, and that’s exactly the name of the factor employed here. So, we will compare the indices of these groups, by the factor Generation.
#a) Rhizosphere
#a.1) South plot
#Normality
indices_rizo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Pielou)
x11() ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot)
x11() ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation~ Plot)
#Homoscedasticity
indices_rizo1 %>% levene_test(Observed ~ Generation) indices_rizo1 %>% levene_test(Shannon ~ Generation) indices_rizo1 %>% levene_test(InvSimpson ~ Generation) indices_rizo1 %>% levene_test(Pielou ~ Generation)
#outliers
a=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Observed); View(a)
b=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b)
c=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c)
d=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)
#Check whether with robust methods the data are still homocedastic
indices_rizo1%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)
indices_rizo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation)
indices_rizo1%>% filter(between(Pielou, quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>% levene_test(Pielou ~ Generation)
#Hypothesis contrasting tests
indices_rizo1 %>% anova_test(Observed ~ Generation) t1way(Shannon~Generation, data=indices_rizo1)
t1way(InvSimpson~Generation, data=indices_rizo1)
t1way(Pielou~Generation, data=indices_rizo1)
#Post-hoc test
indices_rizo1%>% pairwise_t_test(Observed~Generation)
#a.2) North plots
#Normality
indices_rizo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Pielou)
x11()
ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot)
x11()
ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot)
#Homoscedasticity
indices_rizo2 %>% levene_test(Observed ~ Generation)
indices_rizo2 %>% levene_test(Shannon ~ Generation)
indices_rizo2 %>% levene_test(InvSimpson ~ Generation)
indices_rizo2 %>% levene_test(Pielou ~ Generation)
#outliers
a=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a)
b=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b)
c=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c)
d=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)
#Check whether with robust tests the data are still homocedastic
indices_rizo2%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)
indices_rizo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation)
indices_rizo2%>% filter(between(Pielou, quantile(Pielou,0.1),
quantile(Pielou, 0.9)))%>% levene_test(Pielou~ Generation)
#Hypothesis contrasting tests
indices_rizo2 %>% welch_anova_test(Observed~Generation)#ANOVA test that does no assume that the data are homocedastic
t1way(Shannon~Generation, data=indices_rizo2) t1way(InvSimpson~Generation, data=indices_rizo2) t1way(Pielou~Generation, data=indices_rizo2)
q=lm(Observed~Generation, data=indices_rizo2) eta_squared(q)
#Post-hoc tests
indices_rizo2%>% pairwise_t_test(Observed~Generation,p.adjust.method = "holm", pool.sd = F)
#b) Endosphere
#b.1) South plot
#Normality
indices_endo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo1%>% group_by(Generation)%\>% shapiro_test(Pielou)
x11()
ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation~Plot)
#Homoscedasticity
indices_endo1 %>% levene_test(Observed ~ Generation)
indices_endo1 %>% levene_test(Shannon ~ Generation)
indices_endo1 %>% levene_test(InvSimpson ~ Generation)
indices_endo1 %>% levene_test(Pielou ~ Generation)
#outliers
a=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Observed);View(a)
b=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b)
c=indices_endo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c)
d=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)
#Check whether with robust test the data are still homocedastic
indices_endo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation)
#Hypothesis contrasting tests
indices_endo1 %>% anova_test(Observed ~ Generation)
indices_endo1 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo1)
indices_endo1 %>% anova_test(Pielou ~ Generation)
#Post-hoc
indices_endo1%>% pairwise_t_test(Observed~Generation)
#b.2) North plot
#Normality
indices_endo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo2%>% group_by(Generation)%>% shapiro_test(Pielou)
x11()
ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) + facet_grid(Generation~Plot)
x11()
ggqqplot(indices_endo2, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot)
#Homoscedasticity
indices_endo2 %>% levene_test(Observed ~ Generation)
indices_endo2 %>% levene_test(Shannon ~ Generation)
indices_endo2 %>% levene_test(InvSimpson ~ Generation)
indices_endo2 %>% levene_test(Pielou ~ Generation)
#outliers
a=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a)
b=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b)
c=indices_endo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c)
d=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)
#check whether with robust tests data are still homocedastic
indices_endo2%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)
indices_endo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation)
#Hypothesis contrasting tests
indices_endo2 %>% anova_test(Observed ~ Generation)
indices_endo2 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo2)
indices_endo2 %>% anova_test(Pielou ~ Generation)5. Beta diversity analyses
It’s time to analyze the differences in the diversity of the fungal communities between samples. For that purpose, we will navigate into the multivariate statistics.
Firstly, we have to normalize the data to avoid potential biases due to different library sizes (different number of sequences in the samples). We prefer the edgeR method, which is based on the TMM (Trimmed Mean of the M-values). This normalization is needed when we are working also with compositional data; that is to say, with data that expressed in percentages, such as microbiota counts. More information can be found here.
5.1 TMM Normalization
We are going to use this normalization for both multivariate plots and for multivariate statistics.
#a)RHIZOSPHERE
#a.1) South plot
rizo1_ASV=as.data.frame(otu_table(rizo1,taxa_are_rows = T))#extract the ASV table from the phyloseq object
rizo1_mt=as.data.frame(sample_data(rizo1))#extract the metadata table from the phyloseq object
rizo1_tax=as.data.frame(tax_table(rizo1))#extract the taxonomy table from the phyloseq object
#Create a new object suitbale for the normalization by the package edgeR
rizo1_edgeR = DGEList(counts = rizo1_ASV, samples = rizo1_mt, genes = rizo1_tax)
#Calculate the normalization factors to correct the differences in sequencing depth
rizo1_edgeR = calcNormFactors(rizo1_edgeR)
#Extract the normalized abundance
rizo1_ASV_norm = cpm(rizo1_edgeR, normalized.lib.sizes=T, log=F)
#Create the phyloseq elements with the ASV counts normalized
rizo1_phy_OTU_norm=otu_table(as.data.frame(rizo1_ASV_norm,row.names=F), taxa_are_rows = T)
rizo1_phy_taxonomy_norm=tax_table(as.matrix(rizo1_tax))
rizo1_phy_metadata_norm=sample_data(rizo1_mt)
taxa_names(rizo1_phy_OTU_norm)= taxa_names(rizo1_phy_taxonomy_norm)
identical(rownames(rizo1_ASV_norm), rownames(rizo1_tax))#it should give "TRUE"
#Join all the elements into a new phyloseq object
rizo1_normalized_phyloseq<-phyloseq(rizo1_phy_OTU_norm,
rizo1_phy_taxonomy_norm,
rizo1_phy_metadata_norm)
#a.2) North plot
rizo2_ASV=as.data.frame(otu_table(rizo2,taxa_are_rows = T))
rizo2_mt=as.data.frame(sample_data(rizo2))
rizo2_tax=as.data.frame(tax_table(rizo2))
rizo2_edgeR = DGEList(counts = rizo2_ASV, samples = rizo2_mt, genes = rizo2_tax)
rizo2_edgeR = calcNormFactors(rizo2_edgeR)
rizo2_ASV_norm = cpm(rizo2_edgeR, normalized.lib.sizes=T, log=F)
rizo2_phy_OTU_norm=otu_table(as.data.frame(rizo2_ASV_norm,row.names=F), taxa_are_rows = T)
rizo2_phy_taxonomy_norm=tax_table(as.matrix(rizo2_tax))
rizo2_phy_metadata_norm=sample_data(rizo2_mt)
taxa_names(rizo2_phy_OTU_norm)= taxa_names(rizo2_phy_taxonomy_norm)
identical(rownames(rizo2_ASV_norm), rownames(rizo2_tax))
rizo2_normalized_phyloseq<-phyloseq(rizo2_phy_OTU_norm,
rizo2_phy_taxonomy_norm,
rizo2_phy_metadata_norm)
#b) ENDOSPHERE
#b.1) South plot
endo1_ASV=as.data.frame(otu_table(endo1,taxa_are_rows = T))
endo1_mt=as.data.frame(sample_data(endo1))
endo1_tax=as.data.frame(tax_table(endo1))
endo1_edgeR = DGEList(counts = endo1_ASV, samples = endo1_mt, genes = endo1_tax)
endo1_edgeR = calcNormFactors(endo1_edgeR)
endo1_ASV_norm = cpm(endo1_edgeR, normalized.lib.sizes=T, log=F)
endo1_phy_OTU_norm=otu_table(as.data.frame(endo1_ASV_norm,row.names=F), taxa_are_rows = T)
endo1_phy_taxonomy_norm=tax_table(as.matrix(endo1_tax))
endo1_phy_metadata_norm=sample_data(endo1_mt)
taxa_names(endo1_phy_OTU_norm)= taxa_names(endo1_phy_taxonomy_norm)
identical(rownames(endo1_ASV_norm), rownames(endo1_tax))
endo1_normalized_phyloseq<-phyloseq(endo1_phy_OTU_norm,
endo1_phy_taxonomy_norm,
endo1_phy_metadata_norm)
#b.2) North plot
endo2_ASV=as.data.frame(otu_table(endo2,taxa_are_rows = T))
endo2_mt=as.data.frame(sample_data(endo2))
endo2_tax=as.data.frame(tax_table(endo2))
endo2_edgeR = DGEList(counts = endo2_ASV, samples = endo2_mt, genes = endo2_tax)
endo2_edgeR = calcNormFactors(endo2_edgeR)
endo2_ASV_norm = cpm(endo2_edgeR, normalized.lib.sizes=T, log=F)
endo2_phy_OTU_norm=otu_table(as.data.frame(endo2_ASV_norm,row.names=F), taxa_are_rows = T)
endo2_phy_taxonomy_norm=tax_table(as.matrix(endo2_tax))
endo2_phy_metadata_norm=sample_data(endo2_mt)
#Anyadimos el nombre de los taxones
taxa_names(endo2_phy_OTU_norm)= taxa_names(endo2_phy_taxonomy_norm)
identical(rownames(endo2_ASV_norm), rownames(endo2_tax))
endo2_normalized_phyloseq<-phyloseq(endo2_phy_OTU_norm,
endo2_phy_taxonomy_norm,
endo2_phy_metadata_norm)5.2 Multivariate statistical tests
The best options to test whether the groups of samples have the same dispersion is to apply the test PERMDISP2, while PERMANOVA (Permutational Multivariate Analysis of Variance Using Distance Matrices) is a test suitable to check whether the centroid (concept similar to the mean of groups of samples) of all the groups of samples to be compared is homogenus. Thus, these tests are the multivariate version of Levene’s and ANOVA tests. However, both of them are based on distance matrices. That is to say, we have to calculate the distance between all sample pairs. There are many different distance or dissimilarity measures, but here we are going to use Bray-Curtis dissimilarity.
Null hypotheses of both tests: PERMANOVA H0: no significant differences in the centroids (mean values) of the groups of samples PERMDISPD2 H0: no significant differences in the dispersion of the groups of samples
Be careful with unbalanced experiments. As stated Anderson and Walsh (2013), when the groups of samples to be compared have different number of replicates (unbalanced design), heterocedasticity becomes a problem. In that cases, PERMANOVA cannot distinguish between the differences between groups of samples are due to the different dispersion or due to differences in the centroids. So, in case of unbalances designs, it is superimportant to check the dispersion of the data.
As you know, the ITS2 fungal region is quite variable in length, thus, we do not calculate a phylogenetic tree including all the ASVs. For that reason, we cannot compute the (un)weighted UniFrac distances. We are going to perform all the beta diversity analyses based just on Bray-Curtis dissimilarity measure.
#a) RHIZOSPHERE
#a.1) South plot
df_r1=data.frame(sample_data(rizo1_normalized_phyloseq))#extract the metadata table from the phyloseq object (NORMALIZED phyloseq!)
b_r1=distance(rizo1_normalized_phyloseq, "bray")#calculate Bray-Curtis dissimilarity
adonis_rizo1=adonis2(b_r1~Condition*Year, data=df_r1, permutations = 9999)#apply the PERMANOVA test. Here we test the model Community structure~Plant type*Sampling year
adonis_rizo1#check the results
pairwise.adonis(b_r1, phyloseq::sample_data(rizo1_normalized_phyloseq)$Year, p.adjust.m = "holm")#pairwise PERMANOVAs as post-hoc tests
betadisper_r1=betadisper(b_r1, df_r1$Condition_Year)#check the betadispersion of each factor separately
permutest(betadisper_r1)
betadisper_r1_condition=betadisper(b_r1, df_r1$Condition)#Check the betadispersion of the factor "Condition"
permutest(betadisper_r1_condition)
betadisper_r1_year=betadisper(b_r1, df_r1$Year)#Check the betadispersion of the factor "sampling year"
permutest(betadisper_r1_year)
#a.2) North plot
df_r2=data.frame(sample_data(rizo2_normalized_phyloseq))
b_r2=distance(rizo2_normalized_phyloseq,"bray")
adonis_rizo2=adonis2(b_r2~Condition*Year, data=df_r2, permutations = 9999)
adonis_rizo2
pairwise.adonis(b_r2, phyloseq::sample_data(rizo2_normalized_phyloseq)$Year, p.adjust.m = "holm")
betadisper_r2=betadisper(b_r2, df_r2$Condition_Year)
permutest(betadisper_r2)
betadisper_r2_condition=betadisper(b_r2, df_r2$Condition)
permutest(betadisper_r2_condition)
betadisper_r2_year=betadisper(b_r2, df_r2$Year)
permutest(betadisper_r2_year)
#b) ENDOSPHERE
#b.1) South plot
df_e1=data.frame(sample_data(endo1_normalized_phyloseq))
b_e1=distance(endo1_normalized_phyloseq, "bray")
adonis_endo1=adonis2(b_e1~Condition*Year, data=df_e1, permutations = 9999)
adonis_endo1
pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Year, p.adjust.m = "holm")
pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Condition, p.adjust.m = "holm")
pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Fusion, p.adjust.m = "holm")
betadisper_e1=betadisper(b_e1, df_e1$Condition_Year)
permutest(betadisper_e1)
betadisper_e1_condition=betadisper(b_e1, df_e1$Condition)
permutest(betadisper_e1_condition)
betadisper_e1_year=betadisper(b_e1, df_e1$Year)
permutest(betadisper_e1_year)
#b.2) North plot
df_e2=data.frame(sample_data(endo2_normalized_phyloseq))
b_e2=distance(endo2_normalized_phyloseq, "bray")
adonis_endo2=adonis2(b_e2~Condition*Year, data=df_e2, permutations = 9999)
adonis_endo2
pairwise.adonis(b_e2, phyloseq::sample_data(endo2_normalized_phyloseq)$Fusion, p.adjust.m = "holm")
betadisper_e2=betadisper(b_e2, df_e2$Condition_Year)
permutest(betadisper_e2)
betadisper_e2_condition=betadisper(b_e2, df_e2$Condition)
permutest(betadisper_e2_condition)
betadisper_e2_year=betadisper(b_e2, df_e2$Year)
permutest(betadisper_e2_year)
#Let's calculate the size of the effects. But first, we have to create the function that calculates the size of the effect:
adonis_OmegaSq <- function(adonisOutput, partial = TRUE){
if(!(is(adonisOutput, "adonis") || is(adonisOutput, "anova.cca")))
stop("Input should be an adonis object")
if (is(adonisOutput, "anova.cca")) {
aov_tab <- adonisOutput
aov_tab$MeanSqs <- aov_tab$SumOfSqs / aov_tab$Df
aov_tab$MeanSqs[length(aov_tab$Df)] <- NA
} else {
aov_tab <- adonisOutput$aov.tab
}
heading <- attr(aov_tab, "heading")
MS_res <- aov_tab[pmatch("Residual", rownames(aov_tab)), "MeanSqs"]
SS_tot <- aov_tab[rownames(aov_tab) == "Total", "SumsOfSqs"]
N <- aov_tab[rownames(aov_tab) == "Total", "Df"] + 1
if(partial){
omega <- apply(aov_tab, 1, function(x) (x["Df"]*(x["MeanSqs"]-MS_res))/(x["Df"]*x["MeanSqs"]+(N-x["Df"])*MS_res))
aov_tab$parOmegaSq <- c(omega[1:(length(omega)-2)], NA, NA)
} else {
omega <- apply(aov_tab, 1, function(x) (x["SumsOfSqs"]-x["Df"]*MS_res)/(SS_tot+MS_res))
aov_tab$OmegaSq <- c(omega[1:(length(omega)-2)], NA, NA)
}
if (is(adonisOutput, "adonis"))
cn_order <- c("Df", "SumsOfSqs", "MeanSqs", "F.Model", "R2",
if (partial) "parOmegaSq" else "OmegaSq", "Pr(>F)")
else
cn_order <- c("Df", "SumOfSqs", "F", if (partial) "parOmegaSq" else "OmegaSq",
"Pr(>F)")
aov_tab <- aov_tab[, cn_order]
attr(aov_tab, "names") <- cn_order
attr(aov_tab, "heading") <- heading
if (is(adonisOutput, "adonis"))
adonisOutput$aov.tab <- aov_tab
else
adonisOutput <- aov_tab
return(adonisOutput)
}
#apply just in that cases in which PERMANOVA gives significant results:
adonis_OmegaSq(adonis_rizo1)
adonis_OmegaSq(adonis_rizo2)
adonis_OmegaSq(adonis_endo1)
adonis_OmegaSq(adonis_endo2)Now, let’s compare the structure of bacterial communities of mothers, first and second suckers:
#a) RHIZOSPHERE
#a.1) South plot
adonis_rizo1_generation=adonis2(b_r1~Generation, data=df_rizo1, permutations = 9999)
adonis_rizo1_generation
betadisper_rizo1_generation=betadisper(b_r1, df_rizo1$Generation)
permutest(betadisper_rizo1_generation)
adonis_OmegaSq(adonis_rizo1_generation)
pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm")
pa=pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm")
#a.2) North plot
adonis_rizo2_generation=adonis2(b_r2~Generation, data=df_rizo2, permutations = 9999)
adonis_rizo2_generation
betadisper_rizo2_generation=betadisper(b_r2, df_rizo2$Generation)
permutest(betadisper_rizo2_generation)
adonis_OmegaSq(adonis_rizo2_generation)
pairwise.adonis(b_r2, phyloseq::sample_data(rizo2)$Generation, p.adjust.m = "holm")
#b) ENDOSPHERE
#b.1) South plot
adonis_endo1_generation=adonis2(b_e1~Generation, data=df_endo1, permutations = 9999)
adonis_endo1_generation
betadisper_endo1_generation=betadisper(b_e1, df_endo1$Generation)
permutest(betadisper_endo1_generation)
#b.2) North plot
adonis_endo2_generation=adonis2(b_e2~Generation, data=df_endo2, permutations = 9999)
adonis_endo2_generation
betadisper_endo2_generation=betadisper(b_e2, df_endo2$Generation)
permutest(betadisper_endo2_generation)
adonis_OmegaSq(adonis_endo2_generation)
pairwise.adonis(b_e2, phyloseq::sample_data(endo2)$Generation, p.adjust.m = "holm")5.3 Ordination plots
There are several options to visualize the results of PERMANOVA tests, for instance, by ordination plots such as PCoA (Principal Coordinate Analysis), NMDS (Non-metric MultiDimenstional Scaling), PCA (Principal Component Analysis), among others. Theses graphs are very useful because help us to detect the differences in the structure of the microbiota among groups of samples. Both PCoA and NMDS are based on non-euclidean distances or dissimilarity measures, so we consider that they are the best options for microbiota analyses.
One the one hand, PCoA plots represent in two dimensions the arrangement of the samples in the multivariate space, although the percentage of the total variance is commonly explained by many dimensions. On the other hand, NMDS approach “forces” the samples to be ordinated in just two dimensions, although this forced arrangement has an associated stress. We tolerate stress values lower than 0.2.
We recommend calculating both ordination plots with the aim of selecting the option that best represents the results of PERMANOVA analyses.
#a) RHIZOSPHERE
#a.1) South plot
NMDS_bray_rizo1= ordinate(rizo1_normalized_phyloseq, "NMDS", "bray")#indicate the ordination plot and the distance
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo1$stress))#this shows the stress of the NMDS
PCOA_bray_rizo1 = ordinate(rizo1_normalized_phyloseq, "PCoA", "bray")
peso_r1b=PCOA_bray_rizo1$values$Relative_eig[1:2]
peso_r1b#This shows the percentage of the total variance explained by just the first and second axes
#a.2) North plot
NMDS_bray_rizo2= ordinate(rizo2_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo2$stress))
PCOA_bray_rizo2 = ordinate(rizo2_normalized_phyloseq, "PCoA", "bray")
peso_r2b=PCOA_bray_rizo2$values$Relative_eig[1:2]
peso_r2b
#b) ENDOSPHERE
#b.1) South plot
NMDS_bray_endo1= ordinate(endo1_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo1$stress))
PCOA_bray_endo1 = ordinate(endo1_normalized_phyloseq, "PCoA", "bray")
peso_e1b=PCOA_bray_endo1$values$Relative_eig[1:2]
peso_e1b
#b.2) North plot
NMDS_bray_endo2= ordinate(endo2_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo2$stress))
PCOA_bray_endo2 = ordinate(endo2_normalized_phyloseq, "PCoA", "bray")
peso_e2b=PCOA_bray_endo2$values$Relative_eig[1:2]
peso_e2bOnce these values of stress and the weight of the axes are obtained, we should choose which option is the most suitable for our dataset. Although you can also use both method in your research paper. In this case, we selected PCoA plots because we obtained relatively high values of NMDS stress. It is easy to understand: our data are probably multidimensional are complex due to the high diversity and richness of the data, so it is easy to have high stress when forcing the data to fit just into two dimensions.
Let’s obtain the plots:
#a) RHIZOSPHERE
#a.1) South plot
p_rizo1_pcoa= plot_ordination(rizo1_normalized_phyloseq, PCOA_bray_rizo1,type= "samples", color= "Condition", shape = "Year")+#to color by "Condition" and shape by "Year"
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_r1b[1]*100,digits = 2),"%)"),
y=paste0("Axis 2 (",round(peso_r1b[2]*100,digits = 2),"%)"))+#write the weight of each axis with two decimals
scale_color_manual(values = c("Immature"="cornflowerblue","Mature"="darkgreen"),
breaks=c("Immature", "Mature"),
labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
breaks=c("A", "B"),
labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
legend.title.align = 0,
legend.title = element_text(face="bold",size=18),
legend.text.align = 0,
axis.text = element_text(size=18),
axis.title = element_text(size = 20),
plot.title = element_text(hjust=0.5, face="bold",size=18),
legend.text = element_text(size = 18))+
geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Rhizosphere (Plot 1)\nPCoA on Bray Curtis dissimilarities")
x11()
p_rizo1_pcoa
#a.2) North plot
p_rizo2_pcoa= plot_ordination(rizo2_normalized_phyloseq, PCOA_bray_rizo2,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_r2b[1]*100,digits = 2),"%)"),
y=paste0("Axis 2 (",round(peso_r2b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="magenta","Mature"="darkolivegreen2"),
breaks=c("Immature", "Mature"),
labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
breaks=c("A", "B"),
labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
legend.title.align = 0,
legend.title = element_text(face="bold",size=18),
legend.text.align = 0,
axis.text = element_text(size=18),
axis.title = element_text(size = 20),
plot.title = element_text(hjust=0.5, face="bold",size=18),
legend.text = element_text(size = 18))+
geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Rhizosphere (Plot 2)\nPCoA on Bray-Curtis dissimilarities")
x11()
p_rizo1_pcoa
#b)ENDOSPHERE
#b.1) South plot
p_endo1_pcoa= plot_ordination(endo1_normalized_phyloseq, PCOA_bray_endo1,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_e1b[1]*100,digits = 2),"%)"),
y=paste0("Axis 2 (",round(peso_e1b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="#00CCFF","Mature"="green"),
breaks=c("Immature", "Mature"),
labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
breaks=c("A", "B"),
labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
legend.title.align = 0,
legend.title = element_text(face="bold",size=18),
legend.text.align = 0,
axis.text = element_text(size=18),
axis.title = element_text(size = 20),
plot.title = element_text(hjust=0.5, face="bold",size=18),
legend.text = element_text(size = 18))+
geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Endosphere (Plot 1)\nPCoA on Bray-Curtis dissimilarities")
x11()
p_endo1_pcoa
#b.2) North plot
p_endo2_pcoa= plot_ordination(endo2_normalized_phyloseq, PCOA_bray_endo2,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_e2b[1]*100,digits = 2),"%)"),
y=paste0("Axis 2 (",round(peso_e2b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="blueviolet","Mature"="aquamarine"),
breaks=c("Immature", "Mature"),
labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
breaks=c("A", "B"),
labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
legend.title.align = 0,
legend.title = element_text(face="bold",size=18),
legend.text.align = 0,
axis.text = element_text(size=18),
axis.title = element_text(size = 20),
plot.title = element_text(hjust=0.5, face="bold",size=18),
legend.text = element_text(size = 18))+
geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Endosphere (Plot 2)\nPCoA on Bray Curtis dissimilarities")
x11()
p_endo2_pcoa6. Taxonomical profiles
We already know that there are differences in the structure of bacterial communities in some case, however, we still do not know which taxa are the responsible of that differences. So, let’s analyze the taxonomical profiles. Fistly, we have to obtain the relative abundance of each ASV:
phy_data_relabun=transform_sample_counts(phy_total_TOTAL, function(x){x/sum(x)}*100)
colSums(otu_table(phy_data_relabun)) #check that all sum 100%6.1 Phylum level
6.1.1 Obtaining tables with the relative abundance and standard deviation values
We are going to obtain the relative abundance of each phylum detected in our samples.
phylum_relabun=tax_glom(phy_data_relabun, taxrank = "Phylum") #aglomerate at Phylum level
colSums(otu_table(phylum_relabun)) #check that the relative abundance sums 100% in all cases
#Now we are going to create a table with the relative abundance of each taxa in each replicate so that we can work on it, for instance, in Excel
a=as.data.frame(otu_table(phylum_relabun))
aa=cbind(tax_table(phylum_relabun),a)
identical(rownames(a),rownames(aa))#comprobamos que sean identicos (tiene uqe salir por consola "TRUE")
write.table(data.frame("TAXA"=rownames(aa),aa),file="abundrel_phylum_porReplicas.txt", sep="\t",row.names =F)
#Another table, with the mean and standard deviation of the relative abundance of each phylum:
table_phylum = otu_table(phylum_relabun)[,] %>% t() %>% as.data.frame()
tax_phylum=tax_table(phylum_relabun)
phylum_media=aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t() #calculate the mean values of the relative abundance of each phylum in each group of samples
phylum_sd = aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1") %>% t() %>%
as.data.frame() %>% rename_with(.fn= ~paste0(colnames(phylum_media), "_SD"))#calculate the standard deviation of the relative abundance of each phylum in each group of samples
phylum_media_sd=merge(tax_phylum, phylum_media, by=0) %>%column_to_rownames("Row.names") %>% #bind mean and sd values into one table
merge(phylum_sd, by=0) %>% column_to_rownames("Row.names")
colSums(phylum_media_sd[,9:ncol(phylum_media_sd)])
write.table(data.frame("TAXA"=rownames(phylum_media_sd),phylum_media_sd), file="abundrel_media_sd_phylum.txt", sep="\t",row.names =F)6.1.2 Plotting
We are going to make a stacked bars plot, just indicating the most abundant phyla
Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2.
df_melt_phylum = psmelt(physeq = phylum_relabun) #transform the phyloseq object into a dataframe in which the abundance of each phylum is in rows
colnames(df_melt_phylum)#check the content of the dataframe
#Here we aggregate the data by the factor we indicate in the argument "by"
df_melt_aggreg1 = aggregate(df_melt_phylum$Abundance,
by=list(Group=df_melt_phylum$Fusion,
Sample=df_melt_phylum$Sample,Phylum=df_melt_phylum$Phylum,
Compartment=df_melt_phylum$Compartment,
Plot=df_melt_phylum$Plot, Year=df_melt_phylum$Year, Condition=df_melt_phylum$Condition), FUN=sum)#it sums the abundance of each phylum in all the replicates corresponding to the same group of samples
colnames(df_melt_aggreg1)=c("Group", "Sample", "Phylum","Compartment","Plot","Year","Condition","Abundance")#rename the columns
colnames(df_melt_aggreg1)
df_melt_aggreg_mean = aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group,
Phylum=df_melt_aggreg1$Phylum,
Compartment=df_melt_aggreg1$Compartment,
Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=mean)#now, it makes the mean relative abundance of each phylum in each group of samples
colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)
#repeat the same with the standard deviation
df_melt_aggreg_sd=aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group,
Phylum=df_melt_aggreg1$Phylum,
Compartment=df_melt_aggreg1$Compartment,
Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=sd)
colnames(df_melt_aggreg_sd)=c("Group", "Phylum","Compartment","Plot","Year","Condition","sd")
all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group)
all(df_melt_aggreg_mean$Phylum==df_melt_aggreg_sd$Phylum)
df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd
media_sd_phylum_todos=df_melt_aggreg_meanNow, we are ready to plot the mean relative abundances. We want to order the phyla by ascending relative abundance, keeping the unclassified and minor phyla in the upper part of the plot.
#a) RHIZOSPHERE
#a.1) South plot
media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",]#subset the dataset
media_sd_phylum_Rizo1=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot1",]
#aggregate all the phyla that were not classified
media_sd_phylum_Rizo1$Phylum=as.character(media_sd_phylum_Rizo1$Phylum)
unclassified_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "unclassified",] #save them into a new variable
media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "unclassified",] #remove the unclassified phyla from the original dataset
#aggregate all the phyla that account less than 1% into an artificial group named "Other phyla <1%"
media_sd_phylum_Rizo1$Phylum[media_sd_phylum_Rizo1$Abundance <= 1.0] = "Other phyla (<1%)"
media_sd_phylum_Rizo1$Phylum=as.factor(media_sd_phylum_Rizo1$Phylum)
others_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "Other phyla (<1%)",]#save the "other phyla" into a new variable
media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "Other phyla (<1%)",]#remove them from the original dataset
#Now, the minor and unclassified phyla are not included in the original dataset, so let's order by abundance the remaining phyla
ordenado_rizo1=media_sd_phylum_Rizo1[order(media_sd_phylum_Rizo1$Abundance, decreasing=T),]#order by relative abundance
ordenado_rizo1=rbind(ordenado_rizo1,others_rizo1,unclassified_rizo1)#now add the minor phyla and the unclassified
group_label_rizo1=c("Mature","Immature", "Mature","Immature")#set the labels
levels_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere",
"B_Plot1_Immature_Rhizosphere")#set the levels of the graph
limits_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere",
"B_Plot1_Immature_Rhizosphere")#set the limits of the graph
phyl_name_ordered_rizo1=as.vector(ordenado_rizo1$Phylum)
nombres_unicos_rizo1=unique(phyl_name_ordered_rizo1)#get the levels (=the names of the phyla )of the factor "Phylum", and show them just once regardless of their abundance and prevalence)
ordenado_rizo1$Phylum=reorder.factor(ordenado_rizo1$Phylum,new.order=rev(nombres_unicos_rizo1)) #order the name of the phyla
lab_unicos_rizo1=nombres_unicos_rizo1
sorted_labels_ggplot_rizo1 <- sapply(lab_unicos_rizo1,
function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
#this function helps us to write the name of the phyla (and genera) in italics and the name of the minor phyla and unclassified without italics
colores_rizo1 = c("#0099FF", "#00CC00","#9933FF","yellow", "#FF00FF","grey", "black")#choose the colors of specific phyla
length(lab_unicos_rizo1)==length(colores_rizo1) #si FALSE, anyadir/quitar colores
length(lab_unicos_rizo1)
length(colores_rizo1)
titulo_plot_rizo1="Plot 1"
p3=ggplot(ordenado_rizo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_rizo1,
labels=sorted_labels_ggplot_rizo1,
breaks=nombres_unicos_rizo1)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_rizo1)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_rizo1,labels=group_label_rizo1,
breaks=levels_rizo1)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
p3
# a.2) North plot
media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",]
media_sd_phylum_Rizo2=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot2",]
media_sd_phylum_Rizo2$Phylum=as.character(media_sd_phylum_Rizo2$Phylum)
unclassified_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "unclassified",]
media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "unclassified",]
media_sd_phylum_Rizo2$Phylum[media_sd_phylum_Rizo2$Abundance <= 1.0] = "Other phyla (<1%)"
media_sd_phylum_Rizo2$Phylum=as.factor(media_sd_phylum_Rizo2$Phylum)
others_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "Other phyla (<1%)",]
ordenado_rizo2=media_sd_phylum_Rizo2[order(media_sd_phylum_Rizo2$Abundance, decreasing=T),]
ordenado_rizo2=rbind(ordenado_rizo2,others_rizo2,unclassified_rizo2)
group_label_rizo2=c("Mature","Immature", "Mature","Immature")
levels_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere",
"B_Plot2_Immature_Rhizosphere")
limits_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere",
"B_Plot2_Immature_Rhizosphere")
phyl_name_ordered_rizo2=as.vector(ordenado_rizo2$Phylum)
nombres_unicos_rizo2=unique(phyl_name_ordered_rizo2)
ordenado_rizo2$Phylum=reorder.factor(ordenado_rizo2$Phylum,new.order=rev(nombres_unicos_rizo2))
lab_unicos_rizo2=nombres_unicos_rizo2
sorted_labels_ggplot_rizo2 <- sapply(lab_unicos_rizo2,
function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_rizo2 = c("#0099FF", "#00CC00", "#9933FF","#FF00FF",
"brown","grey", "black")
length(lab_unicos_rizo2)==length(colores_rizo2)
length(lab_unicos_rizo2)
length(colores_rizo2)
titulo_plot_rizo2="Plot 2"
#b) ENDOSPHERE
#b.1) South plot
media_sd_phylum_Endo1=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot1",]
media_sd_phylum_Endo1$Phylum=as.character(media_sd_phylum_Endo1$Phylum)
unclassified_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "unclassified",]
media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "unclassified",]
media_sd_phylum_Endo1$Phylum[media_sd_phylum_Endo1$Abundance <= 1.0] = "Other phyla (<1%)"
media_sd_phylum_Endo1$Phylum=as.factor(media_sd_phylum_Endo1$Phylum)
others_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "Other phyla (<1%)",]
ordenado_endo1=media_sd_phylum_Endo1[order(media_sd_phylum_Endo1$Abundance, decreasing=T),]
ordenado_endo1=rbind(ordenado_endo1,others_endo1,unclassified_endo1)
group_label_endo1=c("Mothers","Suckers", "Mothers","Suckers")
levels_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
limits_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
phyl_name_ordered_endo1=as.vector(ordenado_endo1$Phylum)
nombres_unicos_endo1=unique(phyl_name_ordered_endo1)
ordenado_endo1$Phylum=reorder.factor(ordenado_endo1$Phylum,new.order=rev(nombres_unicos_endo1))
lab_unicos_endo1=nombres_unicos_endo1
sorted_labels_ggplot_endo1 <- sapply(lab_unicos_endo1,
function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_endo1 = c("#0099FF","#00CC00", "grey","black")
length(lab_unicos_endo1)==length(colores_endo1)
length(lab_unicos_endo1)
length(colores_endo1)
titulo_plot_endo1="South plot"
p1=ggplot(ordenado_endo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_endo1,
labels=sorted_labels_ggplot_endo1,
breaks=nombres_unicos_endo1)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo1)+
guides(fill = guide_legend(reverse = TRUE))+#sirve para cambiar el orden de la leyenda
scale_x_discrete(limits=limits_endo1,labels=group_label_endo1,
breaks=levels_endo1)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
theme(axis.text = element_text(size = 5))+
theme(axis.text.x = element_text(face="bold", size=5))+
theme(axis.title.y = element_text(size = 7))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 5))+
theme(legend.title = element_text(size=7, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)+
theme(legend.position = "none")
x11()
p1
#b.2) North plot
media_sd_phylum_Endo2=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot2",]
media_sd_phylum_Endo2$Phylum=as.character(media_sd_phylum_Endo2$Phylum)
unclassified_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "unclassified",]
media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "unclassified",]
media_sd_phylum_Endo2$Phylum[media_sd_phylum_Endo2$Abundance <= 1.0] = "Other phyla (<1%)"
media_sd_phylum_Endo2$Phylum=as.factor(media_sd_phylum_Endo2$Phylum)
others_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "Other phyla (<1%)",]
ordenado_endo2=media_sd_phylum_Endo2[order(media_sd_phylum_Endo2$Abundance, decreasing=T),]
ordenado_endo2=rbind(ordenado_endo2,others_endo2,unclassified_endo2)
group_label_endo2=c("Mature","Immature", "Mature","Immature")
levels_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
limits_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
phyl_name_ordered_endo2=as.vector(ordenado_endo2$Phylum)
nombres_unicos_endo2=unique(phyl_name_ordered_endo2)
ordenado_endo2$Phylum=reorder.factor(ordenado_endo2$Phylum,new.order=rev(nombres_unicos_endo2))
sorted_labels_ggplot_endo2 <- sapply(lab_unicos_endo2,
function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_endo2 = c("#0099FF","#00CC00","grey","black")
length(lab_unicos_endo2)==length(colores_endo2)
length(lab_unicos_endo2)
length(colores_endo2)
titulo_plot_endo2="Plot 2"
p2=ggplot(ordenado_endo2, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_endo2,
labels=sorted_labels_ggplot_endo2,
breaks=nombres_unicos_endo2)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo2)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_endo2,labels=group_label_endo2,
breaks=levels_endo2)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
p26.2 Genus
6.2.1 Obtain tables with the relative abundance and standard deviation values
We are going to obtain the relative abundance of each phylum detected in our samples.
genus_relabun=tax_glom(phy_data_relabun, taxrank = "Genus")
colSums(otu_table(genus_relabun))
b=as.data.frame(otu_table(genus_relabun))
bb=cbind(tax_table(genus_relabun),b)
identical(rownames(b),rownames(bb))
write.table(data.frame("TAXA"=rownames(bb),bb),file="abundrel_Genero_porReplicas.txt", sep="\t",row.names =F)
table_genus = otu_table(genus_relabun)[,] %>% t() %>% as.data.frame()
tax_genus=tax_table(genus_relabun)
genus_media=aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t()
genus_sd= aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1") %>% t() %>%
as.data.frame() %>% rename_with(.fn= ~paste0(colnames(genus_media), "_SD"))
genus_media_sd=merge(tax_genus, genus_media, by=0) %>%column_to_rownames("Row.names") %>%
merge(genus_sd, by=0) %>% column_to_rownames("Row.names")
colSums(genus_media_sd[,8:ncol(genus_media_sd)])
genus_media_sd$ASV_names=row.names(genus_media_sd)
write.table(data.frame("TAXA"=rownames(genus_media_sd),genus_media_sd), file="abundrel_media_sd_genus.txt", sep="\t",row.names =F)6.2.2. Plotting
We are going to make a stacked bars plot, just indicating the most abundant genera
Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2.
df_melt_genus = psmelt(physeq = genus_relabun)
colnames(df_melt_genus)
df_melt_aggreg2 = aggregate(df_melt_genus$Abundance,
by=list(Group=df_melt_genus$Fusion,
Sample=df_melt_genus$Sample,Genus=df_melt_genus$Genus,
Compartment=df_melt_genus$Compartment,
Plot=df_melt_genus$Plot, Year=df_melt_genus$Year, Condition=df_melt_genus$Condition), FUN=sum)
colnames(df_melt_aggreg2)=c("Group", "Sample", "Genus","Compartment","Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg2)
df_melt_aggreg_mean = aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group,
Genus=df_melt_aggreg2$Genus,
Compartment=df_melt_aggreg2$Compartment,
Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=mean)
colnames(df_melt_aggreg_mean)<-c("Group", "Genus","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)
df_melt_aggreg_sd=aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group,
Genus=df_melt_aggreg2$Genus,
Compartment=df_melt_aggreg2$Compartment,
Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=sd)
colnames(df_melt_aggreg_sd)=c("Group", "Genus","Compartment","Plot","Year","Condition","sd")
all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group)
all(df_melt_aggreg_mean$Genus==df_melt_aggreg_sd$Genus)
df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd
media_sd_genus_todos=df_melt_aggreg_meanNow, we are ready to plot the mean relative abundances. We want to order the genera by ascending relative abundance, keeping the unclassified and minor genera in the upper part of the plot.
#a) RHIZOSPHERE
media_sd_genus_rizo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Rhizosphere",]
#a.1) South plot
media_sd_genus_rizo1=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot1",]
media_sd_genus_rizo1$Genus=as.character(media_sd_genus_rizo1$Genus)
unclassified_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "unclassified",]
media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "unclassified",]
media_sd_genus_rizo1$Genus[media_sd_genus_rizo1$Abundance <= 2] = "Other genera (<2%)"
media_sd_genus_rizo1$Genus=as.factor(media_sd_genus_rizo1$Genus)
others_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "Other genera (<2%)",]
ordenado_r1=media_sd_genus_rizo1[order(media_sd_genus_rizo1$Abundance, decreasing=T),]
ordenado_r1=rbind(ordenado_r1,others_r1,unclassified_r1)
group_label_r1=c("M","S", "M","S")
levels_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere",
"B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere")
limits_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere",
"B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere")
genus_name_ordered_r1=as.vector(ordenado_r1$Genus)
nombres_unicos_r1=unique(genus_name_ordered_r1)
ordenado_r1$Genus=reorder.factor(ordenado_r1$Genus,new.order=rev(nombres_unicos_r1))
lab_unicos_r1=nombres_unicos_r1
sorted_labels_ggplot_r1 = sapply(lab_unicos_r1,
function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
lab_unicos_r1=as.data.frame(lab_unicos_r1)
colores_r1 = c("#0099FF","#00CC00","#9933FF","brown","yellow","orange","#00FFFF","red",
"pink", "magenta","blue","khaki2","green4","#6A3D9A","gold1","purple","green" ,
"grey","black")
length(lab_unicos_r1)==length(colores_r1)
length(lab_unicos_r1)
length(colores_r1)
titulo_plot_r1="South plot"
pgenus_r1=ggplot(ordenado_r1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_r1,
labels=sorted_labels_ggplot_r1,
breaks=nombres_unicos_r1)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r1)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_r1,labels=group_label_r1,
breaks=levels_r1)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
theme(axis.text = element_text(size = 7))+
theme(axis.text.x = element_text(face="bold", size=7))+
theme(axis.title.y = element_text(size = 7))+
theme(legend.key.size = unit(0.2, "cm"))+
theme(legend.text = element_text(size = 5))+
theme(legend.title = element_text(size=5, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)+
theme(legend.position = "none")
x11()
pgenus_r1
#a.2) North plot
media_sd_genus_rizo2=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot2",]
media_sd_genus_rizo2$Genus=as.character(media_sd_genus_rizo2$Genus)
unclassified_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "unclassified",]
media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "unclassified",]
media_sd_genus_rizo2$Genus[media_sd_genus_rizo2$Abundance <= 2] = "Other genera (<2%)"
media_sd_genus_rizo2$Genus=as.factor(media_sd_genus_rizo2$Genus)
others_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "Other genera (<2%)",]
ordenado_r2=media_sd_genus_rizo2[order(media_sd_genus_rizo2$Abundance, decreasing=T),]
ordenado_r2=rbind(ordenado_r2,others_r2,unclassified_r2)
group_label_r2=c("M","S", "M","S")
levels_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere",
"B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere")
limits_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere",
"B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere")
genus_name_ordered_r2=as.vector(ordenado_r2$Genus)
nombres_unicos_r2=unique(genus_name_ordered_r2)
ordenado_r2$Genus=reorder.factor(ordenado_r2$Genus,new.order=rev(nombres_unicos_r2))
lab_unicos_r2=nombres_unicos_r2
sorted_labels_ggplot_r2 <- sapply(lab_unicos_r2,
function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_r2 = c("green4","brown","darkturquoise","steelblue4","#00FFFF", "#9933FF","maroon","pink","gold1",
"green","slategray2","plum2","yellow","red","darkolivegreen2","grey","black")
length(lab_unicos_r2)==length(colores_r2)
length(lab_unicos_r2)
length(colores_r2)
titulo_plot_r2="North plot"
pgenus_r2=ggplot(ordenado_r2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_r2,
labels=sorted_labels_ggplot_r2,
breaks=nombres_unicos_r2)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r2)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_r2,labels=group_label_r2,
breaks=levels_r2)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
theme(axis.text = element_text(size = 7))+
theme(axis.text.x = element_text(face="bold", size=7))+
theme(axis.title.y = element_text(size = 7))+
theme(legend.key.size = unit(0.2, "cm"))+
theme(legend.text = element_text(size = 5))+
theme(legend.title = element_text(size=, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)+
theme(legend.position="none")
x11()
pgenus_r2
#b) ENDOSPHERE
media_sd_genus_endo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Endosphere",]
#b.1) South plot
media_sd_genus_endo1=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot1",]
media_sd_genus_endo1$Genus=as.character(media_sd_genus_endo1$Genus)
unclassified_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "unclassified",]
media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "unclassified",]
media_sd_genus_endo1$Genus[media_sd_genus_endo1$Abundance <= 2] = "Other genera (<2%)"
media_sd_genus_endo1$Genus=as.factor(media_sd_genus_endo1$Genus)
others_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "Other genera (<2%)",]
ordenado_e1=media_sd_genus_endo1[order(media_sd_genus_endo1$Abundance, decreasing=T),]
ordenado_e1=rbind(ordenado_e1,others_e1,unclassified_e1)
group_label_e1=c("M","S", "M","S")
levels_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere",
"B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
limits_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere",
"B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
genus_name_ordered_e1=as.vector(ordenado_e1$Genus)
nombres_unicos_e1=unique(genus_name_ordered_e1)
ordenado_e1$Genus=reorder.factor(ordenado_e1$Genus,new.order=rev(nombres_unicos_e1))
lab_unicos_e1=nombres_unicos_e1
sorted_labels_ggplot_e1=sapply(lab_unicos_e1,
function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_e1 = c("blue","violet","brown","magenta","#FDBF6F","#FB9A99","springgreen3","yellow","coral2",
"yellow4","green1","#6A3D9A","mediumorchid2","lightgoldenrod2","darkgreen","green4","firebrick",
"grey","black")
length(lab_unicos_e1)==length(colores_e1)
length(lab_unicos_e1)
length(colores_e1)
titulo_plot_e1="South plot"
pgenus_e1=ggplot(ordenado_e1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_e1,
labels=sorted_labels_ggplot_e1,
breaks=nombres_unicos_e1)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e1)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_e1,labels=group_label_e1,
breaks=levels_e1)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
theme(axis.text = element_text(size = 7))+
theme(axis.text.x = element_text(face="bold", size=7))+
theme(axis.title.y = element_text(size = 7))+
theme(legend.key.size = unit(0.2, "cm"))+
theme(legend.text = element_text(size = 5))+
theme(legend.title = element_text(size=5, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)+
theme(legend.position = "right")
x11()
pgenus_e1
#b.2) North plot
media_sd_genus_endo2=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot2",]
media_sd_genus_endo2$Genus=as.character(media_sd_genus_endo2$Genus)
unclassified_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "unclassified",]
media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "unclassified",]
media_sd_genus_endo2$Genus[media_sd_genus_endo2$Abundance <= 2] = "Other genera (<2%)"
media_sd_genus_endo2$Genus=as.factor(media_sd_genus_endo2$Genus)
others_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "Other genera (<2%)",]
ordenado_e2=media_sd_genus_endo2[order(media_sd_genus_endo2$Abundance, decreasing=T),]
ordenado_e2=rbind(ordenado_e2,others_e2,unclassified_e2)
group_label_e2=c("M","S", "M","S")
levels_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere",
"B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
limits_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere",
"B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
genus_name_ordered_e2=as.vector(ordenado_e2$Genus)
nombres_unicos_e2=unique(genus_name_ordered_e2)
ordenado_e2$Genus=reorder.factor(ordenado_e2$Genus,new.order=rev(nombres_unicos_e2))
lab_unicos_e2=nombres_unicos_e2
sorted_labels_ggplot_e2 = sapply(lab_unicos_e2,
function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
{parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_e2 = c("brown", "violet", "blue", "grey90","orangered2","darkseagreen2","burlywood",
"yellow4","darkmagenta","powderblue","sienna2","magenta","springgreen3","mediumslateblue",
"gold2",
"grey","black")
length(lab_unicos_e2)==length(colores_e2)
length(lab_unicos_e2)
length(colores_e2)
titulo_plot_e2="North plot"
pgenus_e2=ggplot(ordenado_e2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=colores_e2,
labels=sorted_labels_ggplot_e2,
breaks=nombres_unicos_e2)+
labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e2)+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=limits_e2,labels=group_label_e2,
breaks=levels_e2)+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 100))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
theme(axis.text = element_text(size = 7))+
theme(axis.text.x = element_text(face="bold", size=7))+
theme(axis.title.y = element_text(size = 7))+
theme(legend.key.size = unit(0.2, "cm"))+
theme(legend.text = element_text(size = 5))+
theme(legend.title = element_text(size=5, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)+
theme(legend.position = "none")
x11()
pgenus_e2In Figure 6 you can observe an example of the stacked bar plot for the main genera. This kind of plot can be calculated at the taxonomical rank you prefer. Please, try to open the plot in a new window if you want to view it at full size.

7. Fungal transmission analyses
Our experimental design allows us to study the potential transmission of microorganisms from mothers to first and second suckers. For that purpose, we firstly compared the root microbiota of mothers and first suckers, and then, these shared microorganisms were further compared against the microbiota of rhizosphere of sucker plants. It is expected that the microorganisms that are found in the root endosphere of suckers (and not found in the rhizosphere) and also in the root endosphere of mother plants, are transferred from mothers to suckers through the root system (corm), but not acquired from the surrounding soil of from the environment.
We will perform this analysis just at ASV level
7.1 Prepare the data
mt1=as.data.frame(sample_data(phy_data_relabun))
names = unique(mt1$Fusion)#get the name of each level of the variable "Fusion", which in this example corresponds to each individual group of samples. For instance, the first level of the variable "Fusion" is "A_Plot1_Mature_Rhizosphere" (all rhizosphere samples of mother plants sampled in 2022 located in the south plot)
#in the following for loop we are creating a list in which we are saving one each phyloseq object per group of samples. That is to say, in the first element of the list, the phyloseq object corresponding to the samples of the rhizosphere of mother plants sampled in 2022 located in the south plot is stored.
pruned_data = list()
for(i in 1:length(names)){
samples_group <- mt1[which(mt1$Fusion==names[i]),]$Sample
pruned_data[[i]] <- prune_samples(samples_group, phy_data_relabun)
}
#this loop stored the taxonony and ASV table of each phyloseq element of the previously created list
otu_tables = list()
tax_tables = list()
for (i in 1:length(pruned_data)){
otu_tables[[i]] <- as.data.frame(otu_table(pruned_data[[i]]))
tax_tables[[i]] <- as.data.frame(tax_table(pruned_data[[i]]))
}#it gives the list "otu_tables", whose element [[1]] corresponds to the ASV table of the group of sample "A_Plot1_Mature_Rhizosphere". While "tax_tables" list stores the taxonomy tables
#this list merges each ASV table with each tax table by rownames (by=0)
whole_tables = list()
for (i in 1:length(pruned_data)){
whole_tables[[i]] = merge(tax_tables[[i]],otu_tables[[i]], by=0)
}
for(i in 1:length(whole_tables)){
whole_tables[[i]]$ASV_names=whole_tables[[i]]$Row.names
}#it creates a new column in the list "wholes_tables" that corresponds to the rownames
for (i in 1:length(whole_tables)){
assign(paste0(names[i],"_df"),as#create one dataframe per element from the list "whole_tables"
#the previous loop returns complex dataframe names, so let's rename them and calculate the total number of sequences registered for each ASV in all the corresponding dataset
A_r1_M=A_Plot1_Mature_Rhizosphere_df; A_r1_M$Sum=rowSums(A_r1_M[,9:ncol(A_r1_M)]); A_r1_M_def=A_r1_M[which(A_r1_M$Sum>0,arr.ind=TRUE),];A_r1_M_def2= subset(A_r1_M_def, select = -c(Sum))#we remove those ASV that account for 0 sequences in sum in each dataset
A_e1_M=A_Plot1_Mature_Endosphere_df; A_e1_M$Sum=rowSums(A_e1_M[,9:ncol(A_e1_M)]); A_e1_M_def=A_e1_M[which(A_e1_M$Sum>0,arr.ind=TRUE),];A_e1_M_def2= subset(A_e1_M_def, select = -c(Sum))
A_e1_I=A_Plot1_Immature_Endosphere_df; A_e1_I$Sum=rowSums(A_e1_I[,9:ncol(A_e1_I)]); A_e1_I_def=A_e1_I[which(A_e1_I$Sum>0,arr.ind=TRUE),];A_e1_I_def2= subset(A_e1_I_def, select = -c(Sum))
A_r1_I=A_Plot1_Immature_Rhizosphere_df; A_r1_I$Sum=rowSums(A_r1_I[,9:ncol(A_r1_I)]); A_r1_I_def=A_r1_I[which(A_r1_I$Sum>0,arr.ind=TRUE),];A_r1_I_def2= subset(A_r1_I_def, select = -c(Sum))
A_e2_M=A_Plot2_Mature_Endosphere_df; A_e2_M$Sum=rowSums(A_e2_M[,9:ncol(A_e2_M)]); A_e2_M_def=A_e2_M[which(A_e2_M$Sum>0,arr.ind=TRUE),];A_e2_M_def2= subset(A_e2_M_def, select = -c(Sum))
A_r2_M=A_Plot2_Mature_Rhizosphere_df; A_r2_M$Sum=rowSums(A_r2_M[,9:ncol(A_r2_M)]); A_r2_M_def=A_r2_M[which(A_r2_M$Sum>0,arr.ind=TRUE),];A_r2_M_def2= subset(A_r2_M_def, select = -c(Sum))
A_r2_I=A_Plot2_Immature_Rhizosphere_df; A_r2_I$Sum=rowSums(A_r2_I[,9:ncol(A_r2_I)]); A_r2_I_def=A_r2_I[which(A_r2_I$Sum>0,arr.ind=TRUE),];A_r2_I_def2= subset(A_r2_I_def, select = -c(Sum))
A_e2_I=A_Plot2_Immature_Endosphere_df; A_e2_I$Sum=rowSums(A_e2_I[,9:ncol(A_e2_I)]);A_e2_I_def=A_e2_I[which(A_e2_I$Sum>0,arr.ind=TRUE),];A_e2_I_def2= subset(A_e2_I_def, select = -c(Sum))
B_r1_M=B_Plot1_Mature_Rhizosphere_df; B_r1_M$Sum=rowSums(B_r1_M[,9:ncol(B_r1_M)]); B_r1_M_def=B_r1_M[which(B_r1_M$Sum>0,arr.ind=TRUE),];B_r1_M_def2= subset(B_r1_M_def, select = -c(Sum))
B_r1_I=B_Plot1_Immature_Rhizosphere_df; B_r1_I$Sum=rowSums(B_r1_I[,9:ncol(B_r1_I)]); B_r1_I_def=B_r1_I[which(B_r1_I$Sum>0,arr.ind=TRUE),];B_r1_I_def2= subset(B_r1_I_def, select = -c(Sum))
B_r2_M=B_Plot2_Mature_Rhizosphere_df; B_r2_M$Sum=rowSums(B_r2_M[,9:ncol(B_r2_M)]); B_r2_M_def=B_r2_M[which(B_r2_M$Sum>0,arr.ind=TRUE),];B_r2_M_def2= subset(B_r2_M_def, select = -c(Sum))
B_r2_I=B_Plot2_Immature_Rhizosphere_df; B_r2_I$Sum=rowSums(B_r2_I[,9:ncol(B_r2_I)]); B_r2_I_def=B_r2_I[which(B_r2_I$Sum>0,arr.ind=TRUE),];B_r2_I_def2= subset(B_r2_I_def, select = -c(Sum))
B_e1_I=B_Plot1_Immature_Endosphere_df; B_e1_I$Sum=rowSums(B_e1_I[,9:ncol(B_e1_I)]); B_e1_I_def=B_e1_I[which(B_e1_I$Sum>0,arr.ind=TRUE),];B_e1_I_def2= subset(B_e1_I_def, select = -c(Sum))
B_e1_M=B_Plot1_Mature_Endosphere_df; B_e1_M$Sum=rowSums(B_e1_M[,9:ncol(B_e1_M)]); B_e1_M_def=B_e1_M[which(B_e1_M$Sum>0,arr.ind=TRUE),];B_e1_M_def2= subset(B_e1_M_def, select = -c(Sum))
B_e2_M=B_Plot2_Mature_Endosphere_df; B_e2_M$Sum=rowSums(B_e2_M[,9:ncol(B_e2_M)]); B_e2_M_def=B_e2_M[which(B_e2_M$Sum>0,arr.ind=TRUE),];B_e2_M_def2= subset(B_e2_M_def, select = -c(Sum))
B_e2_I=B_Plot2_Immature_Endosphere_df; B_e2_I$Sum=rowSums(B_e2_I[,9:ncol(B_e2_I)]); B_e2_I_def=B_e2_I[which(B_e2_I$Sum>0,arr.ind=TRUE),];B_e2_I_def2= subset(B_e2_I_def, select = -c(Sum))7.2 Compare ASVs between mother and sucker plants
Remember that we want to retain the ASVs that are shared between mother and sucker plants (just in the root endosphere). They could be both transferred by mother plants or acquired from the rhizosphere soil.
#a) South plot
#a.1) 2022
compartidos_A_e1=cbind(A_e1_M_def2[A_e1_M_def2$ASV_names %in% A_e1_I_def2$ASV_names,], A_e1_I_def2[A_e1_I_def2$ASV_names %in% A_e1_M_def2$ASV_names,])#retain those ASVs that are in the dataframe of mothers and suckers (root endosphere)
compartidos_A_e1=compartidos_A_e1[,-(20:27)]#removing extra information
write.table(data.frame("_"=rownames(compartidos_A_e1),compartidos_A_e1),file="Core_A_e1.txt",sep="\t", row.names = F)
#a.2) 2023
compartidos_B_e1=cbind(B_e1_M_def2[B_e1_M_def2$ASV_names %in% B_e1_I_def2$ASV_names,], B_e1_I_def2[B_e1_I_def2$ASV_names %in% B_e1_M_def2$ASV_names,])
compartidos_B_e1=compartidos_B_e1[,-(18:25)]
write.table(data.frame("_"=rownames(compartidos_B_e1),compartidos_B_e1),file="Core_B_e1.txt",sep="\t", row.names = F)
#b) North plot
#b.1) 2022
compartidos_A_e2=cbind(A_e2_M_def2[A_e2_M_def2$ASV_names %in% A_e2_I_def2$ASV_names,], A_e2_I_def2[A_e2_I_def2$ASV_names %in% A_e2_M_def2$ASV_names,])
compartidos_A_e2=compartidos_A_e2[,-(21:28)]
write.table(data.frame("_"=rownames(compartidos_A_e2),compartidos_A_e2),file="Core_A_e2.txt",sep="\t", row.names = F)
#b.2) 2023
compartidos_B_e2=cbind(B_e2_M_def2[B_e2_M_def2$ASV_names %in% B_e2_I_def2$ASV_names,], B_e2_I_def2[B_e2_I_def2$ASV_names %in% B_e2_M_def2$ASV_names,])
compartidos_B_e2=compartidos_B_e2[,-(19:26)]
write.table(data.frame("_"=rownames(compartidos_B_e2),compartidos_B_e2),file="Core_B_e2.txt",sep="\t", row.names = F)Now, we are going to compare the shared fungal communities against the gunal communities dwelling in the rhizosphere soil of the suckers. We will retain fungal members that are in the root endosphere but NO in the rhizosphere soil
#a) South plot
#a.1) 2022
compartidos_A1_endo_rizo=cbind(compartidos_A_e1[compartidos_A_e1$ASV_names %in% A_r1_I_def2$ASV_names,], A_r1_I_def2[A_r1_I_def2$ASV_names %in% compartidos_A_e1$ASV_names,])#remove extra information from the data (additional columns indicating, again, the taxonomy of the ASVs)
compartidos_A1_endo_rizo=compartidos_A1_endo_rizo[,-(30:37)]#remove extra information
write.table(data.frame("_"=rownames(compartidos_A1_endo_rizo),compartidos_A1_endo_rizo),file="Compartidos_A1_endo_rizo.txt",sep="\t", row.names = F)
#a.2) 2023
compartidos_B1_endo_rizo=cbind(compartidos_B_e1[compartidos_B_e1$ASV_names %in% B_r1_I_def2$ASV_names,], B_r1_I_def2[B_r1_I_def2$ASV_names %in% compartidos_B_e1$ASV_names,])
compartidos_B1_endo_rizo=compartidos_B1_endo_rizo[,-(26:33)]
write.table(data.frame("_"=rownames(compartidos_B1_endo_rizo),compartidos_B1_endo_rizo),file="Compartidos_B1_endo_rizo.txt",sep="\t", row.names = F)
#b) North plot
#b.1) 2022
compartidos_A2_endo_rizo=cbind(compartidos_A_e2[compartidos_A_e2$ASV_names %in% A_r2_I_def2$ASV_names,], A_r2_I_def2[A_r2_I_def2$ASV_names %in% compartidos_A_e2$ASV_names,])
compartidos_A2_endo_rizo=compartidos_A2_endo_rizo[,-(29:36)]
write.table(data.frame("_"=rownames(compartidos_A2_endo_rizo),compartidos_A2_endo_rizo),file="Compartidos_A2_endo_rizo.txt",sep="\t", row.names = F)
#b.2) 2023
compartidos_B2_endo_rizo=cbind(compartidos_B_e2[compartidos_B_e2$ASV_names %in% B_r2_I_def2$ASV_names,], B_r2_I_def2[B_r2_I_def2$ASV_names %in% compartidos_B_e2$ASV_names,])
compartidos_B2_endo_rizo=compartidos_B2_endo_rizo[,-(27:34)]
write.table(data.frame("_"=rownames(compartidos_B2_endo_rizo),compartidos_B2_endo_rizo),file="Compartidos_B2_endo_rizo.txt",sep="\t", row.names = F)
#Remove those ASV detected both in the rhizosphere and in the root endosphere
#a) South plot
#a.1) 2022
especif_A1_endo=subset(compartidos_A_e1, !(ASV_names %in% compartidos_A1_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_A1_endo),especif_A1_endo),file="A1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)
#a.2) 2023
especif_B1_endo=subset(compartidos_B_e1, !(ASV_names %in% compartidos_B1_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_B1_endo),especif_B1_endo),file="B1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)
#b) North plot
#b.1) 2022
especif_A2_endo=subset(compartidos_A_e2, !(ASV_names %in% compartidos_A2_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_A2_endo),especif_A2_endo),file="A2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)
#b.2) 2023
especif_B2_endo=subset(compartidos_B_e2, !(ASV_names %in% compartidos_B2_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_B2_endo),especif_B2_endo),file="B2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)In this work, we analyzed carefully the number and proportions of the transferred ASVs in Excel
7.3 Plotting
In this section, we are going to make different graph representing the microbial transmission process. We will use barplots
#a) South plot
#a.1) 2022
totales_M_A_e1=nrow(A_e1_M_def2)#get the total number of ASV in mothers
transferidos_A_e1=nrow(especif_A1_endo)#get the number of shared ASVs between mothers and suckers
prop_transferidos_M_A_e1=(transferidos_A_e1 / totales_M_A_e1) *100#calculate the proportion of transferred ASVs
prop_perdidos_M_A_e1=((totales_M_A_e1-transferidos_A_e1) / totales_M_A_e1) *100#calculate the proportion of ASVs lost by the mothers
totales_I_A_e1=nrow(A_e1_I_def2)#get the total number of ASVs in suckers
prop_transferidos_I_A_e1=(transferidos_A_e1 / totales_I_A_e1) *100#get the number of shared ASVs between mothers and suckers
prop_adquiridos_I_A_e1=((totales_I_A_e1-transferidos_A_e1) / totales_I_A_e1) *100#calculate the proportion of ASVs acquired by the suckers
barras_A_e1=as.data.frame(rbind(prop_transferidos_M_A_e1,prop_perdidos_M_A_e1,prop_transferidos_I_A_e1,prop_adquiridos_I_A_e1))#create a dataframe with all the values in order to plot them
barras_A_e1$Year="A"
barras_A_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_A_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_A_e1)[1]="Proportion"
#a.2) 2023
totales_M_B_e1=nrow(B_e1_M_def2)
transferidos_B_e1=nrow(especif_B1_endo)
prop_transferidos_M_B_e1=(transferidos_B_e1 / totales_M_B_e1) *100
prop_perdidos_M_B_e1=((totales_M_B_e1-transferidos_B_e1) / totales_M_B_e1) *100
totales_I_B_e1=nrow(B_e1_I_def2)
prop_transferidos_I_B_e1=(transferidos_B_e1 / totales_I_B_e1) *100
prop_adquiridos_I_B_e1=((totales_I_B_e1-transferidos_B_e1) / totales_I_B_e1) *100
barras_B_e1=as.data.frame(rbind(prop_transferidos_M_B_e1,prop_perdidos_M_B_e1, prop_transferidos_I_B_e1,prop_adquiridos_I_B_e1))
barras_B_e1$Year="B"
barras_B_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_B_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_B_e1)[1]="Proportion"
A_e1=ggplot(barras_A_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
A_e1
B_e1=ggplot(barras_B_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
B_e1
x11()
p_Plot1_prop=grid.arrange(A_e1,B_e1,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18)))
#b) North plot
#b.1) 2022
totales_M_A_e2=nrow(A_e2_M_def2)
transferidos_A_e2=nrow(especif_A2_endo)
prop_transferidos_M_A_e2=(transferidos_A_e2 / totales_M_A_e2) *100
prop_perdidos_M_A_e2=((totales_M_A_e2-transferidos_A_e2) / totales_M_A_e2) *100
totales_I_A_e2=nrow(A_e2_I_def2)
prop_transferidos_I_A_e2=(transferidos_A_e2 / totales_I_A_e2) *100
prop_adquiridos_I_A_e2=((totales_I_A_e2-transferidos_A_e2) / totales_I_A_e2) *100
barras_A_e2=as.data.frame(rbind(prop_transferidos_M_A_e2,prop_perdidos_M_A_e2,
prop_transferidos_I_A_e2,prop_adquiridos_I_A_e2))
barras_A_e2$Year="A"
barras_A_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_A_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_A_e2)[1]="Proportion"
#b.2) 2023
totales_M_B_e2=nrow(B_e2_M_def2)
transferidos_B_e2=nrow(especif_B2_endo)
prop_transferidos_M_B_e2=(transferidos_B_e2 / totales_M_B_e2) *100
prop_perdidos_M_B_e2=((totales_M_B_e2-transferidos_B_e2) / totales_M_B_e2) *100
totales_I_B_e2=nrow(B_e2_I_def2)
prop_transferidos_I_B_e2=(transferidos_B_e2 / totales_I_B_e2) *100
prop_adquiridos_I_B_e2=((totales_I_B_e2-transferidos_B_e2) / totales_I_B_e2) *100
barras_B_e2=as.data.frame(rbind(prop_transferidos_M_B_e2,prop_perdidos_M_B_e2,
prop_transferidos_I_B_e2,prop_adquiridos_I_B_e2))
barras_B_e2$Year="B"
barras_B_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_B_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_B_e2)[1]="Proportion"
A_e2=ggplot(barras_A_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
A_e2
B_e2=ggplot(barras_B_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
B_e2
x11()
p_Plot2_prop=grid.arrange(A_e2,B_e2,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18)))With this code a bar plot is created (see Figure 7)

We are going to obtain the same plots but instead of plotting the proportion of ASVs lost, acquired and transferred, the relative abundance of each of them will be visualized.
#a) South plot
#a.1) 2022
perdidos_M_A_e1=subset(A_e1_M_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that mothers lost
perdidos_M_A_e1$Promedio=rowMeans(subset(perdidos_M_A_e1, select =c(9:ncol(perdidos_M_A_e1))))#mean relative abundance of each of them
perdidos_M_A_e1_sum=sum(perdidos_M_A_e1$Promedio)#sum the relative abundance of all the ASVs lost by the mothers
adquiridos_I_A_e1=subset(A_e1_I_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that suckers retain from the environment
adquiridos_I_A_e1$Promedio=rowMeans(subset(adquiridos_I_A_e1, select =c(9:ncol(adquiridos_I_A_e1))))#mean
adquiridos_I_A_e1_sum=sum(adquiridos_I_A_e1$Promedio)
especif_A1_endo$Promedio_Madre=rowMeans(subset(especif_A1_endo, select =c(9:19)))#mean relative abundance of transferred ASVs (in mothers samples)
especif_A1_endo_M_sum=sum(especif_A1_endo$Promedio_Madre)
especif_A1_endo$Promedio_Hijas=rowMeans(subset(especif_A1_endo, select =c(20:29)))#mean relative abundance of acquired ASV from mothers (in suckers samples)
especif_A1_endo_I_sum=sum(especif_A1_endo$Promedio_Hijas)
Abund_A1_endo=c(especif_A1_endo_M_sum,perdidos_M_A_e1_sum,especif_A1_endo_I_sum,adquiridos_I_A_e1_sum)
barras_abund_A_e1=as.data.frame(cbind(barras_A_e1,Abund_A1_endo))#create a new dataframe with all the data
colnames(barras_abund_A_e1)[5]="Abundance"
write.table(data.frame("X"=rownames(perdidos_M_A_e1),perdidos_M_A_e1),"AbunRel_perdidos_Madre_A_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_A_e1),adquiridos_I_A_e1),"AbunRel_AdquiridosAmbiente_A_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_A1_endo),especif_A1_endo),"AbunRel_TransferidosMadreHija_A_e1.txt",sep="\t",row.names = F)
#a.2) 2023
perdidos_M_B_e1=subset(B_e1_M_def2, !(ASV_names %in% especif_B1_endo$ASV_names))
perdidos_M_B_e1$Promedio=rowMeans(subset(perdidos_M_B_e1, select =c(9:ncol(perdidos_M_B_e1))))
perdidos_M_B_e1_sum=sum(perdidos_M_B_e1$Promedio)
adquiridos_I_B_e1=subset(B_e1_I_def2, !(ASV_names %in% especif_B1_endo$ASV_names))
adquiridos_I_B_e1$Promedio=rowMeans(subset(adquiridos_I_B_e1, select =c(9:ncol(adquiridos_I_B_e1))))
adquiridos_I_B_e1_sum=sum(adquiridos_I_B_e1$Promedio)
especif_B1_endo$Promedio_Madre=rowMeans(subset(especif_B1_endo, select =c(9:17)))
especif_B1_endo_M_sum=sum(especif_B1_endo$Promedio_Madre)
especif_B1_endo$Promedio_Hijas=rowMeans(subset(especif_B1_endo, select =c(18:25)))
especif_B1_endo_I_sum=sum(especif_B1_endo$Promedio_Hijas)
Abund_B1_endo=c(especif_B1_endo_M_sum,perdidos_M_B_e1_sum,especif_B1_endo_I_sum,adquiridos_I_B_e1_sum)
barras_abund_B_e1=as.data.frame(cbind(barras_B_e1,Abund_B1_endo))
colnames(barras_abund_B_e1)[5]="Abundance"
write.table(data.frame("X"=rownames(perdidos_M_B_e1),perdidos_M_B_e1),"AbunRel_perdidos_Madre_B_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_B_e1),adquiridos_I_B_e1),"AbunRel_AdquiridosAmbienteHija_B_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_B1_endo),especif_B1_endo),"AbunRel_TransferidosMadreHija_B_e1.txt",sep="\t",row.names = F)
#b) North plot
#b.1) 2022
perdidos_M_A_e2=subset(A_e2_M_def2, !(ASV_names %in% especif_A2_endo$ASV_names))
perdidos_M_A_e2$Promedio=rowMeans(subset(perdidos_M_A_e2, select =c(9:ncol(perdidos_M_A_e2))))
perdidos_M_A_e2_sum=sum(perdidos_M_A_e2$Promedio)
adquiridos_I_A_e2=subset(A_e2_I_def2, !(ASV_names %in% especif_A2_endo$ASV_names))
adquiridos_I_A_e2$Promedio=rowMeans(subset(adquiridos_I_A_e2, select =c(9:ncol(adquiridos_I_A_e2))))
adquiridos_I_A_e2_sum=sum(adquiridos_I_A_e2$Promedio)
especif_A2_endo$Promedio_Madre=rowMeans(subset(especif_A2_endo, select =c(9:20)))
especif_A2_endo_M_sum=sum(especif_A2_endo$Promedio_Madre)
especif_A2_endo$Promedio_Hijas=rowMeans(subset(especif_A2_endo, select =c(21:28)))
especif_A2_endo_I_sum=sum(especif_A2_endo$Promedio_Hijas)
Abund_A2_endo=c(especif_A2_endo_M_sum,perdidos_M_A_e2_sum,especif_A2_endo_I_sum,adquiridos_I_A_e2_sum)
barras_abund_A_e2=as.data.frame(cbind(barras_A_e2,Abund_A2_endo))
colnames(barras_abund_A_e2)[5]="Abundance"
write.table(data.frame("X"=rownames(perdidos_M_A_e2),perdidos_M_A_e2),"AbunRel_perdidos_Madre_A_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_A_e2),adquiridos_I_A_e2),"AbunRel_AdquiridosAmbienteHija_A_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_A2_endo),especif_A2_endo),"AbunRel_TransferidosMadreHija_A_e2.txt",sep="\t",row.names = F)
#b.2) 2023
perdidos_M_B_e2=subset(B_e2_M_def2, !(ASV_names %in% especif_B2_endo$ASV_names))
perdidos_M_B_e2$Promedio=rowMeans(subset(perdidos_M_B_e2, select =c(9:ncol(perdidos_M_B_e2))))
perdidos_M_B_e2_sum=sum(perdidos_M_B_e2$Promedio)
adquiridos_I_B_e2=subset(B_e2_I_def2, !(ASV_names %in% especif_B2_endo$ASV_names))
adquiridos_I_B_e2$Promedio=rowMeans(subset(adquiridos_I_B_e2, select =c(9:ncol(adquiridos_I_B_e2))))
adquiridos_I_B_e2_sum=sum(adquiridos_I_B_e2$Promedio)
especif_B2_endo$Promedio_Madre=rowMeans(subset(especif_B2_endo, select =c(9:18)))
especif_B2_endo_M_sum=sum(especif_B2_endo$Promedio_Madre)
especif_B2_endo$Promedio_Hijas=rowMeans(subset(especif_B2_endo, select =c(19:26)))
especif_B2_endo_I_sum=sum(especif_B2_endo$Promedio_Hijas)
Abund_B2_endo=c(especif_B2_endo_M_sum,perdidos_M_B_e2_sum,especif_B2_endo_I_sum,adquiridos_I_B_e2_sum)
barras_abund_B_e2=as.data.frame(cbind(barras_B_e2,Abund_B2_endo))
colnames(barras_abund_B_e2)[5]="Abundance"
write.table(data.frame("X"=rownames(perdidos_M_B_e2),perdidos_M_B_e2),"AbunRel_perdidos_Madre_B_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_B_e2),adquiridos_I_B_e2),"AbunRel_AdquiridosAmbienteHijas_B_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_B2_endo),especif_B2_endo),"AbunRel_TransferidosMadreHija_B_e2.txt",sep="\t",row.names = F)
A1_abun=ggplot(barras_abund_A_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
A1_abun
B1_abun=ggplot(barras_abund_B_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
B1_abun
A2_abun=ggplot(barras_abund_A_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
A2_abun
B2_abun=ggplot(barras_abund_B_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+
guides(fill = guide_legend(reverse = TRUE))+
scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
breaks=c("Mother","Sucker"))+
scale_y_continuous(expand=c(0.01,0.01),
breaks=c(0,10,20,30,40,50,60,70,80,90,100),
labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
limits = c(NA, 101))+
theme_bw()+
theme(panel.border = element_rect(colour="black"))+
theme(axis.title.x=element_blank())+
theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
theme(axis.text = element_text(size = 14))+
theme(axis.text.x = element_text(face="bold", size=16))+
theme(axis.title.y = element_text(size = 16))+
theme(legend.key.size = unit(0.9, "cm"))+
theme(legend.text = element_text(size = 18))+
theme(legend.title = element_text(size=18, face="bold"))+
theme(legend.title.align=0)+
theme(legend.text.align = 0)
x11()
B2_abun
x11()
p_abund_p1=grid.arrange(A1_abun,B1_abun,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18)))
x11()
p_abund_p2=grid.arrange(A2_abun,B2_abun,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18)))8. Differentially abundant taxa
It is interesting to decipher which taxa show statistically significant differences between two groups of samples (i.e., differences in the abundance between mother and suckers plants). For that purpose, we will apply ANCOM-BC approach, especially designed for compositional data. Furthermore, we will use the function ancomloop of the package micro4all in order to compare all groups of samples among them (ANCOM-BC just compares the first group of samples in the dataset with all other groups)
We will apply this statistical test at both phylum and genus levels.
ANCOM-BC makes a data transformation and applies logarithms, so in this case, we shouldn’t use the normalized abundance. We should use raw phyloseq objects (in which the ASV counts is expressed in absolute abundance)
8.1 Phylum
ANCOM_phylum_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion",
ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum")
###options:
#grouping: indicate your grouping variable
#n_cl: the number of cores
#global: apply the global test (for more information, read this paper: https://www.nature.com/articles/s41467-020-17041-7 and documentation of the function ancombc)
#struc_zero: remove "structural zeros" (not real zeros but zeros resulting from a wrong sampling procedure)
#out.unclassified: apply or not the test to unclassified taxa
#tax.level: indicate the taxonomical level that you want to analyze
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[1]]),ANCOM_phylum_rizo[[1]]),file="ANCOM_Phylum_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[2]]),ANCOM_phylum_rizo[[2]]),file="ANCOM_Phylum_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[3]]),ANCOM_phylum_rizo[[3]]),file="ANCOM_Phylum_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[4]]),ANCOM_phylum_rizo[[4]]),file="ANCOM_Phylum_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[5]]),ANCOM_phylum_rizo[[5]]),file="ANCOM_Phylum_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[6]]),ANCOM_phylum_rizo[[6]]),file="ANCOM_Phylum_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[7]]),ANCOM_phylum_rizo[[7]]),file="ANCOM_Phylum_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[8]]),ANCOM_phylum_rizo[[8]]),file="ANCOM_Phylum_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F)
#b) ENDOSPHERE
ANCOM_phylum_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion",
ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum")
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[1]]),ANCOM_phylum_endo[[1]]),file="ANCOM_Phylum_endo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[2]]),ANCOM_phylum_endo[[2]]),file="ANCOM_Phylum_endo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[3]]),ANCOM_phylum_endo[[3]]),file="ANCOM_Phylum_endo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[4]]),ANCOM_phylum_endo[[4]]),file="ANCOM_Phylum_endo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[5]]),ANCOM_phylum_endo[[5]]),file="ANCOM_Phylum_endo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[6]]),ANCOM_phylum_endo[[6]]),file="ANCOM_Phylum_endo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[7]]),ANCOM_phylum_endo[[7]]),file="ANCOM_Phylum_endo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[8]]),ANCOM_phylum_endo[[8]]),file="ANCOM_Phylum_endo_2023_Plot2_M.txt",sep="\t", row.names = F)In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the phyla that were differentially abundant.
8.2 Genus
#a) RHIZOSPHERE
ANCOM_genus_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion",
ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus")
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[1]]),ANCOM_genus_rizo[[1]]),file="ANCOM_Genus_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[2]]),ANCOM_genus_rizo[[2]]),file="ANCOM_Genus_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[3]]),ANCOM_genus_rizo[[3]]),file="ANCOM_Genus_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[4]]),ANCOM_genus_rizo[[4]]),file="ANCOM_Genus_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[5]]),ANCOM_genus_rizo[[5]]),file="ANCOM_Genus_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[6]]),ANCOM_genus_rizo[[6]]),file="ANCOM_Genus_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[7]]),ANCOM_genus_rizo[[7]]),file="ANCOM_Genus_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[8]]),ANCOM_genus_rizo[[8]]),file="ANCOM_Genus_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F)
#a) ENDOSPHERE
ANCOM_genus_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion",
ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus")
names(ANCOM)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[1]]),ANCOM_genus_endo[[1]]),file="ANCOM_Genus_Endo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[2]]),ANCOM_genus_endo[[2]]),file="ANCOM_Genus_Endo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[3]]),ANCOM_genus_endo[[3]]),file="ANCOM_Genus_Endo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[4]]),ANCOM_genus_endo[[4]]),file="ANCOM_Genus_Endo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[5]]),ANCOM_genus_endo[[5]]),file="ANCOM_Genus_Endo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[6]]),ANCOM_genus_endo[[6]]),file="ANCOM_Genus_Endo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[7]]),ANCOM_genus_endo[[7]]),file="ANCOM_Genus_Endo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[8]]),ANCOM_genus_endo[[8]]),file="ANCOM_Genus_Endo_2023_Plot2_M.txt",sep="\t", row.names = F)In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the genera that were differentially abundant.