Ecological analyses of fungal dataset

Firstly, we have to install and load many packages needed to analyze our fungal data:

if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("ANCOMBC")
BiocManager::install("phyloseq")
if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("microbiome")
install.packages("dplyr")
install.packages("tidyverse")
install.packages("ggplot2")
install.packages("GUniFrac")
install.packages("phangorn")
install.packages("vegan")
install.packages("gdata")
install.packages("purrr")
install.packages("devtools")
install.packages("rstatix")
install.packages("ggstasplot")
install.packages("DescTools")
install.packages("tibble")
install.packages("WRS2")
install.packages("tibble")
install.packages("ARTool")
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("edgeR")
remotes::install_github("wilkelab/ggtext") 
install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
install_github("nuriamw/micro4all")

library(ANCOMBC)
library(phyloseq)
library(microbiome)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(GUniFrac)
library(vegan)
library(gdata)
library(purrr)
library(devtools)
library(rstatix)
library(ggstatsplot)
library(DescTools)
library(tibble)
library(WRS2)
library(ARTool)
library(edgeR)
library(pairwiseAdonis)
library(micro4all)

Our analyses will based on phyloseq objects. That is to say, we are going to use phyloseq package

1. Create a phyloseq object

phyloseq object are very useful since they harbor several elements of interest: an ASV (or OTU) table in which the absolute abundance of each ASV is registered, the taxonomy of each ASV (from Domain or Kingdom to ASV level), a metadata table in which a full description of each sample is given, and finally, a phylogenetic tree at ASV level. Thus, lets load all the input files and create the phyloseq object. So, all the above mentioned files should be in our working directory:

data=read.table("ASV_Hongos_FINAL.txt", header = T, sep="\t")
data$Kingdom=gsub("k__","",data$Kingdom)#replace the name of the taxa, bause Unite databse adds a code for each taxonomic rank
data$Phylum=gsub("p__","",data$Phylum)
data$Class=gsub("c__","",data$Class)
data$Order=gsub("o__","",data$Order)
data$Family=gsub("f__","",data$Family)
data$Genus=gsub("g__","",data$Genus)
colnames_data=colnames(data)

mt=read.table("metadata_fungi.txt",sep="\t", header=T)#load the metadata table
mt$Year=gsub("2022", "A", mt$Year)#we replaced the number of the years by a character
mt$Year=gsub("2023", "B", mt$Year)
mt$Replicate=as.character(mt$Replicate)#convert into characters
mt$Fusion=paste0(mt$Year, sep="_",mt$Plot, sep="_", mt$Condition,sep="_", mt$Compartment)#here we create a joined artificial variable in order to plot better our samples
mt$Condition_Year=paste0(mt$Condition,sep="_", mt$Year)
row.names(mt)=mt$Sample#the rownames of the metadata table should be identical to the name of the samples

The order of the sample is very important! In particular, the order of the samples in the metadata file must be the same as the order of the samples in the ASV table. So, check (and correct it) previously.

tax =data[,2:8]#indicate the taxonomy of each ASV: from Domain/Kingdom to ASV level
ASV=data[,9:ncol(data)]#indicate the ASV counts

dna=Biostrings::DNAStringSet(data$ASV_seqs) #save the sequence of each ASV into a variable
names(dna)= data$ASV_names

row.names(tax)=data$ASV_names#the name of each row should be the name of each ASV (i.e., "ASV02156")
row.names(ASV)=data$ASV_names

identical(rownames(ASV), rownames(tax))#this is very important and should give "TRUE". Otherwise, review the lines above because there is a mistake elsewhere

#Convert each element into suitable objects for the construction of a phyloseq object
phy_OTUtable=otu_table(ASV, taxa_are_rows = T)
phy_taxonomy=tax_table(as.matrix(tax))
phy_metadata=sample_data(mt) 

phy_data_total=phyloseq(phy_OTUtable,phy_taxonomy,phy_metadata)#bind all the elements into the phyloseq object
nsamples(phy_OTUtable); sum(sample_names(phy_metadata) %in% sample_names(phy_OTUtable)); nsamples(phy_metadata)#check all the elements have the same number of samples

2. Data curation

The phyloseq object is already created, however, we still have to check the quality of the data. For instance, we have to check if all the samples are comprised by enough sequences, and if the sampling and sequencing efforts are enough. For that purpose, we have to check the number of total sequences.

numsec=as.data.frame(colSums(otu_table(phy_data_total)))
View(numsec)#order the table in ascending order to check the lowest library size (the sample with the lowest number of sequences)

We have to remove some samples that were not well sterilized, as we detected in the bacterial dataset.

phy_total=subset_samples(phy_data_total, 
                         Sample != "A1S04E"  & 
                           Sample != "A1S05E" & 
                           Sample!= "A1M01E"&
                           Sample != "A2S09E" & Sample != "A2S12E"& Sample != "A2S08E" & Sample!="A2S01E"&
                           Sample !="B1S03E"& Sample != "B1S06E"& Sample!="B1S09E"& Sample!="B1S10E"&
                           Sample !="B1M12E"&
                           Sample != "B2S06E" & Sample !="B2S11E" & Sample !="B2S07E" & Sample !="B2S02E" &
                           Sample !="B2M08E" & Sample !="B2M03E")
numsec_pruned=as.data.frame(colSums(otu_table(phy_total)))

Now, it would be interesting to check the quality of the sterilization process. We want to visualize in an ordination plot if rhizosphere and root endosphere samples are separated in the multivariate space. As indicated in the step 5, we have to normalize the ASV counts and then, calculate the ordination of the samples and plot them into a multivariate ordination plot:

#Let's sub-sample the phyloseq objects
AA=subset_samples(phy_total, Year=="A")
AA=prune_taxa(taxa_sums(AA)>0.0, AA)
BB=subset_samples(phy_total, Year=="B")
BB=prune_taxa(taxa_sums(BB)>0.0, BB)

#normalization of the data. For instance, the data corresponding to year 2023
todas_B_ASV=as.data.frame(otu_table(BB,taxa_are_rows = T))#obtain the ASV table from the phyloseq object
todas_B_mt=as.data.frame(sample_data(BB))#obtain the metadata table from the phyloseq object
todas_B_tax=as.data.frame(tax_table(BB))#obtain the taxonomy table from the phyloseq objec

#all the previous steps are required to create a new object suitable for edgeR functions
todas_B_edgeR = DGEList(counts = todas_B_ASV, samples = todas_B_mt, genes = todas_B_tax)#Create a DGEList object need for the TMM normalization
todas_B_edgeR = calcNormFactors(todas_B_edgeR)#calculate all the normalization factor so that we can correct potential biases associated to the different librazy sizes (different number of sequences per sample)
todas_B_ASV_norm = cpm(todas_B_edgeR, normalized.lib.sizes=T, log=F)#extracting the normalized abundance
todas_B_phy_OTU_norm=otu_table(as.data.frame(todas_B_ASV_norm,row.names=F), taxa_are_rows = T)#create the normalized phyloseq elements
todas_B_phy_taxonomy_norm=tax_table(as.matrix(todas_B_tax))
todas_B_phy_metadata_norm=sample_data(todas_B_mt)
taxa_names(todas_B_phy_OTU_norm)= taxa_names(todas_B_phy_taxonomy_norm)#if everything is well calculated, it should give "TRUE"
identical(rownames(todas_B_ASV_norm), rownames(todas_B_tax))
todas_B_normalized_phyloseq<-phyloseq(todas_B_phy_OTU_norm,
                                      todas_B_phy_taxonomy_norm,
                                      todas_B_phy_metadata_norm)#Create the new phyloseq object, in which the abundance of all the taxa is normalized

One the normalization is done, the plot is made as follows:

PCOA_bray_B = ordinate(todas_B_normalized_phyloseq, "PCoA", "bray")#here we create the ordination plot (PCoA) based on Bray-Curtis dissimilarities
peso2=PCOA_bray_B$values$Relative_eig[1:2] #save the percentage of the variance explained by each axis into a new variable
peso2 #check the variance explained

p= plot_ordination(todas_B_normalized_phyloseq, PCOA_bray_B,type= "samples", color= "Compartment",shape = "Year")+
 geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso2[1]*100,digits = 2),"%)"), 
          y=paste0("Axis 2 (",round(peso2[2]*100,digits = 2),"%)"))+
geom_text(aes(label=Sample, fontface="bold"), hjust=0.15, vjust=0.15, nudge_y = 0.0075,size=4, show.legend =F) +
scale_shape_manual(values=c("2022"=16, "2023"=17))+
scale_color_manual(values=c("Rhizosphere"="blue", "Endosphere"="green"))+
theme_bw()+
theme(legend.key=element_blank(),
          legend.title.align = 0,
          legend.title = element_text(face="bold",size=18),
          legend.text.align = 0,
          axis.text = element_text(size=18),
          axis.title = element_text(size = 20),
          plot.title = element_text(hjust=0.5, face="bold",size=18),
          legend.text = element_text(size = 18))+
  geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
  geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Year 2023\nPCoA on Bray Curtis distance")

x11()
p

As shown in Figure 1, there are still some root endosphere samples (B1M10E and B1M09E) that are closer than rhizosphere samples than other root endosphere samples. Thus, we consider that these roots were not properly sterilized. We should remove them because we are not aware of their origin

Figure1. PCoA based on Bray-Curtis dissimilarities including the rhizosphere and root endosphere samples of plants (2023)

This, we have to curate our dataset, and remove all these samples:

phy_total_TOTAL=subset_samples(phy_total, 
                         Sample != "B1M10E"  & 
                           Sample != "B1M09E")
numsec_pruned_TOTAL=as.data.frame(colSums(otu_table(phy_total_TOTAL)))

write.table(data.frame(" "=rownames(numsec_pruned_TOTAL),numsec_pruned_TOTAL),file="Numero_secuencias_deTrabajo.txt", sep="\t",row.names =F)#save the table with the number of total sequences per sample

3. Rarefaction curves

We still have to check whether the sequencing effort has been enough, so let’s have a look at rarefaction curves. They are the best way to visualize the quality of the sequencing and sampling.

In our case, we should visualize the rarefaction curves by plant compartment, to check whether the data curation was made well.

We will split according to the orchards, otherwise it will be really difficult to understand the graphs. So, let’s subset the data according to our experimental design. We will create new phyloseq objects: two per plant compartment, corresponding to each orchard or plot.

rizo1=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot=="Plot1")
rizo1=prune_taxa(taxa_sums(rizo1)>0.0, rizo1)

rizo2=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot== "Plot2")
rizo2=prune_taxa(taxa_sums(rizo2)>0.0, rizo2)

endo1=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot=="Plot1")
endo1=prune_taxa(taxa_sums(endo1)>0.0, endo1)

endo2=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot== "Plot2")
endo2=prune_taxa(taxa_sums(endo2)>0.0, endo2)

Now, calculate and plot the rarefaction curves:

#a) Rhizosphere
#a.1) South plot
min(sample_sums(rizo1)) #get the minimum library size (the number of sequences)
colSums(otu_table(rizo1))[which.min(colSums(otu_table(rizo1)))] 
mt=as.data.frame(sample_data(rizo1))[order(as.character(rownames(as.data.frame(sample_data(rizo1)))),decreasing=F),]
otu = otu_table(rizo1)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]

rownames(otu)==rownames(mt) #we want to color in different colors mothers and suckers, and also the samples taken in each year
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)

out <- rarecurve(otu, step = 100, label = F) #calculate the rarefaction
rare <- lapply(out, function(x){  
  b <- as.data.frame(x)
  b <- data.frame(ASV = b[,1], raw.read = rownames(b))
  b$raw.read <- as.numeric(gsub("N", "",  b$raw.read))
  return(b)
})#this function is to create a table from the rarefaction variable

names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
  z <- data.frame(x)
  return(z)
}, .id = "Sample")

rare$Condition_Year=rare$Sample #these lines aimed at creating a new column in which the information needed for the plotting is included. You should include here the factor that determines the color of the curves
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)#replace all the text before "/"
rare$raw.read=as.numeric(rare$raw.read)

p_rizo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
  theme_bw()+
  geom_point(aes(colour=Condition_Year), size=0.85)+
  geom_line(aes(colour=Condition_Year),size=1.2)+
  geom_vline(aes(xintercept = min(sample_sums(rizo1))), 
             lty=1, colour="black")+
  scale_fill_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen",
                              "Immature_B"="#6600CC" ,"Mature_B"="magenta"))+
  scale_color_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen",
                                "Immature_B"="#6600CC" ,"Mature_B"="magenta"),
                     name="Condition (sampling year)",
                     breaks=c("Immature_A", "Mature_A",
                              "Immature_B" ,"Mature_B"),
                     labels=c("Immature (2022)", "Mature (2022)",
                              "Immature (2023)","Mature (2023)"))+
  labs(title= "Rhizosphere; Plot 1", x="Number of sequences", y="Number of ASV")+
  guides(alpha=FALSE)+
  theme(legend.key=element_blank(),
        legend.title.align = 0.85,
        legend.title = element_text(face="bold",size=14),
        axis.text = element_text(size=14),
        axis.title = element_text(size = 16),
        plot.title = element_text(hjust=0.5, face="bold", size=16),
        legend.text = element_text(size = 16))
x11()
p_rizo1


#a.2) North plot
min(sample_sums(rizo2)) 
colSums(otu_table(rizo2))[which.min(colSums(otu_table(rizo2)))] 
mt=as.data.frame(sample_data(rizo2))[order(as.character(rownames(as.data.frame(sample_data(rizo2)))),decreasing=F),]
otu = otu_table(rizo2)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]

rownames(otu)==rownames(mt) 
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)

out <- rarecurve(otu, step = 100, label = F) 
rare <- lapply(out, function(x){  
  b <- as.data.frame(x)
  b <- data.frame(ASV = b[,1], raw.read = rownames(b))
  b$raw.read <- as.numeric(gsub("N", "",  b$raw.read))
  return(b)
})

names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
  z <- data.frame(x)
  return(z)
}, .id = "Sample")

rare$Condition_Year=rare$Sample 
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)

p_rizo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
  theme_bw()+
  geom_point(aes(colour=Condition_Year), size=0.85)+
  geom_line(aes(colour=Condition_Year),size=1.2)+
  geom_vline(aes(xintercept = min(sample_sums(rizo2))), 
             lty=1, colour="black")+
  scale_fill_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2",
                               "Immature_B"="#993300" ,"Mature_B"="black"))+
  scale_color_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2",
                                "Immature_B"="#993300" ,"Mature_B"="black"),
                     name="Condition (sampling year)",
                     breaks=c("Immature_A", "Mature_A",
                              "Immature_B" ,"Mature_B"),
                     labels=c("Immature (2022)", "Mature (2022)",
                              "Immature (2023)","Mature (2023)"))+
  labs(title= "Rhizosphere; Plot 2", x="Number of sequences", y="Number of ASV")+
  guides(alpha=FALSE)+
  theme(legend.key=element_blank(),
        legend.title.align = 0.85,
        legend.title = element_text(face="bold",size=14),
        axis.text = element_text(size=14),
        axis.title = element_text(size = 16),
        plot.title = element_text(hjust=0.5, face="bold", size=16),
        legend.text = element_text(size = 16))
x11()
p_rizo2

#b) ENDOSPHERE
#b.1) South plot
min(sample_sums(endo1)) 
colSums(otu_table(endo1))[which.min(colSums(otu_table(endo1)))] 
mt=as.data.frame(sample_data(endo1))[order(as.character(rownames(as.data.frame(sample_data(endo1)))),decreasing=F),]
otu = otu_table(endo1)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]

rownames(otu)==rownames(mt) 
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)

out <- rarecurve(otu, step = 100, label = F) 
rare <- lapply(out, function(x){  
  b <- as.data.frame(x)
  b <- data.frame(ASV = b[,1], raw.read = rownames(b))
  b$raw.read <- as.numeric(gsub("N", "",  b$raw.read))
  return(b)
})

names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
  z <- data.frame(x)
  return(z)
}, .id = "Sample")

rare$Condition_Year=rare$Sample 
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)

p_endo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
  theme_bw()+
  geom_point(aes(colour=Condition_Year), size=0.85)+
  geom_line(aes(colour=Condition_Year),size=1.2)+
  geom_vline(aes(xintercept = min(sample_sums(endo1))), 
             lty=1, colour="black")+
  scale_fill_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green",
                               "Immature_B"="#6600CC" ,"Mature_B"="magenta"))+
  scale_color_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green",
                                "Immature_B"="#6600CC" ,"Mature_B"="magenta"),
                     name="Condition (sampling year)",
                     breaks=c("Immature_A", "Mature_A",
                              "Immature_B" ,"Mature_B"),
                     labels=c("Immature (2022)", "Mature (2022)",
                              "Immature (2023)","Mature (2023)"))+
  labs(title= "Endosphere; Plot 1", x="Number of sequences", y="Number of ASV")+
  guides(alpha=FALSE)+
  theme(legend.key=element_blank(),
        legend.title.align = 0.85,
        legend.title = element_text(face="bold",size=14),
        axis.text = element_text(size=14),
        axis.title = element_text(size = 16),
        plot.title = element_text(hjust=0.5, face="bold", size=16),
        legend.text = element_text(size = 16))
x11()
p_endo1

#b.2) North plot
min(sample_sums(endo2)) 
colSums(otu_table(endo2))[which.min(colSums(otu_table(endo2)))] 
mt=as.data.frame(sample_data(endo2))[order(as.character(rownames(as.data.frame(sample_data(endo2)))),decreasing=F),]
otu = otu_table(endo2)
otu = as.data.frame(t(otu))
otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),]

rownames(otu)==rownames(mt) 
rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year)
sample_names = rownames(otu)

out <- rarecurve(otu, step = 100, label = F) 
rare <- lapply(out, function(x){ 
  b <- as.data.frame(x)
  b <- data.frame(ASV = b[,1], raw.read = rownames(b))
  b$raw.read <- as.numeric(gsub("N", "",  b$raw.read))
  return(b)
})

names(rare) <- sample_names
rare <- map_dfr(rare, function(x){
  z <- data.frame(x)
  return(z)
}, .id = "Sample")

rare$Condition_Year=rare$Sample 
rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)
rare$raw.read=as.numeric(rare$raw.read)

p_endo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) +
  theme_bw()+
  geom_point(aes(colour=Condition_Year), size=0.85)+
  geom_line(aes(colour=Condition_Year),size=1.2)+
  geom_vline(aes(xintercept = min(sample_sums(endo2))), 
             lty=1, colour="black")+
  scale_fill_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine",
                               "Immature_B"="#993300" ,"Mature_B"="black"))+
  scale_color_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine",
                                "Immature_B"="#993300" ,"Mature_B"="black"),
                     name="Condition (sampling year)",
                     breaks=c("Immature_A", "Mature_A",
                              "Immature_B" ,"Mature_B"),
                     labels=c("Immature (2022)", "Mature (2022)",
                              "Immature (2023)","Mature (2023)"))+
  labs(title= "Endosphere; Plot 2", x="Number of sequences", y="Number of ASV")+
  guides(alpha=FALSE)+
  theme(legend.key=element_blank(),
        legend.title.align = 0.85,
        legend.title = element_text(face="bold",size=14),
        axis.text = element_text(size=14),
        axis.title = element_text(size = 16),
        plot.title = element_text(hjust=0.5, face="bold", size=16),
        legend.text = element_text(size = 16))
x11()
p_endo2

As shown in Figures 2-5, at the same rarefaction levels, most of the samples reached to the asymptote, so the sequencing effort can be considered enough. Furthermore, no clear patterns of richnes can be deduced from the rarefaction curves.

Figure 2. Rarefaction curves corresponding to rhizosphere samples of the south plot (also named “Plot 1”). Figure 3. Rarefaction curves corresponding to rhizosphere samples of the north plot (also named “Plot 2”). Figure 4. Rarefaction curves corresponding to root endosphere samples of the south plot (also named “Plot 1”). Figure 5. Rarefaction curves corresponding to root endosphere samples of the north plot (also named “Plot 2”).

4. Alpha diversity analysis

We are going to estimate the richness, diversity and evenness of each sample by calculating the number of observed ASVs, and Shannon’s and Inverse of Simpson and Pielou’s indices, respectively.

Some alpha diversity indices are sensitive to different library sizes, so first, we will rarefy all the samples to the smallest library size to avoid potential biases associated to different number of sequences.

Note

It should be made a rarefaction per groups of samples to be compared. For instance, if we want to compare the diversity of rhizosphere bacterial communitites of Mother and Suckers plants located in the south plot, we should calculate the rarefaction level corresponding to the subgroup of Rhizosphere samples of the south plot (namely, object rizo1). And then, rarefy to the smallest library size of the samples included in the object rizo1.

#a) Endosphere
#a.1) South plot
rarefac_endo1=rarefy_even_depth(endo1, sample.size = min(sample_sums(endo1)),rngseed=T)#rarefaction. You can indicate manually the minimum sample size or by using the function "min"
sample_sums(rarefac_endo1)#check that now all the samples have the same number of sequences 
colSums(otu_table(rarefac_endo1))[which.min(colSums(otu_table(rarefac_endo1)))]

indices_endo1=estimate_richness(rarefac_endo1, measures=c("Observed", "InvSimpson", "Shannon"))#calculate the indices
indices_endo1$Pielou=indices_endo1$Shannon/log(indices_endo1$Observed)#calculated manually the Pielou index

rownames(sample_data(rarefac_endo1))==row.names(indices_endo1)
mt_indices_endo1=data.frame(sample_data(rarefac_endo1))#extract the metadata

indices_endo1=add_column(indices_endo1, mt_indices_endo1[1:ncol(mt_indices_endo1)], .before = "Observed")#add the metadata to the indices table, before the column named "Observed"
write.table(data.frame(" "=rownames(indices_endo1),indices_endo1),file="Indices_Endo_Plot1_porReplicas.txt", sep="\t",row.names =F)

#We have already calculated the indices per replicates but we are interested in the mean and standard values per groups of samples, so let's calculate them:
media_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)#be careful. Here you have to indicate the position of the columns where the values of the indices are placed
sd_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)

mean_sd = NULL
for (i in 2:5){ #be careful with the position of the numeric values (alpha indices)
  mean_sd <- cbind(mean_sd,paste0(media_endo1[,i], " +/- ", sd_endo1[,i]))}#then in Excel you can replace "+/-" by the corresponding symbol

tabla_publicaciones_endo1 = cbind(media_endo1$grouping, mean_sd)
colnames(tabla_publicaciones_endo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_endo1)

write.table(tabla_publicaciones_endo1,file="Indices_media_sd_Endo_Plot1.txt", sep="\t",row.names =F)

#a.2) North plot
rarefac_endo2=rarefy_even_depth(endo2, sample.size = min(sample_sums(endo2)),rngseed=T)
sample_sums(rarefac_endo2) 
colSums(otu_table(rarefac_endo2))[which.min(colSums(otu_table(rarefac_endo2)))]

indices_endo2=estimate_richness(rarefac_endo2, measures=c("Observed", "InvSimpson", "Shannon"))
indices_endo2$Pielou=indices_endo2$Shannon/log(indices_endo2$Observed)

rownames(sample_data(rarefac_endo2))==row.names(indices_endo2)
mt_indices_endo2=data.frame(sample_data(rarefac_endo2))

indices_endo2=add_column(indices_endo2, mt_indices_endo2[1:ncol(mt_indices_endo2)], .before = "Observed")
write.table(data.frame(" "=rownames(indices_endo2),indices_endo2),file="Indices_Endo_Plot2_porReplicas.txt", sep="\t",row.names =F)

media_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)

mean_sd = NULL
for (i in 2:5){ 
  mean_sd <- cbind(mean_sd,paste0(media_endo2[,i], " +/- ", sd_endo2[,i]))}

tabla_publicaciones_endo2 = cbind(media_endo2$grouping, mean_sd)
colnames(tabla_publicaciones_endo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_endo2)

write.table(tabla_publicaciones_endo2,file="Indices_media_sd_Endo_Plot2.txt", sep="\t",row.names =F)

#b) Rhizosphere
#b.1) South plot
rarefac_rizo1=rarefy_even_depth(rizo1, sample.size = min(sample_sums(rizo1)),rngseed=T)
sample_sums(rarefac_rizo1) 
colSums(otu_table(rarefac_rizo1))[which.min(colSums(otu_table(rarefac_rizo1)))]

indices_rizo1=estimate_richness(rarefac_rizo1, measures=c("Observed", "InvSimpson", "Shannon"))
indices_rizo1$Pielou=indices_rizo1$Shannon/log(indices_rizo1$Observed)

rownames(sample_data(rarefac_rizo1))==row.names(indices_rizo1)
mt_indices_rizo1=data.frame(sample_data(rarefac_rizo1))

indices_rizo1=add_column(indices_rizo1, mt_indices_rizo1[1:ncol(mt_indices_rizo1)], .before = "Observed")
write.table(data.frame(" "=rownames(indices_rizo1),indices_rizo1),file="Indices_rizo_Plot1_porReplicas.txt", sep="\t",row.names =F)

media_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)

mean_sd = NULL
for (i in 2:5){ 
  mean_sd <- cbind(mean_sd,paste0(media_rizo1[,i], " +/- ", sd_rizo1[,i]))}

tabla_publicaciones_rizo1 = cbind(media_rizo1$grouping, mean_sd)
colnames(tabla_publicaciones_rizo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_rizo1)

write.table(tabla_publicaciones_rizo1,file="Indices_media_sd_rizo_Plot1.txt", sep="\t",row.names =F)

#Pb.2) North plot
rarefac_rizo2=rarefy_even_depth(rizo2, sample.size = min(sample_sums(rizo2)),rngseed=T)
sample_sums(rarefac_rizo2) 
colSums(otu_table(rarefac_rizo2))[which.min(colSums(otu_table(rarefac_rizo2)))]

indices_rizo2=estimate_richness(rarefac_rizo2, measures=c("Observed", "InvSimpson", "Shannon"))
indices_rizo2$Pielou=indices_rizo2$Shannon/log(indices_rizo2$Observed)

rownames(sample_data(rarefac_rizo2))==row.names(indices_rizo2)
mt_indices_rizo2=data.frame(sample_data(rarefac_rizo2))

indices_rizo2=add_column(indices_rizo2, mt_indices_rizo2[1:ncol(mt_indices_rizo2)], .before = "Observed")

write.table(data.frame(" "=rownames(indices_rizo2),indices_rizo2),file="Indices_rizo_Plot2_porReplicas.txt", sep="\t",row.names =F)

media_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)
sd_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2)

mean_sd = NULL
for (i in 2:5){ 
  mean_sd <- cbind(mean_sd,paste0(media_rizo2[,i], " +/- ", sd_rizo2[,i]))}

tabla_publicaciones_rizo2 = cbind(media_rizo2$grouping, mean_sd)
colnames(tabla_publicaciones_rizo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou")
as.data.frame(tabla_publicaciones_rizo2)

write.table(tabla_publicaciones_rizo2,file="Indices_media_sd_rizo_Plot2.txt", sep="\t",row.names =F)

4.1 Statistical analysis

We have just calculated the alpha indices per sample and per group of samples, but we have to compare them among group of samples. For that purpose, we will apply univariate statistic test. We will go into different steps, which briefly consist of:

Check the normality of the data: we have to confirm whether our variables follow a normal distribution. If they are not normally distributed, we will also check if they are very far from the normal distribution.
Check the homoscedasticity of the data: confirm if dispersion of the variable is the same in all the groups of samples to compare. If the distribution of the variables is normal (or almost normal) and there is homogeneity of variances, we will apply parametric tests. Otherwise, we will use non-parametric tests.
Check the presence of the outliers. Outliers are not a problem, but we have to check if they are extreme outliers. In that case, we will apply robust tests.
Apply the proper statistical test. Depending on the characteristics of the variables and the experimental design (one factor to be studied: one-way statistics; two factors: two-way statistical tests).
Calculate the size of the effect. In case we find statistical differences among groups of samples in the studied variables, we have to measure the magnitude of the effect.

We have to study separately each individual variable (in this case, each alpha diversity index).

In this example, we are interested in two factors: the effect of the sampling year, and the effect of the plants’ developmental stage, also named Condition in this web.

#a) ENDOSPHERE
#a.1) North plot

#Normality
indices_endo1%>%  group_by(Condition, Year)%>%  shapiro_test(Observed)
 
indices_endo1%>%  group_by(Condition, Year)%>%  shapiro_test(Shannon)

indices_endo1%>%  group_by(Condition, Year)%>%  shapiro_test(InvSimpson)

indices_endo1%>%  group_by(Condition, Year)%>%  shapiro_test(Pielou)

x11()#let's see how far are our data from the normal distribution into a qq-plot.
ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)

#Homoscedasticity
indices_endo1 %>% levene_test(Observed ~ Condition*Year)
indices_endo1 %>% levene_test(Shannon ~ Condition*Year)
indices_endo1 %>% levene_test(InvSimpson ~ Condition*Year)
indices_endo1 %>% levene_test(Pielou ~ Condition*Year)
 
#Outliers
a=indices_endo1 %>%  group_by(Condition,Year)%>%  identify_outliers(Observed); View(a)
b=indices_endo1 %>%  group_by(Condition, Year)%>%  identify_outliers(Shannon) ; View(b)
c=indices_endo1 %>%  group_by(Condition, Year)%>%  identify_outliers(InvSimpson); View(c)
d=indices_endo1 %>%  group_by(Condition, Year)%>%  identify_outliers(Pielou); View(d)

#Hypothesis contrasting tests 
indices_endo1 %>%  anova_test(Shannon ~ Condition * Year)#two-way ANOVA
indices_endo1 %>%  anova_test(InvSimpson ~ Condition * Year)
indices_endo1 %>%  anova_test(Pielou ~ Condition * Year)

indices_endo1$Condition=as.factor(indices_endo1$Condition)
indices_endo1$Year=as.factor(indices_endo1$Year)
t2way(Observed ~ Year*Condition, data=indices_endo1)#two-way robust test

#We did not find the interaction between Year and plant developmental Condition significant in any of the variables studied, but the effect of Year or Condition (or both) was statistaclly significant on some of the indices. In that case, we have to study the effect of each factor individually:

#Post-hoc tests
indices_endo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm")
test=mcp2atm(Observed~Year*Condition, data=indices_endo1)

#Effect size
z=lm(Observed~Condition*Year, data=indices_endo1)
eta_squared(z)

#a.2) North plot

#Normality
indices_endo2%>%  group_by(Condition, Year)%>%  shapiro_test(Observed)
indices_endo2%>%  group_by(Condition, Year)%>%  shapiro_test(Shannon)
indices_endo2%>%  group_by(Condition, Year)%>%  shapiro_test(InvSimpson)
indices_endo2%>%  group_by(Condition, Year)%>%  shapiro_test(Pielou)

x11()
ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)

#Homoscedasticity
indices_endo2 %>% levene_test(Observed ~ Condition*Year)#heterocedastic
indices_endo2 %>% levene_test(Shannon ~ Condition*Year)
indices_endo2 %>% levene_test(InvSimpson ~ Condition*Year)
indices_endo2 %>% levene_test(Pielou ~ Condition*Year)

#Outliers
a=indices_endo2 %>%  group_by(Condition,Year)%>%  identify_outliers(Observed); View(a)
b=indices_endo2 %>%  group_by(Condition, Year)%>%  identify_outliers(Shannon); View(b) 
c=indices_endo2 %>%  group_by(Condition, Year)%>%  identify_outliers(InvSimpson); View(c)
d=indices_endo2 %>%  group_by(Condition, Year)%>%  identify_outliers(Pielou); View(d)

#Check wether with robust test the homoscedasticity is maintained
indices_endo2%>%   filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>%
  levene_test(InvSimpson ~ Year*Condition)

#Hypothesis contrasting tests 
#Observed ASVs is not homoscedastic, thus, we should apply non-parametric tests. However, there is not a suitable option for two-way tests, so let's transformate the data and then apply the corresponding test. Check this link:  http://depts.washington.edu/acelab/proj/art/index.html
indices_endo2$Condition=as.factor(indices_endo2$Condition)
indices_endo2$Year=as.factor(indices_endo2$Year)
m = art(Observed ~ Year * Condition, data=indices_endo2) # linear model syntax; see lm()
anova(m)
eta_squared(m)

indices_endo2 %>%  anova_test(Shannon ~ Condition * Year)
t2way(InvSimpson ~ Condition*Year, data=indices_endo2)
indices_endo2 %>%  anova_test(Pielou ~ Condition * Year)

#Post-hoc tests
art.con(m, "Year:Condition", adjust="holm") %>%  
  summary() %>%  # add significance stars to the output
  mutate(sig. = symnum(p.value, corr=FALSE, na=FALSE,
                       cutpoints = c(0, .001, .01, .05, .10, 1),
                       symbols = c("***", "**", "*", ".", " ")))


t_test(Observed~Year, data=indices_endo2, var.equal=F)
indices_endo2 %>%pairwise_t_test(Shannon~Year)
indices_endo2 %>%pairwise_t_test(Shannon~Condition)

#b) RHIZOSPHERE
#b.1) South plot
#Normality
indices_rizo1%>%  group_by(Condition, Year)%>%  shapiro_test(Observed)
indices_rizo1%>%  group_by(Condition, Year)%>%  shapiro_test(Shannon)
indices_rizo1%>%  group_by(Condition, Year)%>%  shapiro_test(InvSimpson)
indices_rizo1%>%  group_by(Condition, Year)%>%  shapiro_test(Pielou)

x11()
ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)
x11()
ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)

#Homoscedasticity
indices_rizo1 %>% levene_test(Observed ~ Condition*Year)
indices_rizo1 %>% levene_test(Shannon ~ Condition*Year)
indices_rizo1 %>% levene_test(InvSimpson ~ Condition*Year)
indices_rizo1 %>% levene_test(Pielou ~ Condition*Year)

#Outliers
a=indices_rizo1 %>%  group_by(Condition,Year)%>%  identify_outliers(Observed); View(a)
b=indices_rizo1 %>%  group_by(Condition, Year)%>%  identify_outliers(Shannon); View(b)
c=indices_rizo1 %>%  group_by(Condition, Year)%>%  identify_outliers(InvSimpson); View(c)
d=indices_rizo1 %>%  group_by(Condition, Year)%>%  identify_outliers(Pielou); View(d)

#check wether robust test mantain the homoscedasticity
indices_rizo1%>%  filter(between(Shannon,quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Year*Condition)

indices_rizo1%>%   filter(between(Pielou,quantile(Pielou,0.1),  quantile(Pielou, 0.9)))%>%levene_test(Pielou ~ Year*Condition)

#Hypothesis contrasting tests
indices_rizo1$Condition=as.factor(indices_rizo1$Condition)
indices_rizo1$Year=as.factor(indices_rizo1$Year)

indices_rizo1 %>%  anova_test(Observed ~ Condition * Year)
indices_rizo1$Year=as.factor(indices_rizo1$Year); indices_rizo1$Condition=as.factor(indices_rizo1$Condition)
t2way(Shannon ~ Condition*Year, data=indices_rizo1)
indices_rizo1 %>%  anova_test(InvSimpson ~ Condition * Year)
indices_rizo1 %>%  anova_test(Pielou ~ Condition * Year)
t2way(Pielou ~ Condition*Year, data=indices_rizo1)

#Post-hoc tests
indices_rizo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm") indices_rizo1 %>%pairwise_t_test(Observed~Condition,p.adjust.method = "holm")
mcp2atm(InvSimpson~Year*Condition, data=indices_rizo1)
mcp2atm(Pielou~Year*Condition, data=indices_rizo1)

#Effect size:
y=lm(Pielou~Year*Condition, data=indices_rizo1)
eta_squared(y)

#b.2) North plot
#Normality
indices_rizo2%>%  group_by(Condition, Year)%>%  shapiro_test(Observed)
indices_rizo2%>%  group_by(Condition, Year)%>%  shapiro_test(Shannon)
indices_rizo2%>%  group_by(Condition, Year)%>%  shapiro_test(InvSimpson)
indices_rizo2%>%  group_by(Condition, Year)%>%  shapiro_test(Pielou)

x11()
ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)
x11()
ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) +
  facet_grid(Condition ~ Year)

#Homoscedasticity
indices_rizo2 %>% levene_test(Observed ~ Condition*Year)
indices_rizo2 %>% levene_test(Shannon ~ Condition*Year)
indices_rizo2 %>% levene_test(InvSimpson ~ Condition*Year)
indices_rizo2 %>% levene_test(Pielou ~ Condition*Year)

#Outliers
a=indices_rizo2 %>%  group_by(Condition,Year)%>%  identify_outliers(Observed);View(a)
b=indices_rizo2 %>%  group_by(Condition, Year)%>%  identify_outliers(Shannon);View(b) 
c=indices_rizo2 %>%  group_by(Condition, Year)%>%  identify_outliers(InvSimpson); View(c)
d=indices_rizo2 %>%  group_by(Condition, Year)%>%  identify_outliers(Pielou); View(d)

indices_rizo2%>% 
  filter(between(Shannon,
                 quantile(Shannon,0.1), 
                 quantile(Shannon, 0.9)))%>%
  levene_test(Shannon ~ Year*Condition)
indices_rizo2%>% 
  filter(between(Pielou,
                 quantile(Pielou,0.1),  
                 quantile(Pielou, 0.9)))%>%
  levene_test(Pielou ~ Year*Condition)

#Hypothesis contrasting tests
indices_rizo2 %>%  anova_test(Observed ~ Condition * Year)
indices_rizo2$Year=as.factor(indices_rizo2$Year)
indices_rizo2$Condition=as.factor(indices_rizo2$Condition)
t2way(Shannon ~ Condition*Year, data=indices_rizo2)
indices_rizo2 %>%  anova_test(InvSimpson ~ Condition * Year)
t2way(Pielou ~ Condition*Year, data=indices_rizo2)

#Post-hoc tests
indices_rizo2 %>%pairwise_t_test(Observed~Year)

We can also compare if the alpha diversity indices of fungal communities associated to mothers, first and second suckers are different each other. In the index of this website, you can find more details about the concepts of Mothers, first suckers and second suckers. These concepts are very similar to the concept of “Generation” in sexually reproducing plants, and that’s exactly the name of the factor employed here. So, we will compare the indices of these groups, by the factor Generation.

#a) Rhizosphere 
#a.1) South plot

#Normality 
indices_rizo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Pielou)

x11() ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot)

x11() ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation~ Plot)

#Homoscedasticity 
indices_rizo1 %>% levene_test(Observed ~ Generation) indices_rizo1 %>% levene_test(Shannon ~ Generation) indices_rizo1 %>% levene_test(InvSimpson ~ Generation) indices_rizo1 %>% levene_test(Pielou ~ Generation)

#outliers 
a=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Observed); View(a) 
b=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b) 
c=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) 
d=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)

#Check whether with robust methods the data are still homocedastic
indices_rizo1%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)

indices_rizo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation)

indices_rizo1%>% filter(between(Pielou, quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>% levene_test(Pielou ~ Generation)

#Hypothesis contrasting tests 
indices_rizo1 %>% anova_test(Observed ~ Generation) t1way(Shannon~Generation, data=indices_rizo1) 
t1way(InvSimpson~Generation, data=indices_rizo1) 
t1way(Pielou~Generation, data=indices_rizo1)

#Post-hoc test 
indices_rizo1%>% pairwise_t_test(Observed~Generation)

#a.2) North plots 
#Normality 
indices_rizo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Pielou)

x11() 
ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot)

x11() 
ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot)

#Homoscedasticity 
indices_rizo2 %>% levene_test(Observed ~ Generation) 
indices_rizo2 %>% levene_test(Shannon ~ Generation) 
indices_rizo2 %>% levene_test(InvSimpson ~ Generation) 
indices_rizo2 %>% levene_test(Pielou ~ Generation)

#outliers 
a=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a)
b=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b) 
c=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) 
d=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)

#Check whether with robust tests the data are still homocedastic

indices_rizo2%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)

indices_rizo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation)

indices_rizo2%>% filter(between(Pielou, quantile(Pielou,0.1),
quantile(Pielou, 0.9)))%>% levene_test(Pielou~ Generation)

#Hypothesis contrasting tests 
indices_rizo2 %>% welch_anova_test(Observed~Generation)#ANOVA test that does no assume that the data are homocedastic
t1way(Shannon~Generation, data=indices_rizo2) t1way(InvSimpson~Generation, data=indices_rizo2) t1way(Pielou~Generation, data=indices_rizo2)

q=lm(Observed~Generation, data=indices_rizo2) eta_squared(q)

#Post-hoc tests 
indices_rizo2%>% pairwise_t_test(Observed~Generation,p.adjust.method = "holm", pool.sd = F)

#b) Endosphere 
#b.1) South plot
#Normality 
indices_endo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo1%>% group_by(Generation)%\>% shapiro_test(Pielou)

x11() 
ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation~Plot)

#Homoscedasticity 
indices_endo1 %>% levene_test(Observed ~ Generation) 
indices_endo1 %>% levene_test(Shannon ~ Generation) 
indices_endo1 %>% levene_test(InvSimpson ~ Generation) 
indices_endo1 %>% levene_test(Pielou ~ Generation)

#outliers 
a=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Observed);View(a) 
b=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b) 
c=indices_endo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) 
d=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)

#Check whether with robust test the data are still homocedastic 
indices_endo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation)

#Hypothesis contrasting tests 
indices_endo1 %>% anova_test(Observed ~ Generation) 
indices_endo1 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo1)
indices_endo1 %>% anova_test(Pielou ~ Generation)

#Post-hoc 
indices_endo1%>% pairwise_t_test(Observed~Generation)

#b.2) North plot 
#Normality 
indices_endo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo2%>% group_by(Generation)%>% shapiro_test(Pielou)

x11() 
ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) + facet_grid(Generation~Plot)

x11() 
ggqqplot(indices_endo2, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot)

#Homoscedasticity 
indices_endo2 %>% levene_test(Observed ~ Generation) 
indices_endo2 %>% levene_test(Shannon ~ Generation) 
indices_endo2 %>% levene_test(InvSimpson ~ Generation) 
indices_endo2 %>% levene_test(Pielou ~ Generation)

#outliers 
a=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a) 
b=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b) 
c=indices_endo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) 
d=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d)

#check whether with robust tests data are still homocedastic 
indices_endo2%>% filter(between(Shannon, quantile(Shannon,0.1),
quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation)

indices_endo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1),
quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation)

#Hypothesis contrasting tests 
indices_endo2 %>% anova_test(Observed ~ Generation) 
indices_endo2 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo2)
indices_endo2 %>% anova_test(Pielou ~ Generation)

5. Beta diversity analyses

It’s time to analyze the differences in the diversity of the fungal communities between samples. For that purpose, we will navigate into the multivariate statistics.

Firstly, we have to normalize the data to avoid potential biases due to different library sizes (different number of sequences in the samples). We prefer the edgeR method, which is based on the TMM (Trimmed Mean of the M-values). This normalization is needed when we are working also with compositional data; that is to say, with data that expressed in percentages, such as microbiota counts. More information can be found here.

5.1 TMM Normalization

We are going to use this normalization for both multivariate plots and for multivariate statistics.

#a)RHIZOSPHERE
#a.1) South plot
rizo1_ASV=as.data.frame(otu_table(rizo1,taxa_are_rows = T))#extract the ASV table from the phyloseq object
rizo1_mt=as.data.frame(sample_data(rizo1))#extract the metadata table from the phyloseq object
rizo1_tax=as.data.frame(tax_table(rizo1))#extract the taxonomy table from the phyloseq object

#Create a new object suitbale for the normalization by the package edgeR
rizo1_edgeR = DGEList(counts = rizo1_ASV, samples = rizo1_mt, genes = rizo1_tax)
#Calculate the normalization factors to correct the differences in sequencing depth
rizo1_edgeR = calcNormFactors(rizo1_edgeR)
#Extract the normalized abundance
rizo1_ASV_norm = cpm(rizo1_edgeR, normalized.lib.sizes=T, log=F)
#Create the phyloseq elements with the ASV counts normalized
rizo1_phy_OTU_norm=otu_table(as.data.frame(rizo1_ASV_norm,row.names=F), taxa_are_rows = T)
rizo1_phy_taxonomy_norm=tax_table(as.matrix(rizo1_tax))
rizo1_phy_metadata_norm=sample_data(rizo1_mt)
taxa_names(rizo1_phy_OTU_norm)= taxa_names(rizo1_phy_taxonomy_norm)

identical(rownames(rizo1_ASV_norm), rownames(rizo1_tax))#it should give "TRUE"
#Join all the elements into a new phyloseq object
rizo1_normalized_phyloseq<-phyloseq(rizo1_phy_OTU_norm,
                                          rizo1_phy_taxonomy_norm,
                                          rizo1_phy_metadata_norm)

#a.2) North plot
rizo2_ASV=as.data.frame(otu_table(rizo2,taxa_are_rows = T))
rizo2_mt=as.data.frame(sample_data(rizo2))
rizo2_tax=as.data.frame(tax_table(rizo2))

rizo2_edgeR = DGEList(counts = rizo2_ASV, samples = rizo2_mt, genes = rizo2_tax)
rizo2_edgeR = calcNormFactors(rizo2_edgeR)
rizo2_ASV_norm = cpm(rizo2_edgeR, normalized.lib.sizes=T, log=F)
rizo2_phy_OTU_norm=otu_table(as.data.frame(rizo2_ASV_norm,row.names=F), taxa_are_rows = T)
rizo2_phy_taxonomy_norm=tax_table(as.matrix(rizo2_tax))
rizo2_phy_metadata_norm=sample_data(rizo2_mt)
taxa_names(rizo2_phy_OTU_norm)= taxa_names(rizo2_phy_taxonomy_norm)

identical(rownames(rizo2_ASV_norm), rownames(rizo2_tax))
rizo2_normalized_phyloseq<-phyloseq(rizo2_phy_OTU_norm,
                                    rizo2_phy_taxonomy_norm,
                                    rizo2_phy_metadata_norm)

#b) ENDOSPHERE
#b.1) South plot
endo1_ASV=as.data.frame(otu_table(endo1,taxa_are_rows = T))
endo1_mt=as.data.frame(sample_data(endo1))
endo1_tax=as.data.frame(tax_table(endo1))

endo1_edgeR = DGEList(counts = endo1_ASV, samples = endo1_mt, genes = endo1_tax)
endo1_edgeR = calcNormFactors(endo1_edgeR)
endo1_ASV_norm = cpm(endo1_edgeR, normalized.lib.sizes=T, log=F)
endo1_phy_OTU_norm=otu_table(as.data.frame(endo1_ASV_norm,row.names=F), taxa_are_rows = T)
endo1_phy_taxonomy_norm=tax_table(as.matrix(endo1_tax))
endo1_phy_metadata_norm=sample_data(endo1_mt)
taxa_names(endo1_phy_OTU_norm)= taxa_names(endo1_phy_taxonomy_norm)

identical(rownames(endo1_ASV_norm), rownames(endo1_tax))
endo1_normalized_phyloseq<-phyloseq(endo1_phy_OTU_norm,
                                    endo1_phy_taxonomy_norm,
                                    endo1_phy_metadata_norm)
#b.2) North plot
endo2_ASV=as.data.frame(otu_table(endo2,taxa_are_rows = T))
endo2_mt=as.data.frame(sample_data(endo2))
endo2_tax=as.data.frame(tax_table(endo2))

endo2_edgeR = DGEList(counts = endo2_ASV, samples = endo2_mt, genes = endo2_tax)
endo2_edgeR = calcNormFactors(endo2_edgeR)
endo2_ASV_norm = cpm(endo2_edgeR, normalized.lib.sizes=T, log=F)
endo2_phy_OTU_norm=otu_table(as.data.frame(endo2_ASV_norm,row.names=F), taxa_are_rows = T)
endo2_phy_taxonomy_norm=tax_table(as.matrix(endo2_tax))
endo2_phy_metadata_norm=sample_data(endo2_mt)
#Anyadimos el nombre de los taxones
taxa_names(endo2_phy_OTU_norm)= taxa_names(endo2_phy_taxonomy_norm)

identical(rownames(endo2_ASV_norm), rownames(endo2_tax))
endo2_normalized_phyloseq<-phyloseq(endo2_phy_OTU_norm,
                                    endo2_phy_taxonomy_norm,
                                    endo2_phy_metadata_norm)

5.2 Multivariate statistical tests

The best options to test whether the groups of samples have the same dispersion is to apply the test PERMDISP2, while PERMANOVA (Permutational Multivariate Analysis of Variance Using Distance Matrices) is a test suitable to check whether the centroid (concept similar to the mean of groups of samples) of all the groups of samples to be compared is homogenus. Thus, these tests are the multivariate version of Levene’s and ANOVA tests. However, both of them are based on distance matrices. That is to say, we have to calculate the distance between all sample pairs. There are many different distance or dissimilarity measures, but here we are going to use Bray-Curtis dissimilarity.

Null hypotheses of both tests: PERMANOVA H₀: no significant differences in the centroids (mean values) of the groups of samples PERMDISPD2 H₀: no significant differences in the dispersion of the groups of samples

Important

Be careful with unbalanced experiments. As stated Anderson and Walsh (2013), when the groups of samples to be compared have different number of replicates (unbalanced design), heterocedasticity becomes a problem. In that cases, PERMANOVA cannot distinguish between the differences between groups of samples are due to the different dispersion or due to differences in the centroids. So, in case of unbalances designs, it is superimportant to check the dispersion of the data.

Note

As you know, the ITS2 fungal region is quite variable in length, thus, we do not calculate a phylogenetic tree including all the ASVs. For that reason, we cannot compute the (un)weighted UniFrac distances. We are going to perform all the beta diversity analyses based just on Bray-Curtis dissimilarity measure.

#a) RHIZOSPHERE
#a.1) South plot
df_r1=data.frame(sample_data(rizo1_normalized_phyloseq))#extract the metadata table from the phyloseq object (NORMALIZED phyloseq!)
b_r1=distance(rizo1_normalized_phyloseq, "bray")#calculate Bray-Curtis dissimilarity

adonis_rizo1=adonis2(b_r1~Condition*Year, data=df_r1, permutations = 9999)#apply the PERMANOVA test. Here we test the model Community structure~Plant type*Sampling year
adonis_rizo1#check the results

pairwise.adonis(b_r1, phyloseq::sample_data(rizo1_normalized_phyloseq)$Year, p.adjust.m = "holm")#pairwise PERMANOVAs as post-hoc tests

betadisper_r1=betadisper(b_r1, df_r1$Condition_Year)#check the betadispersion of each factor separately
permutest(betadisper_r1)

betadisper_r1_condition=betadisper(b_r1, df_r1$Condition)#Check the betadispersion of the factor "Condition"
permutest(betadisper_r1_condition)

betadisper_r1_year=betadisper(b_r1, df_r1$Year)#Check the betadispersion of the factor "sampling year"
permutest(betadisper_r1_year)

#a.2) North plot
df_r2=data.frame(sample_data(rizo2_normalized_phyloseq))
b_r2=distance(rizo2_normalized_phyloseq,"bray")

adonis_rizo2=adonis2(b_r2~Condition*Year, data=df_r2, permutations = 9999)
adonis_rizo2

pairwise.adonis(b_r2, phyloseq::sample_data(rizo2_normalized_phyloseq)$Year, p.adjust.m = "holm")
betadisper_r2=betadisper(b_r2, df_r2$Condition_Year)
permutest(betadisper_r2)

betadisper_r2_condition=betadisper(b_r2, df_r2$Condition)
permutest(betadisper_r2_condition)

betadisper_r2_year=betadisper(b_r2, df_r2$Year)
permutest(betadisper_r2_year)

#b) ENDOSPHERE
#b.1) South plot
df_e1=data.frame(sample_data(endo1_normalized_phyloseq))
b_e1=distance(endo1_normalized_phyloseq, "bray")

adonis_endo1=adonis2(b_e1~Condition*Year, data=df_e1, permutations = 9999)
adonis_endo1

pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Year, p.adjust.m = "holm")
pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Condition, p.adjust.m = "holm")

pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Fusion, p.adjust.m = "holm")

betadisper_e1=betadisper(b_e1, df_e1$Condition_Year)
permutest(betadisper_e1)

betadisper_e1_condition=betadisper(b_e1, df_e1$Condition)
permutest(betadisper_e1_condition)

betadisper_e1_year=betadisper(b_e1, df_e1$Year)
permutest(betadisper_e1_year)

#b.2) North plot
df_e2=data.frame(sample_data(endo2_normalized_phyloseq))
b_e2=distance(endo2_normalized_phyloseq, "bray")

adonis_endo2=adonis2(b_e2~Condition*Year, data=df_e2, permutations = 9999)
adonis_endo2

pairwise.adonis(b_e2, phyloseq::sample_data(endo2_normalized_phyloseq)$Fusion, p.adjust.m = "holm")

betadisper_e2=betadisper(b_e2, df_e2$Condition_Year)
permutest(betadisper_e2)

betadisper_e2_condition=betadisper(b_e2, df_e2$Condition)
permutest(betadisper_e2_condition)

betadisper_e2_year=betadisper(b_e2, df_e2$Year)
permutest(betadisper_e2_year)

#Let's calculate the size of the effects. But first, we have to create the function that calculates the size of the effect:
adonis_OmegaSq <- function(adonisOutput, partial = TRUE){
  if(!(is(adonisOutput, "adonis") || is(adonisOutput, "anova.cca")))
    stop("Input should be an adonis object")
  if (is(adonisOutput, "anova.cca")) {
    aov_tab <- adonisOutput
    aov_tab$MeanSqs <- aov_tab$SumOfSqs / aov_tab$Df
    aov_tab$MeanSqs[length(aov_tab$Df)] <- NA
  } else {
    aov_tab <- adonisOutput$aov.tab
  }
  heading <- attr(aov_tab, "heading")
  MS_res <- aov_tab[pmatch("Residual", rownames(aov_tab)), "MeanSqs"]
  SS_tot <- aov_tab[rownames(aov_tab) == "Total", "SumsOfSqs"]
  N <- aov_tab[rownames(aov_tab) == "Total", "Df"] + 1
  if(partial){
    omega <- apply(aov_tab, 1, function(x) (x["Df"]*(x["MeanSqs"]-MS_res))/(x["Df"]*x["MeanSqs"]+(N-x["Df"])*MS_res))
    aov_tab$parOmegaSq <- c(omega[1:(length(omega)-2)], NA, NA)
  } else {
    omega <- apply(aov_tab, 1, function(x) (x["SumsOfSqs"]-x["Df"]*MS_res)/(SS_tot+MS_res))
    aov_tab$OmegaSq <- c(omega[1:(length(omega)-2)], NA, NA)
  }
  if (is(adonisOutput, "adonis"))
    cn_order <- c("Df", "SumsOfSqs", "MeanSqs", "F.Model", "R2",
                  if (partial) "parOmegaSq" else "OmegaSq", "Pr(>F)")
  else
    cn_order <- c("Df", "SumOfSqs", "F", if (partial) "parOmegaSq" else "OmegaSq",
                  "Pr(>F)")
  aov_tab <- aov_tab[, cn_order]
  attr(aov_tab, "names") <- cn_order
  attr(aov_tab, "heading") <- heading
  if (is(adonisOutput, "adonis"))
    adonisOutput$aov.tab <- aov_tab
  else
    adonisOutput <- aov_tab
  return(adonisOutput)
}

#apply just in that cases in which PERMANOVA gives significant results:
adonis_OmegaSq(adonis_rizo1)
adonis_OmegaSq(adonis_rizo2)
adonis_OmegaSq(adonis_endo1)
adonis_OmegaSq(adonis_endo2)

Now, let’s compare the structure of bacterial communities of mothers, first and second suckers:

#a) RHIZOSPHERE
#a.1) South plot
adonis_rizo1_generation=adonis2(b_r1~Generation, data=df_rizo1, permutations = 9999)
adonis_rizo1_generation

betadisper_rizo1_generation=betadisper(b_r1, df_rizo1$Generation)
permutest(betadisper_rizo1_generation)

adonis_OmegaSq(adonis_rizo1_generation)

pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm")
pa=pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm")

#a.2) North plot
adonis_rizo2_generation=adonis2(b_r2~Generation, data=df_rizo2, permutations = 9999)
adonis_rizo2_generation

betadisper_rizo2_generation=betadisper(b_r2, df_rizo2$Generation)
permutest(betadisper_rizo2_generation)

adonis_OmegaSq(adonis_rizo2_generation)
pairwise.adonis(b_r2, phyloseq::sample_data(rizo2)$Generation, p.adjust.m = "holm")

#b) ENDOSPHERE
#b.1) South plot
adonis_endo1_generation=adonis2(b_e1~Generation, data=df_endo1, permutations = 9999)
adonis_endo1_generation

betadisper_endo1_generation=betadisper(b_e1, df_endo1$Generation)
permutest(betadisper_endo1_generation)

#b.2) North plot
adonis_endo2_generation=adonis2(b_e2~Generation, data=df_endo2, permutations = 9999)
adonis_endo2_generation

betadisper_endo2_generation=betadisper(b_e2, df_endo2$Generation)
permutest(betadisper_endo2_generation)

adonis_OmegaSq(adonis_endo2_generation)
pairwise.adonis(b_e2, phyloseq::sample_data(endo2)$Generation, p.adjust.m = "holm")

5.3 Ordination plots

There are several options to visualize the results of PERMANOVA tests, for instance, by ordination plots such as PCoA (Principal Coordinate Analysis), NMDS (Non-metric MultiDimenstional Scaling), PCA (Principal Component Analysis), among others. Theses graphs are very useful because help us to detect the differences in the structure of the microbiota among groups of samples. Both PCoA and NMDS are based on non-euclidean distances or dissimilarity measures, so we consider that they are the best options for microbiota analyses.

One the one hand, PCoA plots represent in two dimensions the arrangement of the samples in the multivariate space, although the percentage of the total variance is commonly explained by many dimensions. On the other hand, NMDS approach “forces” the samples to be ordinated in just two dimensions, although this forced arrangement has an associated stress. We tolerate stress values lower than 0.2.

We recommend calculating both ordination plots with the aim of selecting the option that best represents the results of PERMANOVA analyses.

#a) RHIZOSPHERE
#a.1) South plot
NMDS_bray_rizo1= ordinate(rizo1_normalized_phyloseq, "NMDS", "bray")#indicate the ordination plot and the distance
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo1$stress))#this shows the stress of the NMDS

PCOA_bray_rizo1 = ordinate(rizo1_normalized_phyloseq, "PCoA", "bray")
peso_r1b=PCOA_bray_rizo1$values$Relative_eig[1:2]
peso_r1b#This shows the percentage of the total variance explained by just the first and second axes

#a.2) North plot
NMDS_bray_rizo2= ordinate(rizo2_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo2$stress))

PCOA_bray_rizo2 = ordinate(rizo2_normalized_phyloseq, "PCoA", "bray")
peso_r2b=PCOA_bray_rizo2$values$Relative_eig[1:2]
peso_r2b

#b) ENDOSPHERE
#b.1) South plot
NMDS_bray_endo1= ordinate(endo1_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo1$stress))

PCOA_bray_endo1 = ordinate(endo1_normalized_phyloseq, "PCoA", "bray")
peso_e1b=PCOA_bray_endo1$values$Relative_eig[1:2]
peso_e1b

#b.2) North plot
NMDS_bray_endo2= ordinate(endo2_normalized_phyloseq, "NMDS", "bray")
print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo2$stress))

PCOA_bray_endo2 = ordinate(endo2_normalized_phyloseq, "PCoA", "bray")
peso_e2b=PCOA_bray_endo2$values$Relative_eig[1:2]
peso_e2b

Once these values of stress and the weight of the axes are obtained, we should choose which option is the most suitable for our dataset. Although you can also use both method in your research paper. In this case, we selected PCoA plots because we obtained relatively high values of NMDS stress. It is easy to understand: our data are probably multidimensional are complex due to the high diversity and richness of the data, so it is easy to have high stress when forcing the data to fit just into two dimensions.

Let’s obtain the plots:

#a) RHIZOSPHERE
#a.1) South plot
p_rizo1_pcoa= plot_ordination(rizo1_normalized_phyloseq, PCOA_bray_rizo1,type= "samples", color= "Condition", shape = "Year")+#to color by "Condition" and shape by "Year"
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_r1b[1]*100,digits = 2),"%)"), 
          y=paste0("Axis 2 (",round(peso_r1b[2]*100,digits = 2),"%)"))+#write the weight of each axis with two decimals
scale_color_manual(values = c("Immature"="cornflowerblue","Mature"="darkgreen"),
                       breaks=c("Immature", "Mature"),
                       labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
                       breaks=c("A", "B"),
                       labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
          legend.title.align = 0,
          legend.title = element_text(face="bold",size=18),
          legend.text.align = 0,
          axis.text = element_text(size=18),
          axis.title = element_text(size = 20),
          plot.title = element_text(hjust=0.5, face="bold",size=18),
          legend.text = element_text(size = 18))+
  geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
  geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Rhizosphere (Plot 1)\nPCoA on Bray Curtis dissimilarities")

x11()
p_rizo1_pcoa

#a.2) North plot
p_rizo2_pcoa= plot_ordination(rizo2_normalized_phyloseq, PCOA_bray_rizo2,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_r2b[1]*100,digits = 2),"%)"), 
          y=paste0("Axis 2 (",round(peso_r2b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="magenta","Mature"="darkolivegreen2"),
                       breaks=c("Immature", "Mature"),
                       labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
                       breaks=c("A", "B"),
                       labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
          legend.title.align = 0,
          legend.title = element_text(face="bold",size=18),
          legend.text.align = 0,
          axis.text = element_text(size=18),
          axis.title = element_text(size = 20),
          plot.title = element_text(hjust=0.5, face="bold",size=18),
          legend.text = element_text(size = 18))+
  geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
  geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Rhizosphere (Plot 2)\nPCoA on Bray-Curtis dissimilarities")

x11()
p_rizo1_pcoa

#b)ENDOSPHERE
#b.1) South plot
p_endo1_pcoa= plot_ordination(endo1_normalized_phyloseq, PCOA_bray_endo1,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_e1b[1]*100,digits = 2),"%)"), 
          y=paste0("Axis 2 (",round(peso_e1b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="#00CCFF","Mature"="green"),
                       breaks=c("Immature", "Mature"),
                       labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
                                breaks=c("A", "B"),
                                         labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
          legend.title.align = 0,
          legend.title = element_text(face="bold",size=18),
          legend.text.align = 0,
          axis.text = element_text(size=18),
          axis.title = element_text(size = 20),
          plot.title = element_text(hjust=0.5, face="bold",size=18),
          legend.text = element_text(size = 18))+
  geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
  geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Endosphere (Plot 1)\nPCoA on Bray-Curtis dissimilarities")

x11()
p_endo1_pcoa

#b.2) North plot
p_endo2_pcoa= plot_ordination(endo2_normalized_phyloseq, PCOA_bray_endo2,type= "samples", color= "Condition", shape = "Year")+
geom_point(alpha = 4, size = 4.5)+
labs(x=paste0("Axis 1 (",round(peso_e2b[1]*100,digits = 2),"%)"), 
          y=paste0("Axis 2 (",round(peso_e2b[2]*100,digits = 2),"%)"))+
scale_color_manual(values = c("Immature"="blueviolet","Mature"="aquamarine"),
                       breaks=c("Immature", "Mature"),
                       labels=c("Immature", "Mature"))+
scale_shape_manual(values=c("A"=16, "B"=17),
                       breaks=c("A", "B"),
                       labels=c("2022", "2023"))+
theme_bw()+
theme(legend.key=element_blank(),
          legend.title.align = 0,
          legend.title = element_text(face="bold",size=18),
          legend.text.align = 0,
          axis.text = element_text(size=18),
          axis.title = element_text(size = 20),
          plot.title = element_text(hjust=0.5, face="bold",size=18),
          legend.text = element_text(size = 18))+
  geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+
  geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+
ggtitle("Endosphere (Plot 2)\nPCoA on Bray Curtis dissimilarities")

x11()
p_endo2_pcoa

6. Taxonomical profiles

We already know that there are differences in the structure of bacterial communities in some case, however, we still do not know which taxa are the responsible of that differences. So, let’s analyze the taxonomical profiles. Fistly, we have to obtain the relative abundance of each ASV:

phy_data_relabun=transform_sample_counts(phy_total_TOTAL, function(x){x/sum(x)}*100)
colSums(otu_table(phy_data_relabun)) #check that all sum 100%

6.1 Phylum level

6.1.1 Obtaining tables with the relative abundance and standard deviation values

We are going to obtain the relative abundance of each phylum detected in our samples.

phylum_relabun=tax_glom(phy_data_relabun, taxrank = "Phylum") #aglomerate at Phylum level
colSums(otu_table(phylum_relabun)) #check that the relative abundance sums 100% in all cases

#Now we are going to create a table with the relative abundance of each taxa in each replicate so that we can work on it, for instance, in Excel
a=as.data.frame(otu_table(phylum_relabun))
aa=cbind(tax_table(phylum_relabun),a)
identical(rownames(a),rownames(aa))#comprobamos que sean identicos (tiene uqe salir por consola "TRUE")
write.table(data.frame("TAXA"=rownames(aa),aa),file="abundrel_phylum_porReplicas.txt", sep="\t",row.names =F)

#Another table, with the mean and standard deviation of the relative abundance of each phylum:
table_phylum = otu_table(phylum_relabun)[,] %>% t() %>% as.data.frame()
tax_phylum=tax_table(phylum_relabun)

phylum_media=aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t() #calculate the mean values of the relative abundance of each phylum in each group of samples 

phylum_sd = aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1")  %>% t()  %>% 
  as.data.frame() %>% rename_with(.fn= ~paste0(colnames(phylum_media), "_SD"))#calculate the standard deviation of the relative abundance of each phylum in each group of samples

phylum_media_sd=merge(tax_phylum, phylum_media, by=0) %>%column_to_rownames("Row.names") %>% #bind mean and sd values into one table
  merge(phylum_sd, by=0) %>% column_to_rownames("Row.names")
colSums(phylum_media_sd[,9:ncol(phylum_media_sd)])

write.table(data.frame("TAXA"=rownames(phylum_media_sd),phylum_media_sd), file="abundrel_media_sd_phylum.txt", sep="\t",row.names =F)

6.1.2 Plotting

We are going to make a stacked bars plot, just indicating the most abundant phyla

Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2.

df_melt_phylum = psmelt(physeq = phylum_relabun) #transform the phyloseq object into a dataframe in which the abundance of each phylum is in rows
colnames(df_melt_phylum)#check the content of the dataframe

#Here we aggregate the data by the factor we indicate in the argument "by"
df_melt_aggreg1 = aggregate(df_melt_phylum$Abundance, 
                            by=list(Group=df_melt_phylum$Fusion, 
                                    Sample=df_melt_phylum$Sample,Phylum=df_melt_phylum$Phylum,
                                                              Compartment=df_melt_phylum$Compartment,
                                    Plot=df_melt_phylum$Plot, Year=df_melt_phylum$Year, Condition=df_melt_phylum$Condition), FUN=sum)#it sums the abundance of each phylum in all the replicates corresponding to the same group of samples
colnames(df_melt_aggreg1)=c("Group", "Sample", "Phylum","Compartment","Plot","Year","Condition","Abundance")#rename the columns
colnames(df_melt_aggreg1) 

df_melt_aggreg_mean = aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group, 
                                                                   Phylum=df_melt_aggreg1$Phylum,
                                                                   Compartment=df_melt_aggreg1$Compartment,
                                                                   Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=mean)#now, it makes the mean relative abundance of each phylum in each group of samples
colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)

#repeat the same with the standard deviation
df_melt_aggreg_sd=aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group, 
                                                               Phylum=df_melt_aggreg1$Phylum,
                                                               Compartment=df_melt_aggreg1$Compartment,
                                                               Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=sd)
colnames(df_melt_aggreg_sd)=c("Group", "Phylum","Compartment","Plot","Year","Condition","sd")

all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group)
all(df_melt_aggreg_mean$Phylum==df_melt_aggreg_sd$Phylum)

df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd 
media_sd_phylum_todos=df_melt_aggreg_mean

Now, we are ready to plot the mean relative abundances. We want to order the phyla by ascending relative abundance, keeping the unclassified and minor phyla in the upper part of the plot.

#a) RHIZOSPHERE
#a.1) South plot
media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",]#subset the dataset
media_sd_phylum_Rizo1=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot1",]

#aggregate all the phyla that were not classified
media_sd_phylum_Rizo1$Phylum=as.character(media_sd_phylum_Rizo1$Phylum)
unclassified_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "unclassified",] #save them into a new variable
media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "unclassified",] #remove the unclassified phyla from the original dataset

#aggregate all the phyla that account less than 1% into an artificial group named "Other phyla <1%"
media_sd_phylum_Rizo1$Phylum[media_sd_phylum_Rizo1$Abundance <= 1.0] = "Other phyla (<1%)" 
media_sd_phylum_Rizo1$Phylum=as.factor(media_sd_phylum_Rizo1$Phylum)

others_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "Other phyla (<1%)",]#save the "other phyla" into a new variable
media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "Other phyla (<1%)",]#remove them from the original dataset

#Now, the minor and unclassified phyla are not included in the original dataset, so let's order by abundance the remaining phyla
ordenado_rizo1=media_sd_phylum_Rizo1[order(media_sd_phylum_Rizo1$Abundance, decreasing=T),]#order by relative abundance
ordenado_rizo1=rbind(ordenado_rizo1,others_rizo1,unclassified_rizo1)#now add the minor phyla and the unclassified

group_label_rizo1=c("Mature","Immature", "Mature","Immature")#set the labels
levels_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere",
               "B_Plot1_Immature_Rhizosphere")#set the levels of the graph
limits_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere",
               "B_Plot1_Immature_Rhizosphere")#set the limits of the graph

phyl_name_ordered_rizo1=as.vector(ordenado_rizo1$Phylum)
nombres_unicos_rizo1=unique(phyl_name_ordered_rizo1)#get the levels (=the names of the phyla )of the factor "Phylum", and show them just once regardless of their abundance and prevalence)

ordenado_rizo1$Phylum=reorder.factor(ordenado_rizo1$Phylum,new.order=rev(nombres_unicos_rizo1)) #order the name of the phyla
lab_unicos_rizo1=nombres_unicos_rizo1 

sorted_labels_ggplot_rizo1 <- sapply(lab_unicos_rizo1,
                                    function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
                                    {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
#this function helps us to write the name of the phyla (and genera) in italics and the name of the minor phyla and unclassified without italics

colores_rizo1 = c("#0099FF", "#00CC00","#9933FF","yellow", "#FF00FF","grey", "black")#choose the colors of specific phyla

length(lab_unicos_rizo1)==length(colores_rizo1) #si FALSE, anyadir/quitar colores
length(lab_unicos_rizo1)
length(colores_rizo1)

titulo_plot_rizo1="Plot 1"

p3=ggplot(ordenado_rizo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_rizo1,
                    labels=sorted_labels_ggplot_rizo1,
                    breaks=nombres_unicos_rizo1)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_rizo1)+
  guides(fill = guide_legend(reverse = TRUE))+
 scale_x_discrete(limits=limits_rizo1,labels=group_label_rizo1,
                  breaks=levels_rizo1)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0) 

x11()
p3

# a.2) North plot
media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",]
media_sd_phylum_Rizo2=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot2",]

media_sd_phylum_Rizo2$Phylum=as.character(media_sd_phylum_Rizo2$Phylum)
unclassified_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "unclassified",] 
media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "unclassified",] 

media_sd_phylum_Rizo2$Phylum[media_sd_phylum_Rizo2$Abundance <= 1.0] = "Other phyla (<1%)" 
media_sd_phylum_Rizo2$Phylum=as.factor(media_sd_phylum_Rizo2$Phylum)

others_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "Other phyla (<1%)",]

ordenado_rizo2=media_sd_phylum_Rizo2[order(media_sd_phylum_Rizo2$Abundance, decreasing=T),] 
ordenado_rizo2=rbind(ordenado_rizo2,others_rizo2,unclassified_rizo2)

group_label_rizo2=c("Mature","Immature", "Mature","Immature")
levels_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere",
               "B_Plot2_Immature_Rhizosphere")
limits_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere",
               "B_Plot2_Immature_Rhizosphere")

phyl_name_ordered_rizo2=as.vector(ordenado_rizo2$Phylum)
nombres_unicos_rizo2=unique(phyl_name_ordered_rizo2)

ordenado_rizo2$Phylum=reorder.factor(ordenado_rizo2$Phylum,new.order=rev(nombres_unicos_rizo2))
lab_unicos_rizo2=nombres_unicos_rizo2 

sorted_labels_ggplot_rizo2 <- sapply(lab_unicos_rizo2,
                                     function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
                                     {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

colores_rizo2 = c("#0099FF", "#00CC00", "#9933FF","#FF00FF",
                  "brown","grey", "black")

length(lab_unicos_rizo2)==length(colores_rizo2) 
length(lab_unicos_rizo2)
length(colores_rizo2)

titulo_plot_rizo2="Plot 2"

#b) ENDOSPHERE
#b.1) South plot
media_sd_phylum_Endo1=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot1",]
media_sd_phylum_Endo1$Phylum=as.character(media_sd_phylum_Endo1$Phylum)
unclassified_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "unclassified",] 
media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "unclassified",] 
media_sd_phylum_Endo1$Phylum[media_sd_phylum_Endo1$Abundance <= 1.0] = "Other phyla (<1%)" 
media_sd_phylum_Endo1$Phylum=as.factor(media_sd_phylum_Endo1$Phylum)

others_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "Other phyla (<1%)",]

ordenado_endo1=media_sd_phylum_Endo1[order(media_sd_phylum_Endo1$Abundance, decreasing=T),] 
ordenado_endo1=rbind(ordenado_endo1,others_endo1,unclassified_endo1)

group_label_endo1=c("Mothers","Suckers", "Mothers","Suckers")
levels_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
limits_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")

phyl_name_ordered_endo1=as.vector(ordenado_endo1$Phylum)
nombres_unicos_endo1=unique(phyl_name_ordered_endo1)

ordenado_endo1$Phylum=reorder.factor(ordenado_endo1$Phylum,new.order=rev(nombres_unicos_endo1))
lab_unicos_endo1=nombres_unicos_endo1 
sorted_labels_ggplot_endo1 <- sapply(lab_unicos_endo1,
                                    function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
                                    {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})
colores_endo1 = c("#0099FF","#00CC00", "grey","black")

length(lab_unicos_endo1)==length(colores_endo1)
length(lab_unicos_endo1)
length(colores_endo1)

titulo_plot_endo1="South plot"

p1=ggplot(ordenado_endo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_endo1,
                    labels=sorted_labels_ggplot_endo1,
                    breaks=nombres_unicos_endo1)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo1)+
  guides(fill = guide_legend(reverse = TRUE))+#sirve para cambiar el orden de la leyenda
  scale_x_discrete(limits=limits_endo1,labels=group_label_endo1,
                   breaks=levels_endo1)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
  theme(axis.text = element_text(size = 5))+
  theme(axis.text.x = element_text(face="bold", size=5))+
  theme(axis.title.y = element_text(size = 7))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 5))+
  theme(legend.title = element_text(size=7, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)+
  theme(legend.position = "none")

x11()
p1

#b.2) North plot
media_sd_phylum_Endo2=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot2",]

media_sd_phylum_Endo2$Phylum=as.character(media_sd_phylum_Endo2$Phylum)
unclassified_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "unclassified",] 
media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "unclassified",] 
media_sd_phylum_Endo2$Phylum[media_sd_phylum_Endo2$Abundance <= 1.0] = "Other phyla (<1%)"
media_sd_phylum_Endo2$Phylum=as.factor(media_sd_phylum_Endo2$Phylum)

others_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "Other phyla (<1%)",]
media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "Other phyla (<1%)",]

ordenado_endo2=media_sd_phylum_Endo2[order(media_sd_phylum_Endo2$Abundance, decreasing=T),] 
ordenado_endo2=rbind(ordenado_endo2,others_endo2,unclassified_endo2)

group_label_endo2=c("Mature","Immature", "Mature","Immature")
levels_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
limits_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")

phyl_name_ordered_endo2=as.vector(ordenado_endo2$Phylum)
nombres_unicos_endo2=unique(phyl_name_ordered_endo2)

ordenado_endo2$Phylum=reorder.factor(ordenado_endo2$Phylum,new.order=rev(nombres_unicos_endo2))

sorted_labels_ggplot_endo2 <- sapply(lab_unicos_endo2,
                                     function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)")
                                     {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

colores_endo2 = c("#0099FF","#00CC00","grey","black")

length(lab_unicos_endo2)==length(colores_endo2) 
length(lab_unicos_endo2)
length(colores_endo2)

titulo_plot_endo2="Plot 2"

p2=ggplot(ordenado_endo2, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_endo2,
                    labels=sorted_labels_ggplot_endo2,
                    breaks=nombres_unicos_endo2)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo2)+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=limits_endo2,labels=group_label_endo2,
                   breaks=levels_endo2)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0) 

x11()
p2

6.2 Genus

6.2.1 Obtain tables with the relative abundance and standard deviation values

We are going to obtain the relative abundance of each phylum detected in our samples.

genus_relabun=tax_glom(phy_data_relabun, taxrank = "Genus")
colSums(otu_table(genus_relabun))

b=as.data.frame(otu_table(genus_relabun))
bb=cbind(tax_table(genus_relabun),b)
identical(rownames(b),rownames(bb))
write.table(data.frame("TAXA"=rownames(bb),bb),file="abundrel_Genero_porReplicas.txt", sep="\t",row.names =F)

table_genus = otu_table(genus_relabun)[,] %>% t() %>% as.data.frame()
tax_genus=tax_table(genus_relabun)

genus_media=aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t() 

genus_sd= aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1")  %>% t()  %>% 
  as.data.frame() %>% rename_with(.fn= ~paste0(colnames(genus_media), "_SD"))

genus_media_sd=merge(tax_genus, genus_media, by=0) %>%column_to_rownames("Row.names") %>%
  merge(genus_sd, by=0) %>% column_to_rownames("Row.names")
colSums(genus_media_sd[,8:ncol(genus_media_sd)])
genus_media_sd$ASV_names=row.names(genus_media_sd)

write.table(data.frame("TAXA"=rownames(genus_media_sd),genus_media_sd), file="abundrel_media_sd_genus.txt", sep="\t",row.names =F)

6.2.2. Plotting

We are going to make a stacked bars plot, just indicating the most abundant genera

Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2.

df_melt_genus = psmelt(physeq = genus_relabun)
colnames(df_melt_genus)

df_melt_aggreg2 = aggregate(df_melt_genus$Abundance, 
                            by=list(Group=df_melt_genus$Fusion, 
                                    Sample=df_melt_genus$Sample,Genus=df_melt_genus$Genus,
                                    Compartment=df_melt_genus$Compartment,
                                    Plot=df_melt_genus$Plot, Year=df_melt_genus$Year, Condition=df_melt_genus$Condition), FUN=sum)
colnames(df_melt_aggreg2)=c("Group", "Sample", "Genus","Compartment","Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg2)

df_melt_aggreg_mean = aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group, 
                                                                   Genus=df_melt_aggreg2$Genus,
                                                                   Compartment=df_melt_aggreg2$Compartment,
                                                                   Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=mean)
colnames(df_melt_aggreg_mean)<-c("Group", "Genus","Compartment", "Plot","Year","Condition","Abundance")
colnames(df_melt_aggreg_mean)

df_melt_aggreg_sd=aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group, 
                                                               Genus=df_melt_aggreg2$Genus,
                                                               Compartment=df_melt_aggreg2$Compartment,
                                                               Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=sd)
colnames(df_melt_aggreg_sd)=c("Group", "Genus","Compartment","Plot","Year","Condition","sd")

all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group)
all(df_melt_aggreg_mean$Genus==df_melt_aggreg_sd$Genus)


df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd

media_sd_genus_todos=df_melt_aggreg_mean

Now, we are ready to plot the mean relative abundances. We want to order the genera by ascending relative abundance, keeping the unclassified and minor genera in the upper part of the plot.

#a) RHIZOSPHERE
media_sd_genus_rizo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Rhizosphere",]

#a.1) South plot
media_sd_genus_rizo1=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot1",]

media_sd_genus_rizo1$Genus=as.character(media_sd_genus_rizo1$Genus)
unclassified_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "unclassified",]
media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "unclassified",] 
media_sd_genus_rizo1$Genus[media_sd_genus_rizo1$Abundance <= 2] = "Other genera (<2%)" 
media_sd_genus_rizo1$Genus=as.factor(media_sd_genus_rizo1$Genus)

others_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "Other genera (<2%)",]

ordenado_r1=media_sd_genus_rizo1[order(media_sd_genus_rizo1$Abundance, decreasing=T),] 
ordenado_r1=rbind(ordenado_r1,others_r1,unclassified_r1)

group_label_r1=c("M","S", "M","S")
levels_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere",
           "B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere")
limits_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere",
            "B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere")

genus_name_ordered_r1=as.vector(ordenado_r1$Genus)
nombres_unicos_r1=unique(genus_name_ordered_r1)

ordenado_r1$Genus=reorder.factor(ordenado_r1$Genus,new.order=rev(nombres_unicos_r1))
lab_unicos_r1=nombres_unicos_r1 

sorted_labels_ggplot_r1 = sapply(lab_unicos_r1,
                                     function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
                                     {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

lab_unicos_r1=as.data.frame(lab_unicos_r1)

colores_r1 = c("#0099FF","#00CC00","#9933FF","brown","yellow","orange","#00FFFF","red", 
               "pink", "magenta","blue","khaki2","green4","#6A3D9A","gold1","purple","green" ,
               "grey","black")

length(lab_unicos_r1)==length(colores_r1) 
length(lab_unicos_r1)
length(colores_r1)

titulo_plot_r1="South plot"

pgenus_r1=ggplot(ordenado_r1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_r1,
                    labels=sorted_labels_ggplot_r1,
                    breaks=nombres_unicos_r1)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r1)+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=limits_r1,labels=group_label_r1,
                   breaks=levels_r1)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
  theme(axis.text = element_text(size = 7))+
  theme(axis.text.x = element_text(face="bold", size=7))+
  theme(axis.title.y = element_text(size = 7))+
  theme(legend.key.size = unit(0.2, "cm"))+
  theme(legend.text = element_text(size = 5))+
  theme(legend.title = element_text(size=5, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)+ 
  theme(legend.position = "none")
x11()
pgenus_r1


#a.2) North plot
media_sd_genus_rizo2=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot2",]

media_sd_genus_rizo2$Genus=as.character(media_sd_genus_rizo2$Genus)
unclassified_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "unclassified",] 
media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "unclassified",] 
media_sd_genus_rizo2$Genus[media_sd_genus_rizo2$Abundance <= 2] = "Other genera (<2%)" 
media_sd_genus_rizo2$Genus=as.factor(media_sd_genus_rizo2$Genus)

others_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "Other genera (<2%)",]

ordenado_r2=media_sd_genus_rizo2[order(media_sd_genus_rizo2$Abundance, decreasing=T),] 
ordenado_r2=rbind(ordenado_r2,others_r2,unclassified_r2)

group_label_r2=c("M","S", "M","S")
levels_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere",
            "B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere")
limits_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere",
            "B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere")

genus_name_ordered_r2=as.vector(ordenado_r2$Genus)
nombres_unicos_r2=unique(genus_name_ordered_r2)

ordenado_r2$Genus=reorder.factor(ordenado_r2$Genus,new.order=rev(nombres_unicos_r2))
lab_unicos_r2=nombres_unicos_r2 

sorted_labels_ggplot_r2 <- sapply(lab_unicos_r2,
                                  function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
                                  {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

colores_r2 = c("green4","brown","darkturquoise","steelblue4","#00FFFF", "#9933FF","maroon","pink","gold1",
               "green","slategray2","plum2","yellow","red","darkolivegreen2","grey","black")
  
length(lab_unicos_r2)==length(colores_r2) 
length(lab_unicos_r2)
length(colores_r2)

titulo_plot_r2="North plot"

pgenus_r2=ggplot(ordenado_r2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_r2,
                    labels=sorted_labels_ggplot_r2,
                    breaks=nombres_unicos_r2)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r2)+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=limits_r2,labels=group_label_r2,
                   breaks=levels_r2)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
  theme(axis.text = element_text(size = 7))+
  theme(axis.text.x = element_text(face="bold", size=7))+
  theme(axis.title.y = element_text(size = 7))+
  theme(legend.key.size = unit(0.2, "cm"))+
  theme(legend.text = element_text(size = 5))+
  theme(legend.title = element_text(size=, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)+
  theme(legend.position="none")

x11()
pgenus_r2

#b) ENDOSPHERE
media_sd_genus_endo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Endosphere",]

#b.1) South plot
media_sd_genus_endo1=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot1",]

media_sd_genus_endo1$Genus=as.character(media_sd_genus_endo1$Genus)
unclassified_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "unclassified",] 
media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "unclassified",] 
media_sd_genus_endo1$Genus[media_sd_genus_endo1$Abundance <= 2] = "Other genera (<2%)" 
media_sd_genus_endo1$Genus=as.factor(media_sd_genus_endo1$Genus)

others_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "Other genera (<2%)",]

ordenado_e1=media_sd_genus_endo1[order(media_sd_genus_endo1$Abundance, decreasing=T),] 
ordenado_e1=rbind(ordenado_e1,others_e1,unclassified_e1)

group_label_e1=c("M","S", "M","S")
levels_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere",
            "B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")
limits_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere",
            "B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere")

genus_name_ordered_e1=as.vector(ordenado_e1$Genus)
nombres_unicos_e1=unique(genus_name_ordered_e1)

ordenado_e1$Genus=reorder.factor(ordenado_e1$Genus,new.order=rev(nombres_unicos_e1))
lab_unicos_e1=nombres_unicos_e1 

sorted_labels_ggplot_e1=sapply(lab_unicos_e1,
                                  function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
                                  {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

colores_e1 = c("blue","violet","brown","magenta","#FDBF6F","#FB9A99","springgreen3","yellow","coral2",
               "yellow4","green1","#6A3D9A","mediumorchid2","lightgoldenrod2","darkgreen","green4","firebrick",
               "grey","black")

length(lab_unicos_e1)==length(colores_e1) 
length(lab_unicos_e1)
length(colores_e1)

titulo_plot_e1="South plot"

pgenus_e1=ggplot(ordenado_e1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_e1,
                    labels=sorted_labels_ggplot_e1,
                    breaks=nombres_unicos_e1)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e1)+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=limits_e1,labels=group_label_e1,
                   breaks=levels_e1)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
  theme(axis.text = element_text(size = 7))+
  theme(axis.text.x = element_text(face="bold", size=7))+
  theme(axis.title.y = element_text(size = 7))+
  theme(legend.key.size = unit(0.2, "cm"))+
  theme(legend.text = element_text(size = 5))+
  theme(legend.title = element_text(size=5, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)+ 
  theme(legend.position = "right")

x11()
pgenus_e1

#b.2) North plot
media_sd_genus_endo2=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot2",]

media_sd_genus_endo2$Genus=as.character(media_sd_genus_endo2$Genus)
unclassified_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "unclassified",] 
media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "unclassified",] 
media_sd_genus_endo2$Genus[media_sd_genus_endo2$Abundance <= 2] = "Other genera (<2%)" 
media_sd_genus_endo2$Genus=as.factor(media_sd_genus_endo2$Genus)

others_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "Other genera (<2%)",]
media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "Other genera (<2%)",]

ordenado_e2=media_sd_genus_endo2[order(media_sd_genus_endo2$Abundance, decreasing=T),] 
ordenado_e2=rbind(ordenado_e2,others_e2,unclassified_e2)

group_label_e2=c("M","S", "M","S")
levels_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere",
            "B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")
limits_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere",
            "B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere")

genus_name_ordered_e2=as.vector(ordenado_e2$Genus)
nombres_unicos_e2=unique(genus_name_ordered_e2)

ordenado_e2$Genus=reorder.factor(ordenado_e2$Genus,new.order=rev(nombres_unicos_e2))
lab_unicos_e2=nombres_unicos_e2 

sorted_labels_ggplot_e2 = sapply(lab_unicos_e2,
                                  function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)")
                                  {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))})

colores_e2 = c("brown", "violet", "blue", "grey90","orangered2","darkseagreen2","burlywood",
               "yellow4","darkmagenta","powderblue","sienna2","magenta","springgreen3","mediumslateblue",
               "gold2",
               "grey","black")

length(lab_unicos_e2)==length(colores_e2) 
length(lab_unicos_e2)
length(colores_e2)

titulo_plot_e2="North plot"

pgenus_e2=ggplot(ordenado_e2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=colores_e2,
                    labels=sorted_labels_ggplot_e2,
                    breaks=nombres_unicos_e2)+
  labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e2)+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=limits_e2,labels=group_label_e2,
                   breaks=levels_e2)+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 100))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+
  theme(axis.text = element_text(size = 7))+
  theme(axis.text.x = element_text(face="bold", size=7))+
  theme(axis.title.y = element_text(size = 7))+
  theme(legend.key.size = unit(0.2, "cm"))+
  theme(legend.text = element_text(size = 5))+
  theme(legend.title = element_text(size=5, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)+ 
  theme(legend.position = "none")

x11()
pgenus_e2

In Figure 6 you can observe an example of the stacked bar plot for the main genera. This kind of plot can be calculated at the taxonomical rank you prefer. Please, try to open the plot in a new window if you want to view it at full size.

Figure 6. Distribution of the main classified genera in the rhizosphere of mother (“Mature”) and sucker plants (“Inmature”) in 2022 (left) and 2023 (right)

7. Fungal transmission analyses

Our experimental design allows us to study the potential transmission of microorganisms from mothers to first and second suckers. For that purpose, we firstly compared the root microbiota of mothers and first suckers, and then, these shared microorganisms were further compared against the microbiota of rhizosphere of sucker plants. It is expected that the microorganisms that are found in the root endosphere of suckers (and not found in the rhizosphere) and also in the root endosphere of mother plants, are transferred from mothers to suckers through the root system (corm), but not acquired from the surrounding soil of from the environment.

We will perform this analysis just at ASV level

7.1 Prepare the data

mt1=as.data.frame(sample_data(phy_data_relabun))
names = unique(mt1$Fusion)#get the name of each level of the variable "Fusion", which in this example corresponds to each individual group of samples. For instance, the first level of the variable "Fusion" is "A_Plot1_Mature_Rhizosphere" (all rhizosphere samples of mother plants sampled in 2022 located in the south plot)

#in the following for loop we are creating a list in which we are saving one each phyloseq object per group of samples. That is to say, in the first element of the list, the phyloseq object corresponding to the samples of the rhizosphere of mother plants sampled in 2022 located in the south plot is stored.

pruned_data = list()
for(i in 1:length(names)){
  samples_group <- mt1[which(mt1$Fusion==names[i]),]$Sample
  pruned_data[[i]] <- prune_samples(samples_group, phy_data_relabun)
}

#this loop stored the taxonony and ASV table of each phyloseq element of the previously created list
otu_tables = list()
tax_tables = list()
for (i in 1:length(pruned_data)){
  otu_tables[[i]] <- as.data.frame(otu_table(pruned_data[[i]]))
  tax_tables[[i]] <- as.data.frame(tax_table(pruned_data[[i]]))
}#it gives the list "otu_tables", whose element [[1]] corresponds to the ASV table of the group of sample "A_Plot1_Mature_Rhizosphere". While "tax_tables" list stores the taxonomy tables

#this list merges each ASV table with each tax table by rownames (by=0)
whole_tables = list()
for (i in 1:length(pruned_data)){
  whole_tables[[i]] = merge(tax_tables[[i]],otu_tables[[i]], by=0)
}

for(i in 1:length(whole_tables)){
  whole_tables[[i]]$ASV_names=whole_tables[[i]]$Row.names
}#it creates a new column in the list "wholes_tables" that corresponds to the rownames

for (i in 1:length(whole_tables)){ 
  assign(paste0(names[i],"_df"),as#create one dataframe per element from the list "whole_tables"
         
#the previous loop returns complex dataframe names, so let's rename them and calculate the total number of sequences registered for each ASV in all the corresponding dataset 

A_r1_M=A_Plot1_Mature_Rhizosphere_df; A_r1_M$Sum=rowSums(A_r1_M[,9:ncol(A_r1_M)]); A_r1_M_def=A_r1_M[which(A_r1_M$Sum>0,arr.ind=TRUE),];A_r1_M_def2= subset(A_r1_M_def, select = -c(Sum))#we remove those ASV that account for 0 sequences in sum in each dataset
A_e1_M=A_Plot1_Mature_Endosphere_df; A_e1_M$Sum=rowSums(A_e1_M[,9:ncol(A_e1_M)]); A_e1_M_def=A_e1_M[which(A_e1_M$Sum>0,arr.ind=TRUE),];A_e1_M_def2= subset(A_e1_M_def, select = -c(Sum))
A_e1_I=A_Plot1_Immature_Endosphere_df; A_e1_I$Sum=rowSums(A_e1_I[,9:ncol(A_e1_I)]); A_e1_I_def=A_e1_I[which(A_e1_I$Sum>0,arr.ind=TRUE),];A_e1_I_def2= subset(A_e1_I_def, select = -c(Sum))
A_r1_I=A_Plot1_Immature_Rhizosphere_df; A_r1_I$Sum=rowSums(A_r1_I[,9:ncol(A_r1_I)]); A_r1_I_def=A_r1_I[which(A_r1_I$Sum>0,arr.ind=TRUE),];A_r1_I_def2= subset(A_r1_I_def, select = -c(Sum))
A_e2_M=A_Plot2_Mature_Endosphere_df; A_e2_M$Sum=rowSums(A_e2_M[,9:ncol(A_e2_M)]); A_e2_M_def=A_e2_M[which(A_e2_M$Sum>0,arr.ind=TRUE),];A_e2_M_def2= subset(A_e2_M_def, select = -c(Sum))
A_r2_M=A_Plot2_Mature_Rhizosphere_df; A_r2_M$Sum=rowSums(A_r2_M[,9:ncol(A_r2_M)]); A_r2_M_def=A_r2_M[which(A_r2_M$Sum>0,arr.ind=TRUE),];A_r2_M_def2= subset(A_r2_M_def, select = -c(Sum))
A_r2_I=A_Plot2_Immature_Rhizosphere_df; A_r2_I$Sum=rowSums(A_r2_I[,9:ncol(A_r2_I)]); A_r2_I_def=A_r2_I[which(A_r2_I$Sum>0,arr.ind=TRUE),];A_r2_I_def2= subset(A_r2_I_def, select = -c(Sum))
A_e2_I=A_Plot2_Immature_Endosphere_df; A_e2_I$Sum=rowSums(A_e2_I[,9:ncol(A_e2_I)]);A_e2_I_def=A_e2_I[which(A_e2_I$Sum>0,arr.ind=TRUE),];A_e2_I_def2= subset(A_e2_I_def, select = -c(Sum))
B_r1_M=B_Plot1_Mature_Rhizosphere_df; B_r1_M$Sum=rowSums(B_r1_M[,9:ncol(B_r1_M)]); B_r1_M_def=B_r1_M[which(B_r1_M$Sum>0,arr.ind=TRUE),];B_r1_M_def2= subset(B_r1_M_def, select = -c(Sum))
B_r1_I=B_Plot1_Immature_Rhizosphere_df; B_r1_I$Sum=rowSums(B_r1_I[,9:ncol(B_r1_I)]); B_r1_I_def=B_r1_I[which(B_r1_I$Sum>0,arr.ind=TRUE),];B_r1_I_def2= subset(B_r1_I_def, select = -c(Sum))
B_r2_M=B_Plot2_Mature_Rhizosphere_df; B_r2_M$Sum=rowSums(B_r2_M[,9:ncol(B_r2_M)]); B_r2_M_def=B_r2_M[which(B_r2_M$Sum>0,arr.ind=TRUE),];B_r2_M_def2= subset(B_r2_M_def, select = -c(Sum))
B_r2_I=B_Plot2_Immature_Rhizosphere_df; B_r2_I$Sum=rowSums(B_r2_I[,9:ncol(B_r2_I)]); B_r2_I_def=B_r2_I[which(B_r2_I$Sum>0,arr.ind=TRUE),];B_r2_I_def2= subset(B_r2_I_def, select = -c(Sum))
B_e1_I=B_Plot1_Immature_Endosphere_df; B_e1_I$Sum=rowSums(B_e1_I[,9:ncol(B_e1_I)]); B_e1_I_def=B_e1_I[which(B_e1_I$Sum>0,arr.ind=TRUE),];B_e1_I_def2= subset(B_e1_I_def, select = -c(Sum))
B_e1_M=B_Plot1_Mature_Endosphere_df; B_e1_M$Sum=rowSums(B_e1_M[,9:ncol(B_e1_M)]); B_e1_M_def=B_e1_M[which(B_e1_M$Sum>0,arr.ind=TRUE),];B_e1_M_def2= subset(B_e1_M_def, select = -c(Sum))
B_e2_M=B_Plot2_Mature_Endosphere_df; B_e2_M$Sum=rowSums(B_e2_M[,9:ncol(B_e2_M)]); B_e2_M_def=B_e2_M[which(B_e2_M$Sum>0,arr.ind=TRUE),];B_e2_M_def2= subset(B_e2_M_def, select = -c(Sum))
B_e2_I=B_Plot2_Immature_Endosphere_df; B_e2_I$Sum=rowSums(B_e2_I[,9:ncol(B_e2_I)]); B_e2_I_def=B_e2_I[which(B_e2_I$Sum>0,arr.ind=TRUE),];B_e2_I_def2= subset(B_e2_I_def, select = -c(Sum))

7.2 Compare ASVs between mother and sucker plants

Remember that we want to retain the ASVs that are shared between mother and sucker plants (just in the root endosphere). They could be both transferred by mother plants or acquired from the rhizosphere soil.

#a) South plot 
#a.1) 2022
compartidos_A_e1=cbind(A_e1_M_def2[A_e1_M_def2$ASV_names %in% A_e1_I_def2$ASV_names,], A_e1_I_def2[A_e1_I_def2$ASV_names %in% A_e1_M_def2$ASV_names,])#retain those ASVs that are in the dataframe of mothers and suckers (root endosphere)
compartidos_A_e1=compartidos_A_e1[,-(20:27)]#removing extra information
write.table(data.frame("_"=rownames(compartidos_A_e1),compartidos_A_e1),file="Core_A_e1.txt",sep="\t", row.names = F)

#a.2) 2023
compartidos_B_e1=cbind(B_e1_M_def2[B_e1_M_def2$ASV_names %in% B_e1_I_def2$ASV_names,], B_e1_I_def2[B_e1_I_def2$ASV_names %in% B_e1_M_def2$ASV_names,])
compartidos_B_e1=compartidos_B_e1[,-(18:25)]
write.table(data.frame("_"=rownames(compartidos_B_e1),compartidos_B_e1),file="Core_B_e1.txt",sep="\t", row.names = F)

#b) North plot 
#b.1) 2022
compartidos_A_e2=cbind(A_e2_M_def2[A_e2_M_def2$ASV_names %in% A_e2_I_def2$ASV_names,], A_e2_I_def2[A_e2_I_def2$ASV_names %in% A_e2_M_def2$ASV_names,])
compartidos_A_e2=compartidos_A_e2[,-(21:28)]
write.table(data.frame("_"=rownames(compartidos_A_e2),compartidos_A_e2),file="Core_A_e2.txt",sep="\t", row.names = F)

#b.2) 2023
compartidos_B_e2=cbind(B_e2_M_def2[B_e2_M_def2$ASV_names %in% B_e2_I_def2$ASV_names,], B_e2_I_def2[B_e2_I_def2$ASV_names %in% B_e2_M_def2$ASV_names,])
compartidos_B_e2=compartidos_B_e2[,-(19:26)]
write.table(data.frame("_"=rownames(compartidos_B_e2),compartidos_B_e2),file="Core_B_e2.txt",sep="\t", row.names = F)

Now, we are going to compare the shared fungal communities against the gunal communities dwelling in the rhizosphere soil of the suckers. We will retain fungal members that are in the root endosphere but NO in the rhizosphere soil

#a) South plot
#a.1) 2022
compartidos_A1_endo_rizo=cbind(compartidos_A_e1[compartidos_A_e1$ASV_names %in% A_r1_I_def2$ASV_names,], A_r1_I_def2[A_r1_I_def2$ASV_names %in% compartidos_A_e1$ASV_names,])#remove extra information from the data (additional columns indicating, again, the taxonomy of the ASVs)
compartidos_A1_endo_rizo=compartidos_A1_endo_rizo[,-(30:37)]#remove extra information
write.table(data.frame("_"=rownames(compartidos_A1_endo_rizo),compartidos_A1_endo_rizo),file="Compartidos_A1_endo_rizo.txt",sep="\t", row.names = F)

#a.2) 2023
compartidos_B1_endo_rizo=cbind(compartidos_B_e1[compartidos_B_e1$ASV_names %in% B_r1_I_def2$ASV_names,], B_r1_I_def2[B_r1_I_def2$ASV_names %in% compartidos_B_e1$ASV_names,])
compartidos_B1_endo_rizo=compartidos_B1_endo_rizo[,-(26:33)]
write.table(data.frame("_"=rownames(compartidos_B1_endo_rizo),compartidos_B1_endo_rizo),file="Compartidos_B1_endo_rizo.txt",sep="\t", row.names = F)

#b) North plot
#b.1) 2022
compartidos_A2_endo_rizo=cbind(compartidos_A_e2[compartidos_A_e2$ASV_names %in% A_r2_I_def2$ASV_names,], A_r2_I_def2[A_r2_I_def2$ASV_names %in% compartidos_A_e2$ASV_names,])
compartidos_A2_endo_rizo=compartidos_A2_endo_rizo[,-(29:36)]
write.table(data.frame("_"=rownames(compartidos_A2_endo_rizo),compartidos_A2_endo_rizo),file="Compartidos_A2_endo_rizo.txt",sep="\t", row.names = F)

#b.2) 2023
compartidos_B2_endo_rizo=cbind(compartidos_B_e2[compartidos_B_e2$ASV_names %in% B_r2_I_def2$ASV_names,], B_r2_I_def2[B_r2_I_def2$ASV_names %in% compartidos_B_e2$ASV_names,])
compartidos_B2_endo_rizo=compartidos_B2_endo_rizo[,-(27:34)]
write.table(data.frame("_"=rownames(compartidos_B2_endo_rizo),compartidos_B2_endo_rizo),file="Compartidos_B2_endo_rizo.txt",sep="\t", row.names = F)

#Remove those ASV detected both in the rhizosphere and in the root endosphere

#a) South plot
#a.1) 2022
especif_A1_endo=subset(compartidos_A_e1, !(ASV_names %in% compartidos_A1_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_A1_endo),especif_A1_endo),file="A1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)

#a.2) 2023
especif_B1_endo=subset(compartidos_B_e1, !(ASV_names %in% compartidos_B1_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_B1_endo),especif_B1_endo),file="B1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)

#b) North plot
#b.1) 2022
especif_A2_endo=subset(compartidos_A_e2, !(ASV_names %in% compartidos_A2_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_A2_endo),especif_A2_endo),file="A2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)

#b.2) 2023
especif_B2_endo=subset(compartidos_B_e2, !(ASV_names %in% compartidos_B2_endo_rizo$ASV_names))
write.table(data.frame("_"=rownames(especif_B2_endo),especif_B2_endo),file="B2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F)

In this work, we analyzed carefully the number and proportions of the transferred ASVs in Excel

7.3 Plotting

In this section, we are going to make different graph representing the microbial transmission process. We will use barplots

#a) South plot
#a.1) 2022
totales_M_A_e1=nrow(A_e1_M_def2)#get the total number of ASV in mothers
transferidos_A_e1=nrow(especif_A1_endo)#get the number of shared ASVs between mothers and suckers
prop_transferidos_M_A_e1=(transferidos_A_e1 / totales_M_A_e1) *100#calculate the proportion of transferred ASVs
prop_perdidos_M_A_e1=((totales_M_A_e1-transferidos_A_e1) / totales_M_A_e1) *100#calculate the proportion of ASVs lost by the mothers

totales_I_A_e1=nrow(A_e1_I_def2)#get the total number of ASVs in suckers
prop_transferidos_I_A_e1=(transferidos_A_e1 / totales_I_A_e1) *100#get the number of shared ASVs between mothers and suckers
prop_adquiridos_I_A_e1=((totales_I_A_e1-transferidos_A_e1) / totales_I_A_e1) *100#calculate the proportion of ASVs acquired by the suckers

barras_A_e1=as.data.frame(rbind(prop_transferidos_M_A_e1,prop_perdidos_M_A_e1,prop_transferidos_I_A_e1,prop_adquiridos_I_A_e1))#create a dataframe with all the values in order to plot them
barras_A_e1$Year="A"
barras_A_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_A_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_A_e1)[1]="Proportion"

#a.2) 2023
totales_M_B_e1=nrow(B_e1_M_def2)
transferidos_B_e1=nrow(especif_B1_endo)
prop_transferidos_M_B_e1=(transferidos_B_e1 / totales_M_B_e1) *100
prop_perdidos_M_B_e1=((totales_M_B_e1-transferidos_B_e1) / totales_M_B_e1) *100

totales_I_B_e1=nrow(B_e1_I_def2)
prop_transferidos_I_B_e1=(transferidos_B_e1 / totales_I_B_e1) *100
prop_adquiridos_I_B_e1=((totales_I_B_e1-transferidos_B_e1) / totales_I_B_e1) *100

barras_B_e1=as.data.frame(rbind(prop_transferidos_M_B_e1,prop_perdidos_M_B_e1, prop_transferidos_I_B_e1,prop_adquiridos_I_B_e1))
barras_B_e1$Year="B"
barras_B_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_B_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_B_e1)[1]="Proportion"

A_e1=ggplot(barras_A_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
A_e1

B_e1=ggplot(barras_B_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
B_e1

x11()
p_Plot1_prop=grid.arrange(A_e1,B_e1,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18)))

#b) North plot
#b.1) 2022
totales_M_A_e2=nrow(A_e2_M_def2)
transferidos_A_e2=nrow(especif_A2_endo)
prop_transferidos_M_A_e2=(transferidos_A_e2 / totales_M_A_e2) *100
prop_perdidos_M_A_e2=((totales_M_A_e2-transferidos_A_e2) / totales_M_A_e2) *100

totales_I_A_e2=nrow(A_e2_I_def2)
prop_transferidos_I_A_e2=(transferidos_A_e2 / totales_I_A_e2) *100
prop_adquiridos_I_A_e2=((totales_I_A_e2-transferidos_A_e2) / totales_I_A_e2) *100

barras_A_e2=as.data.frame(rbind(prop_transferidos_M_A_e2,prop_perdidos_M_A_e2,
                                prop_transferidos_I_A_e2,prop_adquiridos_I_A_e2))
barras_A_e2$Year="A"
barras_A_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_A_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_A_e2)[1]="Proportion"

#b.2) 2023
totales_M_B_e2=nrow(B_e2_M_def2)
transferidos_B_e2=nrow(especif_B2_endo)
prop_transferidos_M_B_e2=(transferidos_B_e2 / totales_M_B_e2) *100
prop_perdidos_M_B_e2=((totales_M_B_e2-transferidos_B_e2) / totales_M_B_e2) *100

totales_I_B_e2=nrow(B_e2_I_def2)
prop_transferidos_I_B_e2=(transferidos_B_e2 / totales_I_B_e2) *100
prop_adquiridos_I_B_e2=((totales_I_B_e2-transferidos_B_e2) / totales_I_B_e2) *100

barras_B_e2=as.data.frame(rbind(prop_transferidos_M_B_e2,prop_perdidos_M_B_e2,
                                prop_transferidos_I_B_e2,prop_adquiridos_I_B_e2))
barras_B_e2$Year="B"
barras_B_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker")
barras_B_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired"))
colnames(barras_B_e2)[1]="Proportion"

A_e2=ggplot(barras_A_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
A_e2

B_e2=ggplot(barras_B_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
B_e2

x11()
p_Plot2_prop=grid.arrange(A_e2,B_e2,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18)))

With this code a bar plot is created (see Figure 7)

Figure 7. Bar plot indicating the transferred ASV, ASVs lost by the mothers, and ASVs acquired from the environment by the suckers in the first plot (south plot)

We are going to obtain the same plots but instead of plotting the proportion of ASVs lost, acquired and transferred, the relative abundance of each of them will be visualized.

#a) South plot
#a.1) 2022
perdidos_M_A_e1=subset(A_e1_M_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that mothers lost
perdidos_M_A_e1$Promedio=rowMeans(subset(perdidos_M_A_e1, select =c(9:ncol(perdidos_M_A_e1))))#mean relative abundance of each of them
perdidos_M_A_e1_sum=sum(perdidos_M_A_e1$Promedio)#sum the relative abundance of all the ASVs lost by the mothers
adquiridos_I_A_e1=subset(A_e1_I_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that suckers retain from the environment
adquiridos_I_A_e1$Promedio=rowMeans(subset(adquiridos_I_A_e1, select =c(9:ncol(adquiridos_I_A_e1))))#mean
adquiridos_I_A_e1_sum=sum(adquiridos_I_A_e1$Promedio)

especif_A1_endo$Promedio_Madre=rowMeans(subset(especif_A1_endo, select =c(9:19)))#mean relative abundance of transferred ASVs (in mothers samples)
especif_A1_endo_M_sum=sum(especif_A1_endo$Promedio_Madre)  
especif_A1_endo$Promedio_Hijas=rowMeans(subset(especif_A1_endo, select =c(20:29)))#mean relative abundance of acquired ASV from mothers (in suckers samples)
especif_A1_endo_I_sum=sum(especif_A1_endo$Promedio_Hijas)

Abund_A1_endo=c(especif_A1_endo_M_sum,perdidos_M_A_e1_sum,especif_A1_endo_I_sum,adquiridos_I_A_e1_sum)
barras_abund_A_e1=as.data.frame(cbind(barras_A_e1,Abund_A1_endo))#create a new dataframe with all the data
colnames(barras_abund_A_e1)[5]="Abundance"

write.table(data.frame("X"=rownames(perdidos_M_A_e1),perdidos_M_A_e1),"AbunRel_perdidos_Madre_A_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_A_e1),adquiridos_I_A_e1),"AbunRel_AdquiridosAmbiente_A_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_A1_endo),especif_A1_endo),"AbunRel_TransferidosMadreHija_A_e1.txt",sep="\t",row.names = F)

#a.2) 2023
perdidos_M_B_e1=subset(B_e1_M_def2, !(ASV_names %in% especif_B1_endo$ASV_names))
perdidos_M_B_e1$Promedio=rowMeans(subset(perdidos_M_B_e1, select =c(9:ncol(perdidos_M_B_e1))))
perdidos_M_B_e1_sum=sum(perdidos_M_B_e1$Promedio)
adquiridos_I_B_e1=subset(B_e1_I_def2, !(ASV_names %in% especif_B1_endo$ASV_names))
adquiridos_I_B_e1$Promedio=rowMeans(subset(adquiridos_I_B_e1, select =c(9:ncol(adquiridos_I_B_e1))))
adquiridos_I_B_e1_sum=sum(adquiridos_I_B_e1$Promedio)

especif_B1_endo$Promedio_Madre=rowMeans(subset(especif_B1_endo, select =c(9:17)))
especif_B1_endo_M_sum=sum(especif_B1_endo$Promedio_Madre)  
especif_B1_endo$Promedio_Hijas=rowMeans(subset(especif_B1_endo, select =c(18:25)))
especif_B1_endo_I_sum=sum(especif_B1_endo$Promedio_Hijas)

Abund_B1_endo=c(especif_B1_endo_M_sum,perdidos_M_B_e1_sum,especif_B1_endo_I_sum,adquiridos_I_B_e1_sum)
barras_abund_B_e1=as.data.frame(cbind(barras_B_e1,Abund_B1_endo))
colnames(barras_abund_B_e1)[5]="Abundance"

write.table(data.frame("X"=rownames(perdidos_M_B_e1),perdidos_M_B_e1),"AbunRel_perdidos_Madre_B_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_B_e1),adquiridos_I_B_e1),"AbunRel_AdquiridosAmbienteHija_B_e1.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_B1_endo),especif_B1_endo),"AbunRel_TransferidosMadreHija_B_e1.txt",sep="\t",row.names = F)

#b) North plot
#b.1) 2022
perdidos_M_A_e2=subset(A_e2_M_def2, !(ASV_names %in% especif_A2_endo$ASV_names))
perdidos_M_A_e2$Promedio=rowMeans(subset(perdidos_M_A_e2, select =c(9:ncol(perdidos_M_A_e2))))
perdidos_M_A_e2_sum=sum(perdidos_M_A_e2$Promedio)
adquiridos_I_A_e2=subset(A_e2_I_def2, !(ASV_names %in% especif_A2_endo$ASV_names))
adquiridos_I_A_e2$Promedio=rowMeans(subset(adquiridos_I_A_e2, select =c(9:ncol(adquiridos_I_A_e2))))
adquiridos_I_A_e2_sum=sum(adquiridos_I_A_e2$Promedio)

especif_A2_endo$Promedio_Madre=rowMeans(subset(especif_A2_endo, select =c(9:20)))
especif_A2_endo_M_sum=sum(especif_A2_endo$Promedio_Madre)  
especif_A2_endo$Promedio_Hijas=rowMeans(subset(especif_A2_endo, select =c(21:28)))
especif_A2_endo_I_sum=sum(especif_A2_endo$Promedio_Hijas)

Abund_A2_endo=c(especif_A2_endo_M_sum,perdidos_M_A_e2_sum,especif_A2_endo_I_sum,adquiridos_I_A_e2_sum)
barras_abund_A_e2=as.data.frame(cbind(barras_A_e2,Abund_A2_endo))
colnames(barras_abund_A_e2)[5]="Abundance"

write.table(data.frame("X"=rownames(perdidos_M_A_e2),perdidos_M_A_e2),"AbunRel_perdidos_Madre_A_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_A_e2),adquiridos_I_A_e2),"AbunRel_AdquiridosAmbienteHija_A_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_A2_endo),especif_A2_endo),"AbunRel_TransferidosMadreHija_A_e2.txt",sep="\t",row.names = F)

#b.2) 2023
perdidos_M_B_e2=subset(B_e2_M_def2, !(ASV_names %in% especif_B2_endo$ASV_names))
perdidos_M_B_e2$Promedio=rowMeans(subset(perdidos_M_B_e2, select =c(9:ncol(perdidos_M_B_e2))))
perdidos_M_B_e2_sum=sum(perdidos_M_B_e2$Promedio)
adquiridos_I_B_e2=subset(B_e2_I_def2, !(ASV_names %in% especif_B2_endo$ASV_names))
adquiridos_I_B_e2$Promedio=rowMeans(subset(adquiridos_I_B_e2, select =c(9:ncol(adquiridos_I_B_e2))))
adquiridos_I_B_e2_sum=sum(adquiridos_I_B_e2$Promedio)

especif_B2_endo$Promedio_Madre=rowMeans(subset(especif_B2_endo, select =c(9:18)))
especif_B2_endo_M_sum=sum(especif_B2_endo$Promedio_Madre)  
especif_B2_endo$Promedio_Hijas=rowMeans(subset(especif_B2_endo, select =c(19:26)))
especif_B2_endo_I_sum=sum(especif_B2_endo$Promedio_Hijas)

Abund_B2_endo=c(especif_B2_endo_M_sum,perdidos_M_B_e2_sum,especif_B2_endo_I_sum,adquiridos_I_B_e2_sum)
barras_abund_B_e2=as.data.frame(cbind(barras_B_e2,Abund_B2_endo))
colnames(barras_abund_B_e2)[5]="Abundance"

write.table(data.frame("X"=rownames(perdidos_M_B_e2),perdidos_M_B_e2),"AbunRel_perdidos_Madre_B_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(adquiridos_I_B_e2),adquiridos_I_B_e2),"AbunRel_AdquiridosAmbienteHijas_B_e2.txt",sep="\t",row.names = F)
write.table(data.frame("X"=rownames(especif_B2_endo),especif_B2_endo),"AbunRel_TransferidosMadreHija_B_e2.txt",sep="\t",row.names = F)

A1_abun=ggplot(barras_abund_A_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
A1_abun

B1_abun=ggplot(barras_abund_B_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
B1_abun

A2_abun=ggplot(barras_abund_A_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
A2_abun

B2_abun=ggplot(barras_abund_B_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+
  scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"),
                    labels=c("Transmitted","Lost", "Transmitted", "Acquired"),
                    breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+
  labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+
  guides(fill = guide_legend(reverse = TRUE))+
  scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"),
                   breaks=c("Mother","Sucker"))+
  scale_y_continuous(expand=c(0.01,0.01),
                     breaks=c(0,10,20,30,40,50,60,70,80,90,100),
                     labels=c("0","10", "20","30","40","50","60","70","80","90","100"),
                     limits = c(NA, 101))+
  theme_bw()+
  theme(panel.border = element_rect(colour="black"))+
  theme(axis.title.x=element_blank())+
  theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+
  theme(axis.text = element_text(size = 14))+
  theme(axis.text.x = element_text(face="bold", size=16))+
  theme(axis.title.y = element_text(size = 16))+
  theme(legend.key.size = unit(0.9, "cm"))+
  theme(legend.text = element_text(size = 18))+
  theme(legend.title = element_text(size=18, face="bold"))+
  theme(legend.title.align=0)+
  theme(legend.text.align = 0)
x11()
B2_abun

x11()
p_abund_p1=grid.arrange(A1_abun,B1_abun,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18)))

x11()
p_abund_p2=grid.arrange(A2_abun,B2_abun,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18)))

8. Differentially abundant taxa

It is interesting to decipher which taxa show statistically significant differences between two groups of samples (i.e., differences in the abundance between mother and suckers plants). For that purpose, we will apply ANCOM-BC approach, especially designed for compositional data. Furthermore, we will use the function ancomloop of the package micro4all in order to compare all groups of samples among them (ANCOM-BC just compares the first group of samples in the dataset with all other groups)

We will apply this statistical test at both phylum and genus levels.

ANCOM-BC makes a data transformation and applies logarithms, so in this case, we shouldn’t use the normalized abundance. We should use raw phyloseq objects (in which the ASV counts is expressed in absolute abundance)

8.1 Phylum

ANCOM_phylum_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion", 
                            ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum")

###options:
#grouping: indicate your grouping variable
#n_cl: the number of cores
#global: apply the global test (for more information, read this paper: https://www.nature.com/articles/s41467-020-17041-7 and documentation of the function ancombc)
#struc_zero: remove "structural zeros" (not real zeros but zeros resulting from a wrong sampling procedure)
#out.unclassified: apply or not the test to unclassified taxa
#tax.level: indicate the taxonomical level that you want to analyze

write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[1]]),ANCOM_phylum_rizo[[1]]),file="ANCOM_Phylum_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[2]]),ANCOM_phylum_rizo[[2]]),file="ANCOM_Phylum_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[3]]),ANCOM_phylum_rizo[[3]]),file="ANCOM_Phylum_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[4]]),ANCOM_phylum_rizo[[4]]),file="ANCOM_Phylum_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[5]]),ANCOM_phylum_rizo[[5]]),file="ANCOM_Phylum_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[6]]),ANCOM_phylum_rizo[[6]]),file="ANCOM_Phylum_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[7]]),ANCOM_phylum_rizo[[7]]),file="ANCOM_Phylum_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[8]]),ANCOM_phylum_rizo[[8]]),file="ANCOM_Phylum_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F)

#b) ENDOSPHERE
ANCOM_phylum_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion", 
                            ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum")

write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[1]]),ANCOM_phylum_endo[[1]]),file="ANCOM_Phylum_endo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[2]]),ANCOM_phylum_endo[[2]]),file="ANCOM_Phylum_endo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[3]]),ANCOM_phylum_endo[[3]]),file="ANCOM_Phylum_endo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[4]]),ANCOM_phylum_endo[[4]]),file="ANCOM_Phylum_endo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[5]]),ANCOM_phylum_endo[[5]]),file="ANCOM_Phylum_endo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[6]]),ANCOM_phylum_endo[[6]]),file="ANCOM_Phylum_endo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[7]]),ANCOM_phylum_endo[[7]]),file="ANCOM_Phylum_endo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[8]]),ANCOM_phylum_endo[[8]]),file="ANCOM_Phylum_endo_2023_Plot2_M.txt",sep="\t", row.names = F)

In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the phyla that were differentially abundant.

8.2 Genus

#a) RHIZOSPHERE
ANCOM_genus_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion", 
                            ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus")


write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[1]]),ANCOM_genus_rizo[[1]]),file="ANCOM_Genus_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[2]]),ANCOM_genus_rizo[[2]]),file="ANCOM_Genus_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[3]]),ANCOM_genus_rizo[[3]]),file="ANCOM_Genus_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[4]]),ANCOM_genus_rizo[[4]]),file="ANCOM_Genus_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[5]]),ANCOM_genus_rizo[[5]]),file="ANCOM_Genus_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[6]]),ANCOM_genus_rizo[[6]]),file="ANCOM_Genus_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[7]]),ANCOM_genus_rizo[[7]]),file="ANCOM_Genus_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[8]]),ANCOM_genus_rizo[[8]]),file="ANCOM_Genus_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F)

#a) ENDOSPHERE
ANCOM_genus_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion", 
                            ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus")

names(ANCOM)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[1]]),ANCOM_genus_endo[[1]]),file="ANCOM_Genus_Endo_2022_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[2]]),ANCOM_genus_endo[[2]]),file="ANCOM_Genus_Endo_2022_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[3]]),ANCOM_genus_endo[[3]]),file="ANCOM_Genus_Endo_2022_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[4]]),ANCOM_genus_endo[[4]]),file="ANCOM_Genus_Endo_2022_Plot2_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[5]]),ANCOM_genus_endo[[5]]),file="ANCOM_Genus_Endo_2023_Plot1_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[6]]),ANCOM_genus_endo[[6]]),file="ANCOM_Genus_Endo_2023_Plot1_M.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[7]]),ANCOM_genus_endo[[7]]),file="ANCOM_Genus_Endo_2023_Plot2_I.txt",sep="\t", row.names = F)
write.table(data.frame("_"=rownames(ANCOM_genus_endo[[8]]),ANCOM_genus_endo[[8]]),file="ANCOM_Genus_Endo_2023_Plot2_M.txt",sep="\t", row.names = F)

In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the genera that were differentially abundant.

--- title: "Ecological analyses of fungal dataset" format: html: toc: true execute: eval: false editor_options: chunk_output_type: console --- Firstly, we have to install and load many packages needed to analyze our fungal data: ```{r} if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("ANCOMBC") BiocManager::install("phyloseq") if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("microbiome") install.packages("dplyr") install.packages("tidyverse") install.packages("ggplot2") install.packages("GUniFrac") install.packages("phangorn") install.packages("vegan") install.packages("gdata") install.packages("purrr") install.packages("devtools") install.packages("rstatix") install.packages("ggstasplot") install.packages("DescTools") install.packages("tibble") install.packages("WRS2") install.packages("tibble") install.packages("ARTool") if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("edgeR") remotes::install_github("wilkelab/ggtext") install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis") install_github("nuriamw/micro4all") library(ANCOMBC) library(phyloseq) library(microbiome) library(dplyr) library(tidyverse) library(ggplot2) library(GUniFrac) library(vegan) library(gdata) library(purrr) library(devtools) library(rstatix) library(ggstatsplot) library(DescTools) library(tibble) library(WRS2) library(ARTool) library(edgeR) library(pairwiseAdonis) library(micro4all) ``` Our analyses will based on phyloseq objects. That is to say, we are going to use [phyloseq](http://dx.plos.org/10.1371/journal.pone.0061217 "Paper describing phyloseq package") package # 1. Create a phyloseq object phyloseq object are very useful since they harbor several elements of interest: an ASV (or OTU) table in which the absolute abundance of each ASV is registered, the taxonomy of each ASV (from Domain or Kingdom to ASV level), a metadata table in which a full description of each sample is given, and finally, a phylogenetic tree at ASV level. Thus, lets load all the input files and create the phyloseq object. So, all the above mentioned files should be in our working directory: ```{r} data=read.table("ASV_Hongos_FINAL.txt", header = T, sep="\t") data$Kingdom=gsub("k__","",data$Kingdom)#replace the name of the taxa, bause Unite databse adds a code for each taxonomic rank data$Phylum=gsub("p__","",data$Phylum) data$Class=gsub("c__","",data$Class) data$Order=gsub("o__","",data$Order) data$Family=gsub("f__","",data$Family) data$Genus=gsub("g__","",data$Genus) colnames_data=colnames(data) mt=read.table("metadata_fungi.txt",sep="\t", header=T)#load the metadata table mt$Year=gsub("2022", "A", mt$Year)#we replaced the number of the years by a character mt$Year=gsub("2023", "B", mt$Year) mt$Replicate=as.character(mt$Replicate)#convert into characters mt$Fusion=paste0(mt$Year, sep="_",mt$Plot, sep="_", mt$Condition,sep="_", mt$Compartment)#here we create a joined artificial variable in order to plot better our samples mt$Condition_Year=paste0(mt$Condition,sep="_", mt$Year) row.names(mt)=mt$Sample#the rownames of the metadata table should be identical to the name of the samples ``` ::: collout-important The order of the sample is very important! In particular, the order of the samples in the metadata file must be the same as the order of the samples in the ASV table. So, check (and correct it) previously. ::: ```{r} tax =data[,2:8]#indicate the taxonomy of each ASV: from Domain/Kingdom to ASV level ASV=data[,9:ncol(data)]#indicate the ASV counts dna=Biostrings::DNAStringSet(data$ASV_seqs) #save the sequence of each ASV into a variable names(dna)= data$ASV_names row.names(tax)=data$ASV_names#the name of each row should be the name of each ASV (i.e., "ASV02156") row.names(ASV)=data$ASV_names identical(rownames(ASV), rownames(tax))#this is very important and should give "TRUE". Otherwise, review the lines above because there is a mistake elsewhere #Convert each element into suitable objects for the construction of a phyloseq object phy_OTUtable=otu_table(ASV, taxa_are_rows = T) phy_taxonomy=tax_table(as.matrix(tax)) phy_metadata=sample_data(mt) phy_data_total=phyloseq(phy_OTUtable,phy_taxonomy,phy_metadata)#bind all the elements into the phyloseq object nsamples(phy_OTUtable); sum(sample_names(phy_metadata) %in% sample_names(phy_OTUtable)); nsamples(phy_metadata)#check all the elements have the same number of samples ``` # 2. Data curation The phyloseq object is already created, however, we still have to check the quality of the data. For instance, we have to check if all the samples are comprised by enough sequences, and if the sampling and sequencing efforts are enough. For that purpose, we have to check the number of total sequences. ```{r} numsec=as.data.frame(colSums(otu_table(phy_data_total))) View(numsec)#order the table in ascending order to check the lowest library size (the sample with the lowest number of sequences) ``` We have to remove some samples that were not well sterilized, as we detected in the [bacterial dataset](Bacteria_data_analysis.qmd). ```{r} phy_total=subset_samples(phy_data_total, Sample != "A1S04E" & Sample != "A1S05E" & Sample!= "A1M01E"& Sample != "A2S09E" & Sample != "A2S12E"& Sample != "A2S08E" & Sample!="A2S01E"& Sample !="B1S03E"& Sample != "B1S06E"& Sample!="B1S09E"& Sample!="B1S10E"& Sample !="B1M12E"& Sample != "B2S06E" & Sample !="B2S11E" & Sample !="B2S07E" & Sample !="B2S02E" & Sample !="B2M08E" & Sample !="B2M03E") numsec_pruned=as.data.frame(colSums(otu_table(phy_total))) ``` Now, it would be interesting to check the quality of the sterilization process. We want to visualize in an ordination plot if rhizosphere and root endosphere samples are separated in the multivariate space. As indicated in the step 5, we have to normalize the ASV counts and then, calculate the ordination of the samples and plot them into a multivariate ordination plot: ```{r} #Let's sub-sample the phyloseq objects AA=subset_samples(phy_total, Year=="A") AA=prune_taxa(taxa_sums(AA)>0.0, AA) BB=subset_samples(phy_total, Year=="B") BB=prune_taxa(taxa_sums(BB)>0.0, BB) #normalization of the data. For instance, the data corresponding to year 2023 todas_B_ASV=as.data.frame(otu_table(BB,taxa_are_rows = T))#obtain the ASV table from the phyloseq object todas_B_mt=as.data.frame(sample_data(BB))#obtain the metadata table from the phyloseq object todas_B_tax=as.data.frame(tax_table(BB))#obtain the taxonomy table from the phyloseq objec #all the previous steps are required to create a new object suitable for edgeR functions todas_B_edgeR = DGEList(counts = todas_B_ASV, samples = todas_B_mt, genes = todas_B_tax)#Create a DGEList object need for the TMM normalization todas_B_edgeR = calcNormFactors(todas_B_edgeR)#calculate all the normalization factor so that we can correct potential biases associated to the different librazy sizes (different number of sequences per sample) todas_B_ASV_norm = cpm(todas_B_edgeR, normalized.lib.sizes=T, log=F)#extracting the normalized abundance todas_B_phy_OTU_norm=otu_table(as.data.frame(todas_B_ASV_norm,row.names=F), taxa_are_rows = T)#create the normalized phyloseq elements todas_B_phy_taxonomy_norm=tax_table(as.matrix(todas_B_tax)) todas_B_phy_metadata_norm=sample_data(todas_B_mt) taxa_names(todas_B_phy_OTU_norm)= taxa_names(todas_B_phy_taxonomy_norm)#if everything is well calculated, it should give "TRUE" identical(rownames(todas_B_ASV_norm), rownames(todas_B_tax)) todas_B_normalized_phyloseq<-phyloseq(todas_B_phy_OTU_norm, todas_B_phy_taxonomy_norm, todas_B_phy_metadata_norm)#Create the new phyloseq object, in which the abundance of all the taxa is normalized ``` One the normalization is done, the plot is made as follows: ```{r} PCOA_bray_B = ordinate(todas_B_normalized_phyloseq, "PCoA", "bray")#here we create the ordination plot (PCoA) based on Bray-Curtis dissimilarities peso2=PCOA_bray_B$values$Relative_eig[1:2] #save the percentage of the variance explained by each axis into a new variable peso2 #check the variance explained p= plot_ordination(todas_B_normalized_phyloseq, PCOA_bray_B,type= "samples", color= "Compartment",shape = "Year")+ geom_point(alpha = 4, size = 4.5)+ labs(x=paste0("Axis 1 (",round(peso2[1]*100,digits = 2),"%)"), y=paste0("Axis 2 (",round(peso2[2]*100,digits = 2),"%)"))+ geom_text(aes(label=Sample, fontface="bold"), hjust=0.15, vjust=0.15, nudge_y = 0.0075,size=4, show.legend =F) + scale_shape_manual(values=c("2022"=16, "2023"=17))+ scale_color_manual(values=c("Rhizosphere"="blue", "Endosphere"="green"))+ theme_bw()+ theme(legend.key=element_blank(), legend.title.align = 0, legend.title = element_text(face="bold",size=18), legend.text.align = 0, axis.text = element_text(size=18), axis.title = element_text(size = 20), plot.title = element_text(hjust=0.5, face="bold",size=18), legend.text = element_text(size = 18))+ geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+ geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+ ggtitle("Year 2023\nPCoA on Bray Curtis distance") x11() p ``` As shown in Figure 1, there are still some root endosphere samples (**B1M10E** and **B1M09E**) that are closer than rhizosphere samples than other root endosphere samples. Thus, we consider that these roots were not properly sterilized. We should remove them because we are not aware of their origin ![Figure1. PCoA based on Bray-Curtis dissimilarities including the rhizosphere and root endosphere samples of plants (2023)](images/PCoA_Fungi.jpg){fig-align="center"} This, we have to curate our dataset, and remove all these samples: ```{r} phy_total_TOTAL=subset_samples(phy_total, Sample != "B1M10E" & Sample != "B1M09E") numsec_pruned_TOTAL=as.data.frame(colSums(otu_table(phy_total_TOTAL))) write.table(data.frame(" "=rownames(numsec_pruned_TOTAL),numsec_pruned_TOTAL),file="Numero_secuencias_deTrabajo.txt", sep="\t",row.names =F)#save the table with the number of total sequences per sample ``` # 3. Rarefaction curves We still have to check whether the sequencing effort has been enough, so let’s have a look at rarefaction curves. They are the best way to visualize the quality of the sequencing and sampling. In our case, we should visualize the rarefaction curves by plant compartment, to check whether the data curation was made well. We will split according to the orchards, otherwise it will be really difficult to understand the graphs. So, let’s subset the data according to our experimental design. We will create new phyloseq objects: two per plant compartment, corresponding to each orchard or plot. ```{r} rizo1=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot=="Plot1") rizo1=prune_taxa(taxa_sums(rizo1)>0.0, rizo1) rizo2=subset_samples(phy_total_TOTAL, Compartment=="Rhizosphere" & Plot== "Plot2") rizo2=prune_taxa(taxa_sums(rizo2)>0.0, rizo2) endo1=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot=="Plot1") endo1=prune_taxa(taxa_sums(endo1)>0.0, endo1) endo2=subset_samples(phy_total_TOTAL, Compartment=="Endosphere" & Plot== "Plot2") endo2=prune_taxa(taxa_sums(endo2)>0.0, endo2) ``` Now, calculate and plot the rarefaction curves: ```{r} #a) Rhizosphere #a.1) South plot min(sample_sums(rizo1)) #get the minimum library size (the number of sequences) colSums(otu_table(rizo1))[which.min(colSums(otu_table(rizo1)))] mt=as.data.frame(sample_data(rizo1))[order(as.character(rownames(as.data.frame(sample_data(rizo1)))),decreasing=F),] otu = otu_table(rizo1) otu = as.data.frame(t(otu)) otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),] rownames(otu)==rownames(mt) #we want to color in different colors mothers and suckers, and also the samples taken in each year rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year) sample_names = rownames(otu) out <- rarecurve(otu, step = 100, label = F) #calculate the rarefaction rare <- lapply(out, function(x){ b <- as.data.frame(x) b <- data.frame(ASV = b[,1], raw.read = rownames(b)) b$raw.read <- as.numeric(gsub("N", "", b$raw.read)) return(b) })#this function is to create a table from the rarefaction variable names(rare) <- sample_names rare <- map_dfr(rare, function(x){ z <- data.frame(x) return(z) }, .id = "Sample") rare$Condition_Year=rare$Sample #these lines aimed at creating a new column in which the information needed for the plotting is included. You should include here the factor that determines the color of the curves rare$Condition_Year=gsub(".*/", "", rare$Condition_Year)#replace all the text before "/" rare$raw.read=as.numeric(rare$raw.read) p_rizo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) + theme_bw()+ geom_point(aes(colour=Condition_Year), size=0.85)+ geom_line(aes(colour=Condition_Year),size=1.2)+ geom_vline(aes(xintercept = min(sample_sums(rizo1))), lty=1, colour="black")+ scale_fill_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen", "Immature_B"="#6600CC" ,"Mature_B"="magenta"))+ scale_color_manual(values = c("Immature_A"="cornflowerblue", "Mature_A"="darkgreen", "Immature_B"="#6600CC" ,"Mature_B"="magenta"), name="Condition (sampling year)", breaks=c("Immature_A", "Mature_A", "Immature_B" ,"Mature_B"), labels=c("Immature (2022)", "Mature (2022)", "Immature (2023)","Mature (2023)"))+ labs(title= "Rhizosphere; Plot 1", x="Number of sequences", y="Number of ASV")+ guides(alpha=FALSE)+ theme(legend.key=element_blank(), legend.title.align = 0.85, legend.title = element_text(face="bold",size=14), axis.text = element_text(size=14), axis.title = element_text(size = 16), plot.title = element_text(hjust=0.5, face="bold", size=16), legend.text = element_text(size = 16)) x11() p_rizo1 #a.2) North plot min(sample_sums(rizo2)) colSums(otu_table(rizo2))[which.min(colSums(otu_table(rizo2)))] mt=as.data.frame(sample_data(rizo2))[order(as.character(rownames(as.data.frame(sample_data(rizo2)))),decreasing=F),] otu = otu_table(rizo2) otu = as.data.frame(t(otu)) otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),] rownames(otu)==rownames(mt) rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year) sample_names = rownames(otu) out <- rarecurve(otu, step = 100, label = F) rare <- lapply(out, function(x){ b <- as.data.frame(x) b <- data.frame(ASV = b[,1], raw.read = rownames(b)) b$raw.read <- as.numeric(gsub("N", "", b$raw.read)) return(b) }) names(rare) <- sample_names rare <- map_dfr(rare, function(x){ z <- data.frame(x) return(z) }, .id = "Sample") rare$Condition_Year=rare$Sample rare$Condition_Year=gsub(".*/", "", rare$Condition_Year) rare$raw.read=as.numeric(rare$raw.read) p_rizo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) + theme_bw()+ geom_point(aes(colour=Condition_Year), size=0.85)+ geom_line(aes(colour=Condition_Year),size=1.2)+ geom_vline(aes(xintercept = min(sample_sums(rizo2))), lty=1, colour="black")+ scale_fill_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2", "Immature_B"="#993300" ,"Mature_B"="black"))+ scale_color_manual(values = c("Immature_A"="magenta", "Mature_A"="darkolivegreen2", "Immature_B"="#993300" ,"Mature_B"="black"), name="Condition (sampling year)", breaks=c("Immature_A", "Mature_A", "Immature_B" ,"Mature_B"), labels=c("Immature (2022)", "Mature (2022)", "Immature (2023)","Mature (2023)"))+ labs(title= "Rhizosphere; Plot 2", x="Number of sequences", y="Number of ASV")+ guides(alpha=FALSE)+ theme(legend.key=element_blank(), legend.title.align = 0.85, legend.title = element_text(face="bold",size=14), axis.text = element_text(size=14), axis.title = element_text(size = 16), plot.title = element_text(hjust=0.5, face="bold", size=16), legend.text = element_text(size = 16)) x11() p_rizo2 #b) ENDOSPHERE #b.1) South plot min(sample_sums(endo1)) colSums(otu_table(endo1))[which.min(colSums(otu_table(endo1)))] mt=as.data.frame(sample_data(endo1))[order(as.character(rownames(as.data.frame(sample_data(endo1)))),decreasing=F),] otu = otu_table(endo1) otu = as.data.frame(t(otu)) otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),] rownames(otu)==rownames(mt) rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year) sample_names = rownames(otu) out <- rarecurve(otu, step = 100, label = F) rare <- lapply(out, function(x){ b <- as.data.frame(x) b <- data.frame(ASV = b[,1], raw.read = rownames(b)) b$raw.read <- as.numeric(gsub("N", "", b$raw.read)) return(b) }) names(rare) <- sample_names rare <- map_dfr(rare, function(x){ z <- data.frame(x) return(z) }, .id = "Sample") rare$Condition_Year=rare$Sample rare$Condition_Year=gsub(".*/", "", rare$Condition_Year) rare$raw.read=as.numeric(rare$raw.read) p_endo1=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) + theme_bw()+ geom_point(aes(colour=Condition_Year), size=0.85)+ geom_line(aes(colour=Condition_Year),size=1.2)+ geom_vline(aes(xintercept = min(sample_sums(endo1))), lty=1, colour="black")+ scale_fill_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green", "Immature_B"="#6600CC" ,"Mature_B"="magenta"))+ scale_color_manual(values = c("Immature_A"="#00CCFF", "Mature_A"="green", "Immature_B"="#6600CC" ,"Mature_B"="magenta"), name="Condition (sampling year)", breaks=c("Immature_A", "Mature_A", "Immature_B" ,"Mature_B"), labels=c("Immature (2022)", "Mature (2022)", "Immature (2023)","Mature (2023)"))+ labs(title= "Endosphere; Plot 1", x="Number of sequences", y="Number of ASV")+ guides(alpha=FALSE)+ theme(legend.key=element_blank(), legend.title.align = 0.85, legend.title = element_text(face="bold",size=14), axis.text = element_text(size=14), axis.title = element_text(size = 16), plot.title = element_text(hjust=0.5, face="bold", size=16), legend.text = element_text(size = 16)) x11() p_endo1 #b.2) North plot min(sample_sums(endo2)) colSums(otu_table(endo2))[which.min(colSums(otu_table(endo2)))] mt=as.data.frame(sample_data(endo2))[order(as.character(rownames(as.data.frame(sample_data(endo2)))),decreasing=F),] otu = otu_table(endo2) otu = as.data.frame(t(otu)) otu=otu[order(as.character(rownames(otu)),decreasing=FALSE),] rownames(otu)==rownames(mt) rownames(otu)=paste0(rownames(otu),"/",mt$Condition_Year) sample_names = rownames(otu) out <- rarecurve(otu, step = 100, label = F) rare <- lapply(out, function(x){ b <- as.data.frame(x) b <- data.frame(ASV = b[,1], raw.read = rownames(b)) b$raw.read <- as.numeric(gsub("N", "", b$raw.read)) return(b) }) names(rare) <- sample_names rare <- map_dfr(rare, function(x){ z <- data.frame(x) return(z) }, .id = "Sample") rare$Condition_Year=rare$Sample rare$Condition_Year=gsub(".*/", "", rare$Condition_Year) rare$raw.read=as.numeric(rare$raw.read) p_endo2=ggplot(rare, aes(x=raw.read, y=ASV, colour=Condition_Year, group=Sample)) + theme_bw()+ geom_point(aes(colour=Condition_Year), size=0.85)+ geom_line(aes(colour=Condition_Year),size=1.2)+ geom_vline(aes(xintercept = min(sample_sums(endo2))), lty=1, colour="black")+ scale_fill_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine", "Immature_B"="#993300" ,"Mature_B"="black"))+ scale_color_manual(values = c("Immature_A"="blueviolet", "Mature_A"="aquamarine", "Immature_B"="#993300" ,"Mature_B"="black"), name="Condition (sampling year)", breaks=c("Immature_A", "Mature_A", "Immature_B" ,"Mature_B"), labels=c("Immature (2022)", "Mature (2022)", "Immature (2023)","Mature (2023)"))+ labs(title= "Endosphere; Plot 2", x="Number of sequences", y="Number of ASV")+ guides(alpha=FALSE)+ theme(legend.key=element_blank(), legend.title.align = 0.85, legend.title = element_text(face="bold",size=14), axis.text = element_text(size=14), axis.title = element_text(size = 16), plot.title = element_text(hjust=0.5, face="bold", size=16), legend.text = element_text(size = 16)) x11() p_endo2 ``` As shown in Figures 2-5, at the same rarefaction levels, most of the samples reached to the asymptote, so the sequencing effort can be considered enough. Furthermore, no clear patterns of richnes can be deduced from the rarefaction curves. ![Figure 2. Rarefaction curves corresponding to rhizosphere samples of the south plot (also named "Plot 1").](images/Rizo_Plot1.jpg){fig-align="center" width="700"} ![Figure 3. Rarefaction curves corresponding to rhizosphere samples of the north plot (also named "Plot 2").](images/Rizo_Plot2.jpg){fig-align="center" width="700"} ![Figure 4. Rarefaction curves corresponding to root endosphere samples of the south plot (also named "Plot 1").](images/Endo_Plot1.jpg){fig-align="center" width="700"} ![Figure 5. Rarefaction curves corresponding to root endosphere samples of the north plot (also named "Plot 2").](images/Endo_Plot2.jpg){fig-align="center" width="700"} # 4. Alpha diversity analysis We are going to estimate the richness, diversity and evenness of each sample by calculating the number of observed ASVs, and Shannon’s and Inverse of Simpson and Pielou’s indices, respectively. Some alpha diversity indices are sensitive to different library sizes, so first, we will rarefy all the samples to the smallest library size to avoid potential biases associated to different number of sequences. ::: callout-note It should be made a rarefaction per groups of samples to be compared. For instance, if we want to compare the diversity of rhizosphere bacterial communitites of Mother and Suckers plants located in the south plot, we should calculate the rarefaction level corresponding to the subgroup of Rhizosphere samples of the south plot (namely, object *rizo1*). And then, rarefy to the smallest library size of the samples included in the object *rizo1*. ::: ```{r} #a) Endosphere #a.1) South plot rarefac_endo1=rarefy_even_depth(endo1, sample.size = min(sample_sums(endo1)),rngseed=T)#rarefaction. You can indicate manually the minimum sample size or by using the function "min" sample_sums(rarefac_endo1)#check that now all the samples have the same number of sequences colSums(otu_table(rarefac_endo1))[which.min(colSums(otu_table(rarefac_endo1)))] indices_endo1=estimate_richness(rarefac_endo1, measures=c("Observed", "InvSimpson", "Shannon"))#calculate the indices indices_endo1$Pielou=indices_endo1$Shannon/log(indices_endo1$Observed)#calculated manually the Pielou index rownames(sample_data(rarefac_endo1))==row.names(indices_endo1) mt_indices_endo1=data.frame(sample_data(rarefac_endo1))#extract the metadata indices_endo1=add_column(indices_endo1, mt_indices_endo1[1:ncol(mt_indices_endo1)], .before = "Observed")#add the metadata to the indices table, before the column named "Observed" write.table(data.frame(" "=rownames(indices_endo1),indices_endo1),file="Indices_Endo_Plot1_porReplicas.txt", sep="\t",row.names =F) #We have already calculated the indices per replicates but we are interested in the mean and standard values per groups of samples, so let's calculate them: media_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2)#be careful. Here you have to indicate the position of the columns where the values of the indices are placed sd_endo1=aggregate(indices_endo1[,11:14], list(grouping=indices_endo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2) mean_sd = NULL for (i in 2:5){ #be careful with the position of the numeric values (alpha indices) mean_sd <- cbind(mean_sd,paste0(media_endo1[,i], " +/- ", sd_endo1[,i]))}#then in Excel you can replace "+/-" by the corresponding symbol tabla_publicaciones_endo1 = cbind(media_endo1$grouping, mean_sd) colnames(tabla_publicaciones_endo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou") as.data.frame(tabla_publicaciones_endo1) write.table(tabla_publicaciones_endo1,file="Indices_media_sd_Endo_Plot1.txt", sep="\t",row.names =F) #a.2) North plot rarefac_endo2=rarefy_even_depth(endo2, sample.size = min(sample_sums(endo2)),rngseed=T) sample_sums(rarefac_endo2) colSums(otu_table(rarefac_endo2))[which.min(colSums(otu_table(rarefac_endo2)))] indices_endo2=estimate_richness(rarefac_endo2, measures=c("Observed", "InvSimpson", "Shannon")) indices_endo2$Pielou=indices_endo2$Shannon/log(indices_endo2$Observed) rownames(sample_data(rarefac_endo2))==row.names(indices_endo2) mt_indices_endo2=data.frame(sample_data(rarefac_endo2)) indices_endo2=add_column(indices_endo2, mt_indices_endo2[1:ncol(mt_indices_endo2)], .before = "Observed") write.table(data.frame(" "=rownames(indices_endo2),indices_endo2),file="Indices_Endo_Plot2_porReplicas.txt", sep="\t",row.names =F) media_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2) sd_endo2=aggregate(indices_endo2[,11:14], list(grouping=indices_endo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2) mean_sd = NULL for (i in 2:5){ mean_sd <- cbind(mean_sd,paste0(media_endo2[,i], " +/- ", sd_endo2[,i]))} tabla_publicaciones_endo2 = cbind(media_endo2$grouping, mean_sd) colnames(tabla_publicaciones_endo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou") as.data.frame(tabla_publicaciones_endo2) write.table(tabla_publicaciones_endo2,file="Indices_media_sd_Endo_Plot2.txt", sep="\t",row.names =F) #b) Rhizosphere #b.1) South plot rarefac_rizo1=rarefy_even_depth(rizo1, sample.size = min(sample_sums(rizo1)),rngseed=T) sample_sums(rarefac_rizo1) colSums(otu_table(rarefac_rizo1))[which.min(colSums(otu_table(rarefac_rizo1)))] indices_rizo1=estimate_richness(rarefac_rizo1, measures=c("Observed", "InvSimpson", "Shannon")) indices_rizo1$Pielou=indices_rizo1$Shannon/log(indices_rizo1$Observed) rownames(sample_data(rarefac_rizo1))==row.names(indices_rizo1) mt_indices_rizo1=data.frame(sample_data(rarefac_rizo1)) indices_rizo1=add_column(indices_rizo1, mt_indices_rizo1[1:ncol(mt_indices_rizo1)], .before = "Observed") write.table(data.frame(" "=rownames(indices_rizo1),indices_rizo1),file="Indices_rizo_Plot1_porReplicas.txt", sep="\t",row.names =F) media_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2) sd_rizo1=aggregate(indices_rizo1[,11:14], list(grouping=indices_rizo1$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2) mean_sd = NULL for (i in 2:5){ mean_sd <- cbind(mean_sd,paste0(media_rizo1[,i], " +/- ", sd_rizo1[,i]))} tabla_publicaciones_rizo1 = cbind(media_rizo1$grouping, mean_sd) colnames(tabla_publicaciones_rizo1) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou") as.data.frame(tabla_publicaciones_rizo1) write.table(tabla_publicaciones_rizo1,file="Indices_media_sd_rizo_Plot1.txt", sep="\t",row.names =F) #Pb.2) North plot rarefac_rizo2=rarefy_even_depth(rizo2, sample.size = min(sample_sums(rizo2)),rngseed=T) sample_sums(rarefac_rizo2) colSums(otu_table(rarefac_rizo2))[which.min(colSums(otu_table(rarefac_rizo2)))] indices_rizo2=estimate_richness(rarefac_rizo2, measures=c("Observed", "InvSimpson", "Shannon")) indices_rizo2$Pielou=indices_rizo2$Shannon/log(indices_rizo2$Observed) rownames(sample_data(rarefac_rizo2))==row.names(indices_rizo2) mt_indices_rizo2=data.frame(sample_data(rarefac_rizo2)) indices_rizo2=add_column(indices_rizo2, mt_indices_rizo2[1:ncol(mt_indices_rizo2)], .before = "Observed") write.table(data.frame(" "=rownames(indices_rizo2),indices_rizo2),file="Indices_rizo_Plot2_porReplicas.txt", sep="\t",row.names =F) media_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), mean)%>% mutate_if(is.numeric, round, digits=2) sd_rizo2=aggregate(indices_rizo2[,11:14], list(grouping=indices_rizo2$Condition_Year), sd)%>% mutate_if(is.numeric, round, digits=2) mean_sd = NULL for (i in 2:5){ mean_sd <- cbind(mean_sd,paste0(media_rizo2[,i], " +/- ", sd_rizo2[,i]))} tabla_publicaciones_rizo2 = cbind(media_rizo2$grouping, mean_sd) colnames(tabla_publicaciones_rizo2) = c("Group","Observed", "Shannon", "InvSimpson", "Pielou") as.data.frame(tabla_publicaciones_rizo2) write.table(tabla_publicaciones_rizo2,file="Indices_media_sd_rizo_Plot2.txt", sep="\t",row.names =F) ``` ## 4.1 Statistical analysis We have just calculated the alpha indices per sample and per group of samples, but we have to compare them among group of samples. For that purpose, we will apply univariate statistic test. We will go into different steps, which briefly consist of: 1. Check the normality of the data: we have to confirm whether our variables follow a normal distribution. If they are not normally distributed, we will also check if they are very far from the normal distribution. 2. Check the homoscedasticity of the data: confirm if dispersion of the variable is the same in all the groups of samples to compare. If the distribution of the variables is normal (or almost normal) and there is homogeneity of variances, we will apply parametric tests. Otherwise, we will use non-parametric tests. 3. Check the presence of the outliers. Outliers are not a problem, but we have to check if they are extreme outliers. In that case, we will apply robust tests. 4. Apply the proper statistical test. Depending on the characteristics of the variables and the experimental design (one factor to be studied: one-way statistics; two factors: two-way statistical tests). 5. Calculate the size of the effect. In case we find statistical differences among groups of samples in the studied variables, we have to measure the magnitude of the effect. We have to study separately each individual variable (in this case, each alpha diversity index). In this example, we are interested in two factors: the effect of the sampling year, and the effect of the plants’ developmental stage, also named Condition in this web. ```{r} #a) ENDOSPHERE #a.1) North plot #Normality indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Observed) indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Shannon) indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson) indices_endo1%>% group_by(Condition, Year)%>% shapiro_test(Pielou) x11()#let's see how far are our data from the normal distribution into a qq-plot. ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) #Homoscedasticity indices_endo1 %>% levene_test(Observed ~ Condition*Year) indices_endo1 %>% levene_test(Shannon ~ Condition*Year) indices_endo1 %>% levene_test(InvSimpson ~ Condition*Year) indices_endo1 %>% levene_test(Pielou ~ Condition*Year) #Outliers a=indices_endo1 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a) b=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(Shannon) ; View(b) c=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c) d=indices_endo1 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d) #Hypothesis contrasting tests indices_endo1 %>% anova_test(Shannon ~ Condition * Year)#two-way ANOVA indices_endo1 %>% anova_test(InvSimpson ~ Condition * Year) indices_endo1 %>% anova_test(Pielou ~ Condition * Year) indices_endo1$Condition=as.factor(indices_endo1$Condition) indices_endo1$Year=as.factor(indices_endo1$Year) t2way(Observed ~ Year*Condition, data=indices_endo1)#two-way robust test #We did not find the interaction between Year and plant developmental Condition significant in any of the variables studied, but the effect of Year or Condition (or both) was statistaclly significant on some of the indices. In that case, we have to study the effect of each factor individually: #Post-hoc tests indices_endo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm") test=mcp2atm(Observed~Year*Condition, data=indices_endo1) #Effect size z=lm(Observed~Condition*Year, data=indices_endo1) eta_squared(z) #a.2) North plot #Normality indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Observed) indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Shannon) indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson) indices_endo2%>% group_by(Condition, Year)%>% shapiro_test(Pielou) x11() ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) #Homoscedasticity indices_endo2 %>% levene_test(Observed ~ Condition*Year)#heterocedastic indices_endo2 %>% levene_test(Shannon ~ Condition*Year) indices_endo2 %>% levene_test(InvSimpson ~ Condition*Year) indices_endo2 %>% levene_test(Pielou ~ Condition*Year) #Outliers a=indices_endo2 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a) b=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(Shannon); View(b) c=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c) d=indices_endo2 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d) #Check wether with robust test the homoscedasticity is maintained indices_endo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Year*Condition) #Hypothesis contrasting tests #Observed ASVs is not homoscedastic, thus, we should apply non-parametric tests. However, there is not a suitable option for two-way tests, so let's transformate the data and then apply the corresponding test. Check this link: http://depts.washington.edu/acelab/proj/art/index.html indices_endo2$Condition=as.factor(indices_endo2$Condition) indices_endo2$Year=as.factor(indices_endo2$Year) m = art(Observed ~ Year * Condition, data=indices_endo2) # linear model syntax; see lm() anova(m) eta_squared(m) indices_endo2 %>% anova_test(Shannon ~ Condition * Year) t2way(InvSimpson ~ Condition*Year, data=indices_endo2) indices_endo2 %>% anova_test(Pielou ~ Condition * Year) #Post-hoc tests art.con(m, "Year:Condition", adjust="holm") %>% summary() %>% # add significance stars to the output mutate(sig. = symnum(p.value, corr=FALSE, na=FALSE, cutpoints = c(0, .001, .01, .05, .10, 1), symbols = c("***", "**", "*", ".", " "))) t_test(Observed~Year, data=indices_endo2, var.equal=F) indices_endo2 %>%pairwise_t_test(Shannon~Year) indices_endo2 %>%pairwise_t_test(Shannon~Condition) #b) RHIZOSPHERE #b.1) South plot #Normality indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Observed) indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Shannon) indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson) indices_rizo1%>% group_by(Condition, Year)%>% shapiro_test(Pielou) x11() ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) x11() ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) #Homoscedasticity indices_rizo1 %>% levene_test(Observed ~ Condition*Year) indices_rizo1 %>% levene_test(Shannon ~ Condition*Year) indices_rizo1 %>% levene_test(InvSimpson ~ Condition*Year) indices_rizo1 %>% levene_test(Pielou ~ Condition*Year) #Outliers a=indices_rizo1 %>% group_by(Condition,Year)%>% identify_outliers(Observed); View(a) b=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(Shannon); View(b) c=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c) d=indices_rizo1 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d) #check wether robust test mantain the homoscedasticity indices_rizo1%>% filter(between(Shannon,quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Year*Condition) indices_rizo1%>% filter(between(Pielou,quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>%levene_test(Pielou ~ Year*Condition) #Hypothesis contrasting tests indices_rizo1$Condition=as.factor(indices_rizo1$Condition) indices_rizo1$Year=as.factor(indices_rizo1$Year) indices_rizo1 %>% anova_test(Observed ~ Condition * Year) indices_rizo1$Year=as.factor(indices_rizo1$Year); indices_rizo1$Condition=as.factor(indices_rizo1$Condition) t2way(Shannon ~ Condition*Year, data=indices_rizo1) indices_rizo1 %>% anova_test(InvSimpson ~ Condition * Year) indices_rizo1 %>% anova_test(Pielou ~ Condition * Year) t2way(Pielou ~ Condition*Year, data=indices_rizo1) #Post-hoc tests indices_rizo1 %>%pairwise_t_test(Observed~Year,p.adjust.method = "holm") indices_rizo1 %>%pairwise_t_test(Observed~Condition,p.adjust.method = "holm") mcp2atm(InvSimpson~Year*Condition, data=indices_rizo1) mcp2atm(Pielou~Year*Condition, data=indices_rizo1) #Effect size: y=lm(Pielou~Year*Condition, data=indices_rizo1) eta_squared(y) #b.2) North plot #Normality indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Observed) indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Shannon) indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(InvSimpson) indices_rizo2%>% group_by(Condition, Year)%>% shapiro_test(Pielou) x11() ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) x11() ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) + facet_grid(Condition ~ Year) #Homoscedasticity indices_rizo2 %>% levene_test(Observed ~ Condition*Year) indices_rizo2 %>% levene_test(Shannon ~ Condition*Year) indices_rizo2 %>% levene_test(InvSimpson ~ Condition*Year) indices_rizo2 %>% levene_test(Pielou ~ Condition*Year) #Outliers a=indices_rizo2 %>% group_by(Condition,Year)%>% identify_outliers(Observed);View(a) b=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(Shannon);View(b) c=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(InvSimpson); View(c) d=indices_rizo2 %>% group_by(Condition, Year)%>% identify_outliers(Pielou); View(d) indices_rizo2%>% filter(between(Shannon, quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Year*Condition) indices_rizo2%>% filter(between(Pielou, quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>% levene_test(Pielou ~ Year*Condition) #Hypothesis contrasting tests indices_rizo2 %>% anova_test(Observed ~ Condition * Year) indices_rizo2$Year=as.factor(indices_rizo2$Year) indices_rizo2$Condition=as.factor(indices_rizo2$Condition) t2way(Shannon ~ Condition*Year, data=indices_rizo2) indices_rizo2 %>% anova_test(InvSimpson ~ Condition * Year) t2way(Pielou ~ Condition*Year, data=indices_rizo2) #Post-hoc tests indices_rizo2 %>%pairwise_t_test(Observed~Year) ``` We can also compare if the alpha diversity indices of fungal communities associated to mothers, first and second suckers are different each other. In the [index of this website](index.qmd "Click if you want more information"), you can find more details about the concepts of Mothers, first suckers and second suckers. These concepts are very similar to the concept of “Generation” in sexually reproducing plants, and that’s exactly the name of the factor employed here. So, we will compare the indices of these groups, by the factor *Generation*. ```{r} #a) Rhizosphere #a.1) South plot #Normality indices_rizo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo1%>% group_by(Generation)%>% shapiro_test(Pielou) x11() ggqqplot(indices_rizo1, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot) x11() ggqqplot(indices_rizo1, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation~ Plot) #Homoscedasticity indices_rizo1 %>% levene_test(Observed ~ Generation) indices_rizo1 %>% levene_test(Shannon ~ Generation) indices_rizo1 %>% levene_test(InvSimpson ~ Generation) indices_rizo1 %>% levene_test(Pielou ~ Generation) #outliers a=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Observed); View(a) b=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b) c=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) d=indices_rizo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d) #Check whether with robust methods the data are still homocedastic indices_rizo1%>% filter(between(Shannon, quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation) indices_rizo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation) indices_rizo1%>% filter(between(Pielou, quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>% levene_test(Pielou ~ Generation) #Hypothesis contrasting tests indices_rizo1 %>% anova_test(Observed ~ Generation) t1way(Shannon~Generation, data=indices_rizo1) t1way(InvSimpson~Generation, data=indices_rizo1) t1way(Pielou~Generation, data=indices_rizo1) #Post-hoc test indices_rizo1%>% pairwise_t_test(Observed~Generation) #a.2) North plots #Normality indices_rizo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_rizo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_rizo2%>% group_by(Generation)%>% shapiro_test(Pielou) x11() ggqqplot(indices_rizo2, "Shannon", ggtheme = theme_bw()) + facet_grid(Generation~Plot) x11() ggqqplot(indices_rizo2, "Pielou", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot) #Homoscedasticity indices_rizo2 %>% levene_test(Observed ~ Generation) indices_rizo2 %>% levene_test(Shannon ~ Generation) indices_rizo2 %>% levene_test(InvSimpson ~ Generation) indices_rizo2 %>% levene_test(Pielou ~ Generation) #outliers a=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a) b=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Shannon) ;View(b) c=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) d=indices_rizo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d) #Check whether with robust tests the data are still homocedastic indices_rizo2%>% filter(between(Shannon, quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation) indices_rizo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation) indices_rizo2%>% filter(between(Pielou, quantile(Pielou,0.1), quantile(Pielou, 0.9)))%>% levene_test(Pielou~ Generation) #Hypothesis contrasting tests indices_rizo2 %>% welch_anova_test(Observed~Generation)#ANOVA test that does no assume that the data are homocedastic t1way(Shannon~Generation, data=indices_rizo2) t1way(InvSimpson~Generation, data=indices_rizo2) t1way(Pielou~Generation, data=indices_rizo2) q=lm(Observed~Generation, data=indices_rizo2) eta_squared(q) #Post-hoc tests indices_rizo2%>% pairwise_t_test(Observed~Generation,p.adjust.method = "holm", pool.sd = F) #b) Endosphere #b.1) South plot #Normality indices_endo1%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo1%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo1%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo1%>% group_by(Generation)%\>% shapiro_test(Pielou) x11() ggqqplot(indices_endo1, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation~Plot) #Homoscedasticity indices_endo1 %>% levene_test(Observed ~ Generation) indices_endo1 %>% levene_test(Shannon ~ Generation) indices_endo1 %>% levene_test(InvSimpson ~ Generation) indices_endo1 %>% levene_test(Pielou ~ Generation) #outliers a=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Observed);View(a) b=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b) c=indices_endo1 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) d=indices_endo1 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d) #Check whether with robust test the data are still homocedastic indices_endo1%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%\>% levene_test(InvSimpson ~ Generation) #Hypothesis contrasting tests indices_endo1 %>% anova_test(Observed ~ Generation) indices_endo1 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo1) indices_endo1 %>% anova_test(Pielou ~ Generation) #Post-hoc indices_endo1%>% pairwise_t_test(Observed~Generation) #b.2) North plot #Normality indices_endo2%>% group_by(Generation)%>% shapiro_test(Observed) indices_endo2%>% group_by(Generation)%>% shapiro_test(Shannon) indices_endo2%>% group_by(Generation)%>% shapiro_test(InvSimpson) indices_endo2%>% group_by(Generation)%>% shapiro_test(Pielou) x11() ggqqplot(indices_endo2, "Observed", ggtheme = theme_bw()) + facet_grid(Generation~Plot) x11() ggqqplot(indices_endo2, "InvSimpson", ggtheme = theme_bw()) + facet_grid(Generation ~ Plot) #Homoscedasticity indices_endo2 %>% levene_test(Observed ~ Generation) indices_endo2 %>% levene_test(Shannon ~ Generation) indices_endo2 %>% levene_test(InvSimpson ~ Generation) indices_endo2 %>% levene_test(Pielou ~ Generation) #outliers a=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Observed); View(a) b=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Shannon);View(b) c=indices_endo2 %>% group_by(Generation)%>% identify_outliers(InvSimpson);View(c) d=indices_endo2 %>% group_by(Generation)%>% identify_outliers(Pielou);View(d) #check whether with robust tests data are still homocedastic indices_endo2%>% filter(between(Shannon, quantile(Shannon,0.1), quantile(Shannon, 0.9)))%>% levene_test(Shannon ~ Generation) indices_endo2%>% filter(between(InvSimpson, quantile(InvSimpson,0.1), quantile(InvSimpson, 0.9)))%>% levene_test(InvSimpson ~ Generation) #Hypothesis contrasting tests indices_endo2 %>% anova_test(Observed ~ Generation) indices_endo2 %>% anova_test(Shannon ~ Generation) t1way(InvSimpson~Generation, data=indices_endo2) indices_endo2 %>% anova_test(Pielou ~ Generation) ``` # 5. Beta diversity analyses It's time to analyze the differences in the diversity of the fungal communities between samples. For that purpose, we will navigate into the multivariate statistics. Firstly, we have to normalize the data to avoid potential biases due to different library sizes (different number of sequences in the samples). We prefer the edgeR method, which is based on the TMM (*Trimmed Mean of the M-values*). This normalization is needed when we are working also with compositional data; that is to say, with data that expressed in percentages, such as microbiota counts. More information can be found [here](https://doi.org/10.1186/gb-2010-11-3-r25). ## 5.1 TMM Normalization We are going to use this normalization for both multivariate plots and for multivariate statistics. ```{r} #a)RHIZOSPHERE #a.1) South plot rizo1_ASV=as.data.frame(otu_table(rizo1,taxa_are_rows = T))#extract the ASV table from the phyloseq object rizo1_mt=as.data.frame(sample_data(rizo1))#extract the metadata table from the phyloseq object rizo1_tax=as.data.frame(tax_table(rizo1))#extract the taxonomy table from the phyloseq object #Create a new object suitbale for the normalization by the package edgeR rizo1_edgeR = DGEList(counts = rizo1_ASV, samples = rizo1_mt, genes = rizo1_tax) #Calculate the normalization factors to correct the differences in sequencing depth rizo1_edgeR = calcNormFactors(rizo1_edgeR) #Extract the normalized abundance rizo1_ASV_norm = cpm(rizo1_edgeR, normalized.lib.sizes=T, log=F) #Create the phyloseq elements with the ASV counts normalized rizo1_phy_OTU_norm=otu_table(as.data.frame(rizo1_ASV_norm,row.names=F), taxa_are_rows = T) rizo1_phy_taxonomy_norm=tax_table(as.matrix(rizo1_tax)) rizo1_phy_metadata_norm=sample_data(rizo1_mt) taxa_names(rizo1_phy_OTU_norm)= taxa_names(rizo1_phy_taxonomy_norm) identical(rownames(rizo1_ASV_norm), rownames(rizo1_tax))#it should give "TRUE" #Join all the elements into a new phyloseq object rizo1_normalized_phyloseq<-phyloseq(rizo1_phy_OTU_norm, rizo1_phy_taxonomy_norm, rizo1_phy_metadata_norm) #a.2) North plot rizo2_ASV=as.data.frame(otu_table(rizo2,taxa_are_rows = T)) rizo2_mt=as.data.frame(sample_data(rizo2)) rizo2_tax=as.data.frame(tax_table(rizo2)) rizo2_edgeR = DGEList(counts = rizo2_ASV, samples = rizo2_mt, genes = rizo2_tax) rizo2_edgeR = calcNormFactors(rizo2_edgeR) rizo2_ASV_norm = cpm(rizo2_edgeR, normalized.lib.sizes=T, log=F) rizo2_phy_OTU_norm=otu_table(as.data.frame(rizo2_ASV_norm,row.names=F), taxa_are_rows = T) rizo2_phy_taxonomy_norm=tax_table(as.matrix(rizo2_tax)) rizo2_phy_metadata_norm=sample_data(rizo2_mt) taxa_names(rizo2_phy_OTU_norm)= taxa_names(rizo2_phy_taxonomy_norm) identical(rownames(rizo2_ASV_norm), rownames(rizo2_tax)) rizo2_normalized_phyloseq<-phyloseq(rizo2_phy_OTU_norm, rizo2_phy_taxonomy_norm, rizo2_phy_metadata_norm) #b) ENDOSPHERE #b.1) South plot endo1_ASV=as.data.frame(otu_table(endo1,taxa_are_rows = T)) endo1_mt=as.data.frame(sample_data(endo1)) endo1_tax=as.data.frame(tax_table(endo1)) endo1_edgeR = DGEList(counts = endo1_ASV, samples = endo1_mt, genes = endo1_tax) endo1_edgeR = calcNormFactors(endo1_edgeR) endo1_ASV_norm = cpm(endo1_edgeR, normalized.lib.sizes=T, log=F) endo1_phy_OTU_norm=otu_table(as.data.frame(endo1_ASV_norm,row.names=F), taxa_are_rows = T) endo1_phy_taxonomy_norm=tax_table(as.matrix(endo1_tax)) endo1_phy_metadata_norm=sample_data(endo1_mt) taxa_names(endo1_phy_OTU_norm)= taxa_names(endo1_phy_taxonomy_norm) identical(rownames(endo1_ASV_norm), rownames(endo1_tax)) endo1_normalized_phyloseq<-phyloseq(endo1_phy_OTU_norm, endo1_phy_taxonomy_norm, endo1_phy_metadata_norm) #b.2) North plot endo2_ASV=as.data.frame(otu_table(endo2,taxa_are_rows = T)) endo2_mt=as.data.frame(sample_data(endo2)) endo2_tax=as.data.frame(tax_table(endo2)) endo2_edgeR = DGEList(counts = endo2_ASV, samples = endo2_mt, genes = endo2_tax) endo2_edgeR = calcNormFactors(endo2_edgeR) endo2_ASV_norm = cpm(endo2_edgeR, normalized.lib.sizes=T, log=F) endo2_phy_OTU_norm=otu_table(as.data.frame(endo2_ASV_norm,row.names=F), taxa_are_rows = T) endo2_phy_taxonomy_norm=tax_table(as.matrix(endo2_tax)) endo2_phy_metadata_norm=sample_data(endo2_mt) #Anyadimos el nombre de los taxones taxa_names(endo2_phy_OTU_norm)= taxa_names(endo2_phy_taxonomy_norm) identical(rownames(endo2_ASV_norm), rownames(endo2_tax)) endo2_normalized_phyloseq<-phyloseq(endo2_phy_OTU_norm, endo2_phy_taxonomy_norm, endo2_phy_metadata_norm) ``` ## 5.2 Multivariate statistical tests The best options to test whether the groups of samples have the same dispersion is to apply the test PERMDISP2, while PERMANOVA (Permutational Multivariate Analysis of Variance Using Distance Matrices) is a test suitable to check whether the centroid (concept similar to the mean of groups of samples) of all the groups of samples to be compared is homogenus. Thus, these tests are the multivariate version of Levene's and ANOVA tests. However, both of them are based on distance matrices. That is to say, we have to calculate the distance between all sample pairs. There are many different distance or dissimilarity measures, but here we are going to use Bray-Curtis dissimilarity. Null hypotheses of both tests: PERMANOVA H~0~: no significant differences in the centroids (mean values) of the groups of samples PERMDISPD2 H~0~: no significant differences in the dispersion of the groups of samples ::: callout-important Be careful with unbalanced experiments. As stated [Anderson and Walsh (2013)](https://doi.org/10.1890/12-2010.1), when the groups of samples to be compared have different number of replicates (unbalanced design), heterocedasticity becomes a problem. In that cases, PERMANOVA cannot distinguish between the differences between groups of samples are due to the different dispersion or due to differences in the centroids. So, in case of unbalances designs, it is superimportant to check the dispersion of the data. ::: ::: callout-note As you know, the ITS2 fungal region is quite variable in length, thus, we do not calculate a phylogenetic tree including all the ASVs. For that reason, we cannot compute the (un)weighted UniFrac distances. We are going to perform all the beta diversity analyses based just on Bray-Curtis dissimilarity measure. ::: ```{r} #a) RHIZOSPHERE #a.1) South plot df_r1=data.frame(sample_data(rizo1_normalized_phyloseq))#extract the metadata table from the phyloseq object (NORMALIZED phyloseq!) b_r1=distance(rizo1_normalized_phyloseq, "bray")#calculate Bray-Curtis dissimilarity adonis_rizo1=adonis2(b_r1~Condition*Year, data=df_r1, permutations = 9999)#apply the PERMANOVA test. Here we test the model Community structure~Plant type*Sampling year adonis_rizo1#check the results pairwise.adonis(b_r1, phyloseq::sample_data(rizo1_normalized_phyloseq)$Year, p.adjust.m = "holm")#pairwise PERMANOVAs as post-hoc tests betadisper_r1=betadisper(b_r1, df_r1$Condition_Year)#check the betadispersion of each factor separately permutest(betadisper_r1) betadisper_r1_condition=betadisper(b_r1, df_r1$Condition)#Check the betadispersion of the factor "Condition" permutest(betadisper_r1_condition) betadisper_r1_year=betadisper(b_r1, df_r1$Year)#Check the betadispersion of the factor "sampling year" permutest(betadisper_r1_year) #a.2) North plot df_r2=data.frame(sample_data(rizo2_normalized_phyloseq)) b_r2=distance(rizo2_normalized_phyloseq,"bray") adonis_rizo2=adonis2(b_r2~Condition*Year, data=df_r2, permutations = 9999) adonis_rizo2 pairwise.adonis(b_r2, phyloseq::sample_data(rizo2_normalized_phyloseq)$Year, p.adjust.m = "holm") betadisper_r2=betadisper(b_r2, df_r2$Condition_Year) permutest(betadisper_r2) betadisper_r2_condition=betadisper(b_r2, df_r2$Condition) permutest(betadisper_r2_condition) betadisper_r2_year=betadisper(b_r2, df_r2$Year) permutest(betadisper_r2_year) #b) ENDOSPHERE #b.1) South plot df_e1=data.frame(sample_data(endo1_normalized_phyloseq)) b_e1=distance(endo1_normalized_phyloseq, "bray") adonis_endo1=adonis2(b_e1~Condition*Year, data=df_e1, permutations = 9999) adonis_endo1 pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Year, p.adjust.m = "holm") pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Condition, p.adjust.m = "holm") pairwise.adonis(b_e1, phyloseq::sample_data(endo1_normalized_phyloseq)$Fusion, p.adjust.m = "holm") betadisper_e1=betadisper(b_e1, df_e1$Condition_Year) permutest(betadisper_e1) betadisper_e1_condition=betadisper(b_e1, df_e1$Condition) permutest(betadisper_e1_condition) betadisper_e1_year=betadisper(b_e1, df_e1$Year) permutest(betadisper_e1_year) #b.2) North plot df_e2=data.frame(sample_data(endo2_normalized_phyloseq)) b_e2=distance(endo2_normalized_phyloseq, "bray") adonis_endo2=adonis2(b_e2~Condition*Year, data=df_e2, permutations = 9999) adonis_endo2 pairwise.adonis(b_e2, phyloseq::sample_data(endo2_normalized_phyloseq)$Fusion, p.adjust.m = "holm") betadisper_e2=betadisper(b_e2, df_e2$Condition_Year) permutest(betadisper_e2) betadisper_e2_condition=betadisper(b_e2, df_e2$Condition) permutest(betadisper_e2_condition) betadisper_e2_year=betadisper(b_e2, df_e2$Year) permutest(betadisper_e2_year) #Let's calculate the size of the effects. But first, we have to create the function that calculates the size of the effect: adonis_OmegaSq <- function(adonisOutput, partial = TRUE){ if(!(is(adonisOutput, "adonis") || is(adonisOutput, "anova.cca"))) stop("Input should be an adonis object") if (is(adonisOutput, "anova.cca")) { aov_tab <- adonisOutput aov_tab$MeanSqs <- aov_tab$SumOfSqs / aov_tab$Df aov_tab$MeanSqs[length(aov_tab$Df)] <- NA } else { aov_tab <- adonisOutput$aov.tab } heading <- attr(aov_tab, "heading") MS_res <- aov_tab[pmatch("Residual", rownames(aov_tab)), "MeanSqs"] SS_tot <- aov_tab[rownames(aov_tab) == "Total", "SumsOfSqs"] N <- aov_tab[rownames(aov_tab) == "Total", "Df"] + 1 if(partial){ omega <- apply(aov_tab, 1, function(x) (x["Df"]*(x["MeanSqs"]-MS_res))/(x["Df"]*x["MeanSqs"]+(N-x["Df"])*MS_res)) aov_tab$parOmegaSq <- c(omega[1:(length(omega)-2)], NA, NA) } else { omega <- apply(aov_tab, 1, function(x) (x["SumsOfSqs"]-x["Df"]*MS_res)/(SS_tot+MS_res)) aov_tab$OmegaSq <- c(omega[1:(length(omega)-2)], NA, NA) } if (is(adonisOutput, "adonis")) cn_order <- c("Df", "SumsOfSqs", "MeanSqs", "F.Model", "R2", if (partial) "parOmegaSq" else "OmegaSq", "Pr(>F)") else cn_order <- c("Df", "SumOfSqs", "F", if (partial) "parOmegaSq" else "OmegaSq", "Pr(>F)") aov_tab <- aov_tab[, cn_order] attr(aov_tab, "names") <- cn_order attr(aov_tab, "heading") <- heading if (is(adonisOutput, "adonis")) adonisOutput$aov.tab <- aov_tab else adonisOutput <- aov_tab return(adonisOutput) } #apply just in that cases in which PERMANOVA gives significant results: adonis_OmegaSq(adonis_rizo1) adonis_OmegaSq(adonis_rizo2) adonis_OmegaSq(adonis_endo1) adonis_OmegaSq(adonis_endo2) ``` Now, let’s compare the structure of bacterial communities of mothers, first and second suckers: ```{r} #a) RHIZOSPHERE #a.1) South plot adonis_rizo1_generation=adonis2(b_r1~Generation, data=df_rizo1, permutations = 9999) adonis_rizo1_generation betadisper_rizo1_generation=betadisper(b_r1, df_rizo1$Generation) permutest(betadisper_rizo1_generation) adonis_OmegaSq(adonis_rizo1_generation) pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm") pa=pairwise.adonis(b_r1, phyloseq::sample_data(rizo1)$Generation, p.adjust.m = "holm") #a.2) North plot adonis_rizo2_generation=adonis2(b_r2~Generation, data=df_rizo2, permutations = 9999) adonis_rizo2_generation betadisper_rizo2_generation=betadisper(b_r2, df_rizo2$Generation) permutest(betadisper_rizo2_generation) adonis_OmegaSq(adonis_rizo2_generation) pairwise.adonis(b_r2, phyloseq::sample_data(rizo2)$Generation, p.adjust.m = "holm") #b) ENDOSPHERE #b.1) South plot adonis_endo1_generation=adonis2(b_e1~Generation, data=df_endo1, permutations = 9999) adonis_endo1_generation betadisper_endo1_generation=betadisper(b_e1, df_endo1$Generation) permutest(betadisper_endo1_generation) #b.2) North plot adonis_endo2_generation=adonis2(b_e2~Generation, data=df_endo2, permutations = 9999) adonis_endo2_generation betadisper_endo2_generation=betadisper(b_e2, df_endo2$Generation) permutest(betadisper_endo2_generation) adonis_OmegaSq(adonis_endo2_generation) pairwise.adonis(b_e2, phyloseq::sample_data(endo2)$Generation, p.adjust.m = "holm") ``` # 5.3 Ordination plots There are several options to visualize the results of PERMANOVA tests, for instance, by ordination plots such as PCoA (Principal Coordinate Analysis), NMDS (Non-metric MultiDimenstional Scaling), PCA (Principal Component Analysis), among others. Theses graphs are very useful because help us to detect the differences in the structure of the microbiota among groups of samples. Both PCoA and NMDS are based on non-euclidean distances or dissimilarity measures, so we consider that they are the best options for microbiota analyses. One the one hand, PCoA plots represent in two dimensions the arrangement of the samples in the multivariate space, although the percentage of the total variance is commonly explained by many dimensions. On the other hand, NMDS approach “forces” the samples to be ordinated in just two dimensions, although this forced arrangement has an associated stress. We tolerate stress values lower than 0.2. We recommend calculating both ordination plots with the aim of selecting the option that best represents the results of PERMANOVA analyses. ```{r} #a) RHIZOSPHERE #a.1) South plot NMDS_bray_rizo1= ordinate(rizo1_normalized_phyloseq, "NMDS", "bray")#indicate the ordination plot and the distance print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo1$stress))#this shows the stress of the NMDS PCOA_bray_rizo1 = ordinate(rizo1_normalized_phyloseq, "PCoA", "bray") peso_r1b=PCOA_bray_rizo1$values$Relative_eig[1:2] peso_r1b#This shows the percentage of the total variance explained by just the first and second axes #a.2) North plot NMDS_bray_rizo2= ordinate(rizo2_normalized_phyloseq, "NMDS", "bray") print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_rizo2$stress)) PCOA_bray_rizo2 = ordinate(rizo2_normalized_phyloseq, "PCoA", "bray") peso_r2b=PCOA_bray_rizo2$values$Relative_eig[1:2] peso_r2b #b) ENDOSPHERE #b.1) South plot NMDS_bray_endo1= ordinate(endo1_normalized_phyloseq, "NMDS", "bray") print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo1$stress)) PCOA_bray_endo1 = ordinate(endo1_normalized_phyloseq, "PCoA", "bray") peso_e1b=PCOA_bray_endo1$values$Relative_eig[1:2] peso_e1b #b.2) North plot NMDS_bray_endo2= ordinate(endo2_normalized_phyloseq, "NMDS", "bray") print(paste("The stress of the NMDS based on BrayCurtis is:", NMDS_bray_endo2$stress)) PCOA_bray_endo2 = ordinate(endo2_normalized_phyloseq, "PCoA", "bray") peso_e2b=PCOA_bray_endo2$values$Relative_eig[1:2] peso_e2b ``` Once these values of stress and the weight of the axes are obtained, we should choose which option is the most suitable for our dataset. Although you can also use both method in your research paper. In this case, we selected PCoA plots because we obtained relatively high values of NMDS stress. It is easy to understand: our data are probably multidimensional are complex due to the high diversity and richness of the data, so it is easy to have high stress when forcing the data to fit just into two dimensions. Let’s obtain the plots: ```{r} #a) RHIZOSPHERE #a.1) South plot p_rizo1_pcoa= plot_ordination(rizo1_normalized_phyloseq, PCOA_bray_rizo1,type= "samples", color= "Condition", shape = "Year")+#to color by "Condition" and shape by "Year" geom_point(alpha = 4, size = 4.5)+ labs(x=paste0("Axis 1 (",round(peso_r1b[1]*100,digits = 2),"%)"), y=paste0("Axis 2 (",round(peso_r1b[2]*100,digits = 2),"%)"))+#write the weight of each axis with two decimals scale_color_manual(values = c("Immature"="cornflowerblue","Mature"="darkgreen"), breaks=c("Immature", "Mature"), labels=c("Immature", "Mature"))+ scale_shape_manual(values=c("A"=16, "B"=17), breaks=c("A", "B"), labels=c("2022", "2023"))+ theme_bw()+ theme(legend.key=element_blank(), legend.title.align = 0, legend.title = element_text(face="bold",size=18), legend.text.align = 0, axis.text = element_text(size=18), axis.title = element_text(size = 20), plot.title = element_text(hjust=0.5, face="bold",size=18), legend.text = element_text(size = 18))+ geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+ geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+ ggtitle("Rhizosphere (Plot 1)\nPCoA on Bray Curtis dissimilarities") x11() p_rizo1_pcoa #a.2) North plot p_rizo2_pcoa= plot_ordination(rizo2_normalized_phyloseq, PCOA_bray_rizo2,type= "samples", color= "Condition", shape = "Year")+ geom_point(alpha = 4, size = 4.5)+ labs(x=paste0("Axis 1 (",round(peso_r2b[1]*100,digits = 2),"%)"), y=paste0("Axis 2 (",round(peso_r2b[2]*100,digits = 2),"%)"))+ scale_color_manual(values = c("Immature"="magenta","Mature"="darkolivegreen2"), breaks=c("Immature", "Mature"), labels=c("Immature", "Mature"))+ scale_shape_manual(values=c("A"=16, "B"=17), breaks=c("A", "B"), labels=c("2022", "2023"))+ theme_bw()+ theme(legend.key=element_blank(), legend.title.align = 0, legend.title = element_text(face="bold",size=18), legend.text.align = 0, axis.text = element_text(size=18), axis.title = element_text(size = 20), plot.title = element_text(hjust=0.5, face="bold",size=18), legend.text = element_text(size = 18))+ geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+ geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+ ggtitle("Rhizosphere (Plot 2)\nPCoA on Bray-Curtis dissimilarities") x11() p_rizo1_pcoa #b)ENDOSPHERE #b.1) South plot p_endo1_pcoa= plot_ordination(endo1_normalized_phyloseq, PCOA_bray_endo1,type= "samples", color= "Condition", shape = "Year")+ geom_point(alpha = 4, size = 4.5)+ labs(x=paste0("Axis 1 (",round(peso_e1b[1]*100,digits = 2),"%)"), y=paste0("Axis 2 (",round(peso_e1b[2]*100,digits = 2),"%)"))+ scale_color_manual(values = c("Immature"="#00CCFF","Mature"="green"), breaks=c("Immature", "Mature"), labels=c("Immature", "Mature"))+ scale_shape_manual(values=c("A"=16, "B"=17), breaks=c("A", "B"), labels=c("2022", "2023"))+ theme_bw()+ theme(legend.key=element_blank(), legend.title.align = 0, legend.title = element_text(face="bold",size=18), legend.text.align = 0, axis.text = element_text(size=18), axis.title = element_text(size = 20), plot.title = element_text(hjust=0.5, face="bold",size=18), legend.text = element_text(size = 18))+ geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+ geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+ ggtitle("Endosphere (Plot 1)\nPCoA on Bray-Curtis dissimilarities") x11() p_endo1_pcoa #b.2) North plot p_endo2_pcoa= plot_ordination(endo2_normalized_phyloseq, PCOA_bray_endo2,type= "samples", color= "Condition", shape = "Year")+ geom_point(alpha = 4, size = 4.5)+ labs(x=paste0("Axis 1 (",round(peso_e2b[1]*100,digits = 2),"%)"), y=paste0("Axis 2 (",round(peso_e2b[2]*100,digits = 2),"%)"))+ scale_color_manual(values = c("Immature"="blueviolet","Mature"="aquamarine"), breaks=c("Immature", "Mature"), labels=c("Immature", "Mature"))+ scale_shape_manual(values=c("A"=16, "B"=17), breaks=c("A", "B"), labels=c("2022", "2023"))+ theme_bw()+ theme(legend.key=element_blank(), legend.title.align = 0, legend.title = element_text(face="bold",size=18), legend.text.align = 0, axis.text = element_text(size=18), axis.title = element_text(size = 20), plot.title = element_text(hjust=0.5, face="bold",size=18), legend.text = element_text(size = 18))+ geom_hline(aes(yintercept = c(0.00)), lty=2, colour="grey")+ geom_vline(aes(xintercept = c(0.00)), lty=2, colour="grey")+ ggtitle("Endosphere (Plot 2)\nPCoA on Bray Curtis dissimilarities") x11() p_endo2_pcoa ``` # 6. Taxonomical profiles We already know that there are differences in the structure of bacterial communities in some case, however, we still do not know which taxa are the responsible of that differences. So, let’s analyze the taxonomical profiles. Fistly, we have to obtain the relative abundance of each ASV: ```{r} phy_data_relabun=transform_sample_counts(phy_total_TOTAL, function(x){x/sum(x)}*100) colSums(otu_table(phy_data_relabun)) #check that all sum 100% ``` ## 6.1 Phylum level ### 6.1.1 Obtaining tables with the relative abundance and standard deviation values We are going to obtain the relative abundance of each phylum detected in our samples. ```{r} phylum_relabun=tax_glom(phy_data_relabun, taxrank = "Phylum") #aglomerate at Phylum level colSums(otu_table(phylum_relabun)) #check that the relative abundance sums 100% in all cases #Now we are going to create a table with the relative abundance of each taxa in each replicate so that we can work on it, for instance, in Excel a=as.data.frame(otu_table(phylum_relabun)) aa=cbind(tax_table(phylum_relabun),a) identical(rownames(a),rownames(aa))#comprobamos que sean identicos (tiene uqe salir por consola "TRUE") write.table(data.frame("TAXA"=rownames(aa),aa),file="abundrel_phylum_porReplicas.txt", sep="\t",row.names =F) #Another table, with the mean and standard deviation of the relative abundance of each phylum: table_phylum = otu_table(phylum_relabun)[,] %>% t() %>% as.data.frame() tax_phylum=tax_table(phylum_relabun) phylum_media=aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t() #calculate the mean values of the relative abundance of each phylum in each group of samples phylum_sd = aggregate(table_phylum, by=list(as.data.frame(sample_data(phylum_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1") %>% t() %>% as.data.frame() %>% rename_with(.fn= ~paste0(colnames(phylum_media), "_SD"))#calculate the standard deviation of the relative abundance of each phylum in each group of samples phylum_media_sd=merge(tax_phylum, phylum_media, by=0) %>%column_to_rownames("Row.names") %>% #bind mean and sd values into one table merge(phylum_sd, by=0) %>% column_to_rownames("Row.names") colSums(phylum_media_sd[,9:ncol(phylum_media_sd)]) write.table(data.frame("TAXA"=rownames(phylum_media_sd),phylum_media_sd), file="abundrel_media_sd_phylum.txt", sep="\t",row.names =F) ``` ### 6.1.2 Plotting We are going to make a stacked bars plot, just indicating the most abundant phyla Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2. ```{r} df_melt_phylum = psmelt(physeq = phylum_relabun) #transform the phyloseq object into a dataframe in which the abundance of each phylum is in rows colnames(df_melt_phylum)#check the content of the dataframe #Here we aggregate the data by the factor we indicate in the argument "by" df_melt_aggreg1 = aggregate(df_melt_phylum$Abundance, by=list(Group=df_melt_phylum$Fusion, Sample=df_melt_phylum$Sample,Phylum=df_melt_phylum$Phylum, Compartment=df_melt_phylum$Compartment, Plot=df_melt_phylum$Plot, Year=df_melt_phylum$Year, Condition=df_melt_phylum$Condition), FUN=sum)#it sums the abundance of each phylum in all the replicates corresponding to the same group of samples colnames(df_melt_aggreg1)=c("Group", "Sample", "Phylum","Compartment","Plot","Year","Condition","Abundance")#rename the columns colnames(df_melt_aggreg1) df_melt_aggreg_mean = aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group, Phylum=df_melt_aggreg1$Phylum, Compartment=df_melt_aggreg1$Compartment, Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=mean)#now, it makes the mean relative abundance of each phylum in each group of samples colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance") colnames(df_melt_aggreg_mean)<-c("Group", "Phylum","Compartment", "Plot","Year","Condition","Abundance") colnames(df_melt_aggreg_mean) #repeat the same with the standard deviation df_melt_aggreg_sd=aggregate(df_melt_aggreg1$Abundance, by=list(Group=df_melt_aggreg1$Group, Phylum=df_melt_aggreg1$Phylum, Compartment=df_melt_aggreg1$Compartment, Plot=df_melt_aggreg1$Plot, Year=df_melt_aggreg1$Year,Condition=df_melt_aggreg1$Condition), FUN=sd) colnames(df_melt_aggreg_sd)=c("Group", "Phylum","Compartment","Plot","Year","Condition","sd") all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group) all(df_melt_aggreg_mean$Phylum==df_melt_aggreg_sd$Phylum) df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd media_sd_phylum_todos=df_melt_aggreg_mean ``` Now, we are ready to plot the mean relative abundances. We want to order the phyla by ascending relative abundance, keeping the unclassified and minor phyla in the upper part of the plot. ```{r} #a) RHIZOSPHERE #a.1) South plot media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",]#subset the dataset media_sd_phylum_Rizo1=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot1",] #aggregate all the phyla that were not classified media_sd_phylum_Rizo1$Phylum=as.character(media_sd_phylum_Rizo1$Phylum) unclassified_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "unclassified",] #save them into a new variable media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "unclassified",] #remove the unclassified phyla from the original dataset #aggregate all the phyla that account less than 1% into an artificial group named "Other phyla <1%" media_sd_phylum_Rizo1$Phylum[media_sd_phylum_Rizo1$Abundance <= 1.0] = "Other phyla (<1%)" media_sd_phylum_Rizo1$Phylum=as.factor(media_sd_phylum_Rizo1$Phylum) others_rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] == "Other phyla (<1%)",]#save the "other phyla" into a new variable media_sd_phylum_Rizo1=media_sd_phylum_Rizo1[media_sd_phylum_Rizo1[,"Phylum"] != "Other phyla (<1%)",]#remove them from the original dataset #Now, the minor and unclassified phyla are not included in the original dataset, so let's order by abundance the remaining phyla ordenado_rizo1=media_sd_phylum_Rizo1[order(media_sd_phylum_Rizo1$Abundance, decreasing=T),]#order by relative abundance ordenado_rizo1=rbind(ordenado_rizo1,others_rizo1,unclassified_rizo1)#now add the minor phyla and the unclassified group_label_rizo1=c("Mature","Immature", "Mature","Immature")#set the labels levels_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere", "B_Plot1_Immature_Rhizosphere")#set the levels of the graph limits_rizo1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere","B_Plot1_Mature_Rhizosphere", "B_Plot1_Immature_Rhizosphere")#set the limits of the graph phyl_name_ordered_rizo1=as.vector(ordenado_rizo1$Phylum) nombres_unicos_rizo1=unique(phyl_name_ordered_rizo1)#get the levels (=the names of the phyla )of the factor "Phylum", and show them just once regardless of their abundance and prevalence) ordenado_rizo1$Phylum=reorder.factor(ordenado_rizo1$Phylum,new.order=rev(nombres_unicos_rizo1)) #order the name of the phyla lab_unicos_rizo1=nombres_unicos_rizo1 sorted_labels_ggplot_rizo1 <- sapply(lab_unicos_rizo1, function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) #this function helps us to write the name of the phyla (and genera) in italics and the name of the minor phyla and unclassified without italics colores_rizo1 = c("#0099FF", "#00CC00","#9933FF","yellow", "#FF00FF","grey", "black")#choose the colors of specific phyla length(lab_unicos_rizo1)==length(colores_rizo1) #si FALSE, anyadir/quitar colores length(lab_unicos_rizo1) length(colores_rizo1) titulo_plot_rizo1="Plot 1" p3=ggplot(ordenado_rizo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_rizo1, labels=sorted_labels_ggplot_rizo1, breaks=nombres_unicos_rizo1)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_rizo1)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_rizo1,labels=group_label_rizo1, breaks=levels_rizo1)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() p3 # a.2) North plot media_sd_phylum_Rizo=media_sd_phylum_todos[media_sd_phylum_todos[,"Compartment"] == "Rhizosphere",] media_sd_phylum_Rizo2=media_sd_phylum_Rizo[media_sd_phylum_Rizo[,"Plot"] == "Plot2",] media_sd_phylum_Rizo2$Phylum=as.character(media_sd_phylum_Rizo2$Phylum) unclassified_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "unclassified",] media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "unclassified",] media_sd_phylum_Rizo2$Phylum[media_sd_phylum_Rizo2$Abundance <= 1.0] = "Other phyla (<1%)" media_sd_phylum_Rizo2$Phylum=as.factor(media_sd_phylum_Rizo2$Phylum) others_rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] == "Other phyla (<1%)",] media_sd_phylum_Rizo2=media_sd_phylum_Rizo2[media_sd_phylum_Rizo2[,"Phylum"] != "Other phyla (<1%)",] ordenado_rizo2=media_sd_phylum_Rizo2[order(media_sd_phylum_Rizo2$Abundance, decreasing=T),] ordenado_rizo2=rbind(ordenado_rizo2,others_rizo2,unclassified_rizo2) group_label_rizo2=c("Mature","Immature", "Mature","Immature") levels_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere", "B_Plot2_Immature_Rhizosphere") limits_rizo2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere","B_Plot2_Mature_Rhizosphere", "B_Plot2_Immature_Rhizosphere") phyl_name_ordered_rizo2=as.vector(ordenado_rizo2$Phylum) nombres_unicos_rizo2=unique(phyl_name_ordered_rizo2) ordenado_rizo2$Phylum=reorder.factor(ordenado_rizo2$Phylum,new.order=rev(nombres_unicos_rizo2)) lab_unicos_rizo2=nombres_unicos_rizo2 sorted_labels_ggplot_rizo2 <- sapply(lab_unicos_rizo2, function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_rizo2 = c("#0099FF", "#00CC00", "#9933FF","#FF00FF", "brown","grey", "black") length(lab_unicos_rizo2)==length(colores_rizo2) length(lab_unicos_rizo2) length(colores_rizo2) titulo_plot_rizo2="Plot 2" #b) ENDOSPHERE #b.1) South plot media_sd_phylum_Endo1=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot1",] media_sd_phylum_Endo1$Phylum=as.character(media_sd_phylum_Endo1$Phylum) unclassified_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "unclassified",] media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "unclassified",] media_sd_phylum_Endo1$Phylum[media_sd_phylum_Endo1$Abundance <= 1.0] = "Other phyla (<1%)" media_sd_phylum_Endo1$Phylum=as.factor(media_sd_phylum_Endo1$Phylum) others_endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] == "Other phyla (<1%)",] media_sd_phylum_Endo1=media_sd_phylum_Endo1[media_sd_phylum_Endo1[,"Phylum"] != "Other phyla (<1%)",] ordenado_endo1=media_sd_phylum_Endo1[order(media_sd_phylum_Endo1$Abundance, decreasing=T),] ordenado_endo1=rbind(ordenado_endo1,others_endo1,unclassified_endo1) group_label_endo1=c("Mothers","Suckers", "Mothers","Suckers") levels_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere") limits_endo1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere","B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere") phyl_name_ordered_endo1=as.vector(ordenado_endo1$Phylum) nombres_unicos_endo1=unique(phyl_name_ordered_endo1) ordenado_endo1$Phylum=reorder.factor(ordenado_endo1$Phylum,new.order=rev(nombres_unicos_endo1)) lab_unicos_endo1=nombres_unicos_endo1 sorted_labels_ggplot_endo1 <- sapply(lab_unicos_endo1, function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_endo1 = c("#0099FF","#00CC00", "grey","black") length(lab_unicos_endo1)==length(colores_endo1) length(lab_unicos_endo1) length(colores_endo1) titulo_plot_endo1="South plot" p1=ggplot(ordenado_endo1, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_endo1, labels=sorted_labels_ggplot_endo1, breaks=nombres_unicos_endo1)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo1)+ guides(fill = guide_legend(reverse = TRUE))+#sirve para cambiar el orden de la leyenda scale_x_discrete(limits=limits_endo1,labels=group_label_endo1, breaks=levels_endo1)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+ theme(axis.text = element_text(size = 5))+ theme(axis.text.x = element_text(face="bold", size=5))+ theme(axis.title.y = element_text(size = 7))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 5))+ theme(legend.title = element_text(size=7, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0)+ theme(legend.position = "none") x11() p1 #b.2) North plot media_sd_phylum_Endo2=media_sd_phylum_Endo[media_sd_phylum_Endo[,"Plot"] == "Plot2",] media_sd_phylum_Endo2$Phylum=as.character(media_sd_phylum_Endo2$Phylum) unclassified_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "unclassified",] media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "unclassified",] media_sd_phylum_Endo2$Phylum[media_sd_phylum_Endo2$Abundance <= 1.0] = "Other phyla (<1%)" media_sd_phylum_Endo2$Phylum=as.factor(media_sd_phylum_Endo2$Phylum) others_endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] == "Other phyla (<1%)",] media_sd_phylum_Endo2=media_sd_phylum_Endo2[media_sd_phylum_Endo2[,"Phylum"] != "Other phyla (<1%)",] ordenado_endo2=media_sd_phylum_Endo2[order(media_sd_phylum_Endo2$Abundance, decreasing=T),] ordenado_endo2=rbind(ordenado_endo2,others_endo2,unclassified_endo2) group_label_endo2=c("Mature","Immature", "Mature","Immature") levels_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere") limits_endo2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere","B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere") phyl_name_ordered_endo2=as.vector(ordenado_endo2$Phylum) nombres_unicos_endo2=unique(phyl_name_ordered_endo2) ordenado_endo2$Phylum=reorder.factor(ordenado_endo2$Phylum,new.order=rev(nombres_unicos_endo2)) sorted_labels_ggplot_endo2 <- sapply(lab_unicos_endo2, function(x) if (x == "Other phyla (<1%)"|x == "unclassified"|x == "Other genera (<1%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_endo2 = c("#0099FF","#00CC00","grey","black") length(lab_unicos_endo2)==length(colores_endo2) length(lab_unicos_endo2) length(colores_endo2) titulo_plot_endo2="Plot 2" p2=ggplot(ordenado_endo2, aes(x=Group, y=Abundance, fill=Phylum, order=Phylum)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_endo2, labels=sorted_labels_ggplot_endo2, breaks=nombres_unicos_endo2)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Phylum",title=titulo_plot_endo2)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_endo2,labels=group_label_endo2, breaks=levels_endo2)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() p2 ``` ## 6.2 Genus ### 6.2.1 Obtain tables with the relative abundance and standard deviation values We are going to obtain the relative abundance of each phylum detected in our samples. ```{r} genus_relabun=tax_glom(phy_data_relabun, taxrank = "Genus") colSums(otu_table(genus_relabun)) b=as.data.frame(otu_table(genus_relabun)) bb=cbind(tax_table(genus_relabun),b) identical(rownames(b),rownames(bb)) write.table(data.frame("TAXA"=rownames(bb),bb),file="abundrel_Genero_porReplicas.txt", sep="\t",row.names =F) table_genus = otu_table(genus_relabun)[,] %>% t() %>% as.data.frame() tax_genus=tax_table(genus_relabun) genus_media=aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=mean)%>% column_to_rownames("Group.1") %>% t() genus_sd= aggregate(table_genus, by=list(as.data.frame(sample_data(genus_relabun))$Fusion), FUN=sd)%>% column_to_rownames("Group.1") %>% t() %>% as.data.frame() %>% rename_with(.fn= ~paste0(colnames(genus_media), "_SD")) genus_media_sd=merge(tax_genus, genus_media, by=0) %>%column_to_rownames("Row.names") %>% merge(genus_sd, by=0) %>% column_to_rownames("Row.names") colSums(genus_media_sd[,8:ncol(genus_media_sd)]) genus_media_sd$ASV_names=row.names(genus_media_sd) write.table(data.frame("TAXA"=rownames(genus_media_sd),genus_media_sd), file="abundrel_media_sd_genus.txt", sep="\t",row.names =F) ``` ### 6.2.2. Plotting We are going to make a stacked bars plot, just indicating the most abundant genera Firstly, we have to prepare the data and modify the table so that we can obtain tables suitable for the plotting with the package ggplot2. ```{r} df_melt_genus = psmelt(physeq = genus_relabun) colnames(df_melt_genus) df_melt_aggreg2 = aggregate(df_melt_genus$Abundance, by=list(Group=df_melt_genus$Fusion, Sample=df_melt_genus$Sample,Genus=df_melt_genus$Genus, Compartment=df_melt_genus$Compartment, Plot=df_melt_genus$Plot, Year=df_melt_genus$Year, Condition=df_melt_genus$Condition), FUN=sum) colnames(df_melt_aggreg2)=c("Group", "Sample", "Genus","Compartment","Plot","Year","Condition","Abundance") colnames(df_melt_aggreg2) df_melt_aggreg_mean = aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group, Genus=df_melt_aggreg2$Genus, Compartment=df_melt_aggreg2$Compartment, Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=mean) colnames(df_melt_aggreg_mean)<-c("Group", "Genus","Compartment", "Plot","Year","Condition","Abundance") colnames(df_melt_aggreg_mean) df_melt_aggreg_sd=aggregate(df_melt_aggreg2$Abundance, by=list(Group=df_melt_aggreg2$Group, Genus=df_melt_aggreg2$Genus, Compartment=df_melt_aggreg2$Compartment, Plot=df_melt_aggreg2$Plot, Year=df_melt_aggreg2$Year,Condition=df_melt_aggreg2$Condition), FUN=sd) colnames(df_melt_aggreg_sd)=c("Group", "Genus","Compartment","Plot","Year","Condition","sd") all(df_melt_aggreg_mean$Group==df_melt_aggreg_sd$Group) all(df_melt_aggreg_mean$Genus==df_melt_aggreg_sd$Genus) df_melt_aggreg_mean$sd=df_melt_aggreg_sd$sd media_sd_genus_todos=df_melt_aggreg_mean ``` Now, we are ready to plot the mean relative abundances. We want to order the genera by ascending relative abundance, keeping the unclassified and minor genera in the upper part of the plot. ```{r} #a) RHIZOSPHERE media_sd_genus_rizo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Rhizosphere",] #a.1) South plot media_sd_genus_rizo1=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot1",] media_sd_genus_rizo1$Genus=as.character(media_sd_genus_rizo1$Genus) unclassified_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "unclassified",] media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "unclassified",] media_sd_genus_rizo1$Genus[media_sd_genus_rizo1$Abundance <= 2] = "Other genera (<2%)" media_sd_genus_rizo1$Genus=as.factor(media_sd_genus_rizo1$Genus) others_r1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] == "Other genera (<2%)",] media_sd_genus_rizo1=media_sd_genus_rizo1[media_sd_genus_rizo1[,"Genus"] != "Other genera (<2%)",] ordenado_r1=media_sd_genus_rizo1[order(media_sd_genus_rizo1$Abundance, decreasing=T),] ordenado_r1=rbind(ordenado_r1,others_r1,unclassified_r1) group_label_r1=c("M","S", "M","S") levels_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere", "B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere") limits_r1=c("A_Plot1_Mature_Rhizosphere","A_Plot1_Immature_Rhizosphere", "B_Plot1_Mature_Rhizosphere","B_Plot1_Immature_Rhizosphere") genus_name_ordered_r1=as.vector(ordenado_r1$Genus) nombres_unicos_r1=unique(genus_name_ordered_r1) ordenado_r1$Genus=reorder.factor(ordenado_r1$Genus,new.order=rev(nombres_unicos_r1)) lab_unicos_r1=nombres_unicos_r1 sorted_labels_ggplot_r1 = sapply(lab_unicos_r1, function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) lab_unicos_r1=as.data.frame(lab_unicos_r1) colores_r1 = c("#0099FF","#00CC00","#9933FF","brown","yellow","orange","#00FFFF","red", "pink", "magenta","blue","khaki2","green4","#6A3D9A","gold1","purple","green" , "grey","black") length(lab_unicos_r1)==length(colores_r1) length(lab_unicos_r1) length(colores_r1) titulo_plot_r1="South plot" pgenus_r1=ggplot(ordenado_r1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_r1, labels=sorted_labels_ggplot_r1, breaks=nombres_unicos_r1)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r1)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_r1,labels=group_label_r1, breaks=levels_r1)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+ theme(axis.text = element_text(size = 7))+ theme(axis.text.x = element_text(face="bold", size=7))+ theme(axis.title.y = element_text(size = 7))+ theme(legend.key.size = unit(0.2, "cm"))+ theme(legend.text = element_text(size = 5))+ theme(legend.title = element_text(size=5, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0)+ theme(legend.position = "none") x11() pgenus_r1 #a.2) North plot media_sd_genus_rizo2=media_sd_genus_rizo[media_sd_genus_rizo[,"Plot"] == "Plot2",] media_sd_genus_rizo2$Genus=as.character(media_sd_genus_rizo2$Genus) unclassified_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "unclassified",] media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "unclassified",] media_sd_genus_rizo2$Genus[media_sd_genus_rizo2$Abundance <= 2] = "Other genera (<2%)" media_sd_genus_rizo2$Genus=as.factor(media_sd_genus_rizo2$Genus) others_r2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] == "Other genera (<2%)",] media_sd_genus_rizo2=media_sd_genus_rizo2[media_sd_genus_rizo2[,"Genus"] != "Other genera (<2%)",] ordenado_r2=media_sd_genus_rizo2[order(media_sd_genus_rizo2$Abundance, decreasing=T),] ordenado_r2=rbind(ordenado_r2,others_r2,unclassified_r2) group_label_r2=c("M","S", "M","S") levels_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere", "B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere") limits_r2=c("A_Plot2_Mature_Rhizosphere","A_Plot2_Immature_Rhizosphere", "B_Plot2_Mature_Rhizosphere","B_Plot2_Immature_Rhizosphere") genus_name_ordered_r2=as.vector(ordenado_r2$Genus) nombres_unicos_r2=unique(genus_name_ordered_r2) ordenado_r2$Genus=reorder.factor(ordenado_r2$Genus,new.order=rev(nombres_unicos_r2)) lab_unicos_r2=nombres_unicos_r2 sorted_labels_ggplot_r2 <- sapply(lab_unicos_r2, function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_r2 = c("green4","brown","darkturquoise","steelblue4","#00FFFF", "#9933FF","maroon","pink","gold1", "green","slategray2","plum2","yellow","red","darkolivegreen2","grey","black") length(lab_unicos_r2)==length(colores_r2) length(lab_unicos_r2) length(colores_r2) titulo_plot_r2="North plot" pgenus_r2=ggplot(ordenado_r2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_r2, labels=sorted_labels_ggplot_r2, breaks=nombres_unicos_r2)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_r2)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_r2,labels=group_label_r2, breaks=levels_r2)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+ theme(axis.text = element_text(size = 7))+ theme(axis.text.x = element_text(face="bold", size=7))+ theme(axis.title.y = element_text(size = 7))+ theme(legend.key.size = unit(0.2, "cm"))+ theme(legend.text = element_text(size = 5))+ theme(legend.title = element_text(size=, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0)+ theme(legend.position="none") x11() pgenus_r2 #b) ENDOSPHERE media_sd_genus_endo=media_sd_genus_todos[media_sd_genus_todos[,"Compartment"] == "Endosphere",] #b.1) South plot media_sd_genus_endo1=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot1",] media_sd_genus_endo1$Genus=as.character(media_sd_genus_endo1$Genus) unclassified_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "unclassified",] media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "unclassified",] media_sd_genus_endo1$Genus[media_sd_genus_endo1$Abundance <= 2] = "Other genera (<2%)" media_sd_genus_endo1$Genus=as.factor(media_sd_genus_endo1$Genus) others_e1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] == "Other genera (<2%)",] media_sd_genus_endo1=media_sd_genus_endo1[media_sd_genus_endo1[,"Genus"] != "Other genera (<2%)",] ordenado_e1=media_sd_genus_endo1[order(media_sd_genus_endo1$Abundance, decreasing=T),] ordenado_e1=rbind(ordenado_e1,others_e1,unclassified_e1) group_label_e1=c("M","S", "M","S") levels_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere", "B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere") limits_e1=c("A_Plot1_Mature_Endosphere","A_Plot1_Immature_Endosphere", "B_Plot1_Mature_Endosphere","B_Plot1_Immature_Endosphere") genus_name_ordered_e1=as.vector(ordenado_e1$Genus) nombres_unicos_e1=unique(genus_name_ordered_e1) ordenado_e1$Genus=reorder.factor(ordenado_e1$Genus,new.order=rev(nombres_unicos_e1)) lab_unicos_e1=nombres_unicos_e1 sorted_labels_ggplot_e1=sapply(lab_unicos_e1, function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_e1 = c("blue","violet","brown","magenta","#FDBF6F","#FB9A99","springgreen3","yellow","coral2", "yellow4","green1","#6A3D9A","mediumorchid2","lightgoldenrod2","darkgreen","green4","firebrick", "grey","black") length(lab_unicos_e1)==length(colores_e1) length(lab_unicos_e1) length(colores_e1) titulo_plot_e1="South plot" pgenus_e1=ggplot(ordenado_e1, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_e1, labels=sorted_labels_ggplot_e1, breaks=nombres_unicos_e1)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e1)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_e1,labels=group_label_e1, breaks=levels_e1)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+ theme(axis.text = element_text(size = 7))+ theme(axis.text.x = element_text(face="bold", size=7))+ theme(axis.title.y = element_text(size = 7))+ theme(legend.key.size = unit(0.2, "cm"))+ theme(legend.text = element_text(size = 5))+ theme(legend.title = element_text(size=5, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0)+ theme(legend.position = "right") x11() pgenus_e1 #b.2) North plot media_sd_genus_endo2=media_sd_genus_endo[media_sd_genus_endo[,"Plot"] == "Plot2",] media_sd_genus_endo2$Genus=as.character(media_sd_genus_endo2$Genus) unclassified_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "unclassified",] media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "unclassified",] media_sd_genus_endo2$Genus[media_sd_genus_endo2$Abundance <= 2] = "Other genera (<2%)" media_sd_genus_endo2$Genus=as.factor(media_sd_genus_endo2$Genus) others_e2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] == "Other genera (<2%)",] media_sd_genus_endo2=media_sd_genus_endo2[media_sd_genus_endo2[,"Genus"] != "Other genera (<2%)",] ordenado_e2=media_sd_genus_endo2[order(media_sd_genus_endo2$Abundance, decreasing=T),] ordenado_e2=rbind(ordenado_e2,others_e2,unclassified_e2) group_label_e2=c("M","S", "M","S") levels_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere", "B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere") limits_e2=c("A_Plot2_Mature_Endosphere","A_Plot2_Immature_Endosphere", "B_Plot2_Mature_Endosphere","B_Plot2_Immature_Endosphere") genus_name_ordered_e2=as.vector(ordenado_e2$Genus) nombres_unicos_e2=unique(genus_name_ordered_e2) ordenado_e2$Genus=reorder.factor(ordenado_e2$Genus,new.order=rev(nombres_unicos_e2)) lab_unicos_e2=nombres_unicos_e2 sorted_labels_ggplot_e2 = sapply(lab_unicos_e2, function(x) if (x == "Other genera (<2%)"|x == "unclassified"|x == "Other genera (<2%)") {parse(text=paste0("'", as.character(x), "'"))} else {parse(text = paste0("italic('",as.character(x), "')"))}) colores_e2 = c("brown", "violet", "blue", "grey90","orangered2","darkseagreen2","burlywood", "yellow4","darkmagenta","powderblue","sienna2","magenta","springgreen3","mediumslateblue", "gold2", "grey","black") length(lab_unicos_e2)==length(colores_e2) length(lab_unicos_e2) length(colores_e2) titulo_plot_e2="North plot" pgenus_e2=ggplot(ordenado_e2, aes(x=Group, y=Abundance, fill=Genus, order=Genus)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=colores_e2, labels=sorted_labels_ggplot_e2, breaks=nombres_unicos_e2)+ labs(y="Mean relative abundance (%)", x=NULL, fill="Genus",title=titulo_plot_e2)+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=limits_e2,labels=group_label_e2, breaks=levels_e2)+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 100))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=7))+ theme(axis.text = element_text(size = 7))+ theme(axis.text.x = element_text(face="bold", size=7))+ theme(axis.title.y = element_text(size = 7))+ theme(legend.key.size = unit(0.2, "cm"))+ theme(legend.text = element_text(size = 5))+ theme(legend.title = element_text(size=5, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0)+ theme(legend.position = "none") x11() pgenus_e2 ``` In Figure 6 you can observe an example of the stacked bar plot for the main genera. This kind of plot can be calculated at the taxonomical rank you prefer. Please, try to open the plot in a new window if you want to view it at full size. ![Figure 6. Distribution of the main classified genera in the rhizosphere of mother ("Mature") and sucker plants ("Inmature") in 2022 (left) and 2023 (right)](images/Rizo_Genus_Plot1.jpg){fig-align="center"} # 7. Fungal transmission analyses Our experimental design allows us to study the potential transmission of microorganisms from mothers to first and second suckers. For that purpose, we firstly compared the root microbiota of mothers and first suckers, and then, these shared microorganisms were further compared against the microbiota of rhizosphere of sucker plants. It is expected that the microorganisms that are found in the root endosphere of suckers (and not found in the rhizosphere) and also in the root endosphere of mother plants, are transferred from mothers to suckers through the root system (corm), but not acquired from the surrounding soil of from the environment. We will perform this analysis just at ASV level ## 7.1 Prepare the data ```{r} mt1=as.data.frame(sample_data(phy_data_relabun)) names = unique(mt1$Fusion)#get the name of each level of the variable "Fusion", which in this example corresponds to each individual group of samples. For instance, the first level of the variable "Fusion" is "A_Plot1_Mature_Rhizosphere" (all rhizosphere samples of mother plants sampled in 2022 located in the south plot) #in the following for loop we are creating a list in which we are saving one each phyloseq object per group of samples. That is to say, in the first element of the list, the phyloseq object corresponding to the samples of the rhizosphere of mother plants sampled in 2022 located in the south plot is stored. pruned_data = list() for(i in 1:length(names)){ samples_group <- mt1[which(mt1$Fusion==names[i]),]$Sample pruned_data[[i]] <- prune_samples(samples_group, phy_data_relabun) } #this loop stored the taxonony and ASV table of each phyloseq element of the previously created list otu_tables = list() tax_tables = list() for (i in 1:length(pruned_data)){ otu_tables[[i]] <- as.data.frame(otu_table(pruned_data[[i]])) tax_tables[[i]] <- as.data.frame(tax_table(pruned_data[[i]])) }#it gives the list "otu_tables", whose element [[1]] corresponds to the ASV table of the group of sample "A_Plot1_Mature_Rhizosphere". While "tax_tables" list stores the taxonomy tables #this list merges each ASV table with each tax table by rownames (by=0) whole_tables = list() for (i in 1:length(pruned_data)){ whole_tables[[i]] = merge(tax_tables[[i]],otu_tables[[i]], by=0) } for(i in 1:length(whole_tables)){ whole_tables[[i]]$ASV_names=whole_tables[[i]]$Row.names }#it creates a new column in the list "wholes_tables" that corresponds to the rownames for (i in 1:length(whole_tables)){ assign(paste0(names[i],"_df"),as#create one dataframe per element from the list "whole_tables" #the previous loop returns complex dataframe names, so let's rename them and calculate the total number of sequences registered for each ASV in all the corresponding dataset A_r1_M=A_Plot1_Mature_Rhizosphere_df; A_r1_M$Sum=rowSums(A_r1_M[,9:ncol(A_r1_M)]); A_r1_M_def=A_r1_M[which(A_r1_M$Sum>0,arr.ind=TRUE),];A_r1_M_def2= subset(A_r1_M_def, select = -c(Sum))#we remove those ASV that account for 0 sequences in sum in each dataset A_e1_M=A_Plot1_Mature_Endosphere_df; A_e1_M$Sum=rowSums(A_e1_M[,9:ncol(A_e1_M)]); A_e1_M_def=A_e1_M[which(A_e1_M$Sum>0,arr.ind=TRUE),];A_e1_M_def2= subset(A_e1_M_def, select = -c(Sum)) A_e1_I=A_Plot1_Immature_Endosphere_df; A_e1_I$Sum=rowSums(A_e1_I[,9:ncol(A_e1_I)]); A_e1_I_def=A_e1_I[which(A_e1_I$Sum>0,arr.ind=TRUE),];A_e1_I_def2= subset(A_e1_I_def, select = -c(Sum)) A_r1_I=A_Plot1_Immature_Rhizosphere_df; A_r1_I$Sum=rowSums(A_r1_I[,9:ncol(A_r1_I)]); A_r1_I_def=A_r1_I[which(A_r1_I$Sum>0,arr.ind=TRUE),];A_r1_I_def2= subset(A_r1_I_def, select = -c(Sum)) A_e2_M=A_Plot2_Mature_Endosphere_df; A_e2_M$Sum=rowSums(A_e2_M[,9:ncol(A_e2_M)]); A_e2_M_def=A_e2_M[which(A_e2_M$Sum>0,arr.ind=TRUE),];A_e2_M_def2= subset(A_e2_M_def, select = -c(Sum)) A_r2_M=A_Plot2_Mature_Rhizosphere_df; A_r2_M$Sum=rowSums(A_r2_M[,9:ncol(A_r2_M)]); A_r2_M_def=A_r2_M[which(A_r2_M$Sum>0,arr.ind=TRUE),];A_r2_M_def2= subset(A_r2_M_def, select = -c(Sum)) A_r2_I=A_Plot2_Immature_Rhizosphere_df; A_r2_I$Sum=rowSums(A_r2_I[,9:ncol(A_r2_I)]); A_r2_I_def=A_r2_I[which(A_r2_I$Sum>0,arr.ind=TRUE),];A_r2_I_def2= subset(A_r2_I_def, select = -c(Sum)) A_e2_I=A_Plot2_Immature_Endosphere_df; A_e2_I$Sum=rowSums(A_e2_I[,9:ncol(A_e2_I)]);A_e2_I_def=A_e2_I[which(A_e2_I$Sum>0,arr.ind=TRUE),];A_e2_I_def2= subset(A_e2_I_def, select = -c(Sum)) B_r1_M=B_Plot1_Mature_Rhizosphere_df; B_r1_M$Sum=rowSums(B_r1_M[,9:ncol(B_r1_M)]); B_r1_M_def=B_r1_M[which(B_r1_M$Sum>0,arr.ind=TRUE),];B_r1_M_def2= subset(B_r1_M_def, select = -c(Sum)) B_r1_I=B_Plot1_Immature_Rhizosphere_df; B_r1_I$Sum=rowSums(B_r1_I[,9:ncol(B_r1_I)]); B_r1_I_def=B_r1_I[which(B_r1_I$Sum>0,arr.ind=TRUE),];B_r1_I_def2= subset(B_r1_I_def, select = -c(Sum)) B_r2_M=B_Plot2_Mature_Rhizosphere_df; B_r2_M$Sum=rowSums(B_r2_M[,9:ncol(B_r2_M)]); B_r2_M_def=B_r2_M[which(B_r2_M$Sum>0,arr.ind=TRUE),];B_r2_M_def2= subset(B_r2_M_def, select = -c(Sum)) B_r2_I=B_Plot2_Immature_Rhizosphere_df; B_r2_I$Sum=rowSums(B_r2_I[,9:ncol(B_r2_I)]); B_r2_I_def=B_r2_I[which(B_r2_I$Sum>0,arr.ind=TRUE),];B_r2_I_def2= subset(B_r2_I_def, select = -c(Sum)) B_e1_I=B_Plot1_Immature_Endosphere_df; B_e1_I$Sum=rowSums(B_e1_I[,9:ncol(B_e1_I)]); B_e1_I_def=B_e1_I[which(B_e1_I$Sum>0,arr.ind=TRUE),];B_e1_I_def2= subset(B_e1_I_def, select = -c(Sum)) B_e1_M=B_Plot1_Mature_Endosphere_df; B_e1_M$Sum=rowSums(B_e1_M[,9:ncol(B_e1_M)]); B_e1_M_def=B_e1_M[which(B_e1_M$Sum>0,arr.ind=TRUE),];B_e1_M_def2= subset(B_e1_M_def, select = -c(Sum)) B_e2_M=B_Plot2_Mature_Endosphere_df; B_e2_M$Sum=rowSums(B_e2_M[,9:ncol(B_e2_M)]); B_e2_M_def=B_e2_M[which(B_e2_M$Sum>0,arr.ind=TRUE),];B_e2_M_def2= subset(B_e2_M_def, select = -c(Sum)) B_e2_I=B_Plot2_Immature_Endosphere_df; B_e2_I$Sum=rowSums(B_e2_I[,9:ncol(B_e2_I)]); B_e2_I_def=B_e2_I[which(B_e2_I$Sum>0,arr.ind=TRUE),];B_e2_I_def2= subset(B_e2_I_def, select = -c(Sum)) ``` ## 7.2 Compare ASVs between mother and sucker plants Remember that we want to retain the ASVs that are shared between mother and sucker plants (just in the root endosphere). They could be both transferred by mother plants or acquired from the rhizosphere soil. ```{r} #a) South plot #a.1) 2022 compartidos_A_e1=cbind(A_e1_M_def2[A_e1_M_def2$ASV_names %in% A_e1_I_def2$ASV_names,], A_e1_I_def2[A_e1_I_def2$ASV_names %in% A_e1_M_def2$ASV_names,])#retain those ASVs that are in the dataframe of mothers and suckers (root endosphere) compartidos_A_e1=compartidos_A_e1[,-(20:27)]#removing extra information write.table(data.frame("_"=rownames(compartidos_A_e1),compartidos_A_e1),file="Core_A_e1.txt",sep="\t", row.names = F) #a.2) 2023 compartidos_B_e1=cbind(B_e1_M_def2[B_e1_M_def2$ASV_names %in% B_e1_I_def2$ASV_names,], B_e1_I_def2[B_e1_I_def2$ASV_names %in% B_e1_M_def2$ASV_names,]) compartidos_B_e1=compartidos_B_e1[,-(18:25)] write.table(data.frame("_"=rownames(compartidos_B_e1),compartidos_B_e1),file="Core_B_e1.txt",sep="\t", row.names = F) #b) North plot #b.1) 2022 compartidos_A_e2=cbind(A_e2_M_def2[A_e2_M_def2$ASV_names %in% A_e2_I_def2$ASV_names,], A_e2_I_def2[A_e2_I_def2$ASV_names %in% A_e2_M_def2$ASV_names,]) compartidos_A_e2=compartidos_A_e2[,-(21:28)] write.table(data.frame("_"=rownames(compartidos_A_e2),compartidos_A_e2),file="Core_A_e2.txt",sep="\t", row.names = F) #b.2) 2023 compartidos_B_e2=cbind(B_e2_M_def2[B_e2_M_def2$ASV_names %in% B_e2_I_def2$ASV_names,], B_e2_I_def2[B_e2_I_def2$ASV_names %in% B_e2_M_def2$ASV_names,]) compartidos_B_e2=compartidos_B_e2[,-(19:26)] write.table(data.frame("_"=rownames(compartidos_B_e2),compartidos_B_e2),file="Core_B_e2.txt",sep="\t", row.names = F) ``` Now, we are going to compare the shared fungal communities against the gunal communities dwelling in the rhizosphere soil of the suckers. We will retain fungal members that are in the root endosphere but **NO** in the rhizosphere soil ```{r} #a) South plot #a.1) 2022 compartidos_A1_endo_rizo=cbind(compartidos_A_e1[compartidos_A_e1$ASV_names %in% A_r1_I_def2$ASV_names,], A_r1_I_def2[A_r1_I_def2$ASV_names %in% compartidos_A_e1$ASV_names,])#remove extra information from the data (additional columns indicating, again, the taxonomy of the ASVs) compartidos_A1_endo_rizo=compartidos_A1_endo_rizo[,-(30:37)]#remove extra information write.table(data.frame("_"=rownames(compartidos_A1_endo_rizo),compartidos_A1_endo_rizo),file="Compartidos_A1_endo_rizo.txt",sep="\t", row.names = F) #a.2) 2023 compartidos_B1_endo_rizo=cbind(compartidos_B_e1[compartidos_B_e1$ASV_names %in% B_r1_I_def2$ASV_names,], B_r1_I_def2[B_r1_I_def2$ASV_names %in% compartidos_B_e1$ASV_names,]) compartidos_B1_endo_rizo=compartidos_B1_endo_rizo[,-(26:33)] write.table(data.frame("_"=rownames(compartidos_B1_endo_rizo),compartidos_B1_endo_rizo),file="Compartidos_B1_endo_rizo.txt",sep="\t", row.names = F) #b) North plot #b.1) 2022 compartidos_A2_endo_rizo=cbind(compartidos_A_e2[compartidos_A_e2$ASV_names %in% A_r2_I_def2$ASV_names,], A_r2_I_def2[A_r2_I_def2$ASV_names %in% compartidos_A_e2$ASV_names,]) compartidos_A2_endo_rizo=compartidos_A2_endo_rizo[,-(29:36)] write.table(data.frame("_"=rownames(compartidos_A2_endo_rizo),compartidos_A2_endo_rizo),file="Compartidos_A2_endo_rizo.txt",sep="\t", row.names = F) #b.2) 2023 compartidos_B2_endo_rizo=cbind(compartidos_B_e2[compartidos_B_e2$ASV_names %in% B_r2_I_def2$ASV_names,], B_r2_I_def2[B_r2_I_def2$ASV_names %in% compartidos_B_e2$ASV_names,]) compartidos_B2_endo_rizo=compartidos_B2_endo_rizo[,-(27:34)] write.table(data.frame("_"=rownames(compartidos_B2_endo_rizo),compartidos_B2_endo_rizo),file="Compartidos_B2_endo_rizo.txt",sep="\t", row.names = F) #Remove those ASV detected both in the rhizosphere and in the root endosphere #a) South plot #a.1) 2022 especif_A1_endo=subset(compartidos_A_e1, !(ASV_names %in% compartidos_A1_endo_rizo$ASV_names)) write.table(data.frame("_"=rownames(especif_A1_endo),especif_A1_endo),file="A1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F) #a.2) 2023 especif_B1_endo=subset(compartidos_B_e1, !(ASV_names %in% compartidos_B1_endo_rizo$ASV_names)) write.table(data.frame("_"=rownames(especif_B1_endo),especif_B1_endo),file="B1_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F) #b) North plot #b.1) 2022 especif_A2_endo=subset(compartidos_A_e2, !(ASV_names %in% compartidos_A2_endo_rizo$ASV_names)) write.table(data.frame("_"=rownames(especif_A2_endo),especif_A2_endo),file="A2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F) #b.2) 2023 especif_B2_endo=subset(compartidos_B_e2, !(ASV_names %in% compartidos_B2_endo_rizo$ASV_names)) write.table(data.frame("_"=rownames(especif_B2_endo),especif_B2_endo),file="B2_Endo_Transmitidos_Madreahija.txt",sep="\t", row.names = F) ``` In this work, we analyzed carefully the number and proportions of the transferred ASVs in Excel ## 7.3 Plotting In this section, we are going to make different graph representing the microbial transmission process. We will use barplots ```{r} #a) South plot #a.1) 2022 totales_M_A_e1=nrow(A_e1_M_def2)#get the total number of ASV in mothers transferidos_A_e1=nrow(especif_A1_endo)#get the number of shared ASVs between mothers and suckers prop_transferidos_M_A_e1=(transferidos_A_e1 / totales_M_A_e1) *100#calculate the proportion of transferred ASVs prop_perdidos_M_A_e1=((totales_M_A_e1-transferidos_A_e1) / totales_M_A_e1) *100#calculate the proportion of ASVs lost by the mothers totales_I_A_e1=nrow(A_e1_I_def2)#get the total number of ASVs in suckers prop_transferidos_I_A_e1=(transferidos_A_e1 / totales_I_A_e1) *100#get the number of shared ASVs between mothers and suckers prop_adquiridos_I_A_e1=((totales_I_A_e1-transferidos_A_e1) / totales_I_A_e1) *100#calculate the proportion of ASVs acquired by the suckers barras_A_e1=as.data.frame(rbind(prop_transferidos_M_A_e1,prop_perdidos_M_A_e1,prop_transferidos_I_A_e1,prop_adquiridos_I_A_e1))#create a dataframe with all the values in order to plot them barras_A_e1$Year="A" barras_A_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker") barras_A_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired")) colnames(barras_A_e1)[1]="Proportion" #a.2) 2023 totales_M_B_e1=nrow(B_e1_M_def2) transferidos_B_e1=nrow(especif_B1_endo) prop_transferidos_M_B_e1=(transferidos_B_e1 / totales_M_B_e1) *100 prop_perdidos_M_B_e1=((totales_M_B_e1-transferidos_B_e1) / totales_M_B_e1) *100 totales_I_B_e1=nrow(B_e1_I_def2) prop_transferidos_I_B_e1=(transferidos_B_e1 / totales_I_B_e1) *100 prop_adquiridos_I_B_e1=((totales_I_B_e1-transferidos_B_e1) / totales_I_B_e1) *100 barras_B_e1=as.data.frame(rbind(prop_transferidos_M_B_e1,prop_perdidos_M_B_e1, prop_transferidos_I_B_e1,prop_adquiridos_I_B_e1)) barras_B_e1$Year="B" barras_B_e1$Plant=c("Mother", "Mother", "Sucker", "Sucker") barras_B_e1$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired")) colnames(barras_B_e1)[1]="Proportion" A_e1=ggplot(barras_A_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() A_e1 B_e1=ggplot(barras_B_e1, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() B_e1 x11() p_Plot1_prop=grid.arrange(A_e1,B_e1,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18))) #b) North plot #b.1) 2022 totales_M_A_e2=nrow(A_e2_M_def2) transferidos_A_e2=nrow(especif_A2_endo) prop_transferidos_M_A_e2=(transferidos_A_e2 / totales_M_A_e2) *100 prop_perdidos_M_A_e2=((totales_M_A_e2-transferidos_A_e2) / totales_M_A_e2) *100 totales_I_A_e2=nrow(A_e2_I_def2) prop_transferidos_I_A_e2=(transferidos_A_e2 / totales_I_A_e2) *100 prop_adquiridos_I_A_e2=((totales_I_A_e2-transferidos_A_e2) / totales_I_A_e2) *100 barras_A_e2=as.data.frame(rbind(prop_transferidos_M_A_e2,prop_perdidos_M_A_e2, prop_transferidos_I_A_e2,prop_adquiridos_I_A_e2)) barras_A_e2$Year="A" barras_A_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker") barras_A_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired")) colnames(barras_A_e2)[1]="Proportion" #b.2) 2023 totales_M_B_e2=nrow(B_e2_M_def2) transferidos_B_e2=nrow(especif_B2_endo) prop_transferidos_M_B_e2=(transferidos_B_e2 / totales_M_B_e2) *100 prop_perdidos_M_B_e2=((totales_M_B_e2-transferidos_B_e2) / totales_M_B_e2) *100 totales_I_B_e2=nrow(B_e2_I_def2) prop_transferidos_I_B_e2=(transferidos_B_e2 / totales_I_B_e2) *100 prop_adquiridos_I_B_e2=((totales_I_B_e2-transferidos_B_e2) / totales_I_B_e2) *100 barras_B_e2=as.data.frame(rbind(prop_transferidos_M_B_e2,prop_perdidos_M_B_e2, prop_transferidos_I_B_e2,prop_adquiridos_I_B_e2)) barras_B_e2$Year="B" barras_B_e2$Plant=c("Mother", "Mother", "Sucker", "Sucker") barras_B_e2$Condition=as.factor(c("Transmitted","Lost", "Transmitted", "Acquired")) colnames(barras_B_e2)[1]="Proportion" A_e2=ggplot(barras_A_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() A_e2 B_e2=ggplot(barras_B_e2, aes(x=Plant, y=Proportion, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Proportion of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() B_e2 x11() p_Plot2_prop=grid.arrange(A_e2,B_e2,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18))) ``` With this code a bar plot is created (see Figure 7) ![Figure 7. Bar plot indicating the transferred ASV, ASVs lost by the mothers, and ASVs acquired from the environment by the suckers in the first plot (south plot)](images/Inheritance_HONGOS.jpg){fig-align="center"} We are going to obtain the same plots but instead of plotting the proportion of ASVs lost, acquired and transferred, the relative abundance of each of them will be visualized. ```{r} #a) South plot #a.1) 2022 perdidos_M_A_e1=subset(A_e1_M_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that mothers lost perdidos_M_A_e1$Promedio=rowMeans(subset(perdidos_M_A_e1, select =c(9:ncol(perdidos_M_A_e1))))#mean relative abundance of each of them perdidos_M_A_e1_sum=sum(perdidos_M_A_e1$Promedio)#sum the relative abundance of all the ASVs lost by the mothers adquiridos_I_A_e1=subset(A_e1_I_def2, !(ASV_names %in% especif_A1_endo$ASV_names))#these are the ASVs that suckers retain from the environment adquiridos_I_A_e1$Promedio=rowMeans(subset(adquiridos_I_A_e1, select =c(9:ncol(adquiridos_I_A_e1))))#mean adquiridos_I_A_e1_sum=sum(adquiridos_I_A_e1$Promedio) especif_A1_endo$Promedio_Madre=rowMeans(subset(especif_A1_endo, select =c(9:19)))#mean relative abundance of transferred ASVs (in mothers samples) especif_A1_endo_M_sum=sum(especif_A1_endo$Promedio_Madre) especif_A1_endo$Promedio_Hijas=rowMeans(subset(especif_A1_endo, select =c(20:29)))#mean relative abundance of acquired ASV from mothers (in suckers samples) especif_A1_endo_I_sum=sum(especif_A1_endo$Promedio_Hijas) Abund_A1_endo=c(especif_A1_endo_M_sum,perdidos_M_A_e1_sum,especif_A1_endo_I_sum,adquiridos_I_A_e1_sum) barras_abund_A_e1=as.data.frame(cbind(barras_A_e1,Abund_A1_endo))#create a new dataframe with all the data colnames(barras_abund_A_e1)[5]="Abundance" write.table(data.frame("X"=rownames(perdidos_M_A_e1),perdidos_M_A_e1),"AbunRel_perdidos_Madre_A_e1.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(adquiridos_I_A_e1),adquiridos_I_A_e1),"AbunRel_AdquiridosAmbiente_A_e1.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(especif_A1_endo),especif_A1_endo),"AbunRel_TransferidosMadreHija_A_e1.txt",sep="\t",row.names = F) #a.2) 2023 perdidos_M_B_e1=subset(B_e1_M_def2, !(ASV_names %in% especif_B1_endo$ASV_names)) perdidos_M_B_e1$Promedio=rowMeans(subset(perdidos_M_B_e1, select =c(9:ncol(perdidos_M_B_e1)))) perdidos_M_B_e1_sum=sum(perdidos_M_B_e1$Promedio) adquiridos_I_B_e1=subset(B_e1_I_def2, !(ASV_names %in% especif_B1_endo$ASV_names)) adquiridos_I_B_e1$Promedio=rowMeans(subset(adquiridos_I_B_e1, select =c(9:ncol(adquiridos_I_B_e1)))) adquiridos_I_B_e1_sum=sum(adquiridos_I_B_e1$Promedio) especif_B1_endo$Promedio_Madre=rowMeans(subset(especif_B1_endo, select =c(9:17))) especif_B1_endo_M_sum=sum(especif_B1_endo$Promedio_Madre) especif_B1_endo$Promedio_Hijas=rowMeans(subset(especif_B1_endo, select =c(18:25))) especif_B1_endo_I_sum=sum(especif_B1_endo$Promedio_Hijas) Abund_B1_endo=c(especif_B1_endo_M_sum,perdidos_M_B_e1_sum,especif_B1_endo_I_sum,adquiridos_I_B_e1_sum) barras_abund_B_e1=as.data.frame(cbind(barras_B_e1,Abund_B1_endo)) colnames(barras_abund_B_e1)[5]="Abundance" write.table(data.frame("X"=rownames(perdidos_M_B_e1),perdidos_M_B_e1),"AbunRel_perdidos_Madre_B_e1.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(adquiridos_I_B_e1),adquiridos_I_B_e1),"AbunRel_AdquiridosAmbienteHija_B_e1.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(especif_B1_endo),especif_B1_endo),"AbunRel_TransferidosMadreHija_B_e1.txt",sep="\t",row.names = F) #b) North plot #b.1) 2022 perdidos_M_A_e2=subset(A_e2_M_def2, !(ASV_names %in% especif_A2_endo$ASV_names)) perdidos_M_A_e2$Promedio=rowMeans(subset(perdidos_M_A_e2, select =c(9:ncol(perdidos_M_A_e2)))) perdidos_M_A_e2_sum=sum(perdidos_M_A_e2$Promedio) adquiridos_I_A_e2=subset(A_e2_I_def2, !(ASV_names %in% especif_A2_endo$ASV_names)) adquiridos_I_A_e2$Promedio=rowMeans(subset(adquiridos_I_A_e2, select =c(9:ncol(adquiridos_I_A_e2)))) adquiridos_I_A_e2_sum=sum(adquiridos_I_A_e2$Promedio) especif_A2_endo$Promedio_Madre=rowMeans(subset(especif_A2_endo, select =c(9:20))) especif_A2_endo_M_sum=sum(especif_A2_endo$Promedio_Madre) especif_A2_endo$Promedio_Hijas=rowMeans(subset(especif_A2_endo, select =c(21:28))) especif_A2_endo_I_sum=sum(especif_A2_endo$Promedio_Hijas) Abund_A2_endo=c(especif_A2_endo_M_sum,perdidos_M_A_e2_sum,especif_A2_endo_I_sum,adquiridos_I_A_e2_sum) barras_abund_A_e2=as.data.frame(cbind(barras_A_e2,Abund_A2_endo)) colnames(barras_abund_A_e2)[5]="Abundance" write.table(data.frame("X"=rownames(perdidos_M_A_e2),perdidos_M_A_e2),"AbunRel_perdidos_Madre_A_e2.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(adquiridos_I_A_e2),adquiridos_I_A_e2),"AbunRel_AdquiridosAmbienteHija_A_e2.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(especif_A2_endo),especif_A2_endo),"AbunRel_TransferidosMadreHija_A_e2.txt",sep="\t",row.names = F) #b.2) 2023 perdidos_M_B_e2=subset(B_e2_M_def2, !(ASV_names %in% especif_B2_endo$ASV_names)) perdidos_M_B_e2$Promedio=rowMeans(subset(perdidos_M_B_e2, select =c(9:ncol(perdidos_M_B_e2)))) perdidos_M_B_e2_sum=sum(perdidos_M_B_e2$Promedio) adquiridos_I_B_e2=subset(B_e2_I_def2, !(ASV_names %in% especif_B2_endo$ASV_names)) adquiridos_I_B_e2$Promedio=rowMeans(subset(adquiridos_I_B_e2, select =c(9:ncol(adquiridos_I_B_e2)))) adquiridos_I_B_e2_sum=sum(adquiridos_I_B_e2$Promedio) especif_B2_endo$Promedio_Madre=rowMeans(subset(especif_B2_endo, select =c(9:18))) especif_B2_endo_M_sum=sum(especif_B2_endo$Promedio_Madre) especif_B2_endo$Promedio_Hijas=rowMeans(subset(especif_B2_endo, select =c(19:26))) especif_B2_endo_I_sum=sum(especif_B2_endo$Promedio_Hijas) Abund_B2_endo=c(especif_B2_endo_M_sum,perdidos_M_B_e2_sum,especif_B2_endo_I_sum,adquiridos_I_B_e2_sum) barras_abund_B_e2=as.data.frame(cbind(barras_B_e2,Abund_B2_endo)) colnames(barras_abund_B_e2)[5]="Abundance" write.table(data.frame("X"=rownames(perdidos_M_B_e2),perdidos_M_B_e2),"AbunRel_perdidos_Madre_B_e2.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(adquiridos_I_B_e2),adquiridos_I_B_e2),"AbunRel_AdquiridosAmbienteHijas_B_e2.txt",sep="\t",row.names = F) write.table(data.frame("X"=rownames(especif_B2_endo),especif_B2_endo),"AbunRel_TransferidosMadreHija_B_e2.txt",sep="\t",row.names = F) A1_abun=ggplot(barras_abund_A_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2022)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() A1_abun B1_abun=ggplot(barras_abund_B_e1, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 1 (2023)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() B1_abun A2_abun=ggplot(barras_abund_A_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2022)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() A2_abun B2_abun=ggplot(barras_abund_B_e2, aes(x=Plant, y=Abundance, fill=Condition, order=Condition)) + geom_bar(stat="identity", position="stack")+ scale_fill_manual(values=c("magenta", "deepskyblue", "magenta","green"), labels=c("Transmitted","Lost", "Transmitted", "Acquired"), breaks=c("Transmimtted","Lost", "Transmitted", "Acquired"))+ labs(y="Relative abundance of ASV (%)", x=NULL, fill="Condition",title="Plot 2 (2023)")+ guides(fill = guide_legend(reverse = TRUE))+ scale_x_discrete(limits=c("Mother","Sucker"),labels=c("Mother","Sucker"), breaks=c("Mother","Sucker"))+ scale_y_continuous(expand=c(0.01,0.01), breaks=c(0,10,20,30,40,50,60,70,80,90,100), labels=c("0","10", "20","30","40","50","60","70","80","90","100"), limits = c(NA, 101))+ theme_bw()+ theme(panel.border = element_rect(colour="black"))+ theme(axis.title.x=element_blank())+ theme(plot.title = element_text(face="bold", hjust = 0.5, size=16))+ theme(axis.text = element_text(size = 14))+ theme(axis.text.x = element_text(face="bold", size=16))+ theme(axis.title.y = element_text(size = 16))+ theme(legend.key.size = unit(0.9, "cm"))+ theme(legend.text = element_text(size = 18))+ theme(legend.title = element_text(size=18, face="bold"))+ theme(legend.title.align=0)+ theme(legend.text.align = 0) x11() B2_abun x11() p_abund_p1=grid.arrange(A1_abun,B1_abun,ncol=2, nrow=1, top=textGrob("Plot 1", gp=gpar(fontsize=18))) x11() p_abund_p2=grid.arrange(A2_abun,B2_abun,ncol=2, nrow=1, top=textGrob("Plot 2", gp=gpar(fontsize=18))) ``` # 8. Differentially abundant taxa It is interesting to decipher which taxa show statistically significant differences between two groups of samples (i.e., differences in the abundance between mother and suckers plants). For that purpose, we will apply [ANCOM-BC approach](https://www.nature.com/articles/s41467-020-17041-7 "ANCOM-BC describing paper"), especially designed for compositional data. Furthermore, we will use the function *ancomloop* of the package *micro4all* in order to compare all groups of samples among them (ANCOM-BC just compares the first group of samples in the dataset with all other groups) We will apply this statistical test at both phylum and genus levels. ::: collout-important ANCOM-BC makes a data transformation and applies logarithms, so in this case, we shouldn't use the normalized abundance. We should use raw phyloseq objects (in which the ASV counts is expressed in absolute abundance) ::: ## 8.1 Phylum ```{r} ANCOM_phylum_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion", ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum") ###options: #grouping: indicate your grouping variable #n_cl: the number of cores #global: apply the global test (for more information, read this paper: https://www.nature.com/articles/s41467-020-17041-7 and documentation of the function ancombc) #struc_zero: remove "structural zeros" (not real zeros but zeros resulting from a wrong sampling procedure) #out.unclassified: apply or not the test to unclassified taxa #tax.level: indicate the taxonomical level that you want to analyze write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[1]]),ANCOM_phylum_rizo[[1]]),file="ANCOM_Phylum_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[2]]),ANCOM_phylum_rizo[[2]]),file="ANCOM_Phylum_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[3]]),ANCOM_phylum_rizo[[3]]),file="ANCOM_Phylum_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[4]]),ANCOM_phylum_rizo[[4]]),file="ANCOM_Phylum_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[5]]),ANCOM_phylum_rizo[[5]]),file="ANCOM_Phylum_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[6]]),ANCOM_phylum_rizo[[6]]),file="ANCOM_Phylum_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[7]]),ANCOM_phylum_rizo[[7]]),file="ANCOM_Phylum_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_rizo[[8]]),ANCOM_phylum_rizo[[8]]),file="ANCOM_Phylum_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F) #b) ENDOSPHERE ANCOM_phylum_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion", ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Phylum") write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[1]]),ANCOM_phylum_endo[[1]]),file="ANCOM_Phylum_endo_2022_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[2]]),ANCOM_phylum_endo[[2]]),file="ANCOM_Phylum_endo_2022_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[3]]),ANCOM_phylum_endo[[3]]),file="ANCOM_Phylum_endo_2022_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[4]]),ANCOM_phylum_endo[[4]]),file="ANCOM_Phylum_endo_2022_Plot2_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[5]]),ANCOM_phylum_endo[[5]]),file="ANCOM_Phylum_endo_2023_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[6]]),ANCOM_phylum_endo[[6]]),file="ANCOM_Phylum_endo_2023_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[7]]),ANCOM_phylum_endo[[7]]),file="ANCOM_Phylum_endo_2023_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_phylum_endo[[8]]),ANCOM_phylum_endo[[8]]),file="ANCOM_Phylum_endo_2023_Plot2_M.txt",sep="\t", row.names = F) ``` In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the phyla that were differentially abundant. ## 8.2 Genus ```{r} #a) RHIZOSPHERE ANCOM_genus_rizo=ancomloop(input_object_phyloseq = rizo, grouping = "Fusion", ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus") write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[1]]),ANCOM_genus_rizo[[1]]),file="ANCOM_Genus_Rizo_2022_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[2]]),ANCOM_genus_rizo[[2]]),file="ANCOM_Genus_Rizo_2022_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[3]]),ANCOM_genus_rizo[[3]]),file="ANCOM_Genus_Rizo_2022_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[4]]),ANCOM_genus_rizo[[4]]),file="ANCOM_Genus_Rizo_2022_Plot2_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[5]]),ANCOM_genus_rizo[[5]]),file="ANCOM_Genus_Rizo_2023_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[6]]),ANCOM_genus_rizo[[6]]),file="ANCOM_Genus_Rizo_2023_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[7]]),ANCOM_genus_rizo[[7]]),file="ANCOM_Genus_Rizo_2023_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_rizo[[8]]),ANCOM_genus_rizo[[8]]),file="ANCOM_Genus_Rizo_2023_Plot2_M.txt",sep="\t", row.names = F) #a) ENDOSPHERE ANCOM_genus_endo=ancomloop(input_object_phyloseq = endo, grouping = "Fusion", ancom.options = list(global=FALSE, struc_zero=TRUE, n_cl=8),out.unclassified = TRUE, tax.level="Genus") names(ANCOM) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[1]]),ANCOM_genus_endo[[1]]),file="ANCOM_Genus_Endo_2022_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[2]]),ANCOM_genus_endo[[2]]),file="ANCOM_Genus_Endo_2022_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[3]]),ANCOM_genus_endo[[3]]),file="ANCOM_Genus_Endo_2022_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[4]]),ANCOM_genus_endo[[4]]),file="ANCOM_Genus_Endo_2022_Plot2_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[5]]),ANCOM_genus_endo[[5]]),file="ANCOM_Genus_Endo_2023_Plot1_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[6]]),ANCOM_genus_endo[[6]]),file="ANCOM_Genus_Endo_2023_Plot1_M.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[7]]),ANCOM_genus_endo[[7]]),file="ANCOM_Genus_Endo_2023_Plot2_I.txt",sep="\t", row.names = F) write.table(data.frame("_"=rownames(ANCOM_genus_endo[[8]]),ANCOM_genus_endo[[8]]),file="ANCOM_Genus_Endo_2023_Plot2_M.txt",sep="\t", row.names = F) ``` In this work, we analyzed exhaustively the results in Excel. Pay attention to the column entitled “diff_abbn”, which shows the genera that were differentially abundant.