Reliability of a novel approach for reference-based cell type estimation in human placental DNA methylation studies - Script 2, Main_Analyses

Linda Dieckmann 02-07/2021

preparation
loading data
Descriptives
Correlation reference-free and reference-based estimated cell types
Correlation reference-free estimated cell types and phenotypes
Check if methylation can be predicted using cell types (using CV)
Check how methylation of single CpGs can be predicted using cell types
CpGs most influenced by cell types (reference-based)
CpGs most influenced by cell types (reference-free)
save Tissue Enrichment Plot
save Cell Enrichment Plot
Plot Cell types (RPC, reference-based)
Compare data sets in RPC cell type proportions
- Placenta: CVS vs. term Placenta in ITU
- Term Placentas
Phenotype relationships

{#top}

preparation

loading packages

library(plyr)
library(dplyr)
library(devtools)
library(ggplot2)
library(corrplot)
library(gridExtra)
library(Hmisc)
library(tidyr)
library(viridis)
library(psych)
library(ggpubr)
library(ggdendro)
library(reshape2)
library(planet)
library(GGally)
library(xvalglms)
library(cowplot)
library(rcompanion)
library(ggcorrplot)
library(TissueEnrich)
library(readxl)
library(tidyr)
library(factoextra)
library(npmv)
library(plyr)
library(psych)
library(matrixStats)
library(rcompanion)
library(planet)
library(Biobase)
library(rstatix)
library(gridGraphics)
library(rsq)
library(xlsx)
library(scales)

define function(s)

scaleFUN <- function(x) sprintf("%.2f", x)

boxplot_fun = function(x, id, dat) {
  dat[ ,c(x, id)] %>%   
  pivot_longer(cols=-id) %>% 
  rename_at(1, ~"id") %>%
  ggplot(aes(x = .data[["name"]], y = .data[["value"]])) +
    geom_boxplot() +
    theme_bw()+
    labs(x="", y="")+
    theme(axis.text.x = element_text(size=6, angle=30, hjust=1), axis.title.x=element_text(size=6), axis.title.y = element_text(size=6), axis.text.y = element_text(size=6))
}

R setup

save packages info

writeLines(capture.output(sessionInfo()), "sessionInfo2.txt")

loading data

# data with filtered CpGs & without outliers for ITU placenta
load("Input_Data_prepared/Beta_Placenta_FullInfo_ExprSet_ITU_reduced_filtered.rda")
load("Input_Data_prepared/Beta_Placenta_FullInfo_ExprSet_ITU_filtered.rda")
load("Input_Data_prepared/Beta_CVS_FullInfo_ExprSet_filtered.rda")
load("Input_Data_prepared/Beta_Placenta_FullInfo_ExprSet_PREDO_filtered.rda")
load("Input_Data_prepared/Beta_Placenta_FullInfo_ExprSet_BET_filtered.rda")

Beta_Placenta_ITU_reduced_filtered <-exprs(Beta_Placenta_FullInfo_ExprSet_ITU_reduced_filtered)
dim(Beta_Placenta_ITU_reduced_filtered)
Pheno_Placenta_ITU_reduced_filtered <-pData(Beta_Placenta_FullInfo_ExprSet_ITU_reduced_filtered)

Beta_Placenta_ITU_filtered <-exprs(Beta_Placenta_FullInfo_ExprSet_ITU_filtered)
dim(Beta_Placenta_ITU_filtered)
Pheno_Placenta_ITU_filtered <-pData(Beta_Placenta_FullInfo_ExprSet_ITU_filtered)

Beta_CVS_ITU_filtered <-exprs(Beta_CVS_FullInfo_ExprSet_filtered)
dim(Beta_CVS_ITU_filtered)
Pheno_CVS_ITU_filtered <-pData(Beta_CVS_FullInfo_ExprSet_filtered)

Beta_Placenta_PREDO_filtered <-exprs(Beta_Placenta_FullInfo_ExprSet_PREDO_filtered)
dim(Beta_Placenta_PREDO_filtered)
Pheno_Placenta_PREDO_filtered <-pData(Beta_Placenta_FullInfo_ExprSet_PREDO_filtered)

Beta_Placenta_BET_filtered <-exprs(Beta_Placenta_FullInfo_ExprSet_BET_filtered)
dim(Beta_Placenta_BET_filtered)
Pheno_Placenta_BET_filtered <-pData(Beta_Placenta_FullInfo_ExprSet_BET_filtered)

names_refbased_cells <- c("Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "nRBC", "Syncytiotrophoblast")

data("plColors") # get color from publication from Yuan et al., to make it easier comparable

# load 600 reference CpGs from Yuan et al.
data("plCellCpGsFirst")
cpgs_1yuan <- rownames(plCellCpGsFirst)
data("plCellCpGsThird")
cpgs_3yuan <- rownames(plCellCpGsThird)

load 16 samples different in ITU placenta (names)

load("Input_Data_prepared/samples_different_placenta_itu_meth.RData")
# sample names of those that are extreme outliers in PC1 methylation

load list of non-variable CpGs

load("Results/RData/Placenta_nonvariable_cpgs_all.Rdata")

Descriptives

methylation

1st PC

load("Input_Data_prepared/pc_m_cvs_itu_filtered.RData")
load("Input_Data_prepared/pc_m_placenta_itu_reduced_filtered.RData")
load("Input_Data_prepared/pc_m_placenta_itu_filtered.RData")
load("Input_Data_prepared/pc_m_placenta_predo_filtered.RData")
load("Input_Data_prepared/pc_m_placenta_BET_filtered.RData")

PC_plot_cvs <- fviz_eig(pc_m_cvs_itu_filtered, barfill = "grey", barcolor = "grey", main="")+ 
  theme(axis.title.y = element_text(size = 12),
        axis.title.x = element_blank(),
        axis.title=element_text(size=10),
        axis.text = element_text(size = 10), 
        plot.margin= margin(t = 5.5, r = 10.5, b = 5.5, l = 5.5))+ 
  labs(title="CVS (ITU)", size=10)+
  scale_y_continuous(breaks = seq(0, 20, by = 5), limits=c(0,20), labels=scaleFUN) 

PC_plot_placenta_itu <- fviz_eig(pc_m_placenta_itu_reduced_filtered, barfill = "grey",barcolor = "grey", main="")+ 
  theme(axis.title.x = element_blank(),
        axis.text = element_text(size = 10), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_blank(), 
        plot.margin= margin(t = 5.5, r = 10.5, b = 5.5, l = 20.5))+
  labs(title="Placenta (ITU)", size=10)+
  scale_y_continuous(breaks = seq(0, 20, by = 5), limits=c(0,20), labels=scaleFUN) 

PC_plot_placenta_predo <- fviz_eig(pc_m_placenta_predo_filtered, barfill = "grey",barcolor = "grey", main="")+ 
  theme(axis.title.x = element_blank(),
        axis.text = element_text(size = 10), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_blank(), 
        plot.margin= margin(t = 5.5, r = 15.5, b = 5.5, l = 20.5))+ 
  labs(title="Placenta (PREDO)", size=10)+
  scale_y_continuous(breaks = seq(0, 20, by = 5), limits=c(0,20), labels=scaleFUN)

PC_plot_placenta_BET <-fviz_eig(pc_m_placenta_BET_filtered, barfill = "grey",barcolor = "grey", main="")+ 
  theme(axis.title.x = element_blank(),
        axis.text = element_text(size = 10), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_blank(), 
        plot.margin= margin(t = 5.5, r = 5.5, b = 5.5, l = 20.5))+ 
  labs(title="Placenta (BET)", size=10)+
  scale_y_continuous(breaks = seq(0, 20, by = 5), limits=c(0,20), labels=scaleFUN) 

# See how it looks including our outliers in ITU placenta
pc1_placenta_itu_allpersons <- Pheno_Placenta_ITU_filtered$PC_methB1
group_placenta_itu_allpersons <- replicate(486, "Placenta (ITU)")
outlier_placenta_itu_allpersons <- ifelse(Pheno_Placenta_ITU_filtered$Sample_Name %in% Pheno_Placenta_ITU_reduced_filtered$Sample_Name, "no", "yes")
PC1_Placenta_ITU_allpersons <- data.frame(pc1_placenta_itu_allpersons, group_placenta_itu_allpersons, outlier_placenta_itu_allpersons, stringsAsFactors = FALSE)
colnames(PC1_Placenta_ITU_allpersons) <- c("PC1", "group", "outlier")

#PC1_Placenta_ITU_allpersons
PC1_Placenta_ITU_allpersons_plot <- ggplot(PC1_Placenta_ITU_allpersons, aes(x=group, y=PC1, fill=outlier))+
  geom_boxplot(fill='#A4A4A4', color="black")+
  geom_point(data=PC1_Placenta_ITU_allpersons[PC1_Placenta_ITU_allpersons$outlier =="yes",], aes(x=group, y=PC1, color="red"))+
  theme_minimal()+
  labs(y="PC1 (DNAm)", title="Placenta (ITU)", x="")+
  theme(axis.text.x = element_blank(), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank(), legend.position = "none", plot.title = element_text(size=10, hjust = 0.5))

M_Scree <- ggarrange(
          PC_plot_cvs +
           theme(legend.position="none", plot.title=element_text(size=10)), 
          PC_plot_placenta_itu +
               theme(legend.position="none",axis.title.y = element_blank(), axis.text.y = element_blank(), plot.title=element_text(size=10)), #, plot.margin = margin(r = 0.2, l = 0.2)
          #, axis.text.y = element_blank(),axis.ticks.y = element_blank(),axis.title.y = element_blank()
          PC_plot_placenta_predo +
               theme(legend.position="none",axis.title.y = element_blank(), axis.text.y = element_blank(), plot.title=element_text(size=10)), #, plot.margin = margin(l = 0.2)
          PC_plot_placenta_BET +
               theme(legend.position="none",axis.title.y = element_blank(), axis.text.y = element_blank(), plot.title=element_text(size=10)), #, plot.margin = margin(l = 0.2)
          nrow = 1,
          labels = c("a", "b", "c","d"),
          align = "h", widths = c(1.3, 1, 1, 1))

annotate_figure(M_Scree,bottom = text_grob("Principal Component",size = 12))

PC1 CVS: 10.9% (2: 4.4%, 3: 2.8%) PC1 Placenta ITU: 12.7% (2: 3.9%, 3: 2.6%) PC1 Placenta PREDO: 11.6% (2: 3.6%, 3 2.7%) PC1 Placenta BET: 8.5% (2: 3.7%, 3: 3.2%)

ggsave("Results/Methylation_PC_Screeplots.pdf",
annotate_figure(M_Scree,bottom = text_grob("Principal Component",size = 12)), width=84, height=50, units="mm", dpi=600, scale=2, device = cairo_pdf)

take a look at the outlier samples from ITU placenta

load("Results/RData/samplesample_cor_plot.Rdata")

PC1_Placenta_ITU_allpersons_plot
samplesample_cor_plot

ggsave("Results/Methylation_PC1_PlacentaITU_allpersons_boxplot.pdf",
PC1_Placenta_ITU_allpersons_plot, width=84, height=50, units="mm", dpi=600, scale=2, device = cairo_pdf)

to the top

Cell Types

desc_placenta_itu_rpc <- psych::describe(Pheno_Placenta_ITU_reduced_filtered[,names_refbased_cells])
desc_cvs_itu_rpc <- psych::describe(Pheno_CVS_ITU_filtered[,names_refbased_cells])
desc_placenta_predo_rpc <- psych::describe(Pheno_Placenta_PREDO_filtered[,names_refbased_cells])
desc_placenta_BET_rpc <- psych::describe(Pheno_Placenta_BET_filtered[,names_refbased_cells])

desc_cvs_itu_rpc
desc_placenta_itu_rpc
desc_placenta_predo_rpc
desc_placenta_BET_rpc

desc_placenta_itu_rf <- psych::describe(Pheno_Placenta_ITU_reduced_filtered[,c("C1", "C2", "C3", "C4", "C5","C6","C7", "C8")])
desc_cvs_itu_rf <- psych::describe(Pheno_CVS_ITU_filtered[,c("C1", "C2", "C3", "C4", "C5")])
desc_placenta_predo_rf <- psych::describe(Pheno_Placenta_PREDO_filtered[,c("C1", "C2")])
desc_placenta_BET_rf <- psych::describe(Pheno_Placenta_BET_filtered[,c("C1", "C2", "C3")])

desc_cvs_itu_rf
desc_placenta_itu_rf
desc_placenta_predo_rf
desc_placenta_BET_rf

plots reference-based cell types

barplots

barplot_cells_placenta_itu <- 
  ggplot(gather(Pheno_Placenta_ITU_reduced_filtered[,names_refbased_cells]), aes(value)) + 
  geom_histogram(bins = 10) + 
  facet_wrap(~key, scales = 'free_x', ncol=6)+
  scale_x_continuous(labels=scaleFUN)+
  theme_minimal()

barplot_cells_cvs_itu <- 
  ggplot(gather(Pheno_CVS_ITU_filtered[,names_refbased_cells]), aes(value)) + 
  geom_histogram(bins = 10) + 
  facet_wrap(~key, scales = 'free_x', ncol=6)+
  scale_x_continuous(labels=scaleFUN)+
  theme_minimal()

barplot_cells_placenta_predo <- 
  ggplot(gather(Pheno_Placenta_PREDO_filtered[,names_refbased_cells]), aes(value)) + 
  geom_histogram(bins = 10) + 
  facet_wrap(~key, scales = 'free_x', ncol=6)+
  scale_x_continuous(labels=scaleFUN)+
  theme_minimal()

barplot_cells_placenta_BET <- 
  ggplot(gather(Pheno_Placenta_BET_filtered[,names_refbased_cells]), aes(value)) +
  geom_histogram(bins = 10) + 
  facet_wrap(~key, scales = 'free_x', ncol=6)+
  scale_x_continuous(labels=scaleFUN)+
  theme_minimal()

barplot_cells_cvs_itu

barplot_cells_placenta_itu

barplot_cells_placenta_predo

barplot_cells_placenta_BET

save barplots

png(file="Results/barplot_rpc_placenta.png", width= 6000, height=2000, res=600)
barplot_cells_placenta_itu
dev.off()

png(file="Results/barplot_rpc_cvs.png", width= 6000, height=2000, res=600)
barplot_cells_cvs_itu
dev.off()

png(file="Results/barplot_rpc_placenta.png", width= 6000, height=2000, res=600)
barplot_cells_placenta_predo
dev.off()

png(file="Results/barplot_rpc_placenta.png", width= 6000, height=2000, res=600)
barplot_cells_placenta_BET
dev.off()

boxplots

ggplot(gather(Pheno_CVS_ITU_filtered[,names_refbased_cells]), aes(x=key, y= value)) + 
  geom_boxplot(fill='#A4A4A4', color="black") +
   ylab("proportion (%)")+
  theme_minimal() +
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank()) 

ggplot(gather(Pheno_Placenta_ITU_reduced_filtered[,names_refbased_cells]), aes(x=key, y= value)) + 
  geom_boxplot(fill='#A4A4A4', color="black") +
   ylab("proportion (%)")+
  theme_minimal() +
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank()) 

ggplot(gather(Pheno_Placenta_PREDO_filtered[,names_refbased_cells]), aes(x=key, y= value)) + 
  geom_boxplot(fill='#A4A4A4', color="black") +
   ylab("proportion (%)")+
  theme_minimal() +
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank()) 

ggplot(gather(Pheno_Placenta_BET_filtered[,names_refbased_cells]), aes(x=key, y= value)) + 
  geom_boxplot(fill='#A4A4A4', color="black") +
   ylab("proportion (%)")+
  theme_minimal() +
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank())

outlier plots

Pheno_Placenta_ITU_filtered$outlier <- ifelse(Pheno_Placenta_ITU_filtered$Sample_Name %in% samples_different_placenta_itu_meth, "yes", "no")

Placenta_refbasedCells_allpersons_plot <- ggplot(gather(Pheno_Placenta_ITU_filtered[,names_refbased_cells]), aes(x=key, y= value)) + 
  geom_boxplot(fill='#A4A4A4', color="black") +
  geom_point(data=gather(Pheno_Placenta_ITU_filtered[Pheno_Placenta_ITU_filtered$outlier =="yes",names_refbased_cells]), aes(x=key, y=value), color="red")+
  ylab("cell types proportion (%)")+
  labs(title="Placenta (ITU)")+
  theme_minimal() +
  theme(axis.text.x = element_text(size=10, angle = 90, vjust = 0.5, hjust=1), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_blank(), legend.position="none", plot.title = element_text(size=10, hjust = 0.5)) 
   
Placenta_refbasedCells_allpersons_plot

group and save the plots that show how placenta methylation outliers are different

ITU_Placenta_persons_different_plot <- ggarrange(
          PC1_Placenta_ITU_allpersons_plot,
          samplesample_cor_plot,
          Placenta_refbasedCells_allpersons_plot,
          nrow = 1,
          labels = c("a", "b", "c"),
          align = "hv")

ITU_Placenta_persons_different_plot

ggsave("Results/Methylation_ITU_Placenta_Samples_different.pdf",
ITU_Placenta_persons_different_plot, width=174, height=70, units="mm", dpi=600, scale=2, device = cairo_pdf)

Phenos

desc_placenta_itu_pheno <- psych::describe(Pheno_Placenta_ITU_reduced_filtered[,c("Gestational_Age_Weeks", "PC1_ethnicity", "PC2_ethnicity")])
desc_cvs_itu_pheno <- psych::describe(Pheno_CVS_ITU_filtered[,c("gestage_at_CVS_weeks", "PC1_ethnicity", "PC2_ethnicity")])
desc_placenta_predo_pheno <- psych::describe(Pheno_Placenta_PREDO_filtered[,c("Gestational_Age", "PC1_ethnicity", "PC2_ethnicity")])
desc_placenta_BET_pheno <- psych::describe(Pheno_Placenta_BET_filtered[,c("gestage_weeks", "PC1_ethnicity", "PC2_ethnicity", "PC3_ethnicity", "PC4_ethnicity")])

desc_cvs_itu_pheno
prop.table(table(Pheno_CVS_ITU_filtered$Child_Sex))
desc_placenta_itu_pheno
prop.table(table(Pheno_Placenta_ITU_reduced_filtered$Child_Sex))
desc_placenta_predo_pheno
prop.table(table(Pheno_Placenta_PREDO_filtered$Child_Sex))
desc_placenta_BET_pheno
prop.table(table(Pheno_Placenta_BET_filtered$Sex))
prop.table(table(Pheno_Placenta_BET_filtered$group))

Correlation reference-free and reference-based estimated cell types

Fig. 1

To make publication-ready plot for corrplot need to adjust the function to have significance stars:

change the place_points function within the corrplot function:

#trace(corrplot, edit=TRUE)

Then replace on line 443

# place_points = function(sig.locs, point) {
#   text(pos.pNew[, 1][sig.locs], pos.pNew[, 2][sig.locs], 
#        labels = point, col = pch.col, cex = pch.cex, 
#        lwd = 2)

with:

# place_points = function(sig.locs, point) {
#       text(pos.pNew[, 1][sig.locs]+0.40, (pos.pNew[, 2][sig.locs]), 
#            labels = point, col = pch.col, cex = pch.cex, 
#            lwd = 2)

here 0.3 0.4 0.2 0.25 is added to the x-position, depending on the plot

data prepared for correlation

reffree_rpc_placenta_itu <- Pheno_Placenta_ITU_reduced_filtered[,c("C1","C2","C3","C4","C5","C6","C7","C8", names_refbased_cells)]
reffree_rpc_cvs_itu <- Pheno_CVS_ITU_filtered[,c("C1","C2","C3","C4","C5", names_refbased_cells)]
reffree_rpc_placenta_predo <- Pheno_Placenta_PREDO_filtered[,c("C1","C2",names_refbased_cells)]
reffree_rpc_placenta_BET <- Pheno_Placenta_BET_filtered[,c("C1","C2","C3",names_refbased_cells)]

phenor_reffree_placenta_itu <- reffree_rpc_placenta_itu[,c("C1","C2","C3","C4","C5","C6","C7","C8")]
phenor_reffree_cvs_itu <- reffree_rpc_cvs_itu[,c("C1","C2","C3","C4","C5")]
phenor_reffree_placenta_predo <- reffree_rpc_placenta_predo[,c("C1","C2")]
phenor_reffree_placenta_BET <- reffree_rpc_placenta_BET[,c("C1","C2","C3")]

phenor_rpc_placenta_itu <- reffree_rpc_placenta_itu[,c(names_refbased_cells)]
phenor_rpc_cvs_itu <- reffree_rpc_cvs_itu[,c(names_refbased_cells)]
phenor_rpc_placenta_predo <- reffree_rpc_placenta_predo[,c(names_refbased_cells[ names_refbased_cells != "Hofbauer"])]
phenor_rpc_placenta_BET <- reffree_rpc_placenta_BET[,c(names_refbased_cells)]

colnames(phenor_rpc_cvs_itu) <- c("Trophoblasts        ", "Stromal       ", "Hofbauer       ", "Endothelial       ", "nRBC       ", "Synctio-\n trophoblasts        ")
colnames(phenor_rpc_placenta_itu) <- c("Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "nRBC", "Synctio-\n trophoblasts")
colnames(phenor_rpc_placenta_predo) <- c("Trophoblasts      ", "Stromal      ", "Endothelial      ", "nRBC      ", "Synctio-\n trophoblasts        \n")
colnames(phenor_rpc_placenta_BET) <- c("Trophoblasts       ", "Stromal       ", "Hofbauer       ", "Endothelial       ", "nRBC       ", "Synctio-\n trophoblasts               ")

CVS

r_cor_reffree_rpc_cvs_itu <- psych::corr.test(phenor_reffree_cvs_itu, phenor_rpc_cvs_itu, method="spearman", adjust = "bonferroni")

par(xpd=TRUE)
plot_r_cor_reffree_rpc_cvs_itu <- corrplot(r_cor_reffree_rpc_cvs_itu$r,
     method="color", number.digits = 1,
     mar=c(1,1,1,1),
     addCoef.col = "black",
     p.mat = r_cor_reffree_rpc_cvs_itu$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     tl.srt = 45,
     number.cex=0.7,
     cl.pos='n')

grid.echo()
P1_corr_refbased_reffree <- grid.grab()

matrix.colors <- getGrob(P1_corr_refbased_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P1_corr_refbased_reffree <- editGrob(P1_corr_refbased_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

# apply the saved colours to the underlying matrix grob
P1_corr_refbased_reffree<- editGrob(P1_corr_refbased_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

# convert the background fill from white to transparent, while we are at it
P1_corr_refbased_reffree <- editGrob(P1_corr_refbased_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

r_cor_reffree_rpc_placenta_itu <- psych::corr.test(phenor_reffree_placenta_itu, phenor_rpc_placenta_itu, method="spearman", adjust = "bonferroni")

plot_r_cor_reffree_rpc_placenta_itu <- corrplot(r_cor_reffree_rpc_placenta_itu$r,
     method="color", number.digits=1,
     mar=c(1,0,1,0),
     addCoef.col = "black",
     p.mat = r_cor_reffree_rpc_placenta_itu$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     tl.srt = 45,
     number.cex=0.7,
     cl.pos='n')

grid.echo()
P2_corr_refbased_reffree <- grid.grab()

matrix.colors <- getGrob(P2_corr_refbased_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P2_corr_refbased_reffree <- editGrob(P2_corr_refbased_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

P2_corr_refbased_reffree<- editGrob(P2_corr_refbased_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

P2_corr_refbased_reffree <- editGrob(P2_corr_refbased_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

r_cor_reffree_rpc_placenta_predo <- psych::corr.test(phenor_reffree_placenta_predo, phenor_rpc_placenta_predo, method="spearman", adjust = "bonferroni")

plot_r_cor_reffree_rpc_placenta_predo <- corrplot(r_cor_reffree_rpc_placenta_predo$r,
     method="color", number.digits=1,
     mar=c(1,6,1,6),
     addCoef.col = "black",
     p.mat = r_cor_reffree_rpc_placenta_predo$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.75,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.7,
     tl.srt = 45,
     number.cex=0.7,
     cl.pos='n')

grid.echo()
P3_corr_refbased_reffree <- grid.grab()

matrix.colors <- getGrob(P3_corr_refbased_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P3_corr_refbased_reffree <- editGrob(P3_corr_refbased_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

P3_corr_refbased_reffree<- editGrob(P3_corr_refbased_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

P3_corr_refbased_reffree <- editGrob(P3_corr_refbased_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

r_cor_reffree_rpc_placenta_bet <- psych::corr.test(phenor_reffree_placenta_BET, phenor_rpc_placenta_BET, method="spearman", adjust = "bonferroni")

par(xpd=TRUE)
plot_r_cor_reffree_rpc_placenta_bet <- corrplot(r_cor_reffree_rpc_placenta_bet$r,
     method="color", number.digits=1,
     mar=c(1,6,0,6),
     addCoef.col = "black",
     p.mat = r_cor_reffree_rpc_placenta_bet$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.75,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.7,
     tl.srt = 45,
     number.cex=0.7,
     cl.pos='n')

grid.echo()
P4_corr_refbased_reffree <- grid.grab()

matrix.colors <- getGrob(P4_corr_refbased_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P4_corr_refbased_reffree <- editGrob(P4_corr_refbased_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

P4_corr_refbased_reffree<- editGrob(P4_corr_refbased_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

P4_corr_refbased_reffree <- editGrob(P4_corr_refbased_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

ggsave("Results/Cor_Refbased_Reffree_combined.pdf",
       ggarrange(P1_corr_refbased_reffree, P2_corr_refbased_reffree, P3_corr_refbased_reffree, P4_corr_refbased_reffree, ncol=1, labels = c("a", "b", "c", "d"), heights = c(2,4,0.8,1), widths=c(2,4,0.8,1)), width=84, height=160, units="mm", dpi=600, scale=2)

to the top

Correlation reference-free estimated cell types and phenotypes

Fig. 2

again need to adapt corrplot function by adding +0.25 / +0.35 to the x-position

CVS

CVS_ITU_filtered_corPhReffree <- na.omit(Pheno_CVS_ITU_filtered[ ,c("gestage_at_CVS_weeks", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "C1", "C2", "C3", "C4", "C5")])
# n = 200
names(CVS_ITU_filtered_corPhReffree) <- c("gestational age\n(weeks)", "PC1 ethnicity", "PC2 ethnicity", "child sex", "C1  ", "C2  ", "C3  ", "C4  ", "C5  ")
cells_CVS_ITU_filtered_corPhReffree <- CVS_ITU_filtered_corPhReffree[ ,c("C1  ", "C2  ", "C3  ", "C4  ", "C5  ")]
phenos_CVS_ITU_filtered_corPhReffree <- CVS_ITU_filtered_corPhReffree[ ,c("gestational age\n(weeks)", "child sex", "PC1 ethnicity", "PC2 ethnicity")]
phenos_CVS_ITU_filtered_corPhReffree$`child sex` = as.numeric(phenos_CVS_ITU_filtered_corPhReffree$`child sex`)

r_CVS_ITU_filtered_corPhReffree <- psych::corr.test(phenos_CVS_ITU_filtered_corPhReffree, cells_CVS_ITU_filtered_corPhReffree, method="spearman", adjust = "bonferroni")

corrplot(r_CVS_ITU_filtered_corPhReffree$r,
     method="color", number.digits=1,
     mar=c(1,3,2,2),
     addCoef.col = "black",
     p.mat = r_CVS_ITU_filtered_corPhReffree$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     number.cex= 0.7,
     outline=TRUE,
     cl.pos='n')

grid.echo()
P1_corr_pheno_reffree <- grid.grab()

matrix.colors <- getGrob(P1_corr_pheno_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P1_corr_pheno_reffree <- editGrob(P1_corr_pheno_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

# apply the saved colours to the underlying matrix grob
P1_corr_pheno_reffree <- editGrob(P1_corr_pheno_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

# convert the background fill from white to transparent, while we are at it
P1_corr_pheno_reffree <- editGrob(P1_corr_pheno_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

Placenta ITU

Placenta_ITU_reduced_filtered_corPhReffree <- na.omit(Pheno_Placenta_ITU_reduced_filtered[ ,c("Gestational_Age_Weeks", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "caseVScontrol", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8")])
# n = 425
names(Placenta_ITU_reduced_filtered_corPhReffree) <- c("    gestational age\n(weeks)", "PC1 ethnicity", "PC2 ethnicity", "child sex", "    fetal chromosomal\n testing (yes/no)", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8")
cells_Placenta_ITU_reduced_filtered_corPhReffree <- Placenta_ITU_reduced_filtered_corPhReffree[ ,c("C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8")]
phenos_Placenta_ITU_reduced_filtered_corPhReffree <- Placenta_ITU_reduced_filtered_corPhReffree[ ,c("    gestational age\n(weeks)", "child sex", "PC1 ethnicity", "PC2 ethnicity", "    fetal chromosomal\n testing (yes/no)")]
phenos_Placenta_ITU_reduced_filtered_corPhReffree$`child sex` = as.numeric(Placenta_ITU_reduced_filtered_corPhReffree$`child sex`)
phenos_Placenta_ITU_reduced_filtered_corPhReffree$`    fetal chromosomal\n testing (yes/no)` = as.numeric(Placenta_ITU_reduced_filtered_corPhReffree$`    fetal chromosomal\n testing (yes/no)`)

r_Placenta_ITU_reduced_filtered_corPhReffree <- psych::corr.test(phenos_Placenta_ITU_reduced_filtered_corPhReffree, cells_Placenta_ITU_reduced_filtered_corPhReffree, method="spearman", adjust = "bonferroni")

corrplot(r_Placenta_ITU_reduced_filtered_corPhReffree$r,
     method="color", number.digits=1,
     mar=c(1,3,1,3),
     addCoef.col = "black",
     p.mat = r_Placenta_ITU_reduced_filtered_corPhReffree$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     number.cex= 0.7,
     outline=TRUE,
     cl.pos='n')

grid.echo()
P2_corr_pheno_reffree <- grid.grab()

matrix.colors <- getGrob(P2_corr_pheno_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P2_corr_pheno_reffree <- editGrob(P2_corr_pheno_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

# apply the saved colours to the underlying matrix grob
P2_corr_pheno_reffree <- editGrob(P2_corr_pheno_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

# convert the background fill from white to transparent, while we are at it
P2_corr_pheno_reffree <- editGrob(P2_corr_pheno_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

Placenta PREDO

Placenta_PREDO_filtered_corPhReffree <- na.omit(Pheno_Placenta_PREDO_filtered[ ,c("Gestational_Age", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "C1", "C2")])
# n = 118
names(Placenta_PREDO_filtered_corPhReffree) <- c("gestational age\n(weeks)", "PC1 ethnicity", "PC2 ethnicity", "child sex", "C1 ", "C2 ")
cells_Placenta_PREDO_filtered_corPhReffree <- Placenta_PREDO_filtered_corPhReffree[ ,c("C1 ", "C2 ")]
phenos_Placenta_PREDO_filtered_corPhReffree <- Placenta_PREDO_filtered_corPhReffree[ ,c("gestational age\n(weeks)", "child sex", "PC1 ethnicity", "PC2 ethnicity")]
phenos_Placenta_PREDO_filtered_corPhReffree$`child sex` = as.numeric(Placenta_PREDO_filtered_corPhReffree$`child sex`)

r_Placenta_PREDO_filtered_corPhReffree <- psych::corr.test(phenos_Placenta_PREDO_filtered_corPhReffree, cells_Placenta_PREDO_filtered_corPhReffree, method="spearman", adjust = "bonferroni")

corrplot(r_Placenta_PREDO_filtered_corPhReffree$r,
     method="color", number.digits=1,
     mar=c(1,0,1,1),
     addCoef.col = "black",
     p.mat = r_Placenta_PREDO_filtered_corPhReffree$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     number.cex=0.7,
     outline=TRUE,
     cl.pos='n')

grid.echo()
P3_corr_pheno_reffree <- grid.grab()

matrix.colors <- getGrob(P3_corr_pheno_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P3_corr_pheno_reffree <- editGrob(P3_corr_pheno_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

# apply the saved colours to the underlying matrix grob
P3_corr_pheno_reffree <- editGrob(P3_corr_pheno_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

# convert the background fill from white to transparent, while we are at it
P3_corr_pheno_reffree <- editGrob(P3_corr_pheno_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

Placenta BET

Placenta_BET_filtered_corPhReffree <- na.omit(Pheno_Placenta_BET_filtered[ ,c("gestage_weeks", "PC1_ethnicity", "PC2_ethnicity","PC3_ethnicity", "PC4_ethnicity", "Sex", "group", "C1", "C2", "C3")])
# n = 136
names(Placenta_BET_filtered_corPhReffree) <- c("gestational age\n(weeks)", "PC1 ethnicity", "PC2 ethnicity", "PC3 ethnicity", "PC4 ethnicity", "child sex", "BET status", "C1", "C2", "C3")
cells_Placenta_BET_filtered_corPhReffree <- Placenta_BET_filtered_corPhReffree[ ,c("C1", "C2", "C3")]
phenos_Placenta_BET_filtered_corPhReffree <- Placenta_BET_filtered_corPhReffree[ ,c("gestational age\n(weeks)", "child sex", "BET status", "PC1 ethnicity", "PC2 ethnicity", "PC3 ethnicity", "PC4 ethnicity")]
phenos_Placenta_BET_filtered_corPhReffree$`child sex` = as.numeric(Placenta_BET_filtered_corPhReffree$`child sex`)
phenos_Placenta_BET_filtered_corPhReffree$`BET status` = as.numeric(Placenta_BET_filtered_corPhReffree$`BET status`)

r_Placenta_BET_filtered_corPhReffree <- psych::corr.test(phenos_Placenta_BET_filtered_corPhReffree, cells_Placenta_BET_filtered_corPhReffree, method="spearman", adjust = "bonferroni")

corrplot(r_Placenta_BET_filtered_corPhReffree$r,
     method="color", number.digits=1,
     mar=c(1,0,1,1),
     addCoef.col = "black",
     p.mat = r_Placenta_BET_filtered_corPhReffree$p,
     insig = "label_sig",
     sig.level = c(.001, .01),
     pch.cex = 0.7,
     pch.col = "black",
     tl.col="black",
     tl.cex=0.75,
     number.cex=0.7,
     outline=TRUE,
     cl.pos='n')

grid.echo()
P4_corr_pheno_reffree <- grid.grab()

matrix.colors <- getGrob(P4_corr_pheno_reffree, gPath("square"), grep = TRUE)[["gp"]][["fill"]]
P4_corr_pheno_reffree <- editGrob(P4_corr_pheno_reffree,
               gPath("square"), grep = TRUE,
               gp = gpar(col = NA,
                         fill = NA))

# apply the saved colours to the underlying matrix grob
P4_corr_pheno_reffree <- editGrob(P4_corr_pheno_reffree,
               gPath("symbols-rect-1"), grep = TRUE,
               gp = gpar(fill = matrix.colors))

# convert the background fill from white to transparent, while we are at it
P4_corr_pheno_reffree <- editGrob(P4_corr_pheno_reffree,
               gPath("background"), grep = TRUE,
               gp = gpar(fill = NA))

ggsave("Results/Cor_Pheno_Reffree_combined.pdf",
       ggarrange(P1_corr_pheno_reffree, P2_corr_pheno_reffree, P3_corr_pheno_reffree, P4_corr_pheno_reffree, ncol=1, labels = c("a", "b", "c", "d"), heights = c(1.5,2,1.5,2), widths = c(2,2,1,1), align='v'), width=84, height=180, units="mm", dpi=600, scale=2)

to the top

Check if methylation can be predicted using cell types (using CV)

CVS

Reg_Pheno_CVS_ITU_filtered <- na.omit(Pheno_CVS_ITU_filtered[ ,c("gestage_at_CVS_weeks", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "Syncytiotrophoblast", "nRBC","PC_cell1", "PC_cell2", "PC_cell3","PC_cell4", "C1", "C2", "C3", "C4", "C5", "PC_methB1", "Sample_Name")])
# n = 200
dim(Reg_Pheno_CVS_ITU_filtered)

save(Reg_Pheno_CVS_ITU_filtered, file="Input_Data_prepared/Reg_Pheno_CVS_ITU_filtered.Rdata")

predict PC1 methylation

plot cell & PC 1

cell1_reg <- Reg_Pheno_CVS_ITU_filtered[,c(5:10,20)]
cell1_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

cell2_reg <- Reg_Pheno_CVS_ITU_filtered[,c(15:19,20)]
cell2_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

-RMSE-

models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ gestage_at_CVS_weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity

models[[3]] = PC_methB1 ~  Trophoblasts +Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC
models[[4]] = PC_methB1 ~  Trophoblasts +Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC+ gestage_at_CVS_weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity

models[[5]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5
models[[6]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5+ gestage_at_CVS_weeks + Child_Sex+ PC1_ethnicity + PC2_ethnicity

CV_output_CVS_Meth = xval.glm(data = Reg_Pheno_CVS_ITU_filtered, models, folds = 10, repeats = 500, seed=200)

save(CV_output_CVS_Meth, file="Results/RData/CV_output_CVS_Meth.Rdata")

get adjusted R2 from winning model

rsq(glm(models[[3]], data= Reg_Pheno_CVS_ITU_filtered),adj=TRUE,type='v')

summary(glm(models[[3]], data= Reg_Pheno_CVS_ITU_filtered))

# difference null and residual variance
12220566-1177114
199-194
#p-value = 1 - pchisq(deviance, degrees of freedom)
1-pchisq(11043452,5)
# -> significant

Plots

load("Results/RData/CV_output_CVS_Meth.Rdata")

# this is how plots were defined in original package function
p2c <- CV_output_CVS_Meth[["den.plot"]]
p1c <- CV_output_CVS_Meth[["stab.plot"]]
pc <- CV_output_CVS_Meth[["box.plot"]]

p1c <- p1c +
  theme_minimal()+
  theme(axis.text.x = element_text(size=8), axis.title.y = element_text(size=8), axis.text.y = element_text(size=5), legend.text=element_text(size=8), legend.title=element_text(size=8), legend.key.size = unit(0.2, 'cm'))+
  labs(tag = "a ", title="CVS (ITU)")+
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.25))+
  scale_color_discrete(name="Models",labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"))

pc <- pc +
  theme_minimal()+
  theme(axis.text.x = element_text(size=10), axis.title.x=element_text(size=10), axis.title.y = element_text(size=10), axis.text.y = element_text(size=10), legend.position="none", axis.text.x.top = element_text(size=10))+
  labs(tag = "    ")

p2c <- p2c +
  theme_minimal()+
  theme(legend.position="none", axis.text.x = element_text(size=10))

# define parameters we need for plot
my.ylab <- "RMSE" 
K <- 10
repeats <- 500
wins <- CV_output_CVS_Meth$wins

# titleplot
titleplot <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(plot.background = element_blank(),panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),panel.border = element_blank(),
        panel.background = element_blank(),axis.title.x = element_blank(),
        axis.title.y = element_blank(),axis.text.x = element_blank(),
        axis.text.y = element_blank(),axis.ticks = element_blank()) + annotate("text", x=1, y=1,
                                                                               label=paste0(my.ylab,'\n (',K,'-fold, ',repeats,' repeats) \nModel ',which.max(wins),' wins.'))

ggCV_CVS_M <- grid.arrange(p1c, titleplot, pc, p2c, ncol=2, nrow=2, widths=c(5, 2), heights=c(2.5, 5))

ggsave("Results/CV_CVS_M.pdf",
       grid.arrange(p1c, titleplot, pc, p2c, ncol=2, nrow=2, widths=c(5, 2), heights=c(2, 5)),
       width=84, height=50, units="mm", dpi=600, scale=2, device = cairo_pdf)

to the top

Placenta ITU

Reg_Pheno_Placenta_ITU_reduced_filtered <- na.omit(Pheno_Placenta_ITU_reduced_filtered[ ,c("Gestational_Age_Weeks", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "Syncytiotrophoblast", "nRBC", "PC_cell1", "PC_cell2", "PC_cell3", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "PC_methB1", "Sample_Name", "caseVScontrol")])

dim(Reg_Pheno_Placenta_ITU_reduced_filtered)
save(Reg_Pheno_Placenta_ITU_reduced_filtered, file="Input_Data_prepared/Reg_Pheno_Placenta_ITU_reduced_filtered.Rdata")

predict PC1 methylation

plot PC and cell types

cell1_reg <- Reg_Pheno_Placenta_ITU_reduced_filtered[,c(5:10,22)]
cell1_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

cell2_reg <- Reg_Pheno_Placenta_ITU_reduced_filtered[,c(14:21, 22)]
cell2_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

-RMSE-

models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC
models[[4]] = PC_methB1 ~  Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC + Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity

models[[5]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5 + C6 + C7 + C8
models[[6]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5+ C6 + C7 + C8+ Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity

CV_output_Placenta_Meth = xval.glm(data = Reg_Pheno_Placenta_ITU_reduced_filtered, models, folds = 10, repeats = 500, seed=200)

save(CV_output_Placenta_Meth, file="Results/RData/CV_output_Placenta_Meth.Rdata")

get adjusted R2 from winning model

rsq(glm(models[[6]], data= Reg_Pheno_Placenta_ITU_reduced_filtered),adj=TRUE,type='v')

summary(glm(models[[6]], data= Reg_Pheno_Placenta_ITU_reduced_filtered))

# difference null and residual variance
28342062-2080515
424-411
#p-value = 1 - pchisq(deviance, degrees of freedom)
1-pchisq(26261547, 13)
# -> significant

-RMSE- see if inclusion of case/control changes something

models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity + caseVScontrol

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC
models[[4]] = PC_methB1 ~  Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast + nRBC + Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity + caseVScontrol

models[[5]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5 + C6 + C7 + C8
models[[6]] = PC_methB1 ~ C1 + C2 + C3 + C4 + C5+ C6 + C7 + C8+ Gestational_Age_Weeks + Child_Sex + PC1_ethnicity + PC2_ethnicity + caseVScontrol

CV_output_Placenta_Meth_casecontrol = xval.glm(data = Reg_Pheno_Placenta_ITU_reduced_filtered, models, folds = 10, repeats = 500, seed=200)

save(CV_output_Placenta_Meth_casecontrol, file="Results/RData/CV_output_Placenta_Meth_casecontrol.Rdata")

Plots

load("Results/RData/CV_output_Placenta_Meth.Rdata")

# this is how plots were defined in original package function
pp2 <- CV_output_Placenta_Meth[["den.plot"]]
pp1 <- CV_output_Placenta_Meth[["stab.plot"]]
pp <- CV_output_Placenta_Meth[["box.plot"]]

pp1 <- pp1 +
  theme_minimal()+
  theme(axis.text.x = element_text(size=8), axis.title.y = element_text(size=8), axis.text.y = element_text(size=5), legend.text=element_text(size=8), legend.title=element_text(size=8), legend.key.size = unit(0.2, 'cm'))+
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.25))+
  labs(tag = "b", title="Placenta (ITU)")+
  scale_color_discrete(name="Models",labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"))

pp <- pp +
  theme_minimal()+
  theme(axis.text.x = element_text(size=10), axis.title.x=element_text(size=10), axis.title.y = element_text(size=10), axis.text.y = element_text(size=10), legend.position="none", axis.text.x.top = element_text(size=10))+
  labs(tag= "    ")

pp2 <- pp2 +
  theme_minimal()+
  theme(legend.position="none", axis.text.x = element_text(size=10))

# define parameters we need for plot
my.ylab <- "RMSE" 
K <- 10
repeats <- 500
wins <- CV_output_Placenta_Meth$wins

# titleplot
titleplot <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(plot.background = element_blank(),panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),panel.border = element_blank(),
        panel.background = element_blank(),axis.title.x = element_blank(),
        axis.title.y = element_blank(),axis.text.x = element_blank(),
        axis.text.y = element_blank(),axis.ticks = element_blank()) + annotate("text", x=1, y=1,
                                                                               label=paste0(my.ylab,'\n (',K,'-fold, ',repeats,' repeats) \nModel ',which.max(wins),' wins.'))

ggCV_PlacentaITU_M <- grid.arrange(pp1, titleplot, pp, pp2, ncol=2, nrow=2, widths=c(5, 2), heights=c(2.5, 5))

ggsave("Results/CV_PlacentaITU_M.pdf",
       ggCV_PlacentaITU_M,
       width=100, height=80, units="mm", dpi=600, scale=2)

Placenta PREDO

predict PC1 methylation

Reg_Pheno_Placenta_PREDO <- na.omit(Pheno_Placenta_PREDO_filtered[ ,c("Gestational_Age", "PC1_ethnicity", "PC2_ethnicity", "Child_Sex", "Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "Syncytiotrophoblast", "nRBC", "PC_cell1", "PC_cell2", "PC_cell3", "C1", "C2", "PC_methB1", "ID")])
dim(Reg_Pheno_Placenta_PREDO)

save(Reg_Pheno_Placenta_PREDO, file="Input_Data_prepared/Reg_Pheno_Placenta_PREDO.Rdata")

1st PC methylation and cell types

cell1_reg <- Reg_Pheno_Placenta_PREDO[,c(5:10,16)]
cell1_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

cell2_reg <- Reg_Pheno_Placenta_PREDO[,c(14:15,16)]
cell2_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  theme_bw()+
  stat_smooth()

-RMSE-

# Cross-validation
models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ Gestational_Age + Child_Sex +PC1_ethnicity + PC2_ethnicity

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC
models[[4]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC+ Gestational_Age + Child_Sex + PC1_ethnicity + PC2_ethnicity 

models[[5]] = PC_methB1 ~ C1 + C2 
models[[6]] = PC_methB1 ~ C1 + C2 + Gestational_Age + Child_Sex + PC1_ethnicity + PC2_ethnicity 

CV_output_Placenta_predo_meth = xval.glm(data = Reg_Pheno_Placenta_PREDO, models, folds = 10, repeats = 500, seed = 200)

get adjusted R2 from winning model

rsq(glm(models[[4]], data= Reg_Pheno_Placenta_PREDO),adj=TRUE,type='v')

summary(glm(models[[4]], data= Reg_Pheno_Placenta_PREDO))

# difference null and residual variance
9079814-1190407
117-109
#p-value = 1 - pchisq(deviance, degrees of freedom)
1-pchisq(7889407, 8)
# -> significant

save(CV_output_Placenta_predo_meth, file="Results/RData/CV_output_Placenta_predo_meth.Rdata")

Plots

load("Results/RData/CV_output_Placenta_predo_meth.Rdata")

# this is how plots were defined in original package function
p2 <- CV_output_Placenta_predo_meth[["den.plot"]]
p1 <- CV_output_Placenta_predo_meth[["stab.plot"]]
p <- CV_output_Placenta_predo_meth[["box.plot"]]

p1 <- p1 +
  theme_minimal()+
  theme(axis.text.x = element_text(size=8), axis.title.y = element_text(size=8), axis.text.y = element_text(size=5), legend.text=element_text(size=8), legend.title=element_text(size=8), legend.key.size = unit(0.2, 'cm'))+
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.25))+
  labs(tag = "c ", title="Placenta (PREDO)")+
    scale_color_discrete(name="Models",labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"))

p <- p +
  theme_minimal()+
  theme(axis.text.x = element_text(size=10), axis.title.x=element_text(size=10), axis.title.y = element_text(size=10), axis.text.y = element_text(size=10), legend.position="none", axis.text.x.top = element_text(size=10))+
  labs(tag= "    ")

p2 <- p2 +
  theme_minimal()+
  theme(legend.position="none", axis.text.x = element_text(size=10))

# define parameters we need for plot
my.ylab <- "RMSE" 
K <- 10
repeats <- 500
wins <- CV_output_Placenta_predo_meth$wins

# titleplot
titleplot <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(plot.background = element_blank(),panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),panel.border = element_blank(),
        panel.background = element_blank(),axis.title.x = element_blank(),
        axis.title.y = element_blank(),axis.text.x = element_blank(),
        axis.text.y = element_blank(),axis.ticks = element_blank()) + annotate("text", x=1, y=1,
                                                                               label=paste0(my.ylab,'\n (',K,'-fold, ',repeats,' repeats) \nModel ',which.max(wins),' wins.'))

ggCV_PlacentaPREDO_M <- grid.arrange(p1, titleplot, p, p2, ncol=2, nrow=2, widths=c(5, 2), heights=c(2.5, 5))

ggsave("Results/CV_PlacentaPREDO_M.pdf",
       ggCV_PlacentaPREDO_M,
       width=84, height=50, units="mm", dpi=600, scale=2)

Placenta BET

predict PC1 methylation

Reg_Pheno_Placenta_BET <- na.omit(Pheno_Placenta_BET_filtered[ ,c("Trophoblasts", "Stromal", "Hofbauer", "Endothelial", "Syncytiotrophoblast", "nRBC", "C1", "C2","C3", "PC_methB1", "PC_methB2", "PC_methB3", "Sample_Name", "Sex", "PC1_ethnicity", "PC2_ethnicity", "PC3_ethnicity", "PC4_ethnicity", "gestage_weeks", "group")])

dim(Reg_Pheno_Placenta_BET)

save(Reg_Pheno_Placenta_BET, file="Input_Data_prepared/Reg_Pheno_Placenta_BET.Rdata")

1st PC methylation and cell types

cell1_reg <- Reg_Pheno_Placenta_BET[,c(1:6,10)]
cell1_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  stat_smooth()+
  theme_bw()

cell2_reg <- Reg_Pheno_Placenta_BET[,c(7:9,10)]
cell2_reg %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free") +
  geom_point() +
  stat_smooth()+
  theme_bw()

-RMSE, model with all samples-

# Cross-validation
models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity 

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC
models[[4]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC+ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity 

models[[5]] = PC_methB1 ~ C1 + C2 + C3
models[[6]] = PC_methB1 ~ C1 + C2+ C3 + gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity

CV_output_Placenta_BET_meth = xval.glm(data = Reg_Pheno_Placenta_BET, models, folds = 10, repeats = 500, seed = 200)

save(CV_output_Placenta_BET_meth, file="Results/RData/CV_output_Placenta_BET_meth.Rdata")

identify conspicious samples extract ref-based cell types

refbased_cells_Pheno_BET_filtered <- Pheno_Placenta_BET_filtered[ ,c(names_refbased_cells, "Sample_Name")]

IQR > 3

IQR_outliers3_refbased_cells_Pheno_BET_filtered <- cbind(data.frame(sapply(refbased_cells_Pheno_BET_filtered[,!names(refbased_cells_Pheno_BET_filtered) %in% "Sample_Name"], function(x) is_outlier(x, coef=3))), refbased_cells_Pheno_BET_filtered$Sample_Name)

names(IQR_outliers3_refbased_cells_Pheno_BET_filtered)[names(IQR_outliers3_refbased_cells_Pheno_BET_filtered) == "refbased_cells_Pheno_BET_filtered$Sample_Name"] <- "Sample_Name"

IQR_outliers3_names <- unique(c(IQR_outliers3_refbased_cells_Pheno_BET_filtered[IQR_outliers3_refbased_cells_Pheno_BET_filtered$nRBC == TRUE, "Sample_Name"], IQR_outliers3_refbased_cells_Pheno_BET_filtered[IQR_outliers3_refbased_cells_Pheno_BET_filtered$Hofbauer == TRUE, "Sample_Name"]))

exclude the 5 outliers in Hofbauer & nRBC

Reg_Pheno_Placenta_BET_cleaned <- Reg_Pheno_Placenta_BET[! Reg_Pheno_Placenta_BET$Sample_Name %in% IQR_outliers3_names, ]

-RMSE- with ‘cleaned’ model

# Cross-validation
models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity 

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC
models[[4]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC+ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity 

models[[5]] = PC_methB1 ~ C1 + C2 + C3
models[[6]] = PC_methB1 ~ C1 + C2+ C3 + gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity

CV_output_Placenta_BET_meth_cleaned = xval.glm(data = Reg_Pheno_Placenta_BET_cleaned, models, folds = 10, repeats = 500, seed = 200)

save(CV_output_Placenta_BET_meth_cleaned, file="Results/RData/CV_output_Placenta_BET_meth_cleaned.Rdata")

-RMSE- with ‘cleaned model’ and BET status included

# Cross-validation
models = vector(mode = "list", length = 6) 
models[[1]] = PC_methB1 ~ 1
models[[2]] = PC_methB1 ~ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity + group

models[[3]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC
models[[4]] = PC_methB1 ~ Trophoblasts + Stromal + Hofbauer + Endothelial + Syncytiotrophoblast+ nRBC+ gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity + group

models[[5]] = PC_methB1 ~ C1 + C2 + C3
models[[6]] = PC_methB1 ~ C1 + C2+ C3 + gestage_weeks + Sex +PC1_ethnicity + PC2_ethnicity + PC3_ethnicity + PC4_ethnicity+ group

CV_output_Placenta_BET_meth_group_cleaned = xval.glm(data = Reg_Pheno_Placenta_BET_cleaned, models, folds = 10, repeats = 500, seed = 200)

get adjusted R2 from winning model

rsq(glm(models[[3]], data= Reg_Pheno_Placenta_BET_cleaned),adj=TRUE,type='v')

summary(glm(models[[3]], data= Reg_Pheno_Placenta_BET_cleaned))

# difference null and residual variance
6621201-932287
130-126
#p-value = 1 - pchisq(deviance, degrees of freedom)
1-pchisq(5688914, 4)
# -> significant

Plots

load("Results/RData/CV_output_Placenta_BET_meth_cleaned.Rdata")

# this is how plots were defined in original package function
pe2 <- CV_output_Placenta_BET_meth_cleaned[["den.plot"]]
pe1 <- CV_output_Placenta_BET_meth_cleaned[["stab.plot"]]
pe <- CV_output_Placenta_BET_meth_cleaned[["box.plot"]]

pe1 <- pe1 +
  theme_minimal()+
  theme(axis.text.x = element_text(size=8), axis.title.y = element_text(size=8), axis.text.y = element_text(size=5),legend.text=element_text(size=8), legend.title=element_text(size=8), legend.key.size = unit(0.2, 'cm'))+
  labs(tag = "d ", title="Placenta (BET)")+
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.25))+
  scale_color_discrete(name="Models",labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"))

pe <- pe +
  theme_minimal()+
  theme(axis.text.x = element_text(size=10), axis.title.x=element_text(size=10), axis.title.y = element_text(size=10), axis.text.y = element_text(size=10), legend.position="none", axis.text.x.top = element_text(size=10))+
  labs(tag= "    ")

pe2 <- pe2 +
  theme_minimal()+
  theme(legend.position="none", axis.text.x = element_text(size=10))

# define parameters we need for plot
my.ylab <- "RMSE" 
K <- 10
repeats <- 500
wins <- CV_output_Placenta_BET_meth_cleaned$wins

# titleplot
titleplot <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(plot.background = element_blank(),panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),panel.border = element_blank(),
        panel.background = element_blank(),axis.title.x = element_blank(),
        axis.title.y = element_blank(),axis.text.x = element_blank(),
        axis.text.y = element_blank(),axis.ticks = element_blank()) + annotate("text", x=1, y=1,
                                                                               label=paste0(my.ylab,'\n (',K,'-fold, ',repeats,' repeats) \nModel ',which.max(wins),' wins.'))

ggCV_PlacentaBET_M <- grid.arrange(pe1, titleplot, pe, pe2, ncol=2, nrow=2, widths=c(5, 2), heights=c(2.5, 5))

ggsave("Results/CV_PlacentaBET_M.pdf",
       ggCV_PlacentaBET_M,
       width=84, height=50, units="mm", dpi=600, scale=2)

Make outlier plot for BET CV Model

this is an adaption to the plotting with facets I need only for this plot:

Facet <- ggproto(
  init_scales = function(layout, x_scale = NULL, y_scale = NULL, params) {
    scales <- list()
    if (!is.null(x_scale)) {
      scales$x <- plyr::rlply(max(layout$SCALE_X), x_scale$clone())
    }
    if (!is.null(y_scale)) {
      scales$y <- plyr::rlply(max(layout$SCALE_Y), y_scale$clone())
    }
    scales
  },
)

scale_override <- function(which, scale) {
  if(!is.numeric(which) || (length(which) != 1) || (which %% 1 != 0)) {
    stop("which must be an integer of length 1")
  }
  
  if(is.null(scale$aesthetics) || !any(c("x", "y") %in% scale$aesthetics)) {
    stop("scale must be an x or y position scale")
  }
  
  structure(list(which = which, scale = scale), class = "scale_override")
}

CustomFacetWrap <- ggproto(
  "CustomFacetWrap", FacetWrap,
  init_scales = function(self, layout, x_scale = NULL, y_scale = NULL, params) {
    # make the initial x, y scales list
    scales <- ggproto_parent(FacetWrap, self)$init_scales(layout, x_scale, y_scale, params)
    
    if(is.null(params$scale_overrides)) return(scales)
    
    max_scale_x <- length(scales$x)
    max_scale_y <- length(scales$y)
    
    # ... do some modification of the scales$x and scales$y here based on params$scale_overrides
    for(scale_override in params$scale_overrides) {
      which <- scale_override$which
      scale <- scale_override$scale
      
      if("x" %in% scale$aesthetics) {
        if(!is.null(scales$x)) {
          if(which < 0 || which > max_scale_x) stop("Invalid index of x scale: ", which)
          scales$x[[which]] <- scale$clone()
        }
      } else if("y" %in% scale$aesthetics) {
        if(!is.null(scales$y)) {
          if(which < 0 || which > max_scale_y) stop("Invalid index of y scale: ", which)
          scales$y[[which]] <- scale$clone()
        }
      } else {
        stop("Invalid scale")
      }
    }
    
    # return scales
    scales
  }
)

facet_wrap_custom <- function(..., scale_overrides = NULL) {
  # take advantage of the sanitizing that happens in facet_wrap
  facet_super <- facet_wrap(...)
  
  # sanitize scale overrides
  if(inherits(scale_overrides, "scale_override")) {
    scale_overrides <- list(scale_overrides)
  } else if(!is.list(scale_overrides) || 
            !all(vapply(scale_overrides, inherits, "scale_override", FUN.VALUE = logical(1)))) {
    stop("scale_overrides must be a scale_override object or a list of scale_override objects")
  }
  
  facet_super$params$scale_overrides <- scale_overrides
  
  ggproto(NULL, CustomFacetWrap,
    shrink = facet_super$shrink,
    params = facet_super$params
  )
}

plot_scat_refbased_bet <- Reg_Pheno_Placenta_BET[,c("PC_methB1", names_refbased_cells)] %>%
  gather(-PC_methB1, key = "var", value = "value") %>% 
  ggplot(aes(x = value, y =PC_methB1)) +
  facet_wrap(~ var, scales = "free_x") +
  geom_point() +
  theme_bw()+
  labs(x="", y="PC1 methylation")+
    theme(axis.text.x = element_text(size=6, angle=30, hjust=1), axis.title.x=element_text(size=6), axis.title.y = element_text(size=6), axis.text.y = element_text(size=6), plot.margin=unit(c(8,8,8,8),"points"), strip.text = element_text(size = 6))

pe_o <- CV_output_Placenta_BET_meth[["box.plot"]]

pe_o <- pe_o +
  theme_minimal()+
  theme(axis.text.x = element_text(size=8), axis.title.x=element_text(size=8), axis.title.y = element_text(size=8), axis.text.y = element_text(size=8), legend.position="none", axis.text.x.top = element_text(size=8))

plot_box_refbased_bet <- boxplot_fun(x = names_refbased_cells, id = "Sample_Name", dat= Reg_Pheno_Placenta_BET)

plot_scat_refbased_bet_custom <- plot_scat_refbased_bet +
  facet_wrap_custom(~var, scales = "free", ncol = 3, scale_overrides = list(
    scale_override(1, scale_x_continuous(breaks = c(0,0.05,0.10,0.15,0.20), limits=c(0,0.20), labels = label_number(accuracy = .01))),
    scale_override(2, scale_x_continuous(breaks = c(0,0.01,0.02,0.03,0.04), limits=c(0,0.045), labels = label_number(accuracy = .01))),
    scale_override(3, scale_x_continuous(breaks = c(0,0.01,0.02,0.03,0.04), limits=c(0,0.045), labels = label_number(accuracy = .01))),
    scale_override(4, scale_x_continuous(breaks = c(0,0.05,0.10,0.15,0.20), limits=c(0,0.21), labels = label_number(accuracy = .01))),
    scale_override(5, scale_x_continuous(breaks = c(0,0.20,0.40,0.60,0.80), limits=c(0,0.85), labels = label_number(accuracy = .01))),
    scale_override(6, scale_x_continuous(breaks = c(0,0.10,0.20,0.30,0.40), limits=c(0,0.4), labels = label_number(accuracy = .01)))
  ))

ggsave("Results/BET_CV_Model_Outlier_Plots.pdf",
       ggarrange(pe_o, plot_scat_refbased_bet_custom, plot_box_refbased_bet, ncol=2, nrow=2, labels = c("a", "b", "c"), heights = c(1,1,1), widths=c(1,1.2,1)), width=84, height=100, units="mm", dpi=600, scale=2)

arrange plots together

Fig. 3 Cross-Validation Models

ggA_M <- grid.arrange(arrangeGrob(ggCV_CVS_M, ggCV_PlacentaITU_M, ggCV_PlacentaPREDO_M, ggCV_PlacentaBET_M, nrow=4))

ggsave("Results/Models_Meth_combined.pdf",
       grid.arrange(arrangeGrob(ggCV_CVS_M, ggCV_PlacentaITU_M, ggCV_PlacentaPREDO_M, ggCV_PlacentaBET_M, nrow=4)), width=84, height=180, units="mm", dpi=600, scale=2)

to the top

Check how methylation of single CpGs can be predicted using cell types

CVS

results_cvs_itu_lm_cpg_rb_filtered <-matrix(ncol=1,nrow=dim(Beta_CVS_ITU_filtered)[1])
results_cvs_itu_lm_cpg_rb_filtered[,1]<-rownames(Beta_CVS_ITU_filtered)

results_cvs_itu_lm_cpg_rf_filtered <-matrix(ncol=1,nrow=dim(Beta_CVS_ITU_filtered)[1])
results_cvs_itu_lm_cpg_rf_filtered[,1]<-rownames(Beta_CVS_ITU_filtered)

# ref-based
for (i in 1:dim(Beta_CVS_ITU_filtered)[1]) {
results_cvs_itu_lm_cpg_rb_filtered[i, 1] <- summary(lm(Beta_CVS_ITU_filtered[i,]~ Pheno_CVS_ITU_filtered$Trophoblasts + Pheno_CVS_ITU_filtered$Syncytiotrophoblast + Pheno_CVS_ITU_filtered$nRBC + Pheno_CVS_ITU_filtered$Endothelial + Pheno_CVS_ITU_filtered$Stromal + Pheno_CVS_ITU_filtered$Hofbauer))$adj.r.squared
}

# ref-free
for (i in 1:dim(Beta_CVS_ITU_filtered)[1]) {
  results_cvs_itu_lm_cpg_rf_filtered[i, 1] <- summary(lm(Beta_CVS_ITU_filtered[i,]~ Pheno_CVS_ITU_filtered$C1 + Pheno_CVS_ITU_filtered$C2 + Pheno_CVS_ITU_filtered$C3 + Pheno_CVS_ITU_filtered$C4 + Pheno_CVS_ITU_filtered$C5))$adj.r.squared
}

colnames(results_cvs_itu_lm_cpg_rb_filtered) <- c("adj.Rsquared")
results_cvs_itu_lm_cpg_rb_filtered <- data.frame(results_cvs_itu_lm_cpg_rb_filtered)
rownames(results_cvs_itu_lm_cpg_rb_filtered) <- rownames(Beta_CVS_ITU_filtered)

colnames(results_cvs_itu_lm_cpg_rf_filtered) <- c("adj.Rsquared")
results_cvs_itu_lm_cpg_rf_filtered <- data.frame(results_cvs_itu_lm_cpg_rf_filtered)
rownames(results_cvs_itu_lm_cpg_rf_filtered) <- rownames(Beta_CVS_ITU_filtered)

results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared[results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared <0] <- 0
results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared <- as.numeric(results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared)

results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared[results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared <0] <- 0
results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared <- as.numeric(results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared)

save(results_cvs_itu_lm_cpg_rb_filtered, file="Results/RData/results_cvs_itu_lm_cpg_rb_filtered.Rdata")
save(results_cvs_itu_lm_cpg_rf_filtered, file="Results/RData/results_cvs_itu_lm_cpg_rf_filtered.Rdata")

check mean and sd in r2

load("Results/RData/results_cvs_itu_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_cvs_itu_lm_cpg_rf_filtered.Rdata")

# mean and sd reference-based
mean(results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared)
sd(results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared)

cpgs_hist_cvs_rb_filtered <- ggplot(data=results_cvs_itu_lm_cpg_rb_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-based")

cpgs_hist_cvs_rb_filtered

# mean and sd reference-free
mean(results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared)
sd(results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared)

cpgs_hist_cvs_rf_filtered <- ggplot(data=results_cvs_itu_lm_cpg_rf_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-free")

cpgs_hist_cvs_rf_filtered

M_CpGs_CVS <- ggarrange(cpgs_hist_cvs_rb_filtered, cpgs_hist_cvs_rf_filtered, 
                        nrow = 1,
                        labels = c("a"),
                        align = "hv")

M_CpGs_CVS

to the top

Placenta ITU

results_placenta_itu_lm_cpg_rb_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_ITU_reduced_filtered)[1])
results_placenta_itu_lm_cpg_rb_filtered[,1]<-rownames(Beta_Placenta_ITU_reduced_filtered)

results_placenta_itu_lm_cpg_rf_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_ITU_reduced_filtered)[1])
results_placenta_itu_lm_cpg_rf_filtered[,1]<-rownames(Beta_Placenta_ITU_reduced_filtered)

# ref-based
for (i in 1:dim(Beta_Placenta_ITU_reduced_filtered)[1]) {
results_placenta_itu_lm_cpg_rb_filtered[i, 1] <- summary(lm(Beta_Placenta_ITU_reduced_filtered[i,]~ Pheno_Placenta_ITU_reduced_filtered$Trophoblasts + Pheno_Placenta_ITU_reduced_filtered$Syncytiotrophoblast + Pheno_Placenta_ITU_reduced_filtered$nRBC + Pheno_Placenta_ITU_reduced_filtered$Endothelial + Pheno_Placenta_ITU_reduced_filtered$Stromal + Pheno_Placenta_ITU_reduced_filtered$Hofbauer))$adj.r.squared
}
# note: without the outlier samples

# ref-free
for (i in 1:dim(Beta_Placenta_ITU_reduced_filtered)[1]) {
results_placenta_itu_lm_cpg_rf_filtered[i, 1] <- summary(lm(Beta_Placenta_ITU_reduced_filtered[i,]~ Pheno_Placenta_ITU_reduced_filtered$C1 + Pheno_Placenta_ITU_reduced_filtered$C2 + Pheno_Placenta_ITU_reduced_filtered$C3 + Pheno_Placenta_ITU_reduced_filtered$C4 + Pheno_Placenta_ITU_reduced_filtered$C5 + Pheno_Placenta_ITU_reduced_filtered$C6 + Pheno_Placenta_ITU_reduced_filtered$C7 + Pheno_Placenta_ITU_reduced_filtered$C8))$adj.r.squared
}

colnames(results_placenta_itu_lm_cpg_rb_filtered) <- c("adj.Rsquared")
results_placenta_itu_lm_cpg_rb_filtered <- data.frame(results_placenta_itu_lm_cpg_rb_filtered)
rownames(results_placenta_itu_lm_cpg_rb_filtered) <- rownames(Beta_Placenta_ITU_reduced_filtered)

colnames(results_placenta_itu_lm_cpg_rf_filtered) <- c("adj.Rsquared")
results_placenta_itu_lm_cpg_rf_filtered <- data.frame(results_placenta_itu_lm_cpg_rf_filtered)
rownames(results_placenta_itu_lm_cpg_rf_filtered) <- rownames(Beta_Placenta_ITU_reduced_filtered)

results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared[results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared <0] <- 0
results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared <- as.numeric(results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared)

results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared[results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared <0] <- 0
results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared <- as.numeric(results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared)

save(results_placenta_itu_lm_cpg_rb_filtered, file="Results/RData/results_placenta_itu_lm_cpg_rb_filtered.Rdata")
save(results_placenta_itu_lm_cpg_rf_filtered, file="Results/RData/results_placenta_itu_lm_cpg_rf_filtered.Rdata")

check mean and sd in r2

load("Results/RData/results_placenta_itu_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_itu_lm_cpg_rf_filtered.Rdata")

# mean and sd referenbece based
mean(results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared)
sd(results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared)

cpgs_hist_placenta_rb_filtered <- ggplot(data=results_placenta_itu_lm_cpg_rb_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-based")

cpgs_hist_placenta_rb_filtered

# mean and sd reference-free
mean(results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared)
sd(results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared)

cpgs_hist_placenta_rf_filtered <- ggplot(data=results_placenta_itu_lm_cpg_rf_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-free")

cpgs_hist_placenta_rf_filtered

M_CpGs_Placenta_ITU <- ggarrange(cpgs_hist_placenta_rb_filtered, cpgs_hist_placenta_rf_filtered, 
                                 nrow = 1,
                                 labels = c("b"),
                                 align = "hv")

M_CpGs_Placenta_ITU

to the top

Placenta PREDO

results_placenta_predo_lm_cpg_rb_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_PREDO_filtered)[1])
results_placenta_predo_lm_cpg_rb_filtered[,1]<-rownames(Beta_Placenta_PREDO_filtered)

results_placenta_predo_lm_cpg_rf_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_PREDO_filtered)[1])
results_placenta_predo_lm_cpg_rf_filtered[,1]<-rownames(Beta_Placenta_PREDO_filtered)

# ref-based
for (i in 1:dim(Beta_Placenta_PREDO_filtered)[1]) {
  results_placenta_predo_lm_cpg_rb_filtered[i, 1] <- summary(lm(Beta_Placenta_PREDO_filtered[i,]~ Pheno_Placenta_PREDO_filtered$Trophoblasts + Pheno_Placenta_PREDO_filtered$Syncytiotrophoblast + Pheno_Placenta_PREDO_filtered$nRBC + Pheno_Placenta_PREDO_filtered$Endothelial + Pheno_Placenta_PREDO_filtered$Stromal + Pheno_Placenta_PREDO_filtered$Hofbauer))$adj.r.squared
}

# ref-free
for (i in 1:dim(Beta_Placenta_PREDO_filtered)[1]) {
  results_placenta_predo_lm_cpg_rf_filtered[i, 1] <- summary(lm(Beta_Placenta_PREDO_filtered[i,]~ Pheno_Placenta_PREDO_filtered$C1 + Pheno_Placenta_PREDO_filtered$C2))$adj.r.squared
}

colnames(results_placenta_predo_lm_cpg_rb_filtered) <- c("adj.Rsquared")
results_placenta_predo_lm_cpg_rb_filtered <- data.frame(results_placenta_predo_lm_cpg_rb_filtered)
rownames(results_placenta_predo_lm_cpg_rb_filtered) <- rownames(Beta_Placenta_PREDO_filtered)

colnames(results_placenta_predo_lm_cpg_rf_filtered) <- c("adj.Rsquared")
results_placenta_predo_lm_cpg_rf_filtered <- data.frame(results_placenta_predo_lm_cpg_rf_filtered)
rownames(results_placenta_predo_lm_cpg_rf_filtered) <- rownames(Beta_Placenta_PREDO_filtered)

results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared[results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared <0] <- 0
results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared <- as.numeric(results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared)

results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared[results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared <0] <- 0
results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared <- as.numeric(results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared)

save(results_placenta_predo_lm_cpg_rb_filtered, file="Results/RData/results_placenta_predo_lm_cpg_rb_filtered.Rdata")
save(results_placenta_predo_lm_cpg_rf_filtered, file="Results/RData/results_placenta_predo_lm_cpg_rf_filtered.Rdata")

check mean and sd in r2

load("Results/RData/results_placenta_predo_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_predo_lm_cpg_rf_filtered.Rdata")

# mean and sd reference-based
mean(results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared)
sd(results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared)

cpgs_hist_placenta_predo_rb_filtered <- ggplot(data=results_placenta_predo_lm_cpg_rb_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-based")

cpgs_hist_placenta_predo_rb_filtered

# mean and sd reference-free
mean(results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared)
sd(results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared)

cpgs_hist_placenta_predo_rf_filtered <- ggplot(data=results_placenta_predo_lm_cpg_rf_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-free")

cpgs_hist_placenta_predo_rf_filtered

M_CpGs_Placenta_PREDO <- ggarrange(cpgs_hist_placenta_predo_rb_filtered, cpgs_hist_placenta_predo_rf_filtered, 
                                   nrow = 1,
                                   labels = c("c"),
                                   align = "hv")

M_CpGs_Placenta_PREDO

to the top

Placenta BET

results_placenta_BET_lm_cpg_rb_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_BET_filtered)[1])
results_placenta_BET_lm_cpg_rb_filtered[,1]<-rownames(Beta_Placenta_BET_filtered)

results_placenta_BET_lm_cpg_rf_filtered <-matrix(ncol=1,nrow=dim(Beta_Placenta_BET_filtered)[1])
results_placenta_BET_lm_cpg_rf_filtered[,1]<-rownames(Beta_Placenta_BET_filtered)

for (i in 1:dim(Beta_Placenta_BET_filtered)[1]) {
  results_placenta_BET_lm_cpg_rb_filtered[i, 1] <- summary(lm(Beta_Placenta_BET_filtered[i,]~ Pheno_Placenta_BET_filtered$Trophoblasts + Pheno_Placenta_BET_filtered$Syncytiotrophoblast + Pheno_Placenta_BET_filtered$nRBC + Pheno_Placenta_BET_filtered$Endothelial + Pheno_Placenta_BET_filtered$Stromal + Pheno_Placenta_BET_filtered$Hofbauer))$adj.r.squared
}

for (i in 1:dim(Beta_Placenta_BET_filtered)[1]) {
  results_placenta_BET_lm_cpg_rf_filtered[i, 1] <- summary(lm(Beta_Placenta_BET_filtered[i,]~ Pheno_Placenta_BET_filtered$C1 + Pheno_Placenta_BET_filtered$C2 + Pheno_Placenta_BET_filtered$C3))$adj.r.squared
}

colnames(results_placenta_BET_lm_cpg_rb_filtered) <- c("adj.Rsquared")
results_placenta_BET_lm_cpg_rb_filtered <- data.frame(results_placenta_BET_lm_cpg_rb_filtered)
rownames(results_placenta_BET_lm_cpg_rb_filtered) <- rownames(Beta_Placenta_BET_filtered)

colnames(results_placenta_BET_lm_cpg_rf_filtered) <- c("adj.Rsquared")
results_placenta_BET_lm_cpg_rf_filtered <- data.frame(results_placenta_BET_lm_cpg_rf_filtered)
rownames(results_placenta_BET_lm_cpg_rf_filtered) <- rownames(Beta_Placenta_BET_filtered)

results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared[results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared <0] <- 0
results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared <- as.numeric(results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared)

results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared[results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared <0] <- 0
results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared <- as.numeric(results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared)

save(results_placenta_BET_lm_cpg_rb_filtered, file="Results/RData/results_placenta_BET_lm_cpg_rb_filtered.Rdata")
save(results_placenta_BET_lm_cpg_rf_filtered, file="Results/RData/results_placenta_BET_lm_cpg_rf_filtered.Rdata")

check mean and sd in r2

load("Results/RData/results_placenta_BET_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_BET_lm_cpg_rf_filtered.Rdata")

# mean and sd reference-based
mean(results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared)
sd(results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared)

cpgs_hist_placenta_BET_rb_filtered <- ggplot(data=results_placenta_BET_lm_cpg_rb_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-based")

cpgs_hist_placenta_BET_rb_filtered

# mean and sd reference-free
mean(results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared)
sd(results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared)

cpgs_hist_placenta_BET_rf_filtered <- ggplot(data=results_placenta_BET_lm_cpg_rf_filtered, aes(adj.Rsquared)) + 
  geom_histogram()+
  geom_vline(xintercept = 0.3, color="red", linetype="dotted")+
  xlab(as.expression(bquote("adjusted" ~ R^2)))+ ylab("count (CpGs)")+
  theme_bw()+ scale_x_continuous(breaks = seq(0, 0.8, 0.2))+
  theme(axis.text.x=element_text(size=10), axis.text.y=element_text(size=10), axis.title = element_text(size=10), axis.title.y=element_text(size=10))+
  labs(title="reference-free")

cpgs_hist_placenta_BET_rf_filtered

reference-based explains more

M_CpGs_Placenta_BET <- ggarrange(cpgs_hist_placenta_BET_rb_filtered, cpgs_hist_placenta_BET_rf_filtered, 
                                       nrow = 1,
                                       labels = c("d"),
                                       align = "hv")

M_CpGs_Placenta_BET

to the top

arrange Model figures

single CpG prediction models

M_CpGs_R2 <- ggarrange(M_CpGs_CVS, M_CpGs_Placenta_ITU, M_CpGs_Placenta_PREDO, M_CpGs_Placenta_BET, nrow = 4, align="hv")
M_CpGs_R2

ggsave("Results/CpGs_R2_combined.pdf",
       ggarrange(M_CpGs_CVS, M_CpGs_Placenta_ITU, M_CpGs_Placenta_PREDO, M_CpGs_Placenta_BET, nrow = 4, align="hv"), width=84, height=180, units="mm", dpi=600, scale=2)

to the top

CpGs most influenced by cell types (reference-based)

extract CpGs

load R2 (predicted by RPC cell types)

# reference-based
load("Results/RData/results_cvs_itu_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_itu_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_predo_lm_cpg_rb_filtered.Rdata")
load("Results/RData/results_placenta_BET_lm_cpg_rb_filtered.Rdata")

q9_cvs_rb <- quantile(results_cvs_itu_lm_cpg_rb_filtered$adj.Rsquared, probs=0.9)
q9_placenta_itu_rb <- quantile(results_placenta_itu_lm_cpg_rb_filtered$adj.Rsquared, probs=0.9)
q9_placenta_predo_rb <- quantile(results_placenta_predo_lm_cpg_rb_filtered$adj.Rsquared, probs=0.9)
q9_placenta_bet_rb <- quantile(results_placenta_BET_lm_cpg_rb_filtered$adj.Rsquared, probs=0.9)

genes of all CpGs that overlap between data sets

mapping CpGs to genes

# load matching EPIC CpGs Genes
genes_cpgs_map <- read.table("Input_Data_prepared/genes_CpGs_EPIC.txt", header=T)
# load gene information
load("Input_Data_prepared/genes_CpGs_EPIC.Rdata") #epic_genes

identical(genes_cpgs_map$gene, as.character(epic_genes$name))
genes_cpgs_fullinfo <- cbind(genes_cpgs_map, epic_genes)

write.table(genes_cpgs_fullinfo, file="Input_Data_prepared/genes_cpgs_fullinfo.txt")
save(genes_cpgs_fullinfo, file="Input_Data_prepared/genes_cpgs_fullinfo.Rdata")

cpgs_all_filtered <- list(rownames(results_cvs_itu_lm_cpg_rb_filtered), rownames(results_placenta_itu_lm_cpg_rb_filtered), rownames(results_placenta_predo_lm_cpg_rb_filtered), rownames(results_placenta_predo_lm_cpg_rb_filtered))
# note: although I use the reference-based results here, I only extract CpG names which are the same for ref-based and ref-free - so this is the same for ref-based and ref-free and only for simplicity and object loading

# which are the cpgs that overlap between the data sets
overlap_cpgs_all_filtered <- Reduce(intersect,cpgs_all_filtered)
length(overlap_cpgs_all_filtered)

overlap_cpgs_genes_all_filtered <- genes_cpgs_fullinfo[genes_cpgs_fullinfo$CpG %in% overlap_cpgs_all_filtered, ]
dim(overlap_cpgs_genes_all_filtered)

genes_all_Symbol_filtered <- as.character(unique(overlap_cpgs_genes_all_filtered$gene)) #

length(genes_all_Symbol_filtered)

write.table(genes_all_Symbol_filtered, file="Results/RData/genes_all_Symbol_filtered.txt", row.names = FALSE,col.names = FALSE)
save(genes_all_Symbol_filtered, file="Results/RData/genes_all_Symbol_filtered.Rdata")

extract CpGs with R2 > 30% predicted by reference-based cell types that are in all data sets

cpgs_cvs_itu_lmrb_filtered <- subset(results_cvs_itu_lm_cpg_rb_filtered, adj.Rsquared > 0.3)
dim(cpgs_cvs_itu_lmrb_filtered)
cpgs_placenta_itu_lmrb_filtered <- subset(results_placenta_itu_lm_cpg_rb_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_itu_lmrb_filtered)
cpgs_placenta_predo_lmrb_filtered <- subset(results_placenta_predo_lm_cpg_rb_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_predo_lmrb_filtered)
cpgs_placenta_BET_lmrb_filtered <- subset(results_placenta_BET_lm_cpg_rb_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_BET_lmrb_filtered)


cpgs_list_filtered <- list(rownames(cpgs_cvs_itu_lmrb_filtered), rownames(cpgs_placenta_itu_lmrb_filtered), rownames(cpgs_placenta_predo_lmrb_filtered), rownames(cpgs_placenta_BET_lmrb_filtered))

# which are the cpgs that overlap between the data sets with R2 > .30
overlap_cpgs_filtered <- Reduce(intersect,cpgs_list_filtered)
length(overlap_cpgs_filtered)
# 26.092

overlap_cpgs_genes_filtered <- genes_cpgs_fullinfo[genes_cpgs_fullinfo$CpG %in% overlap_cpgs_filtered, ]
dim(overlap_cpgs_genes_filtered)

write.table(overlap_cpgs_genes_filtered, file="Results/RData/overlap_cpgs_genes_rb_filtered.txt", row.names = FALSE,col.names = FALSE)
save(overlap_cpgs_genes_filtered, file="Results/RData/overlap_cpgs_genes_rb_filtered.Rdata")
# -> CpGs mapped to genes with R2 > .30 and in all data sets

genes_overlap_Symbol_filtered <- as.character(unique(overlap_cpgs_genes_filtered$gene)) 
length(genes_overlap_Symbol_filtered)

write.table(genes_overlap_Symbol_filtered, file="Results/RData/genes_overlap_Symbol_filtered.txt", row.names = FALSE,col.names = FALSE)
save(genes_overlap_Symbol_filtered, file="Results/RData/genes_overlap_Symbol_filtered.Rdata")

extract the CpGs with gene info etc. that are used as input for enrichment (CpGs influenced by cell types)

identical(overlap_cpgs_genes_filtered$gene, as.character(overlap_cpgs_genes_filtered$name))
overlap_cpgs_genes_filtered$GeneSymbol <- overlap_cpgs_genes_filtered$gene
overlap_cpgs_genes_filtered$gene <- NULL
overlap_cpgs_genes_filtered$name <- NULL

CpGs_R30_Genes_refbased <- overlap_cpgs_genes_filtered[ ,c("CpG", "GeneSymbol")]

Table S2 - reference-based

write.xlsx2(CpGs_R30_Genes_refbased, "Results/List_CpGs_R30_Genes_refbased.xlsx", sheetName = "List", col.names = TRUE, row.names = FALSE, append = FALSE)

to the top

Tissue-specific Gene Enrichment

The TissueEnrich R package is used to calculate enrichment of tissue-specific genes in a set of input genes.

The user can input genes to determine which tissue-specific genes are enriched in those datasets.

The hypergeometric test is being used to determine if the tissue-specific genes are enriched among the input genes. Function teEnrichment: Given a gene list as input, this function calculates the tissue-specific gene enrichment using tissue-specific genes from either human or mouse RNA-Seq datasets.

genes with R2 > .30 that overlap between the data sets against all genes

load("Results/RData/genes_all_Symbol_filtered.Rdata") 
length(genes_all_Symbol_filtered)

genes_overlap_input_filtered <-GeneSet(geneIds=genes_overlap_Symbol_filtered,organism="Homo Sapiens",geneIdType=SymbolIdentifier()) 

genes_overlap_background_filtered <-GeneSet(geneIds=genes_all_Symbol_filtered,organism="Homo Sapiens",geneIdType=SymbolIdentifier())

When using teEnrichment, the user can specify the RNA-Seq dataset (rnaSeqDataset) to be used for the tissue-specific gene enrichment analysis. 1 for “Human Protein Atlas” (default) 2 for “GTEx” 3 for “Mouse ENCODE”

output_GEnrich_filtered <- teEnrichment(inputGenes = genes_overlap_input_filtered, backgroundGenes = genes_overlap_background_filtered, rnaSeqDataset = 1)

The first object is the SummarizedExperiment object containing the enrichment results, the second and the third object contains the expression values and tissue-specificity information of the tissue-specific genes for genes from the input gene set, and the fourth is a GeneSet object containing genes that were not identified in the tissue-specific gene data.

seEnrichmentOutput_filtered <-output_GEnrich_filtered[[1]]

enrichmentOutput_filtered<-setNames(data.frame(assay(seEnrichmentOutput_filtered),row.names = rowData(seEnrichmentOutput_filtered)[,1]), colData(seEnrichmentOutput_filtered)[,1])
enrichmentOutput_filtered$Tissue<-row.names(enrichmentOutput_filtered)

save(output_GEnrich_filtered, file="Results/RData/output_GEnrich_filtered.Rdata")
save(enrichmentOutput_filtered, file="Results/RData/enrichmentOutput_filtered.Rdata")

load("Results/RData/output_GEnrich_filtered.Rdata") 
# list object containing the enrichment results
load("Results/RData/enrichmentOutput_filtered.Rdata")
# containing the −Log10(P−Value) and fold-change, corresponding to the tissue-specific gene enrichment, along with the number of tissue-specific genes in the input gene set. This object can be used to visualize tissue-specific gene enrichment in the form of a bar chart using the −Log10(P−Value) values.

all genes mapped:

sum(enrichmentOutput_filtered$Tissue.Specific.Genes)

genes not identified in tissue-specific gene data

genes_not_ident_TissueEnrich_filtered <- geneIds(output_GEnrich_filtered[[4]]) #865

In TissueEnrich, they are dividing the genes into six different groups which are specified in their paper (http://doi.org/10.1093/bioinformatics/bty890). The missing genes could be in the other three non-tissue specific gene groups (Not Expressed, Expressed In all, or Mixed).

Tissue-specific genes are defined using the algorithm from the HPA (Uhlén et al. 2015), and can be grouped as follows: Tissue Enriched: Genes with an expression level greater than 1 (TPM or FPKM) that also have at least five-fold higher expression levels in a particular tissue compared to all other tissues. Group Enriched: Genes with an expression level greater than 1 (TPM or FPKM) that also have at least five-fold higher expression levels in a group of 2-7 tissues compared to all other tissues, and that are not considered Tissue Enriched. Tissue Enhanced: Genes with an expression level greater than 1 (TPM or FPKM) that also have at least five-fold higher expression levels in a particular tissue compared to the average levels in all other tissues, and that are not considered Tissue Enriched or Group Enriched.

tissue_enrich_main_plot_refbased <- ggplot(enrichmentOutput_filtered,aes(x=reorder(Tissue,-Log10PValue),y=Log10PValue,label = Tissue.Specific.Genes,fill = Tissue))+
  geom_bar(stat = 'identity')+
  labs(x='', y = '-log10 (p-adjusted)')+
  scale_y_continuous(breaks = c(0, 2, 4, 6), limits = c(0,8), expand = c(0,0))+
  theme_bw()+
  theme(legend.position="none")+
  theme(plot.title = element_text(hjust = 0.5,size = 10),axis.title = element_text(size=10))+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),panel.grid.major= element_blank(),panel.grid.minor = element_blank(), plot.margin=unit(c(1,1,1,2),"cm"))+
  geom_hline(yintercept=2, linetype="dashed", color = "black")

ggsave("Results/TissueEnrichment_CpGGenes.pdf",
       tissue_enrich_main_plot_refbased, width=84, height=50, units="mm", dpi=600, scale=2, device = cairo_pdf)

ggsave(tissue_enrich_main_plot_refbased, filename = "Results/TissueEnrichment_CpGGenes.png", dpi = 300, type = "cairo",
       width = 8, height = 5, units = "in")

generate a heatmap showing the expression of the placenta specific genes across all the tissues.

seExp_filtered<-output_GEnrich_filtered[[2]][["Placenta"]]
exp_filtered<-setNames(data.frame(assay(seExp_filtered), row.names = rowData(seExp_filtered)[,1]), colData(seExp_filtered)[,1])
exp_filtered$Gene<-row.names(exp_filtered)
exp_filtered<-exp_filtered %>% gather(key = "Tissue", value = "expression",1:(ncol(exp_filtered)-1))

ggplot(exp_filtered, aes(Tissue, Gene)) + geom_tile(aes(fill = expression),
                                                    colour = "white") + scale_fill_gradient(low = "white",
                                                                                            high = "steelblue")+
  labs(x='', y = '')+
  theme_bw()+
  guides(fill = guide_legend(title = "Log2(TPM)"))+
  #theme(legend.position="none")+
  theme(plot.title = element_text(hjust = 0.5,size = 10),axis.title = element_text(size=10))+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),axis.text.y = element_text(angle = 45, vjust = 1, hjust = 1, size=5), panel.grid.major= element_blank(),panel.grid.minor = element_blank())

get p value for placenta

enrichmentOutput_filtered["Placenta", ]
options(scipen=999)
log10p_placenta_enrich_filtered <- enrichmentOutput_filtered["Placenta", "Log10PValue"]
p_placenta_enrich_filtered <- 10^-log10p_placenta_enrich_filtered

log10p_placenta_enrich_filtered
p_placenta_enrich_filtered

#list containing the tissue-specificity information for the input genes. The code below retrieves the tissue-specific genes along with the type of tissue-specificity in placenta tissue.
seGroupInf_Placenta_filtered <-output_GEnrich_filtered[[3]][["Placenta"]]
groupInf_Placenta_filtered <-data.frame(assay(seGroupInf_Placenta_filtered)) # 186
groupInf_Placenta_filtered

save(groupInf_Placenta_filtered, file="Results/RData/groupInf_Placenta_filtered.Rdata")
write.table(groupInf_Placenta_filtered$Gene, file="Results/RData/genes_cpgs_placenta_enriched_filtered.txt", row.names = F, col.names = F)
# 186 genes highly important / specific for placenta

export the 186 placenta-specific genes in our input data to excel

Table S3

write.xlsx(groupInf_Placenta_filtered,"Results/genes_cpgs_placenta_enriched_filtered.xlsx", row.names = F, col.names = F)
# 186 genes highly important / specific for placenta

to the top

Placenta cell enrichment

PlacentaCellEnrich enables users to provide background genes for carrying out cell-specific gene enrichment. In this case, instead of using all the genes in the dataset, a background gene set is being used to carry out the enrichment analysis. It should be noted that the background gene set must have all the genes of the input gene set. The p-values are corrected for multiple hypothesis testing using the Benjamini & Hochberg correction. If the background gene set is not provided all the genes will be used as background.

How should I interpret my results? The recommended threshold value of the adjusted p-values and fold-change for selecting the enriched cells is 0.01 and 2 respectively. It is also recommended that the users should look at cell-specific gene enrichment from all the sources and have the highest confidence in results that are consistent across datasets.

INPUT:

gene set: genes with R2 > .30 that overlap between data sets (genes_overlap_Sympbol_filtered)
background: genes that overlap between data sets (genes_all_Symbol_filtered)

cell_enrichment_results_refbased <- read_excel("Results/PlacentaCellEnrich_Tool/refbased/CellSpecificGeneEnrichment-VentoTormo.xlsx", col_names = T)
cell_enrichment_results_refbased$`-Log10PValue` <- as.numeric(cell_enrichment_results_refbased$`-Log10PValue`)

cell_enrichment_colors_cells <- cell_enrichment_results_refbased[,"Cell"]

cell_enrichment_results_refbased <- merge(cell_enrichment_results_refbased, cell_enrichment_colors_cells, by="Cell")

plot_cell_enrichment_refbased <- ggplot2::ggplot(cell_enrichment_results_refbased, aes(x = reorder(Cell, -`-Log10PValue`, sum), y= `-Log10PValue`, fill=Cell)) +
geom_bar(stat = 'identity')+ 
xlab("") + ylab("-log10 (p-adjusted)")+
scale_y_continuous(breaks = round(seq(0, 14, by = 2),1),limits = c(0,16), expand = c(0,0))+
theme_bw()+
theme(legend.position="none")+
theme(plot.title = element_text(hjust = 0.5,size = 10),axis.title = element_text(size=10))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),panel.grid.major= element_blank(),panel.grid.minor = element_blank(), plot.margin=unit(c(1,1,1,2),"cm"))+
geom_hline(yintercept=2, linetype="dashed", color = "black")

to the top

CpGs most influenced by cell types (reference-free)

extract CpGs

load R2 (predicted by RPC cell types)

load("Results/RData/results_cvs_itu_lm_cpg_rf_filtered.Rdata")
load("Results/RData/results_placenta_itu_lm_cpg_rf_filtered.Rdata")
load("Results/RData/results_placenta_predo_lm_cpg_rf_filtered.Rdata")
load("Results/RData/results_placenta_BET_lm_cpg_rf_filtered.Rdata")

load("Input_Data_prepared/genes_cpgs_fullinfo.Rdata")

q9_cvs_rf <- quantile(results_cvs_itu_lm_cpg_rf_filtered$adj.Rsquared, probs=0.9)
q9_placenta_itu_rf <- quantile(results_placenta_itu_lm_cpg_rf_filtered$adj.Rsquared, probs=0.9)
q9_placenta_predo_rf <- quantile(results_placenta_predo_lm_cpg_rf_filtered$adj.Rsquared, probs=0.9)
q9_placenta_bet_rf <- quantile(results_placenta_BET_lm_cpg_rf_filtered$adj.Rsquared, probs=0.9)

mean of 90% quantiles in adjusted R2 of CpGs (refbased & reffree)

c9_rb_rf_r <- as.numeric(c(q9_cvs_rb, q9_placenta_itu_rb, q9_placenta_predo_rb, q9_placenta_bet_rb, q9_cvs_rf, q9_placenta_itu_rf, q9_placenta_predo_rf, q9_placenta_bet_rf))
mean(c9_rb_rf_r)

extract CpGs with R2 > 30% that are in all data sets (ref-free)

cpgs_cvs_itu_lmrf_filtered <- subset(results_cvs_itu_lm_cpg_rf_filtered, adj.Rsquared > 0.3)
dim(cpgs_cvs_itu_lmrf_filtered)
cpgs_placenta_itu_lmrf_filtered <- subset(results_placenta_itu_lm_cpg_rf_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_itu_lmrf_filtered)
cpgs_placenta_predo_lmrf_filtered <- subset(results_placenta_predo_lm_cpg_rf_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_predo_lmrf_filtered)
cpgs_placenta_BET_lmrf_filtered <- subset(results_placenta_BET_lm_cpg_rf_filtered, adj.Rsquared > 0.3)
dim(cpgs_placenta_BET_lmrf_filtered)
    
cpgs_list_rf_filtered <- list(rownames(cpgs_cvs_itu_lmrf_filtered), rownames(cpgs_placenta_itu_lmrf_filtered), rownames(cpgs_placenta_predo_lmrf_filtered), rownames(cpgs_placenta_BET_lmrf_filtered))

# which are the cpgs that overlap between the data sets
overlap_cpgs_rf_filtered <- Reduce(intersect,cpgs_list_rf_filtered)
length(overlap_cpgs_rf_filtered)
# 531

overlap_cpgs_genes_rf_filtered <- genes_cpgs_fullinfo[genes_cpgs_fullinfo$CpG %in% overlap_cpgs_rf_filtered, ]
dim(overlap_cpgs_genes_rf_filtered)

write.table(overlap_cpgs_genes_rf_filtered, file="Results/RData/overlap_cpgs_genes_rf_filtered.txt", row.names = FALSE,col.names = FALSE)
save(overlap_cpgs_genes_rf_filtered, file="Results/RData/overlap_cpgs_genes_rf_filtered.Rdata")
    # -> CpGs mapped to genes with R2 > .30 and in all data sets

genes_overlap_Symbol_rf_filtered <- as.character(unique(overlap_cpgs_genes_rf_filtered$gene)) 
length(genes_overlap_Symbol_rf_filtered)

write.table(genes_overlap_Symbol_rf_filtered, file="Results/RData/genes_overlap_Symbol_rf_filtered.txt", row.names = FALSE,col.names = FALSE)
    
save(genes_overlap_Symbol_rf_filtered, file="Results/RData/genes_overlap_Symbol_rf_filtered.Rdata")

to the top

extract the CpGs with gene info etc. that are used as input for enrichment (CpGs influenced by cell types)

identical(overlap_cpgs_genes_rf_filtered$gene, as.character(overlap_cpgs_genes_rf_filtered$name))
overlap_cpgs_genes_rf_filtered$GeneSymbol <- overlap_cpgs_genes_rf_filtered$gene
overlap_cpgs_genes_rf_filtered$gene <- NULL
overlap_cpgs_genes_rf_filtered$name <- NULL

CpGs_R30_Genes_reffree <- overlap_cpgs_genes_rf_filtered[ ,c("CpG", "GeneSymbol")]

write.xlsx2(CpGs_R30_Genes_reffree, "Results/List_CpGs_R30_Genes_reffree.xlsx", sheetName = "List", col.names = TRUE, row.names = FALSE, append = FALSE)

Tissue-specific Gene Enrichment

genes with R2 > .30 that overlap between the data sets against all genes

load("Results/RData/genes_all_Symbol_filtered.Rdata")
load("Results/RData/genes_overlap_Symbol_rf_filtered.Rdata")

genes_overlap_input_rf_filtered <-GeneSet(geneIds=genes_overlap_Symbol_rf_filtered,organism="Homo Sapiens",geneIdType=SymbolIdentifier()) 
genes_overlap_background_filtered <-GeneSet(geneIds=genes_all_Symbol_filtered,organism="Homo Sapiens",geneIdType=SymbolIdentifier())

output_GEnrich_rf_filtered <- teEnrichment(inputGenes = genes_overlap_input_rf_filtered, backgroundGenes = genes_overlap_background_filtered, rnaSeqDataset = 1)

seEnrichmentOutput_rf_filtered<-output_GEnrich_rf_filtered[[1]]
enrichmentOutput_rf_filtered<-setNames(data.frame(assay(seEnrichmentOutput_rf_filtered),row.names = rowData(seEnrichmentOutput_rf_filtered)[,1]), colData(seEnrichmentOutput_rf_filtered)[,1])
enrichmentOutput_rf_filtered$Tissue<-row.names(enrichmentOutput_rf_filtered)

save(output_GEnrich_rf_filtered, file="Results/RData/output_GEnrich_rf_filtered.Rdata")
save(enrichmentOutput_rf_filtered, file="Results/RData/enrichmentOutput_rf_filtered.Rdata")

load("Results/RData/output_GEnrich_rf_filtered.Rdata")
# ist object containing the enrichment results
load("Results/RData/enrichmentOutput_rf_filtered.Rdata")
# SummarizedExperiment object containing the −Log10(P−Value) and fold-change, corresponding to the tissue-specific gene enrichment, along with the number of tissue-specific genes in the input gene set. This object can be used to visualize tissue-specific gene enrichment in the form of a bar chart using the −Log10(P−Value) values.

all genes mapped:

sum(enrichmentOutput_rf_filtered$Tissue.Specific.Genes)

genes not identified in tissue-specific gene data

genes_not_ident_TissueEnrich_rf_filtered <- geneIds(output_GEnrich_rf_filtered[[4]])

tissueEnrichment_reffree_filtered <- ggplot(enrichmentOutput_rf_filtered,aes(x=reorder(Tissue,-Log10PValue),y=Log10PValue,label = Tissue.Specific.Genes,fill = Tissue))+
  geom_bar(stat = 'identity')+
  labs(x='', y = '-log10 (p-adjusted)')+
  scale_y_continuous(breaks = c(0, 2, 4, 6), limits = c(0,8), expand = c(0,0))+
  theme_bw()+
  theme(legend.position="none")+
  theme(plot.title = element_text(hjust = 0.5,size = 10),axis.title = element_text(size=10))+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size=10),panel.grid.major= element_blank(),panel.grid.minor = element_blank(), plot.margin=unit(c(1,1,1,2),"cm"))+
  geom_hline(yintercept=2, linetype="dashed", color = "black")

ggsave("Results/TissueEnrichment_CpGGenes_reffree.pdf",
       tissueEnrichment_reffree_filtered, width=84, height=50, units="mm", dpi=600, scale=2, device = cairo_pdf)

ggsave(tissueEnrichment_reffree_filtered, filename = "Results/TissueEnrichment_CpGGenes_reffree.png", dpi = 300, type = "cairo",
       width = 8, height = 5, units = "in")

get p value for placenta

enrichmentOutput_rf_filtered["Placenta", ]
options(scipen=999)
log10p_placenta_enrich_rf_filtered <- enrichmentOutput_rf_filtered["Placenta", "Log10PValue"]
p_placenta_enrich_rf_filtered <- 10^-log10p_placenta_enrich_rf_filtered

see if tissue is significant (p < .01 / -log10 p >2)

enrichmentOutput_rf_filtered[(enrichmentOutput_rf_filtered[,1]>2),]

#list containing the tissue-specificity information for the input genes. The code below retrieves the tissue-specific genes along with the type of tissue-specificity in placenta tissue.
seGroupInf_Placenta_rf_filtered <-output_GEnrich_rf_filtered[[3]][["Placenta"]]
groupInf_Placenta_rf_filtered <-data.frame(assay(seGroupInf_Placenta_rf_filtered)) # 10
groupInf_Placenta_rf_filtered

save(groupInf_Placenta_rf_filtered, file="Results/RData/genes_groupInf_Placenta_rf_filtered.Rdata")
write.table(groupInf_Placenta_rf_filtered$Gene, file="Results/RData/genes_cpgs_placenta_enriched_rf_filtered.txt", row.names = F, col.names = F)
#10 genes highly important / specific for placenta: genes_cpgs_placenta_enriched_rf_filtered

#list of input genes that were not identified in the tissue-specific gene data.
unidentified_rf_filtered <- geneIds(output_GEnrich_rf_filtered[[4]])

Placenta cell enrichment

INPUT:

gene set: genes with R2 > .30 that overlap between data sets (genes_overlap_Symbol_rf_filtered)
background: genes that overlap between data sets (genes_all_Symbol_filtered)

cell_enrichment_results_reffree <- read_excel("Results/PlacentaCellEnrich_Tool/reffree/CellSpecificGeneEnrichment-VentoTormo.xlsx", col_names = T)
cell_enrichment_results_reffree$`-Log10PValue` <- as.numeric(cell_enrichment_results_reffree$`-Log10PValue`)

cell_enrichment_results_reffree <- merge(cell_enrichment_results_reffree, cell_enrichment_colors_cells, by="Cell")

plot_cell_enrichment_reffree <-
ggplot2::ggplot(cell_enrichment_results_reffree, aes(x = reorder(Cell, -`-Log10PValue`, sum), y= `-Log10PValue`, fill=Cell)) +
geom_bar(stat = 'identity')+ 
xlab("") + ylab("-log10 (p-adjusted)")+
scale_y_continuous(breaks = round(seq(0, 14, by = 2),1),limits = c(0,16), expand = c(0,0))+
theme_bw()+
theme(legend.position="none")+
theme(plot.title = element_text(hjust = 0.5,size = 10),axis.title = element_text(size=10))+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),panel.grid.major= element_blank(),panel.grid.minor = element_blank(), plot.margin=unit(c(1,1,1,2),"cm"))+
geom_hline(yintercept=2, linetype="dashed", color = "black")

to the top

save Tissue Enrichment Plot

Fig. 4

ggsave("Results/Plots_tissue_Enrichment_combined.pdf",
       ggarrange(tissue_enrich_main_plot_refbased, tissueEnrichment_reffree_filtered, ncol=1, labels = c("a", "b")), width=84, height=160, units="mm", dpi=600, scale=2)

save Cell Enrichment Plot

Fig. 5

ggsave("Results/Plots_Cell_Enrichment_combined.pdf",
       ggarrange(plot_cell_enrichment_refbased, plot_cell_enrichment_reffree, ncol=1, labels = c("a", "b")), width=84, height=160, units="mm", dpi=600, scale=2)

to the top

Plot Cell types (RPC, reference-based)

Fig. 6

RPC reference-based

CVS_ITU_RPC_Plot <- 
    as_tibble(Pheno_CVS_ITU_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(-0.1,1.1), breaks = c(0, 0.5, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=10), 
        axis.title.y = element_text(size=10), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.key.size = unit(0.3, "cm"), 
        legend.margin=margin(t = 0, unit='cm'), 
        legend.title = element_blank(), 
        panel.border = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0,1))

Placenta_ITU_RPC_Plot <-
    as_tibble(Pheno_Placenta_ITU_reduced_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(-0.1,1.1), breaks = c(0, 0.5, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=10), 
        axis.title.y = element_text(size=10), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=5), 
        legend.key.size = unit(0.3, "cm"), 
        legend.margin=margin(t = 0, unit='cm'), 
        legend.title = element_blank(), 
        panel.border = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0,1))

Placenta_PREDO_RPC_Plot <- 
    as_tibble(Pheno_Placenta_PREDO_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = ID, y = estimate, fill = component)) +
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(-0.1,1.1), breaks = c(0, 0.5, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=5), 
        axis.title.y = element_text(size=10), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.key.size = unit(0.3, "cm"), 
        legend.margin=margin(t = 0, unit='cm'), 
        legend.title = element_blank(), 
        panel.border = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0,1))

Placenta_BET_RPC_Plot <- 
    as_tibble(Pheno_Placenta_BET_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(-0.1,1.1), breaks = c(0, 0.5, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=5), 
        axis.title.y = element_text(size=10), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.key.size = unit(0.3, "cm"), 
        legend.margin=margin(t = 0, unit='cm'), 
        legend.title = element_blank(), 
        panel.border = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0,1))

plot

CVS_ITU_RPC_Plot
Placenta_ITU_RPC_Plot
Placenta_PREDO_RPC_Plot
Placenta_BET_RPC_Plot

CVS_ITU_RPC_Plot_arrange <- 
    as_tibble(Pheno_CVS_ITU_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  labs(tag = "")+
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=10), 
        axis.title.y = element_blank(), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(),legend.position="bottom",
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(), 
  plot.title=element_text(size=10, hjust=0.5)) +
  coord_cartesian(ylim = c(0,1.1))


Placenta_ITU_RPC_Plot_arrange <-
    as_tibble(Pheno_Placenta_ITU_reduced_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  labs(tag = "")+
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=9), 
        axis.title.y = element_blank(), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(), plot.title=element_text(size=10, hjust=0.5)) +
  coord_cartesian(ylim = c(0,1.1))

Placenta_PREDO_RPC_Plot_arrange <- 
    as_tibble(Pheno_Placenta_PREDO_filtered) %>%
  pivot_longer(cols = all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = ID, y = estimate, fill = component)) +
  labs(tag = "")+
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=10), 
        axis.title.y = element_blank(), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(), plot.title=element_text(size=10, hjust=0.5)) +
  coord_cartesian(ylim = c(0,1.1))

Placenta_BET_RPC_Plot_arrange <- 
    as_tibble(Pheno_Placenta_BET_filtered) %>%
  pivot_longer(cols =all_of(names_refbased_cells),
               names_to = 'component',
               values_to = 'estimate') %>%
  ggplot(aes(x = Sample_Name, y = estimate, fill = component)) +
  labs(tag = "")+
  geom_bar(stat = 'identity', width=1) +
  scale_fill_manual(values = plColors)+
  theme_minimal() +
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1), labels = scales::percent) +
  theme(axis.text.x = element_blank(), 
        axis.text.y=element_text(size=10), 
        axis.title.y = element_blank(), 
        axis.title.x=element_blank(), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(), plot.title=element_text(size=10, hjust=0.5)) +
  coord_cartesian(ylim = c(0,1.1))

RPC_Plot_Combined <- 
ggarrange(
          CVS_ITU_RPC_Plot_arrange +
           theme(legend.position="none", plot.margin = margin(r = 0.2, t= -0.2) ),
          Placenta_ITU_RPC_Plot_arrange +
               theme(axis.text.y = element_blank(),
                     axis.ticks.y = element_blank(),
                     axis.title.y = element_blank(), plot.margin = margin(r = 0.2, l = 0.2)),
          Placenta_PREDO_RPC_Plot_arrange +
               theme(axis.text.y = element_blank(),
                     axis.ticks.y = element_blank(),
                     axis.title.y = element_blank(), plot.margin = margin(l = 0.2)),
          Placenta_BET_RPC_Plot_arrange +
               theme(axis.text.y = element_blank(),
                     axis.ticks.y = element_blank(),
                     axis.title.y = element_blank(), plot.margin = margin(l = 0.2)),
          common.legend = T,
          labels = c("a", "b", "c","d", size=10),
          legend="bottom",
          nrow = 1,
          align = "hv")

RPC_Plot_Combined

ggsave("Results/RPC_Cells_Combined.pdf",
RPC_Plot_Combined, width=84, height=40, units="mm", dpi=600, scale=2, device = cairo_pdf)

ggsave(RPC_Plot_Combined, filename = "Results/RPC_Cells_Combined.png", dpi = 300, type = "cairo",
       width = 6, height = 3, units = "in")

plots with mean and sd

cells_cvs <- data.frame(psych::describe(Pheno_CVS_ITU_filtered[ ,names_refbased_cells]))
cells_cvs <- cells_cvs[ ,c("mean", "sd")]
rownames(cells_cvs) <- names_refbased_cells

msd_plot_cells_cvs <- ggplot(cells_cvs, aes(x=as.factor(rownames(cells_cvs)), y=mean, fill = as.factor(rownames(cells_cvs)))) +
  geom_bar(position=position_dodge(), stat="identity") +
  scale_fill_manual(values = plColors)+
  theme_minimal()+
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,position=position_dodge(.9))+
  labs(tag = "a", title="CVS (ITU)")+
  theme(axis.text.x = element_blank(), 
        axis.title.x = element_blank(), 
        axis.title.y=element_blank(), 
        axis.text.y = element_text(size=10), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(), 
        plot.title=element_text(size=10))+
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2,0.4, 0.6, 0.8, 1), labels = scales::percent)+
  guides(fill = guide_legend(label.position = "bottom", nrow=1))
  coord_cartesian(ylim = c(0,1.1))

cells_placenta_itu <- data.frame(psych::describe(Pheno_Placenta_ITU_reduced_filtered[ ,names_refbased_cells]))
cells_placenta_itu <- cells_placenta_itu[ ,c("mean", "sd")]
rownames(cells_placenta_itu) <- names_refbased_cells

msd_plot_cells_placenta_itu <- ggplot(cells_placenta_itu, aes(x=as.factor(rownames(cells_placenta_itu)), y=mean, fill = as.factor(rownames(cells_cvs)))) +
  geom_bar(position=position_dodge(), stat="identity") +
  scale_fill_manual(values = plColors)+
  theme_minimal()+
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,position=position_dodge(.9))+
  labs(tag = "b", title="Placenta (ITU)")+
  theme(axis.text.x = element_blank(), 
        axis.title.x = element_blank(), 
        axis.title.y=element_blank(), 
        axis.text.y = element_text(size=10), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(), 
        plot.title=element_text(size=10))+
   scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2,0.4, 0.6, 0.8, 1), labels = scales::percent)
  coord_cartesian(ylim = c(0,1.1))

cells_placenta_predo <- data.frame(psych::describe(Pheno_Placenta_PREDO_filtered[ ,names_refbased_cells]))
cells_placenta_predo <- cells_placenta_predo[ ,c("mean", "sd")]
rownames(cells_placenta_predo) <- names_refbased_cells

msd_plot_cells_placenta_predo <- ggplot(cells_placenta_predo, aes(x=as.factor(rownames(cells_placenta_predo)), y=mean, fill = as.factor(rownames(cells_cvs)))) +
  geom_bar(position=position_dodge(), stat="identity") +
  scale_fill_manual(values = plColors)+
  theme_minimal()+
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,position=position_dodge(.9))+
  labs(tag = "c", title="Placenta (PREDO)")+
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2,0.4, 0.6, 0.8, 1), labels = scales::percent)+
  theme(axis.text.x = element_blank(), 
        axis.title.x = element_blank(), 
        axis.title.y=element_blank(), 
        axis.text.y = element_text(size=10), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(), 
        plot.title=element_text(size=10))+
  coord_cartesian(ylim = c(0,1.1))

cells_placenta_BET <- data.frame(psych::describe(Pheno_Placenta_BET_filtered[ ,names_refbased_cells]))
cells_placenta_BET <- cells_placenta_BET[ ,c("mean", "sd")]
rownames(cells_placenta_BET) <- names_refbased_cells

msd_plot_cells_placenta_BET <- ggplot(cells_placenta_BET, aes(x=as.factor(rownames(cells_placenta_BET)), y=mean, fill = as.factor(rownames(cells_cvs)))) +
  geom_bar(position=position_dodge(), stat="identity") +
  scale_fill_manual(values = plColors)+
  theme_minimal()+
  geom_errorbar(aes(ymin=mean, ymax=mean+sd), width=.2,position=position_dodge(.9))+
  labs(tag = "d", title="Placenta (BET)")+
  scale_y_continuous(limits = c(0,1.1), breaks = c(0, 0.2,0.4, 0.6, 0.8, 1), labels = scales::percent)+
  theme(axis.text.x = element_blank(), 
        axis.title.x = element_blank(), 
        axis.title.y=element_blank(), 
        axis.text.y = element_text(size=10), 
        legend.text = element_text(size=10), 
        legend.title = element_blank(), 
        plot.title=element_text(size=10))+
  coord_cartesian(ylim = c(0,1.1))

msd_plot_cells_cvs
msd_plot_cells_placenta_itu
msd_plot_cells_placenta_predo
msd_plot_cells_placenta_BET

g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}

gglegend<-g_legend(msd_plot_cells_cvs)

grid.arrange(arrangeGrob(msd_plot_cells_cvs + theme(legend.position="none"),
              CVS_ITU_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),          
              msd_plot_cells_placenta_itu + theme(legend.position="none"),
              Placenta_ITU_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              msd_plot_cells_placenta_predo + theme(legend.position="none"),
              Placenta_PREDO_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              msd_plot_cells_placenta_BET + theme(legend.position="none"),
              Placenta_BET_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              nrow=4),
             gglegend, nrow=2, heights=c(15, 2))

ggsave("Results/RPC_Placenta_combined_cells_estimate.pdf",
grid.arrange(arrangeGrob(msd_plot_cells_cvs + theme(legend.position="none"),
              CVS_ITU_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),          
              msd_plot_cells_placenta_itu + theme(legend.position="none"),
              Placenta_ITU_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              msd_plot_cells_placenta_predo + theme(legend.position="none"),
              Placenta_PREDO_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              msd_plot_cells_placenta_BET + theme(legend.position="none"),
              Placenta_BET_RPC_Plot_arrange + theme(legend.position="none", axis.text.y = element_blank()),
              nrow=4),
             gglegend, nrow=2, heights=c(15, 2)), width=84, height=150, units="mm", dpi=600, scale=2, device = cairo_pdf)

to the top

Compare data sets in RPC cell type proportions

colwise(mean)(Pheno_CVS_ITU_filtered[c("Trophoblasts")])
colwise(mean)(Pheno_CVS_ITU_filtered[c("Stromal")])
colwise(mean)(Pheno_CVS_ITU_filtered[c("Hofbauer")])
colwise(mean)(Pheno_CVS_ITU_filtered[c("Endothelial")])
colwise(mean)(Pheno_CVS_ITU_filtered[c("nRBC")])
colwise(mean)(Pheno_CVS_ITU_filtered[c("Syncytiotrophoblast")])

colwise(median)(Pheno_CVS_ITU_filtered[c("Trophoblasts")])
colwise(median)(Pheno_CVS_ITU_filtered[c("Stromal")])
colwise(median)(Pheno_CVS_ITU_filtered[c("Hofbauer")])
colwise(median)(Pheno_CVS_ITU_filtered[c("Endothelial")])
colwise(median)(Pheno_CVS_ITU_filtered[c("nRBC")])
colwise(median)(Pheno_CVS_ITU_filtered[c("Syncytiotrophoblast")])

Placenta: CVS vs. term Placenta in ITU

Pheno_Placenta_CVS_ITU_filtered <- merge(Pheno_CVS_ITU_filtered, Pheno_Placenta_ITU_reduced_filtered, by="Sample_Name") # n = 85
n_Placenta_CVS_filtered <- nrow(Pheno_Placenta_CVS_ITU_filtered)

colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("Trophoblasts.x", "Trophoblasts.y")])
colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("Stromal.x", "Stromal.y")])
colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("Hofbauer.x", "Hofbauer.y")])
colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("Endothelial.x", "Endothelial.y")])
colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("nRBC.x", "nRBC.y")])
colwise(mean)(Pheno_Placenta_CVS_ITU_filtered[c("Syncytiotrophoblast.x", "Syncytiotrophoblast.y")])

colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("Trophoblasts.x", "Trophoblasts.y")])
colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("Stromal.x", "Stromal.y")])
colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("Hofbauer.x", "Hofbauer.y")])
colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("Endothelial.x", "Endothelial.y")])
colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("nRBC.x", "nRBC.y")])
colwise(median)(Pheno_Placenta_CVS_ITU_filtered[c("Syncytiotrophoblast.x", "Syncytiotrophoblast.y")])

use Wilcoxon (paired)

# trophoblasts
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.x, Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.x, Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.y, paired=T) # with t
pi_T <- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.x, Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.y, paired=T)$p.value # p from Wilxocon
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.x, Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.x, Pheno_Placenta_CVS_ITU_filtered$Trophoblasts.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# stromal
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Stromal.x, Pheno_Placenta_CVS_ITU_filtered$Stromal.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$Stromal.x, Pheno_Placenta_CVS_ITU_filtered$Stromal.y, paired=T)
pi_ST <- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Stromal.x,Pheno_Placenta_CVS_ITU_filtered$Stromal.y, paired=T)$p.value
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Stromal.x, Pheno_Placenta_CVS_ITU_filtered$Stromal.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Stromal.x, Pheno_Placenta_CVS_ITU_filtered$Stromal.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# Hofbauer
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Hofbauer.x, Pheno_Placenta_CVS_ITU_filtered$Hofbauer.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$Hofbauer.x, Pheno_Placenta_CVS_ITU_filtered$Hofbauer.y, paired=T)
pi_H <- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Hofbauer.x, Pheno_Placenta_CVS_ITU_filtered$Hofbauer.y, paired=T)$p.value
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Hofbauer.x, Pheno_Placenta_CVS_ITU_filtered$Hofbauer.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Hofbauer.x, Pheno_Placenta_CVS_ITU_filtered$Hofbauer.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# endothelial
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Endothelial.x, Pheno_Placenta_CVS_ITU_filtered$Endothelial.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$Endothelial.x, Pheno_Placenta_CVS_ITU_filtered$Endothelial.y, paired=T)
pi_E<- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Endothelial.x,Pheno_Placenta_CVS_ITU_filtered$Endothelial.y, paired=T)$p.value
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Endothelial.x, Pheno_Placenta_CVS_ITU_filtered$Endothelial.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Endothelial.x, Pheno_Placenta_CVS_ITU_filtered$Endothelial.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# nRBC
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$nRBC.x, Pheno_Placenta_CVS_ITU_filtered$nRBC.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$nRBC.x, Pheno_Placenta_CVS_ITU_filtered$nRBC.y, paired=T)
pi_n<- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$nRBC.x, Pheno_Placenta_CVS_ITU_filtered$nRBC.y, paired=T)$p.value
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$nRBC.x, Pheno_Placenta_CVS_ITU_filtered$nRBC.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$nRBC.x, Pheno_Placenta_CVS_ITU_filtered$nRBC.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# syncytiotrophoblast
wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.x, Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.y, paired=T)
t.test(Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.x, Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.y, paired=T)
pi_S<- wilcox.test(Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.x, Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.y, paired=T)$p.value
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.x, Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.y, paired=T) # compute z
wilcoxonZ(Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.x, Pheno_Placenta_CVS_ITU_filtered$Syncytiotrophoblast.y, paired=T) / sqrt(n_Placenta_CVS_filtered) # r

# 
pci_placenta <- c(pi_T, pi_ST, pi_H, pi_E, pi_n, pi_S) # all p values
p.adjust(pci_placenta, method = "bonferroni", n = length(pci_placenta)) # multiple test correction with Bonferroni

to the top

Term Placentas

make one data frame

RPC_Cells_ITU <- Pheno_Placenta_ITU_reduced_filtered[ ,names_refbased_cells]
RPC_Cells_ITU$group <- rep("ITU", nrow(RPC_Cells_ITU))

RPC_Cells_PREDO <- Pheno_Placenta_PREDO_filtered[ ,names_refbased_cells]
RPC_Cells_PREDO$group <- rep("PREDO", nrow(RPC_Cells_PREDO))

RPC_Cells_BET <- Pheno_Placenta_BET_filtered[ ,names_refbased_cells]
RPC_Cells_BET$group <- rep("BET", nrow(RPC_Cells_BET))

RPC_Cells_term <- rbind(RPC_Cells_ITU, RPC_Cells_PREDO, RPC_Cells_BET)
RPC_Cells_term$group <- as.factor(RPC_Cells_term$group)

levels(RPC_Cells_term$group)
RPC_Cells_term$group <- factor(RPC_Cells_term$group, levels = c("ITU", "PREDO", "BET"))
levels(RPC_Cells_term$group)

save(RPC_Cells_term, file="Input_Data_prepared/RPC_Cells_term.Rdata")

nonparametric inference for multivariate data We use the npmv package Unlike in classical multivariate analysis of variance, multivariate normality is not required for the data. In fact, the different response variables may even be measured on different scales (binary, ordinal, quantitative). p values are calculated for overall tests (permutation tests and F approximations), and, using multiple testing algorithms which control the familywise error rate, significant subsets of response variables and factor levels are identified. The package may be used for low- or high- dimensional data with small or with large sample sizes and many or few factor levels.

Typical global statistical hypotheses in this context are the following. “Are the a samples from the same population (multivariate distribution)?”

nonpartest(Trophoblasts|Stromal|Syncytiotrophoblast|Endothelial|nRBC|Hofbauer ~ group,RPC_Cells_term)

In addition to the F -distribution approximations that are provided, each of the four test statistics is also used as the basis for a multivariate permutation or randomization test. To this end, the N data vectors are permuted, and the multivariate test statistics recalculated each time. For each of the four tests, these resulting values form the respective distribution whose quantiles are used to determine the p value of the corresponding permutation test (if all N! permutations are performed) or randomization test (if a predetermined number of random permutations is performed). The relative effects quantify the tendencies observed in the data in term of probabilities. For example, the samples from BET tend to have larger proportions of trophoblasts compared to the other cohorts. The probability that a randomly chosen sample from BET exhibits a larger proportion of trophoblasts than a randomly chosen sample from the other group is 0.856.

follow-up test:

ssnonpartest(Trophoblasts|Stromal|Syncytiotrophoblast|Endothelial|nRBC|Hofbauer ~group,RPC_Cells_term, alpha = 0.01, factors.and.variables = TRUE)

Regarding the group (factor levels), all pairwise comparisons are significant. Regarding the variables, every one of the six variables alone shows a significant difference between the groups and so does the combination of variables

colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("Trophoblasts")])
colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("Stromal")])
colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("Hofbauer")])
colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("Endothelial")])
colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("nRBC")])
colwise(median)(Pheno_Placenta_ITU_reduced_filtered[c("Syncytiotrophoblast")])

colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("Trophoblasts")])
colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("Stromal")])
colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("Hofbauer")])
colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("Endothelial")])
colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("nRBC")])
colwise(mean)(Pheno_Placenta_ITU_reduced_filtered[c("Syncytiotrophoblast")])

colwise(median)(Pheno_Placenta_PREDO_filtered[c("Trophoblasts")])
colwise(median)(Pheno_Placenta_PREDO_filtered[c("Stromal")])
colwise(median)(Pheno_Placenta_PREDO_filtered[c("Hofbauer")])
colwise(median)(Pheno_Placenta_PREDO_filtered[c("Endothelial")])
colwise(median)(Pheno_Placenta_PREDO_filtered[c("nRBC")])
colwise(median)(Pheno_Placenta_PREDO_filtered[c("Syncytiotrophoblast")])

colwise(mean)(Pheno_Placenta_PREDO_filtered[c("Trophoblasts")])
colwise(mean)(Pheno_Placenta_PREDO_filtered[c("Stromal")])
colwise(mean)(Pheno_Placenta_PREDO_filtered[c("Hofbauer")])
colwise(mean)(Pheno_Placenta_PREDO_filtered[c("Endothelial")])
colwise(mean)(Pheno_Placenta_PREDO_filtered[c("nRBC")])
colwise(mean)(Pheno_Placenta_PREDO_filtered[c("Syncytiotrophoblast")])

colwise(median)(Pheno_Placenta_BET_filtered[c("Trophoblasts")])
colwise(median)(Pheno_Placenta_BET_filtered[c("Stromal")])
colwise(median)(Pheno_Placenta_BET_filtered[c("Hofbauer")])
colwise(median)(Pheno_Placenta_BET_filtered[c("Endothelial")])
colwise(median)(Pheno_Placenta_BET_filtered[c("nRBC")])
colwise(median)(Pheno_Placenta_BET_filtered[c("Syncytiotrophoblast")])

colwise(mean)(Pheno_Placenta_BET_filtered[c("Trophoblasts")])
colwise(mean)(Pheno_Placenta_BET_filtered[c("Stromal")])
colwise(mean)(Pheno_Placenta_BET_filtered[c("Hofbauer")])
colwise(mean)(Pheno_Placenta_BET_filtered[c("Endothelial")])
colwise(mean)(Pheno_Placenta_BET_filtered[c("nRBC")])
colwise(mean)(Pheno_Placenta_BET_filtered[c("Syncytiotrophoblast")])

to the top

Phenotype relationships

CVS (ITU)

png(file="Results/Pheno_cor_CVS_cells.png", width= 12000, height=6000, res=600)
ggpairs(Pheno_CVS_ITU_filtered[,c(9:19, 42, 43,45,46, 66, 108)])
dev.off()

correlation with gestational age

# correlation cell types with gestational age at sampling
r_gestage_cvs_cell_cors <- sapply(Pheno_CVS_ITU_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$estimate, y=Pheno_CVS_ITU_filtered$gestage_at_CVS_weeks)
p_gestage_cvs_cell_cors <- sapply(Pheno_CVS_ITU_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$p.value, y=Pheno_CVS_ITU_filtered$gestage_at_CVS_weeks)

r_gestage_cvs_cell_cors
p_gestage_cvs_cell_cors

# correct for multiple tests
p.adjust(p_gestage_cvs_cell_cors,  method="bonferroni", n=6)

GA: Trohpoblasts: r = -0.315 p < 0.001 Synctiotrophoblast: r = 0.362 p < 0.001

cor_GA_trophoblasts_cvs <- ggscatter(Pheno_CVS_ITU_filtered, x = "gestage_at_CVS_weeks", y = "Trophoblasts", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)\n at CVS sampling", ylab = "proportion \n Trophoblast cells")+
          stat_cor(method = "spearman", label.x = 10, label.y = 0.5,r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "'**'")))+
          scale_x_continuous(breaks=c(10,12,14,16))+
          scale_y_continuous(breaks=c(0.2,0.3,0.4,0.5))+
  labs(tag="a", size=10)+
     theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

cor_GA_syncytiotrophoblasts_cvs <- ggscatter(Pheno_CVS_ITU_filtered, x = "gestage_at_CVS_weeks", y = "Syncytiotrophoblast",
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)\n at CVS sampling", ylab = "proportion \n Syncytiotrophoblast cells")+
          stat_cor(method = "spearman", label.x = 10, label.y = 0.7,r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "'**'")))+
          scale_x_continuous(breaks=c(10,12,14,16))+
          scale_y_continuous(breaks=c(0.5,0.6,0.7))+
    theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

GA_CVS <-ggarrange(cor_GA_trophoblasts_cvs, cor_GA_syncytiotrophoblasts_cvs,align="hv", nrow=1)
GA_CVS_a <- annotate_figure(GA_CVS, top = text_grob("CVS (ITU)", size = 10))

GA_CVS_a

png(file="Results/GA_CVS_trophoblasts_cor.png", width= 3000, height=2000, res=600)
cor_GA_trophoblasts_cvs
dev.off()

png(file="Results/GA_CVS_synctiotrophoblast_cor.png", width= 3000, height=2000, res=600)
cor_GA_syncytiotrophoblasts_cvs
dev.off()

differences between sexes

# Trophoblast
wilcox.test(Pheno_CVS_ITU_filtered$Trophoblasts ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$Trophoblasts ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_T_sex <- wilcox.test(Pheno_CVS_ITU_filtered$Trophoblasts ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# Hofbauer
wilcox.test(Pheno_CVS_ITU_filtered$Hofbauer ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$Hofbauer ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_H_sex <- wilcox.test(Pheno_CVS_ITU_filtered$Hofbauer ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# endothelial
wilcox.test(Pheno_CVS_ITU_filtered$Endothelial ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$Endothelial ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_E_sex <- wilcox.test(Pheno_CVS_ITU_filtered$Endothelial ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# stromal
wilcox.test(Pheno_CVS_ITU_filtered$Stromal ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$Stromal ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_St_sex <- wilcox.test(Pheno_CVS_ITU_filtered$Stromal ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# syncytiotrophoblast
wilcox.test(Pheno_CVS_ITU_filtered$Syncytiotrophoblast ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$Syncytiotrophoblast ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_S_sex <- wilcox.test(Pheno_CVS_ITU_filtered$Syncytiotrophoblast ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# nRBC
wilcox.test(Pheno_CVS_ITU_filtered$nRBC ~ Pheno_CVS_ITU_filtered$Child_Sex)
t.test(Pheno_CVS_ITU_filtered$nRBC ~ Pheno_CVS_ITU_filtered$Child_Sex) # with t
pi_nR_sex <- wilcox.test(Pheno_CVS_ITU_filtered$nRBC ~ Pheno_CVS_ITU_filtered$Child_Sex)$p.value # p from Wilxocon

# correct for multiple tests
p.adjust(c(pi_T_sex, pi_S_sex, pi_H_sex, pi_E_sex, pi_nR_sex, pi_St_sex), method="bonferroni", n=6)

no significant differences between sexes

to the top

Placenta (ITU)

png(file="Results/Pheno_cor_Placenta_ITU_cells.png", width= 12000, height=6000, res=600)
ggpairs(Pheno_Placenta_ITU_reduced_filtered[,c(9:22,45, 46,48,49, 111)])
dev.off()

gestational age

# correlation cell types with gestational age at sampling
r_gestage_feplacenta_cell_cors <- sapply(Pheno_Placenta_ITU_reduced_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$estimate, y=Pheno_Placenta_ITU_reduced_filtered$Gestational_Age_Weeks)
p_gestage_feplacenta_cell_cors <- sapply(Pheno_Placenta_ITU_reduced_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$p.value, y=Pheno_Placenta_ITU_reduced_filtered$Gestational_Age_Weeks)

r_gestage_feplacenta_cell_cors
p_gestage_feplacenta_cell_cors

# correct for multiple tests
p.adjust(p_gestage_feplacenta_cell_cors, method="bonferroni", n=6)

GA: nothing significant

cor_GA_trophoblasts_itu <- ggscatter(Pheno_Placenta_ITU_reduced_filtered, x = "Gestational_Age_Weeks", y = "Trophoblasts", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Trophoblast cells")+
          stat_cor(method = "spearman", label.x = 29, label.y = 0.3, r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "''")))+
  labs(tag="b")+
          scale_x_continuous(breaks=c(28,30,32,34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.1,0.2,0.3))+
            theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

cor_GA_syncytiotrophoblasts_itu <- ggscatter(Pheno_Placenta_ITU_reduced_filtered, x = "Gestational_Age_Weeks", y = "Syncytiotrophoblast", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Syncytiotrophoblast cells")+
          stat_cor(method = "spearman", label.x = 29, label.y = 1, r.digits = 1, aes(label = paste("'r = '", ..r..,sep = "~", "''")))+
          scale_x_continuous(breaks=c(28,30,32,34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.6,0.7,0.8,0.9,1.0))+
        theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

GA_ITU <-ggarrange(cor_GA_trophoblasts_itu, cor_GA_syncytiotrophoblasts_itu,align="hv", nrow=1)
GA_ITU_a <- annotate_figure(GA_ITU, top = text_grob("Placenta (ITU)", size = 10))

GA_ITU_a

differences between sexes

# Trophoblast
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Trophoblasts ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$Trophoblasts ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_T_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Trophoblasts ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

# Hofbauer
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Hofbauer ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$Hofbauer ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_H_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Hofbauer ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

# endothelial
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Endothelial ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$Endothelial ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_E_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Endothelial ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

# syncytiotrophoblast
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Syncytiotrophoblast ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$Syncytiotrophoblast ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_S_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Syncytiotrophoblast ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

# stromal
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Stromal ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$Stromal ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_St_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$Stromal ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

# nRBC
wilcox.test(Pheno_Placenta_ITU_reduced_filtered$nRBC ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)
t.test(Pheno_Placenta_ITU_reduced_filtered$nRBC ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex) # with t
p_nR_sex <- wilcox.test(Pheno_Placenta_ITU_reduced_filtered$nRBC ~ Pheno_Placenta_ITU_reduced_filtered$Child_Sex)$p.value # p from Wilxocon

no significant differences

to the top

Placenta (PREDO)

png(file="Results/Pheno_cor_Placenta_PREDO_cells.png", width= 12000, height=6000, res=600)
ggpairs(Pheno_Placenta_PREDO_filtered[,c(13:20, 22,24,25,27, 102)])
dev.off()

# correlation cell types with gestational age at sampling
r_gestage_deplacenta_cell_cors <- sapply(Pheno_Placenta_PREDO_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$estimate, y=Pheno_Placenta_PREDO_filtered$Gestational_Age)
p_gestage_deplacenta_cell_cors <- sapply(Pheno_Placenta_PREDO_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$p.value, y=Pheno_Placenta_PREDO_filtered$Gestational_Age)

r_gestage_deplacenta_cell_cors
p_gestage_deplacenta_cell_cors

# correct for multiple tests
p.adjust(p_gestage_deplacenta_cell_cors, method="bonferroni", n=6)

Gestational Age: trophoblasts r = -0.254, p = 0.016 syncytiotrophoblast r = 0.246, p = 0.021

cor_GA_trophoblasts_predo <- ggscatter(Pheno_Placenta_PREDO_filtered, x = "Gestational_Age", y = "Trophoblasts", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Trophoblast cells")+
          stat_cor(method = "spearman", label.x = 32, label.y = 0.3,r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "''")))+
  labs(tag="c", size=10)+
          scale_x_continuous(breaks=c(32,34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.0,0.1,0.2,0.3, 0.3))+
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())
  
cor_GA_syncytiotrophoblasts_predo <- ggscatter(Pheno_Placenta_PREDO_filtered, x = "Gestational_Age", y = "Syncytiotrophoblast", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Syncytiotrophoblast cells")+
          stat_cor(method = "spearman", label.x = 32, label.y = 1.0,r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "''")))+
          scale_x_continuous(breaks=c(32,34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.6,0.7,0.8,0.9,1.0))+
  theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

GA_PREDO <-ggarrange(cor_GA_trophoblasts_predo, cor_GA_syncytiotrophoblasts_predo,align="hv", nrow=1)
GA_PREDO_a <- annotate_figure(GA_PREDO, top = text_grob("Placenta (PREDO)", size = 10))

GA_PREDO_a

png(file="Results/GA_PlacentaPredo_Trophoblasts_cor.png", width= 3000, height=2000, res=600)
cor_GA_trophoblasts_predo
dev.off()

png(file="Results/GA_PlacentaPredo_Synctiotrophoblast_cor.png", width= 3000, height=2000, res=600)
cor_GA_syncytiotrophoblasts_predo
dev.off()

differences between sexes

# Trophoblast
wilcox.test(Pheno_Placenta_PREDO_filtered$Trophoblasts ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$Trophoblasts ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_T_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$Trophoblasts ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

# Hofbauer
wilcox.test(Pheno_Placenta_PREDO_filtered$Hofbauer ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$Hofbauer ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_H_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$Hofbauer ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

# endothelial
wilcox.test(Pheno_Placenta_PREDO_filtered$Endothelial ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$Endothelial ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_E_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$Endothelial ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

# syncytiotrophoblast
wilcox.test(Pheno_Placenta_PREDO_filtered$Syncytiotrophoblast ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$Syncytiotrophoblast ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_S_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$Syncytiotrophoblast ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

# stromal
wilcox.test(Pheno_Placenta_PREDO_filtered$Stromal ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$Stromal ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_St_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$Stromal ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

# nRBC
wilcox.test(Pheno_Placenta_PREDO_filtered$nRBC ~ Pheno_Placenta_PREDO_filtered$Child_Sex)
t.test(Pheno_Placenta_PREDO_filtered$nRBC ~ Pheno_Placenta_PREDO_filtered$Child_Sex) # with t
pp_nR_sex <- wilcox.test(Pheno_Placenta_PREDO_filtered$nRBC ~ Pheno_Placenta_PREDO_filtered$Child_Sex)$p.value # p from Wilxocon

small difference in trophoblasts (higher in females), but not after multiple test correction

# correct for multiple tests
p.adjust(c(pi_S_sex, pi_H_sex, pi_T_sex, pi_E_sex, pi_St_sex, pi_nR_sex), method="bonferroni", n=6)

to the top

Placenta (BET)

png(file="Results/Pheno_cor_Placenta_BET_cells.png", width= 12000, height=6000, res=600)
ggpairs(Pheno_Placenta_BET_filtered[,c(35:44,50,51)])
dev.off()

correlation with gestational age

# correlation cell types with gestational age at sampling
r_gestage_Placenta_BET_cell_cors <- sapply(Pheno_Placenta_BET_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$estimate, y=Pheno_Placenta_BET_filtered$gestage_weeks)
p_gestage_Placenta_BET_cell_cors <- sapply(Pheno_Placenta_BET_filtered[,names_refbased_cells], FUN=function(x, y) cor.test(x, y, method="spearman", exact=F)$p.value, y=Pheno_Placenta_BET_filtered$gestage_weeks)

r_gestage_Placenta_BET_cell_cors
p_gestage_Placenta_BET_cell_cors

# correct for multiple tests
p.adjust(p_gestage_Placenta_BET_cell_cors, method="bonferroni", n=6)

GA: Trohpoblasts: r = -0.415 p < 0.001 Synctiotrophoblast: r = 0.367 p < 0.001

cor_GA_trophoblasts_BET <- ggscatter(Pheno_Placenta_BET_filtered, x = "gestage_weeks", y = "Trophoblasts", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Trophoblast cells")+
          stat_cor(method = "spearman", label.x = 34, label.y = 0.4, r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "'**'")))+
  labs(tag="d")+
          scale_x_continuous(breaks=c(34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.1,0.2,0.3,0.4))+
            theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

cor_GA_syncytiotrophoblasts_BET <- ggscatter(Pheno_Placenta_BET_filtered, x = "gestage_weeks", y = "Syncytiotrophoblast", 
          add = "reg.line", conf.int = TRUE, 
          xlab = "gestational age (weeks)", ylab = "proportion \n Syncytiotrophoblast cells")+
          stat_cor(method = "spearman", label.x = 34, label.y = 0.8, r.digits = 2, aes(label = paste("'r = '", ..r..,sep = "~", "'**'")))+
          scale_x_continuous(breaks=c(34,36,38,40,42))+
          scale_y_continuous(breaks=c(0.5,0.6,0.7,0.8))+
        theme(axis.text.x = element_text(size=10), axis.text.y=element_text(size=10), axis.title.y = element_text(size=10), axis.title.x=element_text(size=10),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank())

GA_BET <-ggarrange(cor_GA_trophoblasts_BET, cor_GA_syncytiotrophoblasts_BET,align="hv", nrow=1)
GA_BET_a <- annotate_figure(GA_BET, top = text_grob("Placenta (BET)", size = 10))

GA_BET_a

png(file="Results/GA_Placenta_BET_trophoblasts_cor.png", width= 3000, height=2000, res=600)
cor_GA_trophoblasts_BET
dev.off()

png(file="Results/GA_Placenta_BET_synctiotrophoblast_cor.png", width= 3000, height=2000, res=600)
cor_GA_syncytiotrophoblasts_BET
dev.off()

differences between sexes

# Trophoblast
wilcox.test(Pheno_Placenta_BET_filtered$Trophoblasts ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$Trophoblasts ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_T_sex <- wilcox.test(Pheno_Placenta_BET_filtered$Trophoblasts ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# Hofbauer
wilcox.test(Pheno_Placenta_BET_filtered$Hofbauer ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$Hofbauer ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_H_sex <- wilcox.test(Pheno_Placenta_BET_filtered$Hofbauer ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# endothelial
wilcox.test(Pheno_Placenta_BET_filtered$Endothelial ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$Endothelial ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_E_sex <- wilcox.test(Pheno_Placenta_BET_filtered$Endothelial ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# stromal
wilcox.test(Pheno_Placenta_BET_filtered$Stromal ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$Stromal ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_St_sex <- wilcox.test(Pheno_Placenta_BET_filtered$Stromal ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# syncytiotrophoblast
wilcox.test(Pheno_Placenta_BET_filtered$Syncytiotrophoblast ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$Syncytiotrophoblast ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_S_sex <- wilcox.test(Pheno_Placenta_BET_filtered$Syncytiotrophoblast ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# nRBC
wilcox.test(Pheno_Placenta_BET_filtered$nRBC ~ Pheno_Placenta_BET_filtered$Sex)
t.test(Pheno_Placenta_BET_filtered$nRBC ~ Pheno_Placenta_BET_filtered$Sex) # with t
pie_nR_sex <- wilcox.test(Pheno_Placenta_BET_filtered$nRBC ~ Pheno_Placenta_BET_filtered$Sex)$p.value # p from Wilxocon

# correct for multiple tests
p.adjust(c(pie_T_sex, pie_S_sex, pie_H_sex, pie_E_sex, pie_nR_sex, pie_St_sex), method="bonferroni", n=6)

no significant differences between sexes

to the top

arrange plots together

Fig. 7

ggsave("Results/GA_cor.pdf",
grid.arrange(GA_CVS_a,
             GA_ITU_a,
              GA_PREDO_a,          
              GA_BET_a, nrow=4),
              width=84, height=120, units="mm", dpi=600, scale=2)

to the top

Files

CellTypesPlacentaAnalyses_Script.md

Latest commit

History

CellTypesPlacentaAnalyses_Script.md

File metadata and controls

Reliability of a novel approach for reference-based cell type estimation in human placental DNA methylation studies - Script 2, Main_Analyses

preparation

loading packages

define function(s)

R setup

save packages info

loading data

Descriptives

methylation

1st PC

Cell Types

plots reference-based cell types

Phenos

Correlation reference-free and reference-based estimated cell types

Correlation reference-free estimated cell types and phenotypes

CVS

Placenta ITU

Placenta PREDO

Placenta BET

Check if methylation can be predicted using cell types (using CV)

CVS

predict PC1 methylation

Placenta ITU

predict PC1 methylation

Placenta PREDO

predict PC1 methylation

Placenta BET

predict PC1 methylation

Make outlier plot for BET CV Model

arrange plots together

Check how methylation of single CpGs can be predicted using cell types

CVS

Placenta ITU

Placenta PREDO

Placenta BET

arrange Model figures

CpGs most influenced by cell types (reference-based)

extract CpGs

genes of all CpGs that overlap between data sets

extract CpGs with R2 > 30% predicted by reference-based cell types that are in all data sets

Tissue-specific Gene Enrichment

genes with R2 > .30 that overlap between the data sets against all genes

Placenta cell enrichment

CpGs most influenced by cell types (reference-free)

extract CpGs

extract CpGs with R2 > 30% that are in all data sets (ref-free)

Tissue-specific Gene Enrichment

genes with R2 > .30 that overlap between the data sets against all genes

Placenta cell enrichment

save Tissue Enrichment Plot

save Cell Enrichment Plot

Plot Cell types (RPC, reference-based)

Compare data sets in RPC cell type proportions

Placenta: CVS vs. term Placenta in ITU

Term Placentas

Phenotype relationships

CVS (ITU)

Placenta (ITU)

Placenta (PREDO)

Placenta (BET)

arrange plots together