-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMicrobiome_Cytokine_Correlations_unBN.R
148 lines (128 loc) · 5.59 KB
/
Microbiome_Cytokine_Correlations_unBN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#With the Tatiana correlations we did not do the deltas but rather just did the log normalized
#I want to try that as well to see how it changes the results
#This is something Mike suggested
load("/Users/SLancaster/Desktop/Projects/Fiber/Multiomics/RData/Full_Log_Arabinoxylan_cytokine_df.RData")
load("/Users/SLancaster/Desktop/Projects/Fiber/Multiomics/RData/Full_Log_Arabinoxylan_metaphlan_df.RData")
#Generating row names where the the formatting is the same between the two datasets
#This involves only getting rid of some characters from from the pcl row names
log_metaphlan <- t(log_metaphlan)
a <- colnames(log_metaphlan)
tmp <- gsub("_FM.*kneaddata_metaphlan_bugs_list","",a)
tmp <- as.character(tmp)
colnames(log_metaphlan) <- tmp
log_metaphlan <- t(log_metaphlan)
a <- colnames(log_metaphlan)
tmp <- gsub("k__.*g__","",a)
tmp <- as.character(tmp)
colnames(log_metaphlan) <- tmp
a <- rownames(log_cytokine)
tmp <- gsub("WashoutD","Washout_D",a)
tmp <- as.character(tmp)
rownames(log_cytokine) <- tmp
a <- rownames(log_metaphlan)
tmp <- gsub("X0*","",a)
tmp <- as.character(tmp)
rownames(log_metaphlan) <- tmp
a <- rownames(log_cytokine)
tmp <- gsub("X\\.*","",a)
tmp <- as.character(tmp)
rownames(log_cytokine) <- tmp
a <- rownames(log_metaphlan)
tmp <- gsub("LC_Inulin","LCInulin",a)
tmp <- as.character(tmp)
rownames(log_metaphlan) <- tmp
##################################################################
#This will find anyting that matches the strings in x and throw out those entries in y
#A way to get rid of all the long term values
#grepl is supposed to be quicker because it uses boolean functions
y <- rownames(log_cytokine)
x <- c("Final", "Week", "Month")
tmp_cyto <- y[!grepl(paste0(x, collapse = "|"), y)]
##################################################################
# Finding the samples that match
matching_samples <- !is.na( match(rownames(log_cytokine), rownames(log_metaphlan)))
cytokine_matching_df <- log_cytokine[matching_samples,]
log_metaphlan <- cbind(names=rownames(log_metaphlan),log_metaphlan)
matching_samples2 <- !is.na( match(rownames(log_metaphlan),rownames(cytokine_matching_df)))
pcl_matching_df <- log_metaphlan[matching_samples2,]
##################################################################
# The pcl dataframe has extra information in it
# First getting rid of the pathways
# The reducing the dubpicated rows by their average
names <- pcl_matching_df[,1] #Just for formatting concerns I had to split the matrix into the part that was character and the part that was numeric
pcl_matching_df <- pcl_matching_df[,3:ncol(pcl_matching_df)]
class(pcl_matching_df) <- "numeric" #For some reason the code requires this formatting
pcl_matching_df <- cbind(data.frame(names),data.frame(pcl_matching_df)) #Creating a data frame with the correct poritons as numeric or character
deduped.pcl <- aggregate(pcl_matching_df,list(names=pcl_matching_df$names),FUN=mean) #Aggregating to get rid of redundant rownames
rownames(deduped.pcl) <- deduped.pcl$names
deduped.pcl <- deduped.pcl[,-c(1,2)] #Getting rid of extra columns
##################################################################
new_cytokine_matching_df <- cytokine_matching_df[ order(row.names(cytokine_matching_df)), ]
new_deduped.pcl <- deduped.pcl[ order(row.names(deduped.pcl)), ]
match(rownames(new_cytokine_matching_df),rownames(new_deduped.pcl)) #Double checking all the rows match
combined_df <- cbind(cytokine_matching_df,deduped.pcl) #finally creating the combined df
# Now I will run the rcorr on these data
# I believe the correct thing to do is run standardise before correlation
# If the difference in magnitude of the two cytokines is large than the line might be close to 0/1 with little correlation
# even though they change together
# This simple function will run standardise
# Takes in a data.frame object and will return a matrix
standardise_data <- function(data_frame) {
require(Mfuzz)
n <- as.matrix(data_frame)
class(n) <- "numeric"
eset1 <- ExpressionSet(n)
eset1 <- standardise(eset1) #Running standarise
o <- exprs(eset1)
o <- na.omit(o)
return(o)
}
combined_df <- data.frame(t(combined_df))
a <- rownames(combined_df)
tmp <- gsub("k__.*g__","",a)
tmp <- as.character(tmp)
rownames(combined_df) <- tmp
o <- standardise_data(combined_df)
##################################################################
#Now that standardise has run I want to correlate the various data with one another
#This function will take in a matrix
correlations <- function(to_correlate_df) {
require(Hmisc)
cor <- rcorr(format(t(to_correlate_df),digits=20), type="spearman")
cor.data <- cor$r
cor.data[upper.tri(cor.data, diag = T)]<- 0
pval.data <- cor$P
pval.data[upper.tri(pval.data, diag = T)]<- NA
FDR.data <- apply(pval.data,2,p.adjust,method="BH", n = length(pval.data))
pdf("./pval_bonferonni_hist.pdf")
hist(pval.data, breaks = 100, col="darkblue")
dev.off()
pdf("./FDR_bonferonni_hist.pdf")
hist(FDR.data, breaks = 100, col="darkblue")
dev.off()
pdf("./cor_bonferonni_hist.pdf")
hist(cor.data, breaks = 10, col="red")
dev.off()
cor.data[FDR.data > 0.05]=0
return(cor.data)
}
cor.data <- correlations(o)
##################################################################
library("igraph")
network=graph.adjacency(cor.data, weighted=T, mode="undirected", diag=F)
#V(network)$color <- lab[,2]
print(network)
ly <- layout.fruchterman.reingold(network,dim=2,grid="nogrid")
pdf("/Users/SLancaster/Desktop/Correlations.pdf")
par(bg="white", mar=c(0,0,0,0))
set.seed(4)
plot(network,
vertex.size=10,
#vertex.color=V(subgraph)$color,
vertex.label.cex=0.5,
vertex.label.color="black",
vertex.frame.color="black",
#vertex.label = NA,
layout = ly
)
dev.off()