-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy path03-isolated_cell_type_populations.Rmd
159 lines (136 loc) · 5.5 KB
/
03-isolated_cell_type_populations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
---
title: "Isolated immune cell populations: microarray data"
output:
html_notebook:
toc: true
toc_float: true
---
**J. Taroni 2018**
[E-MTAB-2452](https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-2452/):
Sorted peripheral blood cells (CD4+ T cells, CD14+ monocytes, CD16+ neutrophils)
profiled on microarray from several autoimmune diseases.
This dataset is a good control for further investigating recount2 PLIER model
(cross-platform) transfer learning.
## Functions and set up
```{r}
`%>%` <- dplyr::`%>%`
library(AnnotationDbi)
# custom functions
source(file.path("util", "plier_util.R"))
```
```{r}
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "03")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "03")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)
```
## Read in data
### `recount2` PLIER model
```{r}
plier.results <- readRDS(file.path("data", "recount2_PLIER_data",
"recount_PLIER_model.RDS"))
```
### Isolated cell subsets microarray data
```{r}
# read in E-MTAB-2452 expression data that has been processed with SCANfast
exprs.df <- readr::read_tsv(file.path("data", "expression_data",
"E-MTAB-2452_hugene11st_SCANfast.pcl"))
colnames(exprs.df)[1] <- "EntrezID"
```
#### Conversion to gene symbol
```{r}
# remove trailing "_at" (result of using BrainArray) to get EntrezIDs
exprs.df$EntrezID <- gsub("_at", "", exprs.df$EntrezID)
# conversion to gene symbol
symbol.obj <- org.Hs.eg.db::org.Hs.egSYMBOL
mapped.genes <- AnnotationDbi::mappedkeys(symbol.obj)
symbol.list <- as.list(symbol.obj[mapped.genes])
symbol.df <- as.data.frame(cbind(names(symbol.list), unlist(symbol.list)))
colnames(symbol.df) <- c("EntrezID", "GeneSymbol")
# inner join
annot.data <- dplyr::inner_join(symbol.df, exprs.df, by = "EntrezID")
# remove objects only necessary for annotation
rm(mapped.genes, symbol.list, symbol.obj, symbol.df)
# write to file
gs.file <- file.path("data", "expression_data",
"E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")
readr::write_tsv(annot.data, path = gs.file)
# get as a matrix
exprs.mat <- as.matrix(annot.data[, 3:ncol(annot.data)])
rownames(exprs.mat) <- annot.data$GeneSymbol
```
## Apply recount2 model
### B matrix
```{r}
iso.b.matrix <- GetNewDataB(exprs.mat = exprs.mat,
plier.model = plier.results)
head(iso.b.matrix[, 1:10])
```
```{r}
# write to file
iso.b.file <- file.path(results.dir, "E-MTAB-2452_B_matrix_recount2_model.txt")
write.table(iso.b.matrix, file = iso.b.file, sep = "\t", quote = FALSE)
```
### Heatmap
```{r}
# differences in cell type LVs?
indx.relevant.lv <- c(grep("CD4", rownames(iso.b.matrix)),
grep("Monocyte", rownames(iso.b.matrix)),
grep("Neutrophil", rownames(iso.b.matrix)))
# get row side color vector matching relevant cell type LVs
row.colors.vec <- c(rep("#1E90EF", sum(grepl("CD4", rownames(iso.b.matrix)))),
rep("#000000",
sum(grepl("Monocyte", rownames(iso.b.matrix)))),
rep("#32CD32",
sum(grepl("Neutrophil", rownames(iso.b.matrix)))))
# get col side color vector based on what cell type each sample is
col.colors.vec <- rep("white", ncol(iso.b.matrix))
col.colors.vec[grep("CD4", colnames(iso.b.matrix))] <- "#1E90EF"
col.colors.vec[grep("CD14", colnames(iso.b.matrix))] <- "#000000"
col.colors.vec[grep("CD16", colnames(iso.b.matrix))] <- "#32CD32"
# heatmap
gplots::heatmap.2(iso.b.matrix[indx.relevant.lv, ],
trace = "none",
col = colorRampPalette(c("blue", "white", "red")),
labCol = FALSE, cexRow = 0.75,
RowSideColors = row.colors.vec,
ColSideColors = col.colors.vec,
density.info = "none",
margins = c(2.5, 14),
main = "Isolated cell populations\nmicroarray",
ylab = "recount2 LVs",
xlab = "Samples",
colsep = 0:ncol(iso.b.matrix),
rowsep = 0:length(indx.relevant.lv),
sepcolor = "#666666", sepwidth = c(0.001, 0.001))
legend("topright",
legend = c("CD4 T cell", "CD14, monocyte", "CD16, neutrophil"),
col = c("#1E90EF", "#000000", "#32CD32"), cex = 0.5,
lty = 1, lwd = 6)
```
```{r}
# save heatmap as pdf -- can not figure out another way around this, hm just
# returns a list
pdf(file.path(plot.dir, "E-MTAB-2452_recount_PLIER_cell_type_LVs_B.pdf"),
width = 7, height = 5)
gplots::heatmap.2(iso.b.matrix[indx.relevant.lv, ],
trace = "none",
col = colorRampPalette(c("blue", "white", "red")),
labCol = FALSE, cexRow = 0.75,
RowSideColors = row.colors.vec,
ColSideColors = col.colors.vec,
density.info = "none",
margins = c(2.5, 14),
main = "Isolated cell populations\nmicroarray",
ylab = "recount2 LVs",
xlab = "Samples",
colsep = 0:ncol(iso.b.matrix),
rowsep = 0:length(indx.relevant.lv),
sepcolor = "#666666", sepwidth = c(0.001, 0.001))
legend("topright",
legend = c("CD4 T cell", "CD14, monocyte", "CD16, neutrophil"),
col = c("#1E90EF", "#000000", "#32CD32"), cex = 0.5,
lty = 1, lwd = 6)
dev.off()
```