-
Notifications
You must be signed in to change notification settings - Fork 0
/
MEP-LINCS_QASVD.Rmd
252 lines (195 loc) · 11.4 KB
/
MEP-LINCS_QASVD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
---
output: html_document
---
---
title: "MEP-LINCS `r x[["CellLine"]]` `r x[["StainingSet"]]` `r x[["Signal"]]` SVD Analysis"
date: "`r Sys.Date()`"
---
```{r global_options_Setup, include=FALSE}
#Author: Mark Dane, copyright 2015
knitr::opts_chunk$set(fig.width=12, fig.height=8,
echo=FALSE, warning=FALSE, message=FALSE)
#source("MEPLINCSFunctions.R")
library("ggplot2")
library("reshape2")
library("data.table")
library("limma")
library("MEMA")
library("grid")
library("knitr")
library("gplots")
library("RColorBrewer")
library("RUVnormalize")
library(plotly)
#Read in file based on name in dataframe
l1 <- fread(x[["inputFileName"]], showProgress = FALSE)
```
```{r simulatePoorPin, eval=FALSE}
l1$Spot_PA_SpotCellCount[l1$Block==6] <-as.integer( l1$Spot_PA_SpotCellCount[l1$Block==6]*.25+1)
```
```{r simulateEmptyingWell, eval=FALSE}
l1 <- setkey(l1,Barcode,Well,Spot)
l1$Spot_PA_SpotCellCount[l1$Block==5 & l1$Row==1 & l1$Column==1] <-as.integer( l1$Spot_PA_SpotCellCount[l1$Block==5 & l1$Row==1 & l1$Column==1]*seq(1,0,length.out=length(l1$Spot_PA_SpotCellCount[l1$Block==5 & l1$Row==1 & l1$Column==1])))
```
```{r}
#Keep the control wells separate
l1$Ligand[grepl("FBS",l1$Ligand)] <- paste(l1$Ligand[grepl("FBS",l1$Ligand)],"C",as.numeric(factor(l1$Barcode[grepl("FBS",l1$Ligand)])),sep="")
l1$MEP <- paste(l1$ECMp,l1$Ligand,sep="_")
#Delete the fiducial and blank spots
setkey(l1, "ECMp")
l1 <- l1[!grepl("fiducial|Fiducial|blank|PBS",l1$ECMp)]
#Logit transform DNA2NProportion
#logit(p) = log[p/(1-p)]
DNA2NImpute <- l1$Nuclei_PA_Cycle_DNA2NProportion
DNA2NImpute[DNA2NImpute==0] <- .01
DNA2NImpute[DNA2NImpute==1] <- .99
l1$Nuclei_PA_Cycle_DNA2NProportionLogit <- log2(DNA2NImpute/(1-DNA2NImpute))
#logit transform eccentricity
EccImpute <- l1$Nuclei_CP_AreaShape_Eccentricity
EccImpute[EccImpute==0] <- .01
EccImpute[EccImpute==1] <- .99
l1$Nuclei_PA_AreaShape_EccentricityLogit <- log2(EccImpute/(1-EccImpute))
#Create spot level dataset and
#add Rel, the residual from subtracting the mel from each value
l3 <- l1[,list(Ligand=unique(Ligand), LigandAnnotID=unique(LigandAnnotID),
ECMp=unique(ECMp), ECMpAnnotID=unique(ECMpAnnotID),
MEP=unique(MEP),
Row=unique(Row), Column = unique(Column), Block=unique(Block), ID=unique(ID),
ArrayRow=unique(ArrayRow), ArrayColumn=unique(ArrayColumn),
DNA2Nmel = median(Nuclei_PA_Cycle_DNA2NProportionLogit, na.rm=TRUE),
SCCmel = as.numeric(median(Spot_PA_SpotCellCount, na.rm=TRUE)),
Eccmel = median(Nuclei_PA_AreaShape_EccentricityLogit)
),
by="Barcode,Well,Spot"]
if(grepl("SS2",x[["StainingSet"]])){
l3$EdUmel <- l1[,list(EdUmel = median(Nuclei_PA_Gated_EduPositiveLogit, na.rm=TRUE)),by=.(Barcode,Well,Spot)][,EdUmel]
l3 <- l3[, EdURel := calcResidual(EdUmel), by="MEP"]
} else if (grepl("SS3",x[["StainingSet"]])){
l3$LineageRatioLog2mel <- l1[,list(LineageRatioLog2mel = median(Cytoplasm_PA_Intensity_LineageRatioLog2, na.rm=TRUE)),by=.(Barcode,Well,Spot)][,LineageRatioLog2mel]
l3 <- l3[, LineageRatioLog2Rel := calcResidual(LineageRatioLog2mel), by="MEP"]
}
l3 <- l3[, DNA2NRel := calcResidual(DNA2Nmel), by="MEP"]
l3 <- l3[, SCCRel := calcResidual(SCCmel), by="MEP"]
l3 <- l3[, EccRel := calcResidual(Eccmel), by="MEP"]
```
```{r createMatrices}
#For an array Unit of study there are 64 'samples'of 694 spots with 8 replicates
#Create a spot assignment that recognizes rotated B row arrays
l3$RSpot <- l3$Spot
l3$RSpot[grepl("B",l3$Well)] <- 701-l3$RSpot[grepl("B",l3$Well)]
#Add in ligand and ECMp names so they will carry through the normalization
l3$BWL <- paste(l3$Barcode,l3$Well,l3$Ligand,sep="_")
l3$SE <- paste(l3$RSpot,l3$ECMp,sep="_")
l3$ES <- paste(l3$ECMp,l3$RSpot,sep="_")
l3$ESB <- paste(l3$ECMp,l3$RSpot,l3$Block, sep="_")
l3$BW <- paste(l3$Barcode,l3$Well,sep="_")
l3$WSE <- paste(l3$Well, l3$RSpot,l3$ECMp,sep="_")
#Cast to get mel values by barcode_well_ligand rows and spot columns
#Coerce missing values to have near 0 values
if(x[["Signal"]] %in% c("EdU", "DNA2N", "Ecc")){
fill <- log2(.01/(1-.01))
} else if(unique(x[["Signal"]]) %in% c("LineageRatioLog2", "SCC")){
fill <- 0
} else(stop(paste("Need fill value for",unique(x[["Signal"]])," signal")))
```
```{r addResiduals}
l3RelcA <- data.table(dcast(l3, BWL~ES, value.var=paste0(x[["Signal"]],"Rel"), fill = fill))
#Remove the BWL column and use it as rownames in the matrix
l3Relm <- l3RelcA[,grep("BWL",colnames(l3RelcA), value=TRUE, invert=TRUE), with=FALSE]
#Y is a numeric matrix of the raw transformed responses
#of each spot extracted from the l3 dataset
YRelArray <- matrix(unlist(l3Relm), nrow=nrow(l3Relm), dimnames=list(l3RelcA$BWL, colnames(l3Relm)))
```
###SVD Applied to Array Level
Singular Value Decomposition is applied to the residuals at the array level as follows.
* Median summarize the cell level data to the spot level.
* Take the logit of EdU$^{+}$, DNA2N, Eccentricity (Ecc) and the log2 of the LineageRatio signals.
* Create residuals by subtracting the median value of the replicates from each replicate value
* Create an M matrix of residuals that is `r nrow(YRelArray)` rows by `r ncol(YRelArray)` columns. Each row holds residuals for each spot in a MEMA.
* Apply SVD to the M matrix to get u, d and v matrices.
* Plot the eignvector in the column of the v matrix which corresponds to the largest singular value in the d matrix. Plot as heat map of 35 rows and 20 columns. Missing tiles in the heat map correspond to fiducials and blank spots.
The plot developed from the SVD analysis of the residuals emphasizes the technical signal in the data. When applied to the spot cell count (SCC) values across multiple arrays, the plots show areas of consistently high or low cell counts.
```{r, fig.width=4}
s <- svd(YRelArray)
vDT <- data.table(Value=s$v[,which.max(s$d)], Spot = as.integer(sub(".*_", "",colnames(l3Relm))))
vDT$ArrayRow <- as.integer(ceiling(vDT$Spot/20))
vDT$ArrayColumn <- as.integer((vDT$Spot-1) %% 20 +1)
p <- ggplot(vDT, aes(x=ArrayColumn, y=ArrayRow, fill= Value))+
geom_tile()+
scale_fill_gradient(low="white",high="red",oob = scales::squish)+
ggtitle(bquote(.(x[["Signal"]])~Residual~Maximum~Singular~Value~Eigenvector))+
xlab("")+ylab("")+
guides(fill=FALSE)+
xlim(.5,20.5)+ylim(35.5,.5)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=rel(.6)), axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(.5)), plot.title = element_text(size = rel(.8)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.7)),panel.grid.major = element_line(linetype = 'blank'),panel.background = element_rect(fill = "lightgrey"))
suppressWarnings(print(p))
```
```{r}
#Cast to get mel values with Barcode rows and well+Spot+ECMp columns
#Use the names to hold the spot contents
#Coerce missing values to have near 0 proliferation signals
l3RelcB <- data.table(dcast(l3, Barcode~WSE, value.var=paste0(x[["Signal"]],"Rel"),fill = fill))
#Remove the Barcode column and use it as rownames in the matrix
l3Relm <- l3RelcB[,grep("Barcode",colnames(l3RelcB), value=TRUE, invert=TRUE), with=FALSE]
YRelPlate <- matrix(unlist(l3Relm), nrow=nrow(l3Relm), dimnames=list(l3RelcB$Barcode, colnames(l3Relm)))
s <- svd(YRelPlate)
splits <- strsplit2(colnames(l3Relm), split="_")
vDT <- data.table(Value=-s$v[,which.max(s$d)], Well=splits[,1], Spot=as.integer(splits[,2]))
vDT$ArrayRow <- as.integer(ceiling(vDT$Spot/20))
vDT$ArrayColumn <- as.integer((vDT$Spot-1) %% 20 +1)
```
###SVD Applied to Plate Level
Singular Value Decomposition is applied to the residuals at the plate level as follows.
* Median summarize the cell level data to the spot level.
* Take the logit of EdU$^{+}$, DNA2N, Eccentricity (Ecc) and the log2 of the LineageRatio signals.
* Create residuals by subtracting the median value of the replicates from each replicate value.
* Create an M matrix of residuals that is `r nrow(YRelPlate)` rows by `r ncol(YRelPlate)` columns. Each row holds residuals for each spot in a two row and four column well plate of MEMAs.
* Apply SVD to the M matrix to get u, d and v matrices.
* Plot the eignvector in the column of the v matrix which corresponds to the largest singular value in the d matrix. Plot as heat maps in two rows and four columns of arrays with 35 rows and 20 columns. Missing tiles in the heat maps correspond to fiducials and blank spots. The B row of heatmaps are displayed in the same orientation as the A row even though they are physically printed rotated 180 degrees.
SVD analysis of the SCC residuals across multiple plates show areas of consistently high or low cell counts at the plate level.
<br>
```{r, fig.width=8, fig.height=8}
p <- ggplot(vDT, aes(x=ArrayColumn, y=ArrayRow, fill= Value))+
geom_tile()+
scale_fill_gradient(low="white",high="red",oob = scales::squish)+
ggtitle(bquote(.(x[["Signal"]])~Residual~Maximum~Singular~Value~Eigenvector))+
xlab("")+ylab("")+
guides(fill=FALSE)+
xlim(.5,20.5)+ylim(35.5,.5)+
facet_wrap(~Well, ncol=4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=rel(.6)), axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(.5)), plot.title = element_text(size = rel(.8)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.7)),panel.grid.major = element_line(linetype = 'blank'),panel.background = element_rect(fill = "lightgrey"))
suppressWarnings(print(p))
```
###Analysis of Spot Level Signals over Time
Analysis of Spot Level Signals over Time uses the residuals at the spot level as follows.
* Median summarize the cell level data to the spot level.
* Take the logit of EdU$^{+}$, DNA2N, Eccentricity (Ecc) and the log2 of the LineageRatio signals.
* Create residuals by subtracting the median value of the replicates from each spot value
* Create an M matrix of residuals that is `r nrow(YRelArray)` rows by `r ncol(YRelArray)` columns. Each column holds residuals for one spot in all MEMAs.
* Plot a time course for each spot
The motivation of this plot is to indicate when a well in a source plate is running low of reagent. While the well has reagents, the residual of the SCC value should be near 0. When a well runs dry, fewer cells will grow on the spots made from that well and the track in the plot below will drop down from 0 and remain low.
<br>
```{r SpotTimeCourse}
l3RelcTC <- data.table(dcast(l3, BWL~ESB, value.var=paste0(x[["Signal"]],"Rel"), fill = fill))
#Remove the BWL column and use it as rownames in the matrix
l3Relm <- l3RelcTC[,grep("BWL",colnames(l3RelcTC), value=TRUE, invert=TRUE), with=FALSE]
#Y is a numeric matrix of the raw transformed responses
#of each spot extracted from the l3 dataset
YRelArray <- matrix(unlist(l3Relm), nrow=nrow(l3Relm), dimnames=list(l3RelcTC$BWL, colnames(l3Relm)))
setkey(l3RelcTC, BWL)
l3RelcTC$PrintOrder <- 1:nrow(l3RelcTC)
l3Relmelt <- melt(l3RelcTC, id.vars=c("BWL","PrintOrder"), measure=patterns("_"),variable.name = "ESB", value.name = paste0(x[["Signal"]],"Rel"))
splits <- strsplit2(l3Relmelt$ESB,split="_")
l3Relmelt$Block <- strsplit2(l3Relmelt$ESB,split="_")[,3]
p <- ggplot(l3Relmelt, aes(x=PrintOrder, y=SCCRel1, colour=ESB))+
geom_smooth(size=.1, se = FALSE)+
guides(colour=FALSE)+
ggtitle("SCC Time Course by Spot")
print(p)
p <- ggplot(l3Relmelt, aes(x=PrintOrder, y=SCCRel1, colour=Block))+
geom_smooth(size=.1, se = FALSE)+
guides(colour=FALSE)+
ggtitle("SCC Time Course by Pin")
print(p)
ggplotly(p)
```