-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEpicapture Target regions_1-make and overlap design GRanges.R
542 lines (416 loc) · 21.7 KB
/
Epicapture Target regions_1-make and overlap design GRanges.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
############################################################################
# Title: Epicapture Target Regions - 1.step
# Description: Script used to import and calculate & visualize overlap for Target capture BED designs across 5 platforms
# Author: Miljana Tanic ([email protected])
# Created: November 2018
# edited: Mart 2019
# Last edited: Dec 2019 - count CpG overlap
#####################################################################################
#=========================================
# Load libraries and set working directory
#=========================================
source("https://bioconductor.org/biocLite.R")
# biocLite("rtracklayer")
library(rtracklayer) # not necessary for importing BED files, can do it manualy crating GRanges object from dataframe library(dplyr)
library(readr)
# biocLite("annotatr")
library(annotatr)
library(BSgenome.Hsapiens.UCSC.hg19) # sequence of the hg19 genome
library(ggplot2)
library(ChIPpeakAnno) # for multiple pairs overlaps
#------------------------------------------
# Set Path to files
#------------------------------------------
# setwd("~/home/[email protected]/EpiCapture/BEDfiles")
setwd("~/RDS_C2c/EpiCapture/BEDfiles")
list.files()
path2Agilent <- "AgilentBEDfiles/S03770311/"
path2Roche <- "RocheBEDfiles/"
path2Illumina <- "IlluminaBEDfiles/"
path2RRBS <- "RRBS_in_silico_BEDfile/"
Epicapture <- "/home/[email protected]/RDS_C2c/EpiCapture/"
list.files(Epicapture)
RESULTS <- paste0(Epicapture,"RESULTS/")
list.files(RESULTS)
#=========================================
# Import Target capture BED files:
#=========================================
#-----------------------------------------
# Roche design BED file:
#-----------------------------------------
list.files(path2Roche, pattern = "bed")
Roche_EpiGiant_design <- read.table(paste0(path2Roche,"130912_HG19_CpGiant_4M_EPI.bed"), header = FALSE, sep="\t")
head(Roche_EpiGiant_design)
# Make GRanges object:
Roche_EpiGiant.gr <- GRanges(seqnames = Roche_EpiGiant_design$V1, IRanges(start=Roche_EpiGiant_design$V2, end = Roche_EpiGiant_design$V3))
Roche_EpiGiant.gr
# check what chromosomes are covered:
seqnames(Roche_EpiGiant.gr)
seqlevels(Roche_EpiGiant.gr)
#extract a vector with chromosome names:
chr.list <- levels(seqnames(Roche_EpiGiant.gr))
# get number of regions covered:
length(Roche_EpiGiant.gr)
# get the total length without double counting anything:
sum(width(reduce(Roche_EpiGiant.gr)))
#-----------------------------------------
# Agilent BED files:
#-----------------------------------------
list.files(path2Agilent, pattern = "bed")
Agilent_regons <- read.table(paste0(path2Agilent, "S03770311_Regions.bed"), skip = 2, header= FALSE, sep= "\t")
head(Agilent_regons)
dim(Agilent_regons)
# make GRanges object:
Agilent_SureSelect.gr <- GRanges(seqnames = Agilent_regons$V1, IRanges(start=Agilent_regons$V2, end = Agilent_regons$V3))
Agilent_SureSelect.gr
genome(Agilent_SureSelect.gr) <- "hg19"
seqnames(Agilent_SureSelect.gr)
seqlevels(Agilent_SureSelect.gr)
#extract a vector with chromosome names:
chr.list.agilent <- levels(seqnames(Agilent_SureSelect.gr))
# get number of regions covered
length(Agilent_SureSelect.gr)
# get the total length without double counting anything
sum(width(reduce(Agilent_SureSelect.gr)))
#-----------------------------------------
# Illumina BED files:
#-----------------------------------------
list.files(path2Illumina, pattern = "bed")
Illumina_EPIC_2016_regions <- read.table(paste0(path2Illumina,"EPIC_target_regions_2016_02_26.bed"), header = FALSE, sep="\t")
Illumina_EPIC_regions <- read.table(paste0(path2Illumina, "truseq-methyl-capture-epic-manifest-file.bed"), header=FALSE, sep="\t")
dim(Illumina_EPIC_regions) # 437792 regions
dim(Illumina_EPIC_2016_regions) # 435423 regions - this one was used for the beta trial
Illumina_EPIC.gr <- GRanges(seqnames=Illumina_EPIC_2016_regions$V1, IRanges(start=Illumina_EPIC_2016_regions$V2, end=Illumina_EPIC_2016_regions$V3))
Illumina_EPIC.gr
genome(Illumina_EPIC.gr) <- "hg19"
seqnames(Illumina_EPIC.gr)
#extract a vector with chromosome names:
chr.list.illumina <- levels(seqnames(Illumina_EPIC.gr))
length(Illumina_EPIC.gr)
sum(width(reduce(Illumina_EPIC.gr)))
Illumina_EPIC.new.gr <- GRanges(seqnames=Illumina_EPIC_regions$V1, IRanges(start=Illumina_EPIC_regions$V2, end=Illumina_EPIC_regions$V3))
length(Illumina_EPIC.new.gr)
sum(width(reduce(Illumina_EPIC.new.gr)))
#-----------------------------------------
# RRBS in silico BED files:
#-----------------------------------------
# In silico digestion og hg19 with MspI restriction enzyme:
source("https://bioconductor.org/biocLite.R")
biocLite("BSgenome.Hsapiens.UCSC.hg19")
library(BSgenome.Hsapiens.UCSC.hg19)
library(plyr)
library(ggplot2)
library(reshape2)
library(scales)
# Identify the MspI recongnition sites for each chromosomal entry
# Generate a dataframe with the length of MspI digested fragments
mdf=data.frame();
for (i in seq_along(Hsapiens)){
#print(paste("Processing ",seqnames(Hsapiens)[i], sep=""))
m<-matchPattern("CCGG", Hsapiens[[i]])
starts<-start(gaps(m))
ends<-end(gaps(m))
temp_df<-data.frame(start=starts-4,end=ends,chr=seqnames(Hsapiens)[i]) #actually end = ends
temp_df$start<-replace(temp_df$start, temp_df$start == -3, 0)
temp_df<-temp_df[c("chr","start","end")]
mdf<-rbind(mdf,temp_df)
}
# Extract the digested fragment length
# methylation insensitive restriction enzyme MspI, which recognizes CCGG.
# As a result of partial fragmentation during bisulfte conversion, PCR, and effciency of cluster generation, only a subset of these fragments, typically under 300 bp in length, is sequenced.
# Note how only a small amount of the digested DNA is represented in the under 300 bp fraction.
# However these smaller fragments are derived from genomic DNA that has a high frequency of MspI sites and therefore a high frequency of potential CpG methylation sites.
# This is how RRBS achieves reduced representation
# Adaptor artifacts typically appear as a peak of approximately 145 bp.
# Keep only the fragments in the range of 40-600 bp
mdf$width=mdf$end-mdf$start
ml<-mdf[mdf$width>39&mdf$width<601,]
dim(ml)
counts<-ddply(ml,.(width), nrow)
# Create a plot of the frequency of the fragment lengths (y-axis is logarithmic)
p<-ggplot(counts,aes(x=width, y=V1))+geom_line()
p+scale_y_continuous(trans=log2_trans())
#My modifications - labels
p<-ggplot(counts,aes(x=width, y=V1))+geom_line() + labs(title="hg19 MspI in silico digest") + labs( x= "Fragment length (bp)", y="Frequency")
p+scale_y_continuous(trans=log2_trans()) + scale_x_continuous(breaks=seq(40,600,50))
# Save as .BED file
RRBS.bed <-ml[,-4]
library(dplyr)
RRBS.bed_sub <- RRBS.bed[!RRBS.bed$chr=="chrUn_gl000249",]
result <- RRBS.bed[ grep("chrUn_", RRBS.bed$chr) , ] # Note need to use `a$` to get at the `x`
# Filter out all but Chr1-22, chrX and chrY
RRBS.bed_sub <- subset(RRBS.bed, !grepl("chrUn_", RRBS.bed$chr))
RRBS.bed_sub <- subset(RRBS.bed_sub, !grepl("*_random", RRBS.bed_sub$chr))
RRBS.bed_sub <- subset(RRBS.bed_sub, !grepl("*_hap?", RRBS.bed_sub$chr))
RRBS.bed_sub <- subset(RRBS.bed_sub, !grepl("chrM", RRBS.bed_sub$chr))
tail(RRBS.bed_sub)
write.table(RRBS.bed_sub, file="RRBS_in_silico.bed", sep="\t",row.name=FALSE, quote=FALSE, col.names = FALSE)
#------------------------------------------------------------------------------------------
# [ Optional - if already generated bed]
# Imorort from bed file
RRBS.bed_sub <- read.table(paste0(path2RRBS,"RRBS_in_silico.bed"), header = FALSE, sep="\t")
RRBS.ir <- IRanges(start=RRBS.bed_sub$V2, end = RRBS.bed_sub$V3, names = RRBS.bed_sub$V1)
#------------------------------------------------------------------------------------------
# make an IRanges object from data frame
RRBS.ir <- IRanges(start=mdf$start, end = mdf$end, names = mdf$chr)
RRBS.ir
names(RRBS.ir)
# make a GRanges object
RRBS.gr <- GRanges(seqnames = names(RRBS.ir), ranges = RRBS.ir)
genome(RRBS.gr) <- "hg19"
RRBS.gr
seqnames(RRBS.gr)
seqlengths(RRBS.gr) # not defined for any platform gr?
sum(width(RRBS.gr)) # gets width of all ranges
# Filter by fragment length!
RRBS.gr.sub.plot <- RRBS.gr[width(RRBS.gr)>39 & width(RRBS.gr)<601] # select only fragments between 40-600bp for plotting frequency
RRBS.gr.sub <- RRBS.gr[width(RRBS.gr)>39 & width(RRBS.gr)<301] # select only fragments between 40-300bp for target annotation
# 104983071 bp
# Select only first 100bp from each end of the restricted fragment:
RRBS.gr.sub.short <- RRBS.gr.sub[width(RRBS.gr.sub)<=200]
RRBS.gr.sub.200 <- RRBS.gr.sub[width(RRBS.gr.sub)>200]
RRBS.gr.sub.R1.100 <- resize(RRBS.gr.sub.200, 100) # get first 100 bp
RRBS.gr.sub.R2.100 <- restrict(RRBS.gr.sub.200, start=(end(RRBS.gr.sub.200)-100)) # get last 100 bp
# merge granges:
RRBS.gr.sub <- c(RRBS.gr.sub.short,RRBS.gr.sub.R1.100,RRBS.gr.sub.R2.100)
# 97848603 bp
RRBS.gr.sub
length(RRBS.gr.sub)
sum(width(reduce(RRBS.gr.sub))) # reduce overlaping fragments to to count each bp only once
# Plot frequency distribution of RRBS.gr.sub s - Works on GRanges object!
table(width(RRBS.gr.sub.plot)) # count occurences of each fragment size (fragment size frequency)
RRBS.counts <- as.data.frame(table(width(RRBS.gr.sub.plot))) # convert to a data frame
p <- ggplot(RRBS.counts,
aes(x=as.numeric(RRBS.counts$Var1), # x-axis generated by table is a factor so it needs to be converted to numeric for it to work with ggplot
y=RRBS.counts$Freq)) +
geom_line() +
labs(title="hg19 MspI in silico digest") +
labs( x= "Fragment length (bp)", y="Frequency")
p+scale_y_continuous(trans="log2") + scale_x_continuous(breaks=seq(40,600,50))
#--------------------------------------
# Save several R objects to load later:
#--------------------------------------
save(RRBS.gr.sub, Illumina_EPIC.gr, Illumina_EPIC.new.gr, Agilent_SureSelect.gr, Roche_EpiGiant.gr, file="Platform_Design_GRanges.RData")
# these can be reloaded into another R session using the load() function:
load(file="Platform_Design_GRanges.RData")
#============================================
# Find overlap between Platform design files:
#============================================
source("https://bioconductor.org/biocLite.R")
biocLite("ChIPpeakAnno")
library(GenomicRanges)
library(Biostrings)
library(ChIPpeakAnno)
library(HelloRanges)
library(rtracklayer)
library(BSgenome.Hsapiens.UCSC.hg19)
# Load GRanges files for each platform
load(file="Platform_Design_GRanges.RData")
# Load the BSgenome package for homo sapiens.
library(BSgenome.Hsapiens.UCSC.hg19)
genome <- BSgenome.Hsapiens.UCSC.hg19
#-----------------------------------
# Make platforms GRangesList object
#----------------------------------
platforms_design.grl <- GRangesList(Agilent_SureSelect.gr, Roche_EpiGiant.gr, Illumina_EPIC.new.gr, RRBS.gr.sub)
names(platforms_design.grl) <- c("Agilent_SureSelect", "Roche_EpiGiant", "Illumina_EPIC", "RRBS_sub")
# Get target design size of each platform
sum(width(reduce(platforms_design.grl)))
# lapply(platforms_design.grl, function(x) sum(width(reduce(x))) )
# Get sequences from GRangesList object:
grl.seq <- getSeq(genome, platforms_design.grl) # The return values are DNAStringSetList objects.
grl.seq2 <- getSeq(genome, reduce(platforms_design.grl)) # The return values are DNAStringSetList objects.
# Count number of occurances for CG in a given region of list object:
sapply(grl.seq, function(x) sum(vcountPattern("CG", x)))
# get number of regions (probes/fragments) covered:
lapply(platforms_design.grl, function(x) length(x))
# get the total length without double counting anything:
lapply(platforms_design.grl, function(x) sum(width(reduce(x))))
# get the average length of target regions:
lapply(platforms_design.grl, function(x) mean(width(reduce(x)))
lapply(platforms_design.grl, function(x) min(width(reduce(x))))
lapply(platforms_design.grl, function(x) max(width(reduce(x))))
#------------------------------------------------
# Find overlaps between several GRanges objects:
#-----------------------------------------------
# returns an object of overlappingPeaks, which contains there elements: venn_cnt, peaklist (a list of overlapping peaks or unique peaks), and overlappingPeaks (a list of data frame consists of the annotation of all the overlapping peaks).
ol <- findOverlapsOfPeaks(Agilent_SureSelect.gr, Roche_EpiGiant.gr,Illumina_EPIC.new.gr, RRBS.gr.sub)
head(ol)
#--------------------------------------------------------------------
# Visualize the number of common and specific peaks with Venn diagram
#--------------------------------------------------------------------
makeVennDiagram(ol, connectedPeaks="keepAll",
NameOfPeaks=c("Agilent", "Roche", "Illumina", "RRBS"),
main="Overlap of platform breadth of coverage by design",
fill=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3" ), # circle fill color
col=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3" ), #circle border color
cat.col=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3") )
# # Or in one shot:
# res <- makeVennDiagram(Peaks=list(Agilent_SureSelect.gr, Roche_EpiGiant.gr, Illumina_EPIC.gr, RRBS.gr.sub), ignore.strand=TRUE,
# NameOfPeaks=c("Agilent", "Roche", "Illumina", "RRBS"),
# main="Venn Diagram for platform coverage design",
# fill=c(1,2,3,4), scaled=TRUE, euler.d=FALSE)
# # Get Venn counts:
# res$vennCounts
# A pie chart is used to demonstrate the overlap features of the common peaks.
pie1(table(ol$overlappingPeaks[["Agilent_SureSelect.gr///Roche_EpiGiant.gr"]]$overlapFeature))
#-------------------------------------------
# Visualize intersecting regions with UpSetR
#-------------------------------------------
# biocLite("UpSetR")
library(UpSetR)
#biocLite("ComplexHeatmap")
library(devtools)
# install_github("jokergoo/ComplexHeatmap") # latest version
library(ComplexHeatmap)
# union mode: 1 means in that set and 0 is not taken into account. When there are multiple 1, the relationship is OR. Then, 1 1 0 means a set of elements in set A or B, and they can also in C or not in C (union(A, B)). Under this mode, the seven combination sets can overlap.
# When the sets are genomic regions, the size is calculated as the sum of the width of regions in each set (or in other words, the total number of base pairs).
# List
lt <- list("Agilent" =Agilent_SureSelect.gr,
"Roche" = Roche_EpiGiant.gr,
"Illumina"= Illumina_EPIC.new.gr,
"RRBS"=RRBS.gr.sub)
m = make_comb_mat(lt)
# m = m[comb_size(m) > 500000]
# save
write.table(comb_size(m), file=paste0(RESULTS,"1_PlatformDesignDiferences/UpSet_matrix.txt"))
# plot
UpSet(m,
set_order = c("Agilent", "Roche", "Illumina", "RRBS"),
top_annotation = upset_top_annotation(
m,
axis_param= list(at=c(0,2e7,4e7,6e7,8e7),
labels=c("0MB", "20MB","40MB", "60MB", "80MB")),
height = unit(4, "cm")
),
right_annotation = upset_right_annotation(
m,
axis_param= list(at=c(0, 5e7,10e7),
labels=c("0MB", "50MB", "100MB"),
labels_rot=0),
width=unit(4, "cm")),
pt_size = unit(5, "mm"), lwd = 3,
#comb_col = c("olivedrab", "mediumslateblue", "black")[comb_degree(m)] # doesn't show all 4 intersection
)
# export as 10' x 5' pdf plot
#----------------------------------------
# Alternative Venn - doesn't work nicely
#----------------------------------------
# After finding the overlapping peaks, you can use annotatePeakInBatch to annotate the overlapping peaks with the genomic features in the AnnotationData within certain distance away specified by maxgap, which is 5kb in the following example.
# But if you have them as a single GRanges stratified by type metadata, you could make it GRangesList and plot it like this:
# peaks1$type<-"TF1"
# peaks2$type<-"TF2"
# peaks3$type<-"TF3"
# gr <- c(peaks1, peaks2, peaks3) # like your data
#
# grl <- splitAsList(gr, gr$type)
# res <- makeVennDiagram(Peaks=grl, NameOfPeaks=names(grl))
# # If you want a nice and colorful Venn diagram, then check https://github.com/js229/Vennerable/blob/master/README.md to install Venerable (I could not get it from Rforge) and do it like this:
# biocLite(c("RBGL","graph"))
# install.packages("devtools"); library(devtools);
# install_github("js229/Vennerable"); library(Vennerable);
# vignette("Venn")
#
# library(Vennerable)
# venn_cnt2venn <- function(venn_cnt){
# n <- which(colnames(venn_cnt)=="Counts") - 1
# SetNames=colnames(venn_cnt)[1:n]
# Weight=venn_cnt[,"Counts"]
# names(Weight) <- apply(venn_cnt[,1:n], 1, paste, collapse="")
# Venn(SetNames=SetNames, Weight=Weight)
# }
# v <- venn_cnt2venn(res$vennCounts)
# plot(v)
# venn_cnt <- res$vennCounts
# n <- which(colnames(venn_cnt)=="Counts") - 1
# SetNames=colnames(venn_cnt)[1:n]
# Weight=venn_cnt[,"Counts"]
# names(Weight) <- apply(venn_cnt[,1:n], 1, paste, collapse="")
# v <- Venn(SetNames=SetNames, Weight=Weight)
# plot(v)
#
# # Following works for getting the exact intersects between all the ranges.
# Reduce(intersect, list(gr, gr1, gr2))
#=========================================================================
# Find overlap wth known CpG sites and compare coverage between Platforms
#=========================================================================
Epicapture <- "/home/[email protected]/RDS_C2c/EpiCapture/"
list.files(Epicapture)
BEDfiles <- paste0(Epicapture,"BEDfiles/")
list.files(BEDfiles)
load(file=paste0(BEDfiles,"FeatureAnnotations/hg19_CpGsites_GRanges.RData"))
# make subsets og CpG sites per platform
# subsetByOverlaps method simply subsets the first GRanges object to include only those that overlap the second.
subsetByOverlaps(gr, g)
platforms_design.grl <- GRangesList(Agilent_SureSelect.gr, Roche_EpiGiant.gr, Illumina_EPIC.new.gr, RRBS.gr.sub)
cpgr.Agilent_SureSelect <- subsetByOverlaps(cpgr, Agilent_SureSelect.gr, ignore.strand=TRUE)
cpgr.Roche_EpiGiant <- subsetByOverlaps(cpgr, Roche_EpiGiant.gr, ignore.strand=TRUE)
cpgr.Illumina_EPIC <- subsetByOverlaps(cpgr, Illumina_EPIC.gr, ignore.strand=TRUE)
cpgr.RRBS <- subsetByOverlaps(cpgr, RRBS.gr.sub, ignore.strand=TRUE)
# length of each cpgr.PLATFORM object should mach count of CpGs per platform
length(cpgr.Agilent_SureSelect)
length(cpgr.Roche_EpiGiant)
length(cpgr.Illumina_EPIC)
length(cpgr.RRBS)
# > sapply(grl.seq, function(x) sum(vcountPattern("CG", x)))
# [1] 3153816 2806466 3346505 4070782
# > length(cpgr.Agilent_SureSelect)
# [1] 3172538
# > length(cpgr.Roche_EpiGiant)
# [1] 2818345
# > length(cpgr.Illumina_EPIC)
# [1] 3353173
# > length(cpgr.RRBS)
# [1] 4079434
# reduce merges consecutive CpG sites
# > length(reduce(cpgr.Agilent_SureSelect))
# [1] 2949002
# > length(reduce(cpgr.Roche_EpiGiant))
# [1] 2620071
# > length(reduce(cpgr.Illumina_EPIC))
# [1] 3143186
# > length(reduce(cpgr.RRBS))
# [1] 3817962
#-------------------------------------------
# Visualize intersecting regions with UpSetR
#-------------------------------------------
# List
lt_cpg <- list("Agilent" =cpgr.Agilent_SureSelect,
"Roche" = cpgr.Roche_EpiGiant,
"Illumina"= cpgr.Illumina_EPIC,
"RRBS"=cpgr.RRBS)
m_cpg = make_comb_mat(lt)
# m = m[comb_size(m) > 500000]
# save
write.table(m_cpg, file=)
# plot
UpSet(m_cpg,
set_order = c("Agilent", "Roche", "Illumina", "RRBS"),
top_annotation = upset_top_annotation(
m,
axis_param= list(at=c(0,2e7,4e7,6e7),
labels=c("0MB", "10 M","20 M", "30 M")),
height = unit(4, "cm")
),
right_annotation = upset_right_annotation(
m,
axis_param= list(at=c(0,2e7,4e7,6e7,8e7,1e8),
labels=c("0MB", "10 M","20 M", "30 M", "40 M", "50M" ),
labels_rot=0),
width=unit(4, "cm")),
pt_size = unit(5, "mm"), lwd = 3,
#comb_col = c("olivedrab", "mediumslateblue", "black")[comb_degree(m)] # doesn't show all 4 intersection
)
# intersection size plots the actual size of CpG regions not the count of common CpGs so the output is "double the count" since each CpG has size of 2bp!
# can just divede by two
#--------------------------------------------------------------------
# Visualize the number of common and specific CpGs with Venn diagram
#--------------------------------------------------------------------
ol_cpg <- findOverlapsOfPeaks(cpgr.Agilent_SureSelect, cpgr.Roche_EpiGiant,cpgr.Illumina_EPIC, cpgr.RRBS)
head(ol_cpg)
makeVennDiagram(ol_cpg, connectedPeaks="keepAll",
NameOfPeaks=c("Agilent", "Roche", "Illumina", "RRBS"),
main="Overlap of CpGs covered by design",
fill=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3" ), # circle fill color
col=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3" ), #circle border color
cat.col=c("#3D79F3FF", "#E6352FFF", "#34A74BFF","#7570b3") )