forked from slancast/fiber
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRNA_from_multiomic_cmeans.R
48 lines (35 loc) · 2.93 KB
/
RNA_from_multiomic_cmeans.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/r
#This program will be to analyze the transctipt data from the multiomic clusters
#First it will pull them out, and then it will add the gene symbol and perform
#enrichments etc.
fiber_subset <- "Arabinoxylan"
df <- read.csv(paste0("/Users/SLancaster/Desktop/multiomics_clusters/clusters16multiomics_",fiber_subset,"_cluster_membership.txt"), header = TRUE, sep="\t", row.names = NULL)
df2 <- df[which(startsWith(df[,1], "ENSG")),] #Pulling out only the ensemble genes
#running the program I wrote called entrezid
source('~/Desktop/Projects/Fiber/Multiomics/Software/Utils.r')
df2 <- entrezid(df2)
interesting_clusters <- c(3,6,13,15) #Put all the intersting clusters here and then it will iterate over them. Otherwise you can run them individually
for (cluster_number in interesting_clusters ) {
#pulling out the cluster of choice. This coule be updated to use all the clusters if necessary
#cluster_number <- 3 #Use this line instead of loop to run clusters one by one
df3 <- df2[which(df2[,2] %in% cluster_number),]
#setting up davidwebservice and the background
library(RDAVIDWebService)
david<-DAVIDWebService(email="[email protected]", url="https://david.ncifcrf.gov/webservice/services/DAVIDWebService.DAVIDWebServiceHttpSoap12Endpoint/")
setTimeOut(david, 1000000) #Large for large gene lists
#setAnnotationCategories(david, c("GOTERM_BP_DIRECT","GOTERM_MF_DIRECT","GOTERM_CC_DIRECT")) #This specficies which annotation category I want (i.e. GO, Kegg, UP, etc)
#The previous function doesn't seem to increase my power with the test set I was using, although I remember a proir test where it did increase the power.
#It seems that entrez works slightly better than ensembl but they are comparable. Symbol wokred the worst.
background <- addList(david, df2$ENTREZID, idType="ENTREZ_GENE_ID",listName="Total_genes", listType="Background") #This needs to be reset before adding new result lists
#I will use this file name several times, so creating it as a object
AnnotChartFilename <- paste("/Users/SLancaster/Desktop/multiomics_clusters/cluster_",cluster_number,"_",fiber_subset,"AnnotationChart.txt",sep="")
result<-addList(david, df3$ENTREZID, idType="ENTREZ_GENE_ID",listName=paste("cluster",cluster_number,sep=""), listType="Gene")
getFunctionalAnnotationChartFile(david, fileName = AnnotChartFilename)
getFunctionalAnnotationTableFile(david, fileName = paste("/Users/SLancaster/Desktop/multiomics_clusters/cluster_",cluster_number,"_",fiber_subset,"AnnotationTable.txt",sep=""))
getClusterReportFile(david, fileName = paste("/Users/SLancaster/Desktop/multiomics_clusters/cluster_",cluster_number,"_",fiber_subset,"ClusterReport.txt",sep=""))
#This section will readjust the pvalues. David does a terrible job of that.
chart <- read.csv(AnnotChartFilename, header = TRUE, sep="\t", row.names = NULL)
padjusted <- p.adjust(chart$PValue, method = "BH")
chart$Benjamini <- padjusted
write.table(chart, file=AnnotChartFilename, sep="\t", row.names = FALSE)
}