-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_na_f.R
60 lines (40 loc) · 1.2 KB
/
plot_na_f.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library(tidyverse)
library(lattice)
plot_na <- function(data, var, metrics) {
v <- enquo(var)
d_rows <- data %>%
distinct(!!v)
var.NA <- matrix(ncol=nrow(d_rows), nrow=ncol(data))
for (i in 1:dim(data)[2])
{
var.NA[i,] <- tapply(X = data[[i]], INDEX = data %>% select(!!v), function(x) sum(is.na(x)) / length(x))
}
dimnames(var.NA) <- list(
names(data),
sort(d_rows[,1]))
stats <- data %>%
group_by(!!v) %>%
summarise(n = n()) %>%
filter(n >= 20) %>%
arrange(desc(n))
col_names <- stats %>%
ungroup() %>%
select(!!v)
col_names <- data.frame(col_names)
col_names_v <- col_names[,1]
var_names <- metrics
p <- var.NA[rownames(var.NA) %in% var_names,
colnames(var.NA) %in% col_names_v,
drop=FALSE]
trellis.par.set(canonical.theme(color = FALSE))
levelplot(p,
scales=list(x=list(rot=90)),
main="Percentage of missing variables",
xlab="Variable",
ylab = "")
}
data <- readRDS("shiny_data.RDS")
data_uniq <- data[!duplicated(data[,c('doi')]),]
metrics <- c("urls", "wos")
plot_na(data_uniq, publisher, metrics)
plot_na(data, parent, metrics)