-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtest_kendalltau.R
105 lines (86 loc) · 3.19 KB
/
test_kendalltau.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(visualizationQualityControl)
library(ggplot2)
library(dplyr)
x = seq(1, 10)
y = seq(1, 10)
n_na = seq(1, 20)
where_na = purrr::map(n_na, function(in_na){
na_comb = combn(20, in_na)
asplit(na_comb, 2)
})
where_na = unlist(where_na, recursive = FALSE)
prog_bar = knitrProgressBar::progress_estimated(length(where_na))
forward_na = purrr::map_dbl(where_na, function(use_na){
#message(.y)
knitrProgressBar::update_progress(prog_bar)
tmp_x = x
tmp_y = y
y_na = use_na[use_na > 10] - 10
x_na = use_na[use_na <= 10]
tmp_y[y_na] = NA
tmp_x[x_na] = NA
ici_kendallt(tmp_x, tmp_y, "global")
})
prog_bar = knitrProgressBar::progress_estimated(length(where_na))
y2 = seq(10, 1)
reverse_na = purrr::map_dbl(where_na, function(use_na){
knitrProgressBar::update_progress(prog_bar)
tmp_x = x
tmp_y = y2
y_na = use_na[use_na > 10] - 10
y_na = 10 - y_na + 1
x_na = use_na[use_na <= 10]
tmp_y[y_na] = NA
tmp_x[x_na] = NA
ici_kendallt(tmp_x, tmp_y, "global")
})
all_na = data.frame(positive = forward_na, negative = reverse_na)
all_na = dplyr::mutate(all_na, diff = -1 * negative - positive)
ggplot(dplyr::slice_sample(all_na, n = 1000), aes(x = positive, y = negative)) + geom_point()
zero_diff = dplyr::filter(all_na, diff == 0)
long_na = tidyr::pivot_longer(all_na, -diff, names_to = "type", values_to = "correlation")
ggplot(long_na, aes(x = correlation)) + geom_histogram(bins = 100) +
facet_wrap(~ type, ncol = 1)
dplyr::group_by(long_na, type) %>%
dplyr::summarize(mean = abs(mean(correlation)))
# just for kicks, lets now test the pearson correlation
prog_bar = knitrProgressBar::progress_estimated(length(where_na))
positive_pearson = purrr::map_dbl(where_na, function(use_na){
#message(.y)
knitrProgressBar::update_progress(prog_bar)
tmp_x = x
tmp_y = y
y_na = use_na[use_na > 10] - 10
x_na = use_na[use_na <= 10]
tmp_y[y_na] = NA
tmp_x[x_na] = NA
in_matrix = rbind(tmp_x, tmp_y)
out_res = locally_it_weighted_pairwise_correlation(in_matrix)
out_res$cor[1, 2]
})
prog_bar = knitrProgressBar::progress_estimated(length(where_na))
y2 = seq(10, 1)
negative_pearson = purrr::map_dbl(where_na, function(use_na){
knitrProgressBar::update_progress(prog_bar)
tmp_x = x
tmp_y = y2
y_na = use_na[use_na > 10] - 10
y_na = 10 - y_na + 1
x_na = use_na[use_na <= 10]
tmp_y[y_na] = NA
tmp_x[x_na] = NA
in_matrix = rbind(tmp_x, tmp_y)
out_res = locally_it_weighted_pairwise_correlation(in_matrix)
out_res$cor[1, 2]
})
pearson_cor = data.frame(correlation = c(positive_pearson,
negative_pearson),
type = rep(c("positive", "negative"), each = length(positive_pearson)))
ggplot(pearson_cor, aes(x = correlation)) + geom_histogram(bins = 100) +
facet_wrap(~ type, ncol = 1)
dplyr::group_by(pearson_cor, type) %>%
dplyr::summarise(mean = abs(mean(correlation, na.rm = TRUE)))
pearson_wide = data.frame(positive = positive_pearson, negative = negative_pearson)
pearson_wide = dplyr::mutate(pearson_wide, diff = positive - -1 * negative)
pearson_wide = dplyr::filter(pearson_wide, !is.na(negative), !is.na(positive))
ggplot(dplyr::slice_sample(pearson_wide, n = 10000), aes(x = positive, y = negative)) + geom_point()