-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathkey_metric_heatmaps.Rmd
277 lines (208 loc) · 8.98 KB
/
key_metric_heatmaps.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
---
title: "Key Metrics by Segment List #1 and Segment List #2"
output:
ioslides_presentation:
widescreen: true
smaller: true
fig_width: 10
css: styles.css
logo: images/logo.png
---
```{r setup, include=FALSE}
# PLATFORM: Adobe Analytics
# This script takes as inputs a set of metrics and then two lists of Adobe Analytics segment IDs.
# It then makes a heatmap for each metric that looks at the "intersection" of each segment in those
# two lists (one list is the rows in the heatmap and the other is the columns in the heatmap.)
#
# To use this script, you will need an .Renviron file in your working directory when you start/
# re-start R that has your Adobe Analytics credentials and the RSID for the report suite being
# used. It should look like:
#
# ADOBE_KEY="[Your Adobe Key]"
# ADOBE_SECRET="[Your Adobe Secret]"
# RSID="[The RSID for the report suite being used]"
#
# Then, you will need to customize the various settings in the "Settings" section below.
# What these settings are for and how to adjust them is documented in the comments.
knitr::opts_chunk$set(echo = FALSE)
# Get a timestamp for when the script starts running. Ultimately, this will be written
# out to a file with end time so there is a record of how long it took the script to run.
script_start_time <- Sys.time()
################
# Load libraries
################
library(tidyverse)
library(RSiteCatalyst)
library(scales) # For adding commas in values
library(stringr) # For wrapping strings in the axis labels
################
# Settings
################
# These are all sourced from config.R, so be sure to open that script
# and adjust settings there before running this one. These are called out
# as separate chunks just for code readability (hopefully).
```
```{r cache=FALSE}
knitr::read_chunk('config.R')
```
```{r metrics-list}
```
```{r timeframes}
```
```{r main-segment}
```
```{r drilldown-segments}
```
```{r default-theme}
```
```{r main, include=FALSE}
################
# Authentication
################
# Get the values needed to authenticate from the .Renviron file
auth_key <- Sys.getenv("ADOBE_KEY")
auth_secret <- Sys.getenv("ADOBE_SECRET")
# Get the RSID we're going to use
rsid <- Sys.getenv("RSID")
####################
# Heatmap Creation Function
####################
summary_heatmap <- function(metric){
# Get just the results for the metric of interest
summary_table <- filter(segment_results, metric_name == metric) %>%
select(segment_1, segment_2, metric_total)
# Convert the segment names to factors (required in order to order them) and
# ensure they're ordered the same as set up in the config.
summary_table$segment_1 <- factor(summary_table$segment_1,
levels = rev(sapply(segment_drilldown_1, function(x) x$name)))
summary_table$segment_2 <- factor(summary_table$segment_2,
levels = sapply(segment_drilldown_2, function(x) x$name))
# Need to jump through a few hoops to get the x-axis labels up at the top
segment_2_levels <- as.data.frame(levels(summary_table$segment_2))
# Create the heatmap
# Get the details on how to format the metric in the box
metric_format <- filter(metrics_list, metric_name == metric) %>%
select(metric_format) %>% as.character()
metric_decimals <- filter(metrics_list, metric_name == metric) %>%
select(metric_decimals) %>% as.numeric()
# Figure out what will become before and after the actual number based on the format.
# By default, there is no symbol before or after.
pre_num <- ""
post_num <- ""
if(metric_format == "dollar"){
pre_num <- "$"
}
if(metric_format == "percent"){
post_num <- "%"
}
heatmap_plot <- ggplot(summary_table, aes(segment_2, segment_1)) +
geom_tile(aes(fill = metric_total)) +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(label =
paste0(pre_num,
comma(round(metric_total,metric_decimals)),
post_num))) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
scale_y_discrete(labels = function(x) str_wrap(x, width = 8)) +
default_theme +
theme(axis.text = element_text(size = 12, colour = "grey10"),
panel.grid.major = element_blank(),
legend.position = "none")
}
################
# Get and process the data
################
# Authenticate
SCAuth(auth_key, auth_secret)
# Cycle through all possible combinations of the segments
# in segment_drilldown_1 and segment_drilldown_2. Think of this as a matrix for each
# metric that will show the total for each combination of segments from the two lists.
# Should this be doable without loops? Maybe. With lapply? I don't think it would
# change the number of API calls, and that's what the real performance drag is.
segment_results <- data.frame(segment_1 = character(),
segment_2 = character(),
metric_name = character(),
metric_total = numeric(),
stringsAsFactors = FALSE)
# Initialize a counter for adding new rows to the data frame just created.
new_row <- 1
# Loops are bad, but, boy, do they seem easier to follow here. And, I think,
# don't impact the number of API calls to Adobe, so really shouldn't materially
# impact performance.
for(s1 in 1:length(segment_drilldown_1)){
# Get the current segment 1 to be processed
segment1_id <- segment_drilldown_1[[s1]]$seg_id
segment1_name <- segment_drilldown_1[[s1]]$name
for(s2 in 1:length(segment_drilldown_2)){
# Get the current segment 2 to be processed
segment2_id <- segment_drilldown_2[[s2]]$seg_id
segment2_name <- segment_drilldown_2[[s2]]$name
segments <- c(segments_all, segment1_id, segment2_id)
# Pull the totals for the two segments metrics to be assessed. This is
# a little bit of a hack, as it's really QueueSummary() data that we're
# looking for, but that doesn't support a segment. So, this is simply
# using "year" as the granularity to get summary-like data. This will
# potentially cause a hiccup here if the period spans two years.
data_summary <- QueueOvertime(rsid,
date_start_heatmap,
date_end,
metrics_list$metric_id,
date.granularity = "year",
segment.id = segments)
# Get the result for each metric
for(m in 1:nrow(metrics_list)){
metric_id <- metrics_list[m,1]
metric_name <- metrics_list[m,2]
# Add the results to the data frame
segment_results[new_row,] <- NA
segment_results$segment_1[new_row] <- segment1_name
segment_results$segment_2[new_row] <- segment2_name
segment_results$metric_name[new_row] <- metric_name
segment_results$metric_total[new_row] <- data_summary[1, colnames(data_summary) == metric_id]
# Increment the counter so the next iteration will add another row
new_row <- new_row + 1
}
}
}
# Save this data. This is just so we can comment out the actual pulling of the
# data if we're just tinkering with the output
save(segment_results, file = "data_key_metric_heatmaps.Rda")
# load("data_key_metric_heatmaps.Rda")
# RMarkdown doesn't do great with looping for output, so the sections below need to be
# constructed manually. This should be fairly quick to tweak. Note that summary_heatmap() takes
# as an input the 'metric_name' value, so this needs to be based on what was entered
# for 'metric_name' in the 'metrics_list' object in the Settings.
```
## Revenue
*Date Range: `r format(as.Date(date_start_heatmap), "%B %d, %Y")` to `r format(as.Date(date_end), "%B %d, %Y")`*
```{r revenue seg-channel}
summary_plot <- summary_heatmap("Revenue")
summary_plot
```
## Orders
_Date Range: `r format(as.Date(date_start_heatmap), "%B %d, %Y")` to `r format(as.Date(date_end), "%B %d, %Y")`_
```{r orders seg-channel}
summary_plot <- summary_heatmap("Orders")
summary_plot
```
## Visits
_Date Range: `r format(as.Date(date_start_heatmap), "%B %d, %Y")` to `r format(as.Date(date_end), "%B %d, %Y")`_
```{r visits seg-channel}
summary_plot <- summary_heatmap("Visits")
summary_plot
```
## Conversion Rate
_Date Range: `r format(as.Date(date_start_heatmap), "%B %d, %Y")` to `r format(as.Date(date_end), "%B %d, %Y")`_
```{r conversion-rate seg-channel}
summary_plot <- summary_heatmap("Conversion Rate")
summary_plot
```
```{r script_time, include=FALSE}
# Get a timestamp for when the script is essentially done and write the start and end times out
# to a file that can be checked to see how long it took the script to run.
script_end_time <- Sys.time()
duration_message <- paste0("The script started running at ", script_start_time, " and finished running at ",
script_end_time, ". The total duration for the script to run was: ",
script_end_time - script_start_time," minutes.")
write_file(duration_message, path = "script_duration_heatmaps.txt")
```