This repository has been archived by the owner on Oct 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path6_dashboard_4.R
456 lines (434 loc) · 17.5 KB
/
6_dashboard_4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
### 6: Dashboard 4
# load packages
library(shiny)
library(shinydashboard)
library(dashboardthemes)
library(readr)
library(dplyr)
library(ggplot2)
# load data
movies_horror <- read_csv(
"data/movies_horror.csv",
# not specifying col_types of "genre" columns in case more genres are added in the future
col_types = cols(tconst = col_character(), primaryTitle = col_character(),
releaseYear = col_integer(), averageRating = col_double(),
numVotes = col_integer(), runtimeMinutes = col_integer(),
actorID = col_character(), actorName = col_character(),
actorBirthYear = col_integer(), actorDeathYear = col_integer(),
actorCategory = col_character(), actorAlive = col_integer(),
actorAge = col_integer(), directorID = col_character(),
directorName = col_character(), directorBirthYear = col_integer(),
directorDeathYear = col_integer(), directorAlive = col_integer(),
directorAge = col_integer())) %>%
# keep only movies with a value for releaseYear
filter(!is.na(releaseYear))
# ui
ui <- dashboardPage(
dashboardHeader(
title = "Horror Film Data" # short title
),
dashboardSidebar(collapsed = FALSE, # open by default
sidebarMenu(
id = "tabs", # input$tabs will give the name of the selected tab
menuItem(text = "Top 10s", # name of tab in the sidebar
tabName = "tab_top10", # how to refer to tab in the script
icon = icon("trophy"), # icon (by default from Font Awesome)
selected = TRUE), # this tab is open by default),
menuItem(text = "Trends",
tabName = "tab_trends",
icon = icon("chart-line")),
menuItem(text = "About",
tabName = "tab_about",
icon = icon("question-circle"))
)),
dashboardBody(
# use a theme by inserting the theme function in dashboardBody
shinyDashboardThemes(theme = "poor_mans_flatly"),
# tabs
tabItems(
tabItem(
tabName = "tab_top10",
fluidPage(
p("Use the range slider below to select a range of years.", align = "center"),
# slider to select range of years
fluidRow(
# rows are made up of 12 columns
column(width = 6,
offset = 3, # widget begins in column 4
align = "center",
sliderInput(
inputId = "selectYear", # call using input$selectYear
label = "Release year",
ticks = FALSE, # don't show ticks and labels
min = min(movies_horror$releaseYear),
max = max(movies_horror$releaseYear),
# if you provide a vector of length 2, it turns the slider into a range slider
value = c(max(movies_horror$releaseYear) - 19, max(movies_horror$releaseYear)), # shows last 20 years by default
width = "100%", # use all available width
sep = "" # no thousands separator because these are years
))),
tabsetPanel(
id = "tabset_top10", # input$tabset_top10 will return "value" of the active tab
type = "tabs",
tabPanel(
title = "Top 10 films",
value = "films",
br(), # blank line for spacing
# checkbox to limit to films with at least 1,000 votes on IMDB
fluidRow(
column(width = 6,
offset = 3,
align = "center",
checkboxInput(
inputId = "at_least_1000", # call using input$at_least_1000
label = "Only include films with at least 1,000 votes on IMDB?",
value = FALSE, # unchecked by default
width = "100%" # should change to max pixel width
))),
# display how many horror films were released in the selected year
# UI can't access objects from server function (like nrow(data_horror()),
# so we have to use renderText / textOutput)
h4(textOutput("n_films"), align = "center"),
# UI can't see input either, so again need to use renderText / textOutput
h2(textOutput("top_10_title"), align = "center"), # title
fluidRow(
column(width = 8,
offset = 2,
align = "center",
div(style = 'overflow-x: scroll', # allow table to scroll horizontally on small screens
tableOutput("top_10_table")) # table
)
)
),
tabPanel(
title = "Top 10 directors",
value = "directors",
br(), # blank line for spacing
uiOutput("slider_top10d"),
fluidRow(
column(width = 8,
offset = 2,
align = "center",
div(style = 'overflow-x: scroll', # allow table to scroll horizontally on small screens
tableOutput("top_10_d_table"))))),
tabPanel(
title = "Top 10 leading actors/actresses",
value = "actors",
br(), # blank line for spacing
uiOutput("slider_top10a"),
fluidRow(
column(width = 8,
offset = 2,
align = "center",
div(style = 'overflow-x: scroll', # allow table to scroll horizontally on small screens
tableOutput("top_10_a_table")))))
)
)
),
tabItem(
tabName = "tab_trends",
fluidPage(
br(), # blank line for spacing
fluidRow(
column(
width = 6,
h3("Average rating", align = "center"),
plotOutput("plot_rating"),
h5(textOutput("text_rating"), align = "center"),
),
column(
width = 6,
h3("Average runtime", align = "center"),
plotOutput("plot_runtime"),
h5(textOutput("text_runtime"), align = "center"),
)
),
fluidRow(
column(
width = 6,
h3("Average age (director)", align = "center"),
plotOutput("plot_d_age"),
h5(textOutput("text_d_age"), align = "center"),
),
column(
width = 6,
h3("Average age (lead)", align = "center"),
plotOutput("plot_a_age"),
h5(textOutput("text_a_age"), align = "center"),
)
)
)
),
# the content of the about tab is simply HTML
tabItem(
tabName = "tab_about",
fluidPage(
br(), # blank line for spacing
# increase font size relative to normal
HTML(
"<p style = 'font-size: 130%'>Dashboard created by <a href='https://jeanpaulsoucy.com/' target='_blank'>Jean-Paul R. Soucy</a> for the <a href='https://ssc.ca/en/meetings/annual/2021-annual-meeting/student-conference' target='_blank'>Ninth Annual Canadian Statistics Student Conference</a> scientific workshop: <b>What makes R Shiny so shiny? A step-by-step introduction to interactive dashboards in R</b>.
<br><br>Data courtesy of <a href='https://www.imdb.com/interfaces/' target='_blank'>IMDB</a> for personal and non-commercial use.
<br><br>Dashboard code is available on <a href='https://github.com/jeanpaulrsoucy/cssc-2021-r-shiny-workshop' target='_blank'>GitHub</a>.
</p>")
)
)
)
)
)
# server
server <- function(input, output, session) {
# data: horror movies for selected year
data_horror <- reactive({
if (input$selectYear[1] == input$selectYear[2]) {
dat <- movies_horror %>%
filter(releaseYear == input$selectYear[1])
} else {
dat <- movies_horror %>%
filter(releaseYear >= input$selectYear[1] & releaseYear <= input$selectYear[2])
}
if (input$at_least_1000) {
dat <- dat %>%
filter(numVotes >= 1000)
}
# return data
dat
})
# data: top 10 horror movies for selected year
# note that ties are broken by the film with the most votes
data_top10films <- reactive({
data_horror() %>%
# arrange by rating and number of votes
arrange(desc(averageRating, numVotes)) %>%
# pick top 10
slice_head(n = 10)
})
# text: number of horror films in the year
output$n_films <- renderText({
if (input$selectYear[1] == input$selectYear[2]) {
txt <- paste0(
"There were ",
format(nrow(data_horror()), big.mark = ","),
" horror films released in ",
input$selectYear[1])
} else {
txt <- paste0(
"There were ",
format(nrow(data_horror()), big.mark = ","),
" horror films released between ",
input$selectYear[1], # min year
" and ",
input$selectYear[2]) # max year
}
if (input$at_least_1000) {
txt <- paste0(txt, " with at least 1,000 ratings on IMDB.")
} else {
txt <- paste0(txt, ".")
}
# output text
txt
})
# table: top 10 films for the year
output$top_10_table <- renderTable(
width = "100%", # use 100% of available width
striped = TRUE,
bordered = TRUE,
hover = TRUE, # bordered table
na = "", # display nothing for NA values
data_top10films() %>%
# select columns of interest
select(primaryTitle, averageRating, numVotes, runtimeMinutes,
directorName, actorName) %>%
rename(Title = primaryTitle, `IMDB rating` = averageRating,
`IMDB votes` = numVotes, `Runtime (minutes)` = runtimeMinutes,
Director = directorName, Actor = actorName) %>%
mutate(`IMDB votes` = format(`IMDB votes`, big.mark = ","))) # add thousands separator
# data: top 10 directors/actors
data_top10da <- reactive({
# do not run until inputs are available
# prevents "error" from briefly flashing on the dashboard when trying to
# run a function before the input is available
req(input$tabset_top10)
# check which tab is active
if (input$tabset_top10 == "directors") {
col_da <- "directorName"
} else if (input$tabset_top10 == "actors") {
col_da <- "actorName"
} else {
# fail silently if input_top10 is defined but is not "directors" or "actors"
# for example, if you are switching between the film and director tab
# this silent failure will prevent an error message from flashing in the tab
# (i.e., because col_da is not defined)
stop("")
}
# get data
dat <- data_horror()
# remove missing director/lead actor
dat <- dat[!is.na(dat[, col_da]), ]
# keep relevant columns
dat <- dat[, c(col_da, "averageRating", "numVotes")] # actor or director
# summarize data
dat <- dat %>%
group_by(across(1)) %>% # group by first column (actor or director)
# group_by(!!sym(col_da)) %>%
# note that I've avoided using quasiquotation here
# an equivalent statement would be:
# group_by(!!sym(col_da)) %>%
# read more: https://adv-r.hadley.nz/quasiquotation.html
# summarize data
summarize(totalFilms = n(),
averageRating = mean(averageRating),
totalVotes = sum(numVotes),
.groups = "drop") %>%
arrange(desc(averageRating))
})
# input slider: top 10 directors/actors
# using renderUI so I can use the same slider in both the actor and director tab
# since it server the same function in both
slider_top10da <- reactive({
# do not run until inputs are available
# req(input$tabset_top10)
# set ID for slider value
if (input$tabset_top10 == "directors") {
slider_id <- "minFilms_d"
} else if (input$tabset_top10 == "actors") {
slider_id <- "minFilms_a"
}
# get max value for slider
dat <- data_top10da()
films_max <- max(dat$totalFilms)
# slider
fluidRow(
# rows are made up of 12 columns
column(width = 6,
offset = 3, # widget begins in column 4
align = "center",
sliderInput(
inputId = slider_id,
label = "Minimum number of films",
ticks = FALSE, # don't show ticks and labels
min = 1,
max = films_max,
step = 1, # move slider in integer increments
value = 5, # default value is max year in dataset
width = "100%", # use all available width
)))
})
# table: top 10 directors/actors
top_10_da_table <- reactive({
# req(input$tabset_top10)
# get min films value
if (input$tabset_top10 == "directors") {
slider_id <- "minFilms_d"
} else if (input$tabset_top10 == "actors") {
slider_id <- "minFilms_a"
}
# since slider_id is a variable, we can't do input$slider_id
# instead, we use input[[slider_id]] to access the desired value
# wait for min films value to be available
req(input[[slider_id]])
data_top10da() %>%
filter(totalFilms >= input[[slider_id]]) %>%
slice_head(n = 10) %>%
# format numbers
mutate(totalVotes = format(totalVotes, big.mark = ",")) %>% # add thousands separator
# rename columns
rename("Name" = 1, "Total films" = 2, "Average rating" = 3, "Total votes" = 4)
})
# director/actor tab elements
output$top_10_d_table <- renderTable(
width = "100%", # use 100% of available width
striped = TRUE,
bordered = TRUE,
hover = TRUE, # bordered table
na = "", # display nothing for NA values
# don't try to render when input$tabset_top10 is still "films"
# this will briefly flash an error on the dashboard
if (input$tabset_top10 == "directors") {
top_10_da_table()
})
output$slider_top10d <- renderUI(
# don't try to render when input$tabset_top10 is still "films"
# this will briefly flash an error on the dashboard
if (input$tabset_top10 == "directors") {
slider_top10da()
})
output$top_10_a_table <- renderTable(
width = "100%", # use 100% of available width
striped = TRUE,
bordered = TRUE,
hover = TRUE, # bordered table
na = "", # display nothing for NA values
# don't try to render when input$tabset_top10 is still "films"
# this will briefly flash an error on the dashboard
if (input$tabset_top10 == "actors") {
top_10_da_table()
})
output$slider_top10a <- renderUI(
# don't try to render when input$tabset_top10 is still "films"
# this will briefly flash an error on the dashboard
if (input$tabset_top10 == "actors") {
slider_top10da()
})
# data: trend over time
# no need to make reactive since it does not need to change in response to input
# get movie data and filter out extremely long movies (> 4 hrs)
# these movies throw off the scale of the runtime plot
# don't exclude runtime = NA so that we can report how many are missing
data_trend <- movies_horror %>%
filter(runtimeMinutes <= 60 * 4 | is.na(runtimeMinutes))
# plot: trend over time
plot_trend <- function(y) {
# y is the y variable for the plot (e.g., averageRating)
# switch is an alternative to nested if statements
lab_y <- switch(
y,
"averageRating" = "Average rating (IMDB)",
"runtimeMinutes" = "Runtime (minutes)",
"actorAge" = "Actor/actress age (years)",
"directorAge" = "Director age (years)"
)
# generate trend plot
ggplot(data = data_trend, aes_string(x = "releaseYear", y = y)) +
geom_point(colour = "grey", alpha = 0.1) + # make individual points very faint
geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
labs(x = "Year", y = lab_y) +
theme_bw() +
theme(
legend.position = "bottom",
axis.title = element_text(size = 18),
axis.text = element_text(size = 13),
legend.title = element_text(size = 15),
legend.text = element_text(size = 13),
)
}
# trend plots
output$plot_rating <- renderPlot(plot_trend(y = "averageRating"))
output$plot_runtime <- renderPlot(plot_trend(y = "runtimeMinutes"))
output$plot_d_age <- renderPlot(plot_trend(y = "directorAge"))
output$plot_a_age <- renderPlot(plot_trend(y = "actorAge"))
# text: how many missing values in trend plot?
text_trend <- function(y) {
lab_y <- switch(
y,
"averageRating" = "average rating",
"runtimeMinutes" = "runtime",
"actorAge" = "actor/actress age",
"directorAge" = "director age"
)
# n total
n_total <- nrow(data_trend)
# n missing
n_missing <- nrow(data_trend[is.na(data_trend[, y]), ])
# output text
paste0("There are ", format(n_missing, big.mark = ","),
"/", format(n_total, big.mark = ","), " missing values for ",
lab_y, ".")
}
# trend plot text
output$text_rating <- renderText(text_trend(y = "averageRating"))
output$text_runtime <- renderText(text_trend(y = "runtimeMinutes"))
output$text_d_age <- renderText(text_trend(y = "directorAge"))
output$text_a_age <- renderText(text_trend(y = "actorAge"))
}
# run Shiny app
shinyApp(ui, server)