-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcompiler.R
executable file
·223 lines (192 loc) · 7.42 KB
/
compiler.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env Rscript
# Load our dependencies
result_pacman = require(pacman)
if(!result_pacman) {
# Hardcoded a repo in order to force proceeding.
install.packages("pacman", repos = "https://cloud.r-project.org")
library(pacman)
}
p_load(rmarkdown, knitr, rjson, argparse)
# This is a quick hack for pandoc not working in the MacOS command line.
if(!rmarkdown::pandoc_available("1.12.3")) {
Sys.setenv(
RSTUDIO_PANDOC = "/Applications/RStudio.app/Contents/MacOS/pandoc"
)
if(!rmarkdown::pandoc_available("1.12.3")) {
stop("Error: Pandoc not detected in R installation.")
}
}
#
process_file_name = function(file_name, force = FALSE, dryrun = FALSE) {
# String split to get the name of the folder, the name of the file,
# and the pre-extension name of the file ("base_name")
folder_chunks = strsplit(file_name, "/")
stripped_file_name = folder_chunks[[1]][length(folder_chunks[[1]])]
base_name = substr(stripped_file_name,
1, (nchar(stripped_file_name)-4))
folder_name = paste0(
paste(folder_chunks[[1]][1:(length(folder_chunks[[1]])-1)],
collapse = "/"),
"/")
# Check if we have a current HTML version
html_version = paste0(folder_name, base_name, ".html")
json_metadata = paste0(folder_name, base_name, ".json")
extra_outputs = paste0(folder_name, base_name, "_files")
# If each of the files exists, then a compiled version is considered to
# exist.
compiled_exists = file.exists(html_version) &&
file.exists(json_metadata) && file.exists(extra_outputs)
# If we have JSON metadata, then we can check recompile data in the JSON
# metadata
if(file.exists(json_metadata)) {
needs_recompile = read_expiry_date(json_metadata)
} else {
needs_recompile = TRUE
}
# Dummied: Get file's most recent modify date.
recent_modify = file.mtime(file_name) > file.mtime(html_version)
# Updates:
# - Compiled files do not exist
# - Compiled files do exist, but the JSON tells us it's time to recompile
# - Force recompile
need_update = !compiled_exists || needs_recompile || force
# Do the update if necessary
if(need_update) {
if(dryrun) {
cat("\tWould have compiled, but dryrun is set... \n")
} else {
cat("\tRendering Rmd to HTML... \n")
render_file(file_name, folder_name)
cat("\tWriting JSON metadata... \n")
parse_front_matter(file_name, folder_name, base_name)
clean_superfluous_libraries(folder_name, base_name)
}
} else {
cat("\tFile not modified, keeping cached version.\n")
cat(paste0("\tFiles exist? ", compiled_exists, "\n"))
# cat(paste0("\tRmd modified? ", recent_modify, "\n"))
cat(paste0("\tNeeds recompile (timer)? ", needs_recompile, "\n"))
cat(paste0("\tForce recompile? ", force, "\n"))
}
}
render_file = function(file_name, folder) {
emergency_break = FALSE
if(emergency_break) { stop("Emergency break activated, refusing to rebuild file.") }
# Override default output options to use our template
rmarkdown::render(input = file_name,
output_format = "html_document",
output_options = list(
template = "../../output_template/template_stub.html",
self_contained = FALSE
),
output_dir = folder,
clean = TRUE,
quiet = TRUE)
}
clean_superfluous_libraries = function(folder_name, base_name) {
# RMarkdown generates JS files for its dependencies, we don't need
# them really.
file_chunk = paste0(folder_name, base_name, "_files/")
unlink(paste0(file_chunk, "bootstrap-*"), recursive = TRUE)
unlink(paste0(file_chunk, "jquery-*"), recursive = TRUE)
unlink(paste0(file_chunk, "navigation-*"), recursive = TRUE)
}
parse_front_matter = function(file_name, folder_name, base_name) {
# Write a JSON
yaml_metadata_list = rmarkdown::yaml_front_matter(file_name)
# Tags: this will affect where something shows up
if(is.null(yaml_metadata_list$tag)) {
cat("\tWARNING: No tags included in article metadata.\n")
}
# Article title
if(is.null(yaml_metadata_list$title)) {
cat("\tWARNING: No title included in article metadata,",
"defaulting to \"", base_name, "\"\n")
yaml_metadata_list$title = base_name
}
# Who wrote it
if(is.null(yaml_metadata_list$author)) {
cat("\tWARNING: No author included in article metadata,",
"defaulting to \"Voteview Team\"\n")
yaml_metadata_list$author = "Voteview Team"
}
# What do we write the date as?
if(is.null(yaml_metadata_list$original_date)) {
cat("\tWARNING: No original date in article metadata,",
"defaulting to", format(Sys.Date(), "%Y-%m-%d"), "\n")
yaml_metadata_list$original_date = format(Sys.Date(), "%Y-%m-%d")
}
# How often does the article need to be updated?
if(is.null(yaml_metadata_list$update_delta)) {
yaml_metadata_list$update_delta = 7
}
update_date = format(Sys.Date() + yaml_metadata_list$update_delta, "%Y-%m-%d")
# Output data
json_output_list = list(
title = yaml_metadata_list$title,
author = yaml_metadata_list$author,
description = yaml_metadata_list$description,
original_date = yaml_metadata_list$original_date,
date_modified = as.numeric(Sys.time()),
# Note one stupid hack: the JSON field is recompile_date, but the yaml
# field is update_delta, which here becomes update_date when parsed.
recompile_date = update_date,
tags = yaml_metadata_list$tags
)
write(rjson::toJSON(json_output_list),
paste0(folder_name, base_name, ".json"))
}
read_expiry_date = function(filename) {
json_matter = fromJSON(file = filename)
if(!"recompile_date" %in% names(json_matter)) { return(TRUE) }
Sys.Date() > json_matter[["recompile_date"]]
}
core_loop = function(dryrun = FALSE) {
# Process these
rmd_process_list = list.files(".", ".Rmd$", recursive = TRUE)
# Loop through the files we wish to process -- why is this not an imap?
# Because we don't include tidyverse as a dependency by default in the
# compiler. We could include it since many articles do, but we don't
# by default.
i = 1
for(file_name in rmd_process_list) {
cat(paste0("Processing file ", i, "/",
length(rmd_process_list), ": ",
file_name, "\n"))
tryCatch({
process_file_name(file_name, dryrun=dryrun)
}, error = function(e) {
print(e)
cat("Error working on this file.\n")
})
i = i + 1
}
cat("Job complete.\n")
}
parse_arguments_dispatch = function() {
# Parse command line options (if called from Rscript)
parser <- argparse::ArgumentParser()
# Force a single file to update
parser$add_argument(
"-f", "--filename", default="", type="character",
help="Name of article Rmarkdown file to process (default processes all articles).")
parser$add_argument(
"-d", "--dryrun", action="store_true",
help="Don't recompile any articles, just say what you would have done."
)
args <- parser$parse_args()
if (args$filename != "") {
if (file.exists(args$filename)) {
# Process this file and force it to be rewritten.
cat(sprintf("Processing %s:\n", args$filename))
process_file_name(args$filename, force=TRUE, dryrun=args$dryrun)
} else {
stop(sprintf("Rmarkdown file '%s' not found.\n", args$filename))
}
} else {
cat("Processing all articles:\n")
core_loop(dryrun=args$dryrun)
}
}
# Parse arguments or process all files.
parse_arguments_dispatch()