forked from LimnoDataScience/plume_bloom_drivers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_download.R
162 lines (135 loc) · 7.02 KB
/
1_download.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
source('1_download/src/download_helpers.R')
p1_download <- list(
##### Handle authentication with Google Drive #####
# Authenticate! Note that existing auth won't matter
# because targets builds in a new session every time.
tar_target(p1_gd_config_yml, 'gd_config.yml', format='file'),
tar_target(p1_gd_config, yaml::yaml.load_file(p1_gd_config_yml)),
tar_target(p1_authenticated_user, gd_auth(p1_gd_config$gd_email),
# Re-authenticate to be certain the user is still authenticated.
cue = tar_cue_age(p1_authenticated_user,
as.difftime(3, units = "hours"))),
##### Download the files from Google Drive #####
# List the files available in this specified folder
tar_target(p1_gd_id_netcdfs, as_id('1g3spZxtTP2tq7TzHaCZqK7Nn1HXB9rKq')),
tar_target(p1_gd_netcdfs, {
# Add a dependency on p1_authenticated_user target so that this
# builds AFTER the target for authenticated to GH has been run.
message(sprintf('Attempting to list files using permissions for %s',
p1_authenticated_user$emailAddress))
drive_ls(p1_gd_id_netcdfs)
}),
# Download the raster stacks as netcdf files
tar_target(p1_netcdfs, {
# Add a dependency on p1_authenticated_user target so that this
# builds AFTER the target for authenticated to GH has been run.
p1_authenticated_user
files_saved_info <- drive_download(
p1_gd_netcdfs$id,
path = sprintf('1_download/out/%s', p1_gd_netcdfs$name),
overwrite = TRUE)
return(files_saved_info$local_path)
}, format = 'file',
pattern = map(p1_gd_netcdfs)),
##### Download the GEE imagery and AOI mission-dates from Google Drive #####
# List the files available in this specified folder
tar_target(p1_gd_id_missiondates, as_id('1UEEVBlvX7P4H2dtNoX1oj44-Xeyg6x01')),
tar_target(p1_gd_missiondates_csv, {
# Add a dependency on p1_authenticated_user target so that this
# builds AFTER the target for authenticated to GH has been run.
message(sprintf('Attempting to download a file using permissions for %s',
p1_authenticated_user$emailAddress))
gd_file_info <- drive_get(p1_gd_id_missiondates)
local_file_info <- drive_download(
p1_gd_id_missiondates,
path = sprintf('1_download/out/%s', gd_file_info$name),
overwrite=TRUE)
return(local_file_info$local_path)
}, format = "file"),
tar_target(p1_lake_superior_sf, {
# Pulled the bounding box for our Lake Superior AOI:
# https://github.com/rossyndicate/Superior-Plume-Bloom/blob/efa1bdc644611ee97c2e1e0c3bf0cfc4a7ca1955/eePlumB/A_PrepAOI/TileAOI.Rmd#L31-L52
sup_box <- tibble(ymin = 46.5, ymax = 47.3, xmin = -92.2,xmax = -90.1)
tibble(
lat = c(sup_box$ymin, sup_box$ymax, sup_box$ymax, sup_box$ymin, sup_box$ymin),
lon = c(sup_box$xmin, sup_box$xmin, sup_box$xmax, sup_box$xmin, sup_box$xmax)) %>%
st_as_sf(coords = c('lon', 'lat'), crs = 4326) %>%
st_bbox() %>% st_as_sfc()
}),
tar_target(p1_lake_superior_grid_sf,
# Now make the grid using that box. To do this, I borrowed code from:
# https://github.com/rossyndicate/Superior-Plume-Bloom/blob/efa1bdc644611ee97c2e1e0c3bf0cfc4a7ca1955/eePlumB/A_PrepAOI/TileAOI.Rmd#L31-L52
st_make_grid(p1_lake_superior_sf,
cellsize = c(0.55, 0.3)) # units are degrees
),
tar_target(p1_lake_superior_grid_centers,
# Get the center of each cell and then convert to a table
p1_lake_superior_grid_sf %>%
st_centroid() %>%
st_coordinates() %>%
as_tibble() %>%
setNames(c('longitude', 'latitude')) %>%
mutate(cell_no = row_number())),
##### Read in the Lake Superior watershed shapes #####
tar_target(p1_lake_superior_watershed_shp, '1_download/in/LakeSuperiorWatershed.shp', format="file"),
tar_target(p1_lake_superior_watershed_sf, st_read(p1_lake_superior_watershed_shp)),
##### Download the HUCs per site outlet #####
# Manual table for which sites to include and their names
tar_target(p1_nwis_sites,
tibble(river = c('Nemadji', 'Bois Brule', 'Siskiwit'),
nwis_site = c('04024454', '04026005', '04026160'))),
# Find lat/long per site and then download associated HUC8. Note that we want
# HUC10s, but `nhdplusTools` won't allow you to get HUC10s from site ids alone.
tar_target(p1_nwis_sites_sf,
dataRetrieval::readNWISsite(p1_nwis_sites$nwis_site) %>%
st_as_sf(coords = c('dec_long_va', 'dec_lat_va'), crs=4326)),
tar_target(p1_huc08_nwis_sites,
get_huc(id = unique(p1_nwis_sites_sf$huc_cd), type='huc08')),
# Use the HUC8 shape to pull the appropriate HUC10s, then filter to just those
# that contain the NWIS site point.
tar_target(p1_huc10_nwis_sites,
get_huc(AOI = p1_huc08_nwis_sites, type='huc10') %>%
# TODO: Not sure about routing at this time. It could be that
# some feed into the next one and more should be included.
st_filter(p1_nwis_sites_sf, .predicate = st_contains)),
##### Download the PRISM meteo data #####
tar_target(p1_prism_dir, '1_download/prism_data'),
tar_target(p1_prism_vars, c('tmean', 'ppt')),
tar_target(p1_prism_dates, seq(from = as.Date("1981-01-01"),
to = as.Date("2022-09-30"), by = "days")),
# Group the dates so that we can query individually and
# therefore rebuild only dates that don't work, but not
# store thousands of dynamic branches
tar_group_count(p1_prism_download_batches,
tibble(date = p1_prism_dates),
count = 20),
tar_target(p1_prism_files, {
# Set the directory where the prism files will go
prism_set_dl_dir(p1_prism_dir)
# Download each date for the current variable from PRISM
get_prism_dailys(
type = p1_prism_vars,
dates = p1_prism_download_batches$date,
keepZip=FALSE
)
# In order to track files and changes, list the files saved in
# the folder as the output here. This works since each subfolder
# is named with the variable and date so adding dates or vars
# will result in changes here.
var_files <- list.files(p1_prism_dir, pattern = p1_prism_vars)
return(tibble(prism_var = p1_prism_vars,
prism_files = var_files))
},
pattern = cross(p1_prism_vars, p1_prism_download_batches),
# Sometimes there is a temporary timeout when pulling a date. Retrying
# has usually fixed it. To handle this more automatically, use `error="null"`
# so that this target will move on and build all branches BUT will
# not considered "complete" and thus will try to rebuild the branch that
# errored the next time the pipeline is built.
error = "null")
# If you download the zip of all the pre-downloaded prism data, uncomment
# this target and comment out the one above instead. Make sure you
# unzip the files and place them in `1_download/prism_data/`
# tar_target(p1_prism_files,
# list.files('1_download/prism_data'))
)