-
Notifications
You must be signed in to change notification settings - Fork 1
/
DDRP_v3.R
2492 lines (2160 loc) · 107 KB
/
DDRP_v3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/Rscript
#.libPaths("/usr/lib64/R/library/")
#
# Log of recent edits (2022-current)
#
# 11/16/24: Fixed bug in Adult by Gen code that caused crashes on certain servers
# 11/5/24: Finished converting code to replace "raster" with "terra" functions
# 11/29/23: Fixed bug in PEM code block that sometimes result in errors when
# cohorts were missing due the event not occuring in that cohort ("calc" function).
# 10/3/23: Fixed bug that prevented climate data from directories with non-numeric
# characters from being imported.
# 11/2/22: Add ability to compute a lognormal distribution and skewed normal
# distribution of emergence times; changed caculation of average date of pest
# event to a weighted average to better account for cohort distributions;
# edited the "cohort_chunks" list object to avoid errors when applying >7 cohorts.
# 9/21/22: Bug fix in Adult by Gen maps when using older raster and/or sp packages
# 8/26/22: Output a raster for StageCount for present day, new PEM colors
# 6/22/22: Added ability to use CDAT data for China; simplified code for
# importing different types of data
# 6/9/22: Added ability to use E-OBS data for Europe
# DDRP v3
options(echo = FALSE)
# Load the required packages
pkgs <- c("doParallel", "dplyr", "foreach", "ggplot2", "ggthemes",
"lubridate", "maps", "mgsub", "optparse", "parallel",
"purrr", "RColorBrewer", "readr", "terra",
"stringr", "tidyr", "tictoc", "tools", "toOrdinal", "sf")
ld_pkgs <- lapply(pkgs, library,
lib.loc = "/usr/lib64/R/library/", character.only = TRUE)
# Load collection of functions for this model
source("/usr/local/dds/DDRP_B1/DDRP_v3_funcs.R")
# Bring in states feature for summary maps (PNG files)
# Requires these libraries: "mapdata" and "maptools"
cat("\n\nDownloading US states feature\n")
states <- map_data("state")
# Start timing the model run
tic("Total run time")
################################################################################
######## Header Documentation Section #############
# Cohorts DDRP: Degree-Day, establishment Risk, and Pest event mapping system #
######## By Brittany Barker, Len Coop, Gericke Cook, Dan Upper, and ###########
######## Tyson Wepprich for APHIS PPQ and IPM needs ###########################
################################################################################
# DDRP v3 is an update from the previous version of DDRP (DDRP_v2.R)
# It uses "terra" instead of "raster" and will eventually include moisture-processing
# features. As in the previous version of DDRP, it has the following features:
# 1. The ability to accomodate multiple population cohorts, in which a user-
# defined number of cohorts emerges from the overwintering stage at different
# times, such that a population in any given area could contain a mix of
# indidivuals in different life stages and generations
# 2. Parallel processing, in which several processes are run in parallel in
# order to speed up the model run (e.g., temperature raster cropping,
# daily loop runs for multiple CONUS tiles and cohorts, production of raster
# and summary map outputs)
# 3. A new summary map type - Lifestage by Generation, which shows the relative
# population size of a specific life stage (currently only adults) for each
# distinct generation (e.g., Gen1 adults, Gen 2 adults...)
# 4. Other more minor improvements/features are included as well, such as:
# a) the ability for summary maps to show a specific state or region of
# interest. The previous version of DDRP can only show CONUS.
# b) the ability to generate summary maps for odd generations only. This
# may be useful if there is a lot of spatial overlap between generations,
# which would make it difficult to see where the boundaries of Gen1 and
# Gen 3 are, for example.
# c) the use of a command line parser that includes parameter abbrevs.
# d) the ability to include or exclude leap day in the model run
#
#
# (1). PARAM HANDLING -----
#### * Process command line args ####
#### # Uses "optparse" package, a command line parser similar to Python's
option_list <- list(
make_option(c("--spp"), action = "store", type = "character",
default = NA, help = "species parameter file name"),
make_option(c("--forecast_data"), action = "store", type = "character",
default = NA, help = "weather data for forecast"),
make_option(c("--start_year"), action = "store", type = "character",
default = NA, help = "start year"),
make_option(c("--start_doy"), action = "store", type = "integer",
default = NA, help = "start day of year"),
make_option(c("--end_doy"), action = "store", type = "integer",
default = NA, help = "end day of year"),
make_option(c("--keep_leap"), action = "store", type = "integer",
default = NA, help = "should leap day be kept? 0=no, 1=yes"),
make_option(c("--region_param"), type = "character", action = "store",
default = NA, help = "study region: CONUS, EAST, WEST, or
state (2-letter abbr.)"),
make_option(c("--exclusions_stressunits"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--pems"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--mapA"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--mapE"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--mapL"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--mapP"), type = "integer", action = "store",
default = NA, help = "0 = off, 1 = on"),
make_option(c("--out_dir"), type = "character", action = "store",
default = NA, help = "name of out directory"),
make_option(c("--out_option"), type = "integer", action = "store",
default = NA, help = "sampling frequency: 1 = 30 days;
2 = 14 days; 3 = 10 days; 4 = 7 days; 5 = 2 days; 6 = 1 day"),
make_option(c("--ncohort"), type = "integer", action = "store",
default = NA, help = "number of cohorts"),
make_option(c("--odd_gen_map"), type = "numeric", action = "store",
default = NA, help = "0 = off, 1 = on")
)
# Read in commands
# If command line isn't used, then opts list elements will be NA
opts <- parse_args(OptionParser(option_list = option_list))
if (!is.na(opts[1])) {
spp <- opts$spp
forecast_data <- opts$forecast_data
start_year <- opts$start_year
start_doy <- opts$start_doy
end_doy <- opts$end_doy
keep_leap <- opts$keep_leap
region_param <- opts$region_param
exclusions_stressunits <- opts$exclusions_stressunits
pems <- opts$pems
mapA <- opts$mapA
mapE <- opts$mapE
mapL <- opts$mapL
mapP <- opts$mapP
out_dir <- opts$out_dir
out_option <- opts$out_option
ncohort <- opts$ncohort
odd_gen_map <- opts$odd_gen_map
} else {
#### * Default values for params, if not provided in command line ####
spp <- "SLF" # Default species to use
forecast_data <- "PRISM" # Forecast data to use (PRISM or NMME)
start_year <- "2024" # Year to use
start_doy <- 1 # Start day of year
end_doy <- 365 # End day of year - need 365 if voltinism map
keep_leap <- 1 # Should leap day be kept?
region_param <- "WEST" # Region
exclusions_stressunits <- 1 # Turn on/off climate stress unit exclusions
pems <- 1 # Turn on/off pest event maps
mapA <- 1 # Make maps for adult stage
mapE <- 1 # Make maps for egg stage
mapL <- 1 # Make maps for larval stage
mapP <- 1 # Make maps for pupal stage
out_dir <- "LBAM_test" # Output dir
out_option <- 1 # Sampling frequency
ncohort <- 7 # Number of cohorts to approximate end of OW stage
odd_gen_map <- 0 # Create summary plots for odd gens only (gen1, gen3, ..)
}
# (2). DIRECTORY INIT ------
#### * Param inputs - species params; thresholds, weather, etc. ####
params_dir <- "/usr/local/dds/DDRP_B1/spp_params/"
#### * Weather inputs and outputs - climate data w/subdirs 4-digit year ####
if (forecast_data == "PRISM") {
base_dir <- "/data/PRISM/"
# TO DO: add command line parameter for different GCMs
} else if (forecast_data == "MACAV2") {
gcm <- "GFDL-ESM2M" # TO DO: command line parameter
base_dir <- paste0("/data/macav2metdata/", gcm, "/")
} else {
base_dir <- paste0("/data/", forecast_data, "/")
}
forecast_dir <- paste0(base_dir, start_year)
cat("\nBASE DIR: ", base_dir, "\n")
cat("\nWORKING DIR: ", forecast_dir, "\n")
#### * Output directory, log file, and error message file ####
# MUST remove .tif files or script will crash during processing because it will
# try to analyze previously processed results.
#output_dir <- paste0("/home/httpd/html/CAPS/", spp, "_cohorts")
output_dir <- paste0("/usr/local/dds/DDRP_B1/DDRP_results/", out_dir)
# If the directory already exists, then a backup directory will be created that
# contains the old run files. Old backup directories will be removed if present.
if (file.exists(output_dir)) {
# Create a directory for the previous run to backup old run files to and
# delete old backup (if present)
backup_dir <- paste0(output_dir, "/previous_run")
unlink(backup_dir, recursive = TRUE)
dir.create(backup_dir)
# Copy dirs and files to backup dir. Exclude the new backup dir using !grepl.
flsToCopy <- list.files(output_dir, full.names = TRUE)
flsToCopy <- flsToCopy[!grepl(pattern = backup_dir, flsToCopy)]
file.copy(flsToCopy, backup_dir, recursive = TRUE)
# Delete remaining folders and files now that they have been backed up
# First ensure that flsToCopy actually contains output_dir in names to
# avoid potential fiascos of deleting files from other non-target directories
if (all(grepl(output_dir, flsToCopy))) {
lapply(flsToCopy, unlink, recursive = TRUE)
}
cat("\n", str_wrap(paste0("EXISTING OUTPUT DIR: ", output_dir, "\n",
"moved old run files to", backup_dir, "\n"),
width = 80), sep = "")
# If no files to backup, don't do anything
} else {
dir.create(output_dir)
cat("NEW OUTPUT DIR:", output_dir, "\n")
}
# Push out a rlogging file with all main messages in model
# Put all log, message, and metadata files in a separate folder
setwd(output_dir)
dir.create("Logs_metadata")
Model_rlogging <- sprintf("%s%s", "./", "/Logs_metadata/Model_rlogging.txt")
# Make header for logging file
cat(paste0(rep("#", 36), collapse = ""), "\n",
"### Log file for DDRP v3 ###\n",
paste0(rep("#", 36), collapse = ""), "\n\n", sep = "",
file = Model_rlogging)
# Record PRISM and output dir
cat("BASE DIR: ", base_dir, "\n", file = Model_rlogging, append = TRUE)
cat("WORKING DIR: ", forecast_dir, "\n", file = Model_rlogging, append = TRUE)
# Push out a message file with all R error messages
msg <- file(paste0(output_dir, "/Logs_metadata/rmessages.txt"), open = "wt")
sink(msg, type = "message")
# (3). PARAMETER AND SETTINGS SETUP -----
cat("PARAMETER AND SETTINGS SETUP: getting parameters to use in model\n",
file = Model_rlogging, append = TRUE)
cat("\n\nPARAMETER AND SETTINGS SETUP: getting parameters to use in model\n")
# Read from source param files in ./spp_params/SPP.params
param_file <- sprintf("%s%s", spp, ".params")
spp <- gsub(".params", "", param_file) # Get species abbr.
species_params <- sprintf("%s%s", params_dir, param_file) # Location of file
if (file.exists(species_params)) {
cat("Species params: ", species_params, "\n", file = Model_rlogging,
append = TRUE)
source(species_params) # Read in species parameters
cat("Reading params for species: ", spp, " Fullname: ", fullname, "\n",
file = Model_rlogging, append = TRUE)
cat("\nReading params for species: ", spp, " Fullname: ", fullname, "\n")
} else {
cat("Param file: ", species_params, "...not found; exiting program\n",
file = Model_rlogging, append = TRUE)
cat("\nParam file: ", species_params, "...not found; exiting program\n")
q() # No reason to keep going without any params
}
# Change year to numeric if it's a specific year
# If using climate normals, the start year is the first 4 characters after the
# variable name in the climate data file (e.g., "PRISM_tmax_30yr6190..")
if (!grepl("[A-z]", start_year)) {
start_year <- as.numeric(start_year)
} else {
fl_nam <- list.files(forecast_dir, pattern = ".tif$|.bil$|.gri$")[1]
splits <- str_count(fl_nam, "_") + 1
fl_nam_split <- str_split_fixed(fl_nam, "_", splits)
start_year <- fl_nam_split[which(str_detect(fl_nam_split, "yr"))]
}
# Set up start and stop day of year depending on whether it's a leap year or
# not (need to modify the last day of the year if the year is a leap year
# and user wants to include it)
# This does not apply to 30 yr climate data, which would not have a numeric
# start year
if (is.numeric(start_year)) {
if (start_year %% 4 == 0 & keep_leap == 1) {
cat(str_wrap(paste0(start_year, " is a leap year and leap day (2/29) will be
included in the model"), width = 80), "\n",
file = Model_rlogging, append = TRUE)
# Need to add an extra day onto year if all 365 days are being included
if (end_doy == 365) {
end_doy <- 366
}
} else if (start_year %% 4 == 0 & keep_leap == 0) {
cat(str_wrap(paste0(start_year, " is a leap year but leap day (2/29) will
not be included in the model"), width = 80), "\n",
file = Model_rlogging, append = TRUE)
} else if (start_year %% 4 != 0) {
cat(start_year, "is not a leap year - ignoring 'keep_leap' parameter\n",
file = Model_rlogging, append = TRUE)
}
}
# Check for appropriate command line parameters
# Exit program if no pest event maps have been specified but users want pest
# event maps (pems = 1)
if (pems == 1 & !(1 %in% c(mapA, mapE, mapL, mapP))) {
cat("\n", str_wrap("No pest event maps (mapA, mapE, mapL, mapP) specified;
exiting program", width = 80), sep = "",
file = Model_rlogging, append = TRUE)
cat("\n", str_wrap("No pest event maps (mapA, mapE, mapL, mapP) specified;
exiting program", width = 80), sep = "")
q()
}
# Exit program if an incorrect sampling frequency has been specified
if (out_option %in% !c(1, 2, 3, 4, 5, 6)) {
cat("Out_option =", out_option, "is unacceptable; exiting program\n",
file = Model_rlogging, append = TRUE)
cat("Out_option =", out_option, "is unacceptable; exiting program\n")
q()
}
# Exit program if ncohort is < 1 (must be at least 1)
if (ncohort < 1) {
cat("\n", str_wrap("Number of cohorts must be greater than or equal to 1;
exiting program", width = 80), sep = "",
file = Model_rlogging, append = TRUE)
cat("\n", str_wrap("Number of cohorts must be greater than or equal to 1;
exiting program", width = 80), sep = "")
q()
}
# Exit if end day of year is inappropriate
if (end_doy > 366) {
cat("\n", str_wrap(paste("End day of year (end_doy) of", end_doy, "is
unacceptable; exiting program"), width = 80), sep = "",
file = Model_rlogging, append = TRUE)
cat("\n", str_wrap(paste("End day of year (end_doy) of", end_doy, "is
unacceptable; exiting program"), width = 80), sep = "")
q()
}
# Create a list of days to use for daily loop
sublist <- start_doy:end_doy
#### * Format threshold and DD params for Daily Loop ####
# Need to match length and order of stgorder, which is species specific
# Upper and lower thresholds
# OW stage will have the same threshold as actual stage (e.g., OWadult = adult)
# Need to match LDT or UDT value to stage, which first requires changing
# stage abbr to the stage name ("E" = "egg")
stage_ldt_list <- list()
stage_udt_list <- list()
j <- 1
for (i in 1:length(stgorder)) {
stg_nam <- stgorder[i]
stg_nam <- mgsub(string = stg_nam, # Requires "mgsub" package
pattern = c("OE", "OL", "OP", "OA", "E", "L", "P", "A"),
replacement = c("egg", "larvae", "pupae", "adult", "egg",
"larvae", "pupae", "adult"))
stage_ldt_val <- get(paste0(stg_nam, "LDT")) # returns LDT value for stage
stage_ldt_list[[j]] <- stage_ldt_val
stage_udt_val <- get(paste0(stg_nam, "UDT")) # returns UDT value for stage
stage_udt_list[[j]] <- stage_udt_val
j <- j + 1
}
# DD parameters - OW stage has it's own DD, so change stage abbr. to stage
# name or "OW" plus stage name
stage_dd_list <- list()
j <- 1
for (i in 1:length(stgorder)) {
stg_nam <- stgorder[i]
stg_nam <- mgsub(string = stg_nam,
pattern = c("OE", "OL", "OP", "OA", "E", "L", "P", "A"),
replacement = c("OWegg", "OWlarvae", "OWpupae", "OWadult",
"egg", "larvae", "pup", "adult"))
stage_dd_val <- get(paste0(stg_nam, "DD")) # returns DD value for stage
stage_dd_list[[j]] <- stage_dd_val
j <- j + 1
}
# Put the values in the list into a numeric vector
stage_ldt <- as.numeric(do.call(cbind, stage_ldt_list))
stage_udt <- as.numeric(do.call(cbind, stage_udt_list))
stage_dd <- as.numeric(do.call(cbind, stage_dd_list))
# Cohort response parameters
# SD is equal the square root of the variance (sigma-squared) parameter
# May consider changing length.out to something else in future versions...
# Also consider making the percent (in cohort_distro) an input parameter
xdist <- seq(xdist1, xdist2, length.out = 1000)
if (distro_shape == "normal") {
ydist <- dnorm(xdist, mean = distro_mean, sd = sqrt(distro_var))
} else if (distro_shape == "lognormal") {
# Convert normal parameters to lognormal parameters
lognorms <- Normal_to_lognormal(normmean = distro_mean,
normsd = sqrt(distro_var))
# x-values for log normal probability density
ydist <- dlnorm(xdist, meanlog = lognorms[[1]], sdlog = lognorms[[2]])
} else if (distro_shape == "normal_skewed") {
ydist <- dsnorm(xdist, mean = distro_mean, sd = sqrt(distro_var),
xi = distro_xi)
}
# Create the cohort distribution
# "relpopsize" values are used to estimate the relative size of the population
# (i.e. proportion) represented by each cohort, which initializes the population
# distribution that is maintained during subsequent stages and generations
inputdist <- data.frame(x = xdist, y = ydist) %>%
arrange(x) %>%
mutate(CDF = cumsum(y/sum(y)))
cohort_distro <- CohortDistrib(dist = inputdist, numstage = ncohort, perc = .99)
relpopsize <- round(cohort_distro$wts, 3)
# Parameters of required degree-days
# Replace OW gen with emergence distro values
if (ncohort == 1) {
ddpar <- matrix(stage_dd, nrow = 1, byrow = TRUE)
stage_dd <- ddpar
} else {
ddpar <- cbind(cohort_distro$means,
matrix(rep(stage_dd[-1], nrow(cohort_distro)),
nrow = nrow(cohort_distro), byrow = TRUE))
stage_dd <- ddpar
}
# (4). METADATA OUTPUT FILE -----
# Push out a metadata file with all inputs used in model
cat("\nMETADATA: creating metadata file for all inputs used in model\n",
file = Model_rlogging, append = TRUE)
cat("\nMETADATA: creating metadata file for all inputs used in model\n")
# Create the metadata file
setwd(output_dir)
metadata <- sprintf("%s%s", "./", "/Logs_metadata/metadata.txt")
cat("### Metadata for DDRP v3 ###\n", file = metadata)
# Document run date and time
cat("\nRun date and time:", strftime(Sys.time(), format = "%m/%d/%Y %H:%M"),
file = metadata, append = TRUE)
# Document species information and method used to calculate degree-days
cat("\n\n### Model Species Parameters ###\n Species Abbrev:", spp,
"\n Full Name:", fullname,
"\n Pest of:", pestof,
"\n Overwintering Stage:", owstage,
"\n Degree-day calculation method:", calctype,
file = metadata, append = TRUE)
# Document developmental threshold temperatures
cat("\n \n Developmental threshold temperatures",
"\n Egg Lower Devel Threshold:", eggLDT,
"\n Egg Upper Devel Threshold:", eggUDT,
"\n Larvae Lower Devel Threshold:", larvaeLDT,
"\n Larvae Upper Devel Threshold:", larvaeUDT,
"\n Pupae Lower Devel Threshold:", pupaeLDT,
"\n Pupae Upper Devel Threshold:", pupaeUDT,
"\n Adult Lower Devel Threshold:", adultLDT,
"\n Adult Upper Devel Threshold:", adultUDT, file =metadata, append = TRUE)
# Document stage durations
cat("\n\n Stage durations in degree-days (DDs)",
"\n Egg DDs:", eggDD,
"\n Larvae DDs", larvaeDD,
"\n Pupae DDs:", pupDD,
"\n Adult DDs:", adultDD,
file = metadata, append = TRUE)
# Document climate stress exclusion parameter values, if applicable
if (exclusions_stressunits) {
cat("\n \n Climate stress parameters",
"\n Lower Cold Threshold:", coldstress_threshold,
"\n Upper Heat Threshold:", heatstress_threshold,
"\n Max Cold Units (lower bound):", coldstress_units_max1,
"\n Max Cold Units (upper bound):", coldstress_units_max2,
"\n Max Heat Stress Units (lower bound):", heatstress_units_max1,
"\n Max Heat Stress Units (upper bound):", heatstress_units_max2,
file = metadata, append = TRUE)
}
# Document Pest Event Map parameter values, if applicable
if (pems) {
cat("\n \n Pest Event Map parameters",
"\n Number of generations to make Pest Event Maps (PEMs): ", PEMnumgens,
"\n Egg Event DDs and Label: ", eggEventDD, " (", eggEventLabel,")",
"\n Larvae Event DDs and Label: ", larvaeEventDD, " (", larvaeEventLabel, ")",
"\n Pupae Event DDs and Label: ", pupaeEventDD, " (", pupaeEventLabel, ")",
"\n Adult Event DDs and Label: ", adultEventDD, " (", adultEventLabel, ")",
sep = "", file = metadata, append = TRUE)
}
cat("\n\n### Model Input Parameters ###\n Start Year:", start_year,
"\n Weather data for forecasts:", forecast_data,
"\n Start day-of-year:", start_doy,
"\n End day-of-year:", end_doy,
"\n Region:", region_param,
"\n Climate stress exclusion maps:", exclusions_stressunits,
"\n Pest Event Maps:", pems,
"\n Adult Event Maps:", mapA,
"\n Egg Event Maps:", mapE,
"\n Larvae Event Maps:", mapL,
"\n Pupae Event Maps:", mapP,
"\n Output_Dir:", out_dir,
"\n Output option:", out_option,
"\n No. of cohorts:", ncohort,
"\n Plot odd gens only:", odd_gen_map,
"\n Mean of end of OW stage (DDs):", distro_mean,
"\n Low bound of end of OW stage (DDs), xdist:", xdist1,
"\n High bound of end of OW stage (DDs), ydist:", xdist2,
"\n Variance in end of OW stage (DDs):", distro_var,
"\n Shape of distribution of end of OW stage (DDs):", distro_shape,
file = metadata, append = TRUE)
if (distro_shape == "normal_skewed") {
cat("\n Skewness of distribution of end of OW stage (DDs):", distro_xi,
file = metadata, append = TRUE)
}
# Make a table of stage DDs for each cohort and print to metadata
stage_dd.print <- as.data.frame(round(stage_dd, 0)) %>%
rename_at(vars(names(.)), ~stgorder)
stage_dd.print <- cbind("cohort" = as.integer(rownames(stage_dd.print)),
data.frame(stage_dd.print, row.names = NULL),
"relpopsize" = relpopsize)
cat("\n\n### Durations (in degree-days) of stages in each of",
ncohort, "cohorts ###\n", file = metadata, append = TRUE)
suppressWarnings(write.table(stage_dd.print, file = metadata,
row.names = FALSE,
col.names = colnames(stage_dd.print),
append = TRUE))
cat("\nDurations (degree-days) of stages in each of", ncohort, "cohorts:\n\n")
print(stage_dd.print, row.names = FALSE)
cat("Done writing metadata file\n\n", forecast_data, " DATA PROCESSING\n",
sep = "", file = Model_rlogging, append = TRUE)
cat("\nDone writing metadata file\n\n", forecast_data, " DATA PROCESSING\n",
sep = "")
# (5). WEATHER DATA LOADING AND PROCESSING -----
# Weather inputs and outputs - PRISM climate data w/subdirs 4-digit year
# New feature - choose whether to use PRISM or NMME for weather forecasts
# (forecast_data = PRISM, or forecast_data = NMME)
# Loop through each needed variable and create a list of needed files
vars <- c("tmin", "tmax")
fls_list <- c("tminfiles", "tmaxfiles")
for (i in seq_along(vars)) {
# Create list of files
if (forecast_data %in% c("PRISM", "PRISM_800m")) {
fl_pat <- glob2rx(paste0("*PRISM_", vars[i], "*", start_year, "*.bil$*"))
} else if (forecast_data == "MACAV2") {
fl_pat <- glob2rx(paste0("*MACAV2_", vars[i], "*", start_year, "*.grd$*"))
} else {
fl_pat <- glob2rx(paste0("*", forecast_data, "_",
vars[i], "*", start_year, "*.tif$*"))
}
fls <- list.files(
path = forecast_dir,
pattern = fl_pat,
all.files = FALSE, full.names = TRUE, recursive = TRUE, ignore.case = TRUE
)
# Exit program if files are missing
if (length(fls) < length(sublist)) {
cat("Missing ", vars[i], " files for one or more days - exiting program\n")
q()
}
# PRISM: extract highest quality files and assign object name to list
# Don't do this step for daily climate averages (have word "daily" in file name)
if (forecast_data == "PRISM" & !any(grep("daily", fls))) {
fls_best <- ExtractBestPRISM(fls, forecast_data, keep_leap)[start_doy:end_doy]
} else {
#fls_best <- fls[start_doy:end_doy]
fls_best <- fls
}
assign(fls_list[i], fls_best)
}
## Extract date from temperature files using regex pattern matching
dats <- unique(regmatches(tminfiles, regexpr(pattern = "[0-9]{8}",
text = tminfiles)))
# Specify sampling frequency (how many days until output maps are generated?)
# This feature may be removed in production version
if (out_option == 1) {
sample_freq <- 30 # Monthly maps
} else if (out_option == 2) {
sample_freq <- 14 # Biweekly maps
} else if (out_option == 3) {
} else if (out_option == 4) {
sample_freq <- 7 # Weekly maps
} else if (out_option == 5) {
sample_freq <- 2 # Even day maps
} else if (out_option == 6) {
sample_freq <- 1 # Daily maps
}
# Make vector of dates to use when processing results
# The current date will be sampled if it's the current year AND if
# the current day falls within the range of start_doy and end_doy.
# The last date of year will always be sampled.
# Using "unique" will only keep date if it doesn't already occur in vector
# This happens if the end day of year is a multiple of the sampling frequency
# (e.g. 1 to 300, w/ a 30 day sampling frequency), or if the current date falls
# within the sampling frequency
today_dat <- strftime(Sys.time(), format = "%Y%m%d")
current_year <- strftime(Sys.time(), format = "%Y")
# If it's the current year and today falls between start_doy and end_doy, make
# a stage count map for today
stageCt_today <- start_year == current_year &
yday(Sys.time()) >= start_doy & yday(Sys.time()) <= end_doy
if (stageCt_today) {
dats2 <- sort(as.numeric(unique(c(dats[seq(0, length(dats), sample_freq)],
today_dat, last(dats)))))
} else {
dats2 <- sort(as.numeric(unique(c(dats[seq(0, length(dats), sample_freq)],
last(dats)))))
}
dats2 <- as.character(dats2) # Need to be in character format for plotting
num_dats <- length(dats2) # How many sampled dates?
# Create vector of days in the sublist that will be sampled (rasters are saved
# for those days) in the Daily Loop, and also tack on the last day in the list.
sample_pts <- c(sublist[seq(0, length(sublist), sample_freq)],
last(sublist))
# Add the present day if DDRP run is being run for the current year AND if
# the current day falls within the range of start_doy and end_doy.
if (start_year == current_year &
yday(Sys.time()) >= start_doy &
yday(Sys.time()) <= end_doy) {
today_doy <- strftime(Sys.time(), format = "%j") # Day of year
sample_pts <- sort(as.numeric(unique(c(sample_pts, today_doy))))
}
# Keep only unique sampling points (there may be duplicates for example
# if the present day is already in the list).
sample_pts <- unique(sample_pts)
# Log file and terminal messages
cat("Finished loading ", forecast_data, " files for ",
length(start_doy:end_doy), " days\nCreating template file for ",
region_param, "\n", sep = "", file = Model_rlogging, append = TRUE)
cat("\nFinished loading ", forecast_data, " files for ",
length(start_doy:end_doy), " days\n\nCreating template file for ",
region_param, "\n", sep = "")
### * Create blank template from a temp file
# This template is used for cropping the temperature (tmin, tmax) rasters
# In terra, a raster must be "wrapped" in order to be analyzed in parallel
REGION <- Assign_extent(region_param) # Bounding box
template <- crop(rast(tminfiles[1]), REGION) # Template for cropping
template[!is.na(template)] <- 0 # Replace NA values with 0
template_w <- wrap(template) # Wrapped template
#### * If CONUS/EAST/EUROPE, split template into tiles (and run in parallel)
# Benefit of tiles is lost for smaller regions, so these are not split
# Register DoParallel
# The "RegCluster" function creates a set of copies of R running in parallel and
# communicating over sockets (parallel socket clusters). The value may be
# specified manually; however, here the value is estimated based on the number
# of available cores on the computer or server DDRP is being run on.
# Specifying too many clusters will overload the computer.
ncores <- detectCores()
# The "RegCluster" function determines an appropriate # of cores depending on
# the "region_param" and "ncohort" parameters, so the server doesn't become
# overloaded. Template is split into 4 tiles if CONUS/EAST/N_AMERICA/EUROPE/CHINA.
# TO DO: matrices may speed things up less now that "terra" has replaced "raster?"
RegCluster(round(ncores/4))
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
# Split template (2 pieces per side)
# Ensuring that too many tiles aren't made requires adding 1 to ncol and nrow
tile_list <- makeTiles(template,
y = ceiling(c((nrow(template)+1)/2, (ncol(template)+1)/2)),
filename = "tile_.tif") # Saves the 4 raster tiles
# Crop temp files by each template tile
cat("Cropping tmax and tmin tiles for", region_param, "\n",
file = Model_rlogging, append = TRUE)
cat("\nCropping tmax and tmin tiles for", region_param, "\n")
# Crop tmin to each tile
tmin_list <- foreach(tile = tile_list, .packages = c("terra"),
.inorder = FALSE) %:%
foreach(tmin = tminfiles, .packages = c("terra"), .inorder = TRUE) %dopar% {
m <- as.matrix(crop(rast(tmin), ext(rast(tile))), wide = TRUE)
}
# Crop tmax to each tile
tmax_list <- foreach(tile = tile_list, .packages = c("terra"),
.inorder = FALSE) %:%
foreach(tmax = tmaxfiles, .packages = c("terra"), .inorder = TRUE) %dopar% {
m <- as.matrix(crop(rast(tmax), ext(rast(tile))), wide = TRUE)
}
# If region is not large regions, simply crop temp files by the single template
} else {
cat("Cropping tmax and tmin tiles for", region_param, "\n",
file = Model_rlogging, append = TRUE)
cat("\nCropping tmax and tmin tiles for", region_param, "\n")
# Crop tmin to template extent (template is wrapped)
tmin_list <- foreach(tmin = tminfiles, .packages = "terra") %dopar% {
m <- as.matrix(crop(rast(tmin), ext(unwrap(template_w))), wide = TRUE)
}
# Crop tmax to template extent (template is wrapped)
tmax_list <- foreach(tmax = tmaxfiles, .packages = "terra") %dopar% {
m <- as.matrix(crop(rast(tmax), ext(unwrap(template_w))), wide = TRUE)
}
}
stopCluster(cl)
rm(cl)
cat("Done processing ", forecast_data, " data\n\nDAILY LOOP\n", sep = "",
file = Model_rlogging, append = TRUE)
cat("\nDone processing ", forecast_data, " data\n\nDAILY LOOP\n", sep = "")
# If relevant, import tiles back in as a list - these will be the templates
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
template_w <- lapply(list.files(pattern = "tile"), function(x) {
wrap(rast(x)) # Wrap for parallel processing
})
# Can delete raster tiles now
unlink(list.files(pattern = "tile"))
}
## (6). RUN THE DAILY LOOP -----
# First set up number of cores to use
# IMPORTANT: using too many cores will result in low memory, killing the daily
# loop part-way through.
# For mclapply: Can not use >1 core on Windows, so detect OS
# TO DO: Resolve "foreach" issue with Daily Loop for CONUS so don't need this
if (grepl("Windows", Sys.info()[1])) {
mc.cores <- 1
} else {
mc.cores <- 4 # use 4 here, because there are 4 tiles being run in parallel
}
# Split cohorts into smaller chunks for CONUS/EAST/EUROPE to avoid overloading
# memory when running in parallel.
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
cohort_chunks <- split(1:ncohort, ceiling(1:length(1:ncohort)/2))
} else {
# TO DO: For some reason errors are return when splitting the cohort_chunks
# list object into multiple pieces. Thus, it must be a list of 1.
cohort_chunks <- split(1:ncohort, ceiling(1:length(1:ncohort)/ncohort))
}
tic("Daily loop run time") # Start timing the daily loop run-time
# cat("DAILY LOOP: daily loop log files show loop progress and
# output file info\n", file = Model_rlogging, append = TRUE)
cat("Sampling every", sample_freq, "days between", first(dats), "and",
last(dats), "\n", file = Model_rlogging, append = TRUE)
cat("\nSampling every", sample_freq, "days between", first(dats), "and",
last(dats), "\n")
# Run it! If there is an error the program will stop
# TO DO: This part definitely needs a better error documenting system.
# Currently, errors produced in loop are often ambiguous.
# Note below that much troubleshooting did not resolve "foreach" issue
tryCatch(
{
RegCluster(round(ncores/8)) # Change this value if server is overloaded
# If the region is CONUS/EAST/N_AMERICA/EUROPE/CHINA, then both cohorts and
# tiles will be run in parallel. To avoid overloading the server,
# mc.cores = 4 (for the 4 tiles, which keeps load at appropriate level.
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
# Total number of nodes is mc.cores * 2 because use mclapply twice in loop
for (c in cohort_chunks) {
cat("Running daily loop for cohorts", as.character(c), "\n",
file = Model_rlogging, append = TRUE)
cat("\nRunning daily loop for cohorts", as.character(c), "\n")
cohort_vec <- unname(unlist(c)) # change to an unnamed vector
# TO DO: "foreach" runs incredibly slow in any method I've tried.
# This issue should be resolved if DDRP will be run on Windows.
# (Windows is incapable of forking with the "mclapply" function).
mclapply(cohort_vec, function(cohort) {
mclapply(1:length(template_w), function(tile_num) {
tile <- template_w[[tile_num]]
DailyLoop(cohort, tile_num, tile)
}, mc.cores = mc.cores)
}, mc.cores = mc.cores)
# system.time(
# foreach(cohort = cohort_vec, .packages = pkgs, .inorder=FALSE) %:%
# foreach(tile_num = 1:4, .packages = pkgs, .inorder = FALSE) %dopar% {
# tile <- template_w[[tile_num]]
# DailyLoop(cohort, tile_num, tile)
# }
# )
}
stopCluster(cl)
rm(cl)
} else {
# If the region is not CONUS/EAST/EUROPE/CHINA,
# then we don't need to run function for multiple tiles.
for (c in cohort_chunks) {
cat("Running daily loop for cohorts", as.character(c), "\n",
file = Model_rlogging, append = TRUE)
cat("\nRunning daily loop for cohorts", as.character(c), "\n")
cohort_vec <- unname(unlist(c)) # change to an unnamed vector
# Only need to run cohorts in parallel
foreach(cohort = cohort_vec, .packages = pkgs,
.inorder = FALSE) %dopar% {
DailyLoop(cohort, NA, template_w)
}
stopCluster(cl)
rm(cl)
}
}
},
error = function(e) {
cat("Error in Daily Loop\n", file = Model_rlogging, append = TRUE)
cat("\nError in Daily Loop\n")
})
# Free up memory - delete unneeded large objects
rm(DailyLoop, template, tmaxfiles, tmax_list, tminfiles, tmin_list)
# Document daily loop execution time
loop_exectime <- toc(quiet = TRUE)
loop_exectime <- (loop_exectime$toc - loop_exectime$tic) / 60
cat("Daily loop done (run time = ", round(loop_exectime, digits = 2), " min)",
"\n\nFINAL ANALYSES AND MAP PRODUCTION\n", sep = "",
file = Model_rlogging, append = TRUE)
cat("\nDaily loop done (run time = ", round(loop_exectime, digits = 2), " min)",
"\n\nFINAL ANALYSES AND MAP PRODUCTION\n", sep = "")
#(7). PROCESS DAILY LOOP RESULTS -----
setwd(output_dir)
tic("Data processing run time") # Start timing for data processing
# Create a directory ("Misc_output") to put secondary outfiles
dir.create("Misc_output")
#### * If CONUS/EAST/EUROPE/CHINA, merge and delete tiles ####
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
cat("\nMerging tiles for", region_param, "\n\n",
file = Model_rlogging, append = TRUE)
cat("\nMerging tiles for", region_param, "\n")
# Get list of raster files for each tile, the type of file,
# its cohort number, and then make a list of the file types
# File types are split up to avoid overloading sytem when running in parallel
rast_files <- list.files(pattern = glob2rx("*cohort*tile*tif$"),
recursive = FALSE)
type_list <- unique(str_split_fixed(rast_files,
pattern = "_cohort", 4)[,1])
type_list_split <- split(type_list, ceiling(1:length(type_list)/3))
cohorts <- unique(str_split_fixed(rast_files, pattern = "cohort", 4)[,2])
cohorts <- unique(substring(cohorts, 1, 1)) # Vector of cohorts (1, 2, 3, ...)
# For each file type, merge the tiles for all cohorts
# File type and cohorts are both run in parallel to increase speed
# If system is overloaded then consider:
# 1) splitting up the cohorts list (as in the "type_list_split"); or
# 2) decreasing the number of splits in "type_list_split")
RegCluster(4)
mrg_by_type <- foreach(type = type_list_split, .packages = pkgs,
.inorder = FALSE) %dopar% {
type_vec <- unname(unlist(type)) # Change to an unnamed vector
for (t in type_vec) {
# If type is exclusion, stressunits, or ddtotal files,
# then just do cohort 1; no other cohorts present
if (grepl("Stress_Excl|Stress_Units|DDtotal", t)) {
CombineMaps(rast_files, t, "1")
cat("Merged", t, "tiles for cohort 1\n",
file = Model_rlogging, append = TRUE)
# If another file type, then merge tiles for all cohorts
} else {
foreach(c = cohorts, .packages = pkgs, .inorder = TRUE) %dopar% {
CombineMaps(rast_files, t, c)
cat("Merged", t, "tiles for cohort", c, "\n",
file = Model_rlogging, append = TRUE)
}
}
}
}
stopCluster(cl)
rm(cl)
cat("\nDone merging tiles\n", file = Model_rlogging, append = TRUE)
cat("\nDone merging tiles\n")
}
# If CONUS/EAST/EUROPE/CHINA, remove tile files, but first check that merged
# files exist for each type (e.g., Lifestage, NumGen, ...)
if (region_param %in% c("CONUS", "EAST", "N_AMERICA", "EUROPE", "CHINA")) {
cat("\nDeleting tiles for", region_param, "\n\n",
file = Model_rlogging, append = TRUE)
cat("\nDeleting tiles for", region_param, "\n")
for (t in type_list) {
# Should only be a single merged file for these types
# Remove the "_all" from the name of the file - this isn't needed
if (grepl("Stress_Excl|Stress_Units|DDtotal", t)) {
fl <- list.files(pattern = glob2rx(paste0("*", t, "_*all*tif$")))
if (length(fl == 1)) {
unlink(list.files(pattern = glob2rx(paste0("*", t, "_*tile*tif$"))))
}
# For other types, the number of merged files should equal the number
# of cohorts. The exception is if OW stage DD distro parameters for
# cohorts results in some cohorts not making DD cutoffs for PEMs -
# a warning will be recorded if this is the case
} else {
fls <- list.files(pattern = glob2rx(paste0("*", t, "_*all*tif$")))
if (length(fls) == ncohort) {
unlink(list.files(pattern = glob2rx(paste0("*", t, "_*tile*tif$"))))
cat("Deleted tile files for", t, "\n",
file = Model_rlogging, append = TRUE)
# Generate a warning message if some cohorts are missing
} else {
unlink(list.files(pattern = glob2rx(paste0("*", t, "_*tile*tif$"))))
cat("\nWarning: only", length(fls),
"cohort files found for", t, "- check params\n",
file = Model_rlogging, append = TRUE)
cat("\nWarning: only", length(fls),
"cohort files found for", t, "- check params\n")
}
}
}
# Remove "_all" from file names - not needed
fls <- list.files(pattern = "*tif$")
file.rename(from = fls, to = sub(pattern = "_all.tif",
replacement = ".tif", fls))
cat("\nDone deleting tile files\n", file = Model_rlogging, append = TRUE)
cat("\nDone deleting tile files\n")
}
#### * Some ggplot2 settings ####
# Some ggplot2 settings are specified here, but other settings are specified
# in the functions file
# Map production in ggplot requires specifying plot.height and plot.width
# These need to be dynamic because regions have different aspect ratios,
# which results warped looking maps
# "coord_sf" replaced "coord_quickmap"; has wrapper for est. map aspect ratio
# Calculate bounding box (xmin, xmax, ymin, ymax) of REGION
coord <- coord_sf(xlim = c(REGION$xmin, REGION$xmax),
ylim = c(REGION$ymin, REGION$ymax), expand = FALSE)
asp <- coord$aspect(list(x_range = c(REGION$xmin, REGION$xmax),
y_range = c(REGION$ymin, REGION$ymax))) # aspect ratio
# Set x and y limits - used for plotting
xlims <- as.numeric(as.matrix(REGION)[1,])
ylims <- as.numeric(as.matrix(REGION)[2,])
# Adjust base_size for ggplot2 (font size) according to aspect ratio
if (asp >= 1.7) {
base_size <- 10.5
legend_units <- 1.4
} else if (asp >= 1.5 & asp < 1.7) {
base_size <- 9.5
legend_units <- 1.3
} else if (asp >= 1.2 & asp < 1.5) {
base_size <- 8.5
legend_units <- 1.2
} else if (asp < 1.2 & asp >= 0.6) {
base_size <- 7
legend_units <- 1
} else if (asp < 0.6 & asp >= 0.45) {
base_size <- 5.7
legend_units <- 0.6
} else if (asp < 0.45) {
base_size <- 5.2