-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalligator_weed.R
146 lines (103 loc) · 4.76 KB
/
alligator_weed.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#install.packages("dimensionsR")
library(dimensionsR)
library(tidyverse)
library(tictoc)
library(bib2df)
library(rscopus)
library(bibliometrix)
library(readxl)
#starts timer
tic()
#dimensions key
token <- dsAuth(key = "5D70B3712E044A41B40C922330029585")
weed_list <- c("Alternanthera philoxeroides", "Telanthera philoxeroides", "Achyranthes philoxeroides")
#create lists of abbreviated names
weed_list_abr <- paste0(substr(weed_list, 1, 1), ". ",sub("^\\S+\\s+", '', weed_list))
weed_list_abr2 <- paste0(substr(weed_list, 1, 1), " ",sub("^\\S+\\s+", '', weed_list))
#creates the query string for dimensions
dimensions_query <- paste0("search publications in title_abstract_only for \"(\\\"invasi*\\\"OR\\\"introduced species\\\"OR\\\"toxicity\\\"OR\\\"dispersal\\\"OR\\\"poisoning\\\"OR\\\"vector\\\"OR\\\"invasive species\\\"OR\\\"invasive organisms\\\"OR\\\"weed control\\\")AND(\\\"",weed_list,"\\\"OR\\\"",weed_list_abr,"\\\"OR\\\"",weed_list_abr2,"\\\"OR\\\"Alligatorweed\\\")\" where year in [ 1900 : 2022 ] and type in [ \"article\" ] return publications[type + basics + extras + authors + concepts + abstract]")
#queries each species in list and adds to data table
for (i in 1:length(weed_list)) {
query_result <- dsApiRequest(token = token, query = dimensions_query[i], step = 200, limit = 50000)
ds_temp <- dsApi2df(query_result)
ds_temp$weed_searched<- weed_list[i]
cat("Searched species ", i, " of ", length(weed_list), " (", weed_list[i], ").")
if (i == 1) {
# if (exists("weed_papers") == F) {
dimensions_raw <- ds_temp
}
else{
dimensions_raw <- bind_rows(dimensions_raw, ds_temp)
}
}
papers<- dimensions_raw
papers$title <- papers$TI %>% str_replace_all(., "[[:punct:]]", "") %>% tolower()
#adds lowercase TI, AB, journal, and keywords to the "combined" column
papers$combined <- paste(papers$title, tolower(papers$AB), tolower(papers$DE), tolower(papers$SO))
#creates status & reason column
papers<- papers %>% mutate(status="neutral", reason="")
# Section 2a. Clean data -----------------------------------------
#adds duplicates to removed_papers
removed_papers<-
filter(papers,duplicated(title)) %>%
mutate(status="bad", reason="duplicate")
#removes duplicates from master list
papers <- papers %>%
distinct(title, .keep_all=TRUE) %>%
select(-"title")
#create function to keep papers with certain words in the TI
keep_words <- function(input) {
for (i in 1:length(input)) {
papers <-
papers %>%
mutate(status = ifelse((grepl(input[i], tolower(SO))) == TRUE, "good", status)) %>%
mutate(reason = paste0(ifelse(
status == "good",
paste0(ifelse(reason=="", input[i], paste0(reason, ", ", input[i])), " journal"),
reason
)))
}
return(papers)
}
#create function to remove papers with certain words in the combined column
remove_words <- function(input) {
for (i in 1:length(input)) {
to_remove <-
papers %>%
filter(.,grepl(input[i],combined) & status != "good") %>%
mutate(status = "bad", reason = paste(input[i]))
removed_papers <<- bind_rows(removed_papers,to_remove)
papers <-
papers %>%
filter(!(grepl(input[i],combined)& status != "good"))
}
return(papers)
}
# Section 2b. clean data cont. ----------------------------------------------------------
#runs function to keep papers with these words in the Journal name
papers<-keep_words(list("toxicology", "weed"))
#runs function to exclude papers with these words anywhere
papers<-remove_words(list("insect pest", "pests","carnivore", "domesticated", "folk", "herbal", "essential oils", "soil organic matter", "cover crop"))
# Section 3 Output -----------------------------------------
#creates summary of kept papers by counting them, grouped by status and reason
kept_summary <- papers %>%
group_by(status, reason) %>%
summarise(n())
#renames summary columns
names(kept_summary) <- c("kept papers", "reason", "count")
#workaround that removes quotation marks that appeared in the reason column
kept_summary <- kept_summary %>% mutate(reason = noquote(reason))
#creates summary of removed papers by counting them, grouped by status and reason
removed_summary <- removed_papers %>%
group_by(status, reason) %>%
summarise(n())
#renames summary columns
names(removed_summary) <- c("removed papers", "reason", "count")
removed_summary
count_weeds<- count(papers, weed_searched)
results <- biblioAnalysis(papers)
summary_stats<- summary(results)
papers<- papers %>% select(., title = TI, author = AF, source = SO, url = URL, year = PY, doi = DI, abstract = AB, type = DT, keywords = DE, funding = FU, country = AU_CO, organisation = AU_UN, database = DB, weed_searched = weed_searched)
write.csv(papers, "alligator_weed.csv")
#displays run time
toc()