-
Notifications
You must be signed in to change notification settings - Fork 0
/
address_match.R
58 lines (49 loc) · 2.23 KB
/
address_match.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library(postmastr)
library(tidyverse)
add_cleaned_addresses <- function(in.df) {
# Returns a data frame with a new column "cleaned_address"
# which is the formatted version of input column "full_address".
#
# Required parameter in.df must be a data frame containing a column "full_address"
# that contains full address info (house-number street, city-name, WA, ZIP Code).
tryCatch({
cities <- pm_dictionary(type='city', filter="WA", case=c("title", "upper"), locale="us")
in.df <- in.df %>%
mutate(full_address = str_replace_all(full_address, "N.W.", "NW"))
df_ident <- pm_identify(in.df, var='full_address', locale="us")
df_min <- pm_prep(df_ident, var='full_address', type='street')
my_dirs <- pm_append("directional",
input=c("N.W.", "N.E.", "S.W.", "S.E.", "N.W", "N.E", "S.W", "S.E"),
output=c("NW", "NE", "SW", "SE", "NW", "NE", "SW", "SE"),
locale="us")
dirs <- pm_dictionary("directional", append=my_dirs)
df_min <- pm_postal_parse(df_min)
df_min <- pm_state_parse(df_min)
df_min <- pm_city_parse(df_min, dictionary=cities)
df_min <- pm_house_parse(df_min)
df_min <- pm_houseRange_parse(df_min)
df_min <- pm_houseFrac_parse(df_min)
df_min <- df_min %>%
mutate(pm.address=str_replace_all(pm.address, "-", "")) %>%
mutate(pm.address=str_trim(pm.address, side="left"))
df_min <- pm_streetDir_parse(df_min, dictionary=dirs)
df_min <- pm_streetSuf_parse(df_min)
df_min <- pm_street_parse(df_min)
df_parsed <- pm_replace(df_min, source=df_ident)
df_parsed <- pm_rebuild(df_parsed,
output="full",
include_commas=TRUE,
keep_parsed="no",
keep_ids=TRUE) %>%
select(c('pm.id', 'pm.address')) %>%
mutate(pm.address = str_remove_all(pm.address, "\"")) %>%
mutate(pm.address = str_replace_all(pm.address, " ,", ","))
out.df <- df_ident %>%
left_join(df_parsed, by='pm.id') %>%
select(-c('pm.id', 'pm.uid', 'pm.type')) %>%
rename('cleaned_address' = 'pm.address')
return(out.df)
}, error = function(w) {
print(glue::glue("A warning popped up in add_cleaned_addresses: {w}"))
})
}