Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update loanbook demo NACE codes #338

Merged
merged 5 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 5 additions & 32 deletions data-raw/classification_bridge.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,11 @@ nace_classification_raw <- read_bridge(
file.path("data-raw", "nace_classification.csv")
)

nace_classification <- nace_classification_raw %>%
mutate(
prepend_value = case_when(
original_code %in% LETTERS ~ "",
trunc(as.numeric(original_code)) %in% seq(1, 3) ~ "A",
trunc(as.numeric(original_code)) %in% seq(5, 9) ~ "B",
trunc(as.numeric(original_code)) %in% seq(10, 33) ~ "C",
trunc(as.numeric(original_code)) == 35 ~ "D",
trunc(as.numeric(original_code)) %in% seq(36, 39) ~ "E",
trunc(as.numeric(original_code)) %in% seq(41, 43) ~ "F",
trunc(as.numeric(original_code)) %in% seq(45, 47) ~ "G",
trunc(as.numeric(original_code)) %in% seq(49, 53) ~ "H",
trunc(as.numeric(original_code)) %in% seq(55, 56) ~ "I",
trunc(as.numeric(original_code)) %in% seq(58, 63) ~ "J",
trunc(as.numeric(original_code)) %in% seq(64, 66) ~ "K",
trunc(as.numeric(original_code)) == 68 ~ "L",
trunc(as.numeric(original_code)) %in% seq(69, 75) ~ "M",
trunc(as.numeric(original_code)) %in% seq(77, 82) ~ "N",
trunc(as.numeric(original_code)) == 84 ~ "O",
trunc(as.numeric(original_code)) == 85 ~ "P",
trunc(as.numeric(original_code)) %in% seq(86, 88) ~ "Q",
trunc(as.numeric(original_code)) %in% seq(90, 93) ~ "R",
trunc(as.numeric(original_code)) %in% seq(94, 96) ~ "S",
trunc(as.numeric(original_code)) %in% seq(97, 98) ~ "T",
trunc(as.numeric(original_code)) == 99 ~ "U",
TRUE ~ "Z" #debug value, see unit tests)
)
) %>%
mutate(
code = paste0(prepend_value, original_code),
prepend_value = NULL
)
nace_classification <- convert_superseded_nace_code(
nace_classification_raw,
col_from = "original_code",
col_to = "code"
)

use_data(nace_classification, overwrite = TRUE)

Expand Down
24 changes: 24 additions & 0 deletions data-raw/loanbook_demo.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,28 @@ loanbook_demo <- loanbook_demo %>%
)
)

nace_classification_raw <- read_bridge(
file.path("data-raw", "nace_classification.csv")
)

# this gets the `original_code` back to the `loanbook_demo` dataset
loanbook_demo <- mutate(
loanbook_demo,
sector_classification_direct_loantaker = as.character(sector_classification_direct_loantaker)
)

loanbook_demo <- left_join(
loanbook_demo,
select(nace_classification_raw, original_code, code),
by = c("sector_classification_direct_loantaker" = "code")
)

loanbook_demo <- convert_superseded_nace_code(
loanbook_demo,
col_from = "original_code",
col_to = "sector_classification_direct_loantaker"
)

loanbook_demo <- mutate(loanbook_demo, original_code = NULL)

usethis::use_data(loanbook_demo, overwrite = TRUE)
39 changes: 39 additions & 0 deletions data-raw/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,42 @@ abort_missing_names <- function(missing_names) {
)
)
}

convert_superseded_nace_code <- function(data,
col_from,
col_to) {
data <- mutate(
data,
prepend_value = case_when(
.data[[col_from]] %in% LETTERS ~ "",
trunc(as.numeric(.data[[col_from]])) %in% seq(1, 3) ~ "A",
trunc(as.numeric(.data[[col_from]])) %in% seq(5, 9) ~ "B",
trunc(as.numeric(.data[[col_from]])) %in% seq(10, 33) ~ "C",
trunc(as.numeric(.data[[col_from]])) == 35 ~ "D",
trunc(as.numeric(.data[[col_from]])) %in% seq(36, 39) ~ "E",
trunc(as.numeric(.data[[col_from]])) %in% seq(41, 43) ~ "F",
trunc(as.numeric(.data[[col_from]])) %in% seq(45, 47) ~ "G",
trunc(as.numeric(.data[[col_from]])) %in% seq(49, 53) ~ "H",
trunc(as.numeric(.data[[col_from]])) %in% seq(55, 56) ~ "I",
trunc(as.numeric(.data[[col_from]])) %in% seq(58, 63) ~ "J",
trunc(as.numeric(.data[[col_from]])) %in% seq(64, 66) ~ "K",
trunc(as.numeric(.data[[col_from]])) == 68 ~ "L",
trunc(as.numeric(.data[[col_from]])) %in% seq(69, 75) ~ "M",
trunc(as.numeric(.data[[col_from]])) %in% seq(77, 82) ~ "N",
trunc(as.numeric(.data[[col_from]])) == 84 ~ "O",
trunc(as.numeric(.data[[col_from]])) == 85 ~ "P",
trunc(as.numeric(.data[[col_from]])) %in% seq(86, 88) ~ "Q",
trunc(as.numeric(.data[[col_from]])) %in% seq(90, 93) ~ "R",
trunc(as.numeric(.data[[col_from]])) %in% seq(94, 96) ~ "S",
trunc(as.numeric(.data[[col_from]])) %in% seq(97, 98) ~ "T",
trunc(as.numeric(.data[[col_from]])) == 99 ~ "U",
TRUE ~ "Z" #debug value, see unit tests)
)
)

data <- mutate(
data,
{{col_to}} := paste0(prepend_value, .data[[col_from]]),
prepend_value = NULL
)
}
Binary file modified data/loanbook_demo.rda
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/testthat/_snaps/loanbook_demo.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@
"value": ["Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code", "Code"]
},
{
"type": "double",
"type": "character",
"attributes": {},
"value": [3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3511, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 3011, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 291, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 2351, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 2910, 2910, 5110, 5110, 5110, 5110, 3030, 2910, 5200, 2410]
"value": ["D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "D35.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C30.11", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C29.1", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C23.51", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "C24.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "B6.1", "C29.10", "C29.10", "H51.1", "H51.1", "H51.1", "H51.1", "C30.3", "C29.10", "B5.2", "C24.1"]
},
{
"type": "character",
Expand Down
24 changes: 24 additions & 0 deletions tests/testthat/test-loanbook_demo.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
test_that("hasn't changed", {
expect_snapshot_value(loanbook_demo, style = "json2")
})

test_that("loanbook_demo has at least one valid sector classification (#336)", {
#base R left_join
out <- merge(
loanbook_demo,
sector_classifications,
by.x = c(
"sector_classification_system",
"sector_classification_direct_loantaker"
),
by.y = c(
"code_system",
"code"
),
all.x = TRUE
)

number_of_sector_matches <- nrow(
out[!is.na(out[["sector"]]), ]
)

expect_true(number_of_sector_matches > 0)

})
Loading