Skip to content

Commit

Permalink
revert and fix demo data
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobvjk committed Oct 20, 2023
1 parent d9ec8ed commit e9b40e9
Show file tree
Hide file tree
Showing 11 changed files with 18,013 additions and 522 deletions.
432 changes: 126 additions & 306 deletions data-raw/abcd_demo.R

Large diffs are not rendered by default.

17,469 changes: 17,469 additions & 0 deletions data-raw/abcd_demo.csv

Large diffs are not rendered by default.

184 changes: 43 additions & 141 deletions data-raw/loanbook_demo.R
Original file line number Diff line number Diff line change
@@ -1,148 +1,50 @@
library(dplyr)
library(r2dii.data)
library(rlang)
library(tidyr)

# parameters
n_companies <- 20
total_exposure_lbk <- 100000000
total_credit_limit_lbk <- 500000000
currency_lbk <- "USD"

# define parameters for sampling----

# styler: off
sector_shares <- tibble::tribble(
~sector, ~share,
"aviation", 0.02,
"automotive", 0.2,
"cement", 0.02,
"coal", 0.03,
"hdv", 0,
"oil and gas", 0.1,
"power", 0.6,
"shipping", 0,
"steel", 0.03
)
# styler: on

# abcd_demo must be sourced first, as the loanbook is created based on this data
abcd <- r2dii.data::abcd_demo

# replace potential NA values with 0 in production
abcd_no_na <- abcd %>%
dplyr::mutate(
production = if_else(is.na(production), 0, production)
)

# sample companies based on parameters----
# seed varies with number of companies and values of sector shares for some randomness
seed_i <- round(n_companies + sd(sector_shares$share) * 100)
set.seed(seed_i)

dist_abcd_sector <- abcd %>%
dplyr::filter(.data$is_ultimate_owner) %>%
dplyr::distinct(
.data$company_id,
.data$name_company,
.data$sector
)

n_companies_sector <- sector_shares %>%
dplyr::mutate(n = round(.env$n_companies * .data$share, 0)) %>%
dplyr::select(-"share")

sectors_to_sample <- sector_shares %>%
dplyr::filter(.data$share > 0) %>%
dplyr::distinct(.data$sector) %>%
dplyr::pull()

sample_sectors <- NULL

for (i in sectors_to_sample) {
n_companies_sector_i <- n_companies_sector %>%
dplyr::filter(.data$sector == i) %>%
dplyr::distinct(.data$n) %>%
dplyr::pull()

sample_sector_i <- dist_abcd_sector %>%
dplyr::filter(.data$sector == i) %>%
dplyr::slice_sample(n = n_companies_sector_i)

sample_sectors <- sample_sectors %>%
dplyr::bind_rows(sample_sector_i)
}

# set up basic loan book structure with sampled companies----
abcd_sample <- abcd %>%
dplyr::inner_join(
sample_sectors,
by = c("company_id", "name_company", "sector")
)

sample_sector_codes <- r2dii.data::nace_classification %>%
dplyr::filter(.data$borderline == FALSE) %>%
dplyr::group_by(.data$sector) %>%
dplyr::slice_max(.data$code_level, n = 1) %>%
dplyr::slice_head(n = 1) %>%
dplyr::ungroup() %>%
dplyr::select("sector", "code") %>%
dplyr::mutate(
sector_classification_system = "NACE",
sector_classification_input_type = "Code"
library(usethis)

source(file.path("data-raw", "utils.R"))

# Source: https://github.com/2DegreesInvesting/r2dii.dataraw/pull/4
path <- file.path("data-raw", "loanbook_demo.csv")
loanbook_demo <- readr::read_csv(
path,
col_types = readr::cols(
id_loan = "c",
id_direct_loantaker = "c",
name_direct_loantaker = "c",
id_intermediate_parent_1 = "c",
name_intermediate_parent_1 = "c",
id_ultimate_parent = "c",
name_ultimate_parent = "c",
loan_size_outstanding = "d",
loan_size_outstanding_currency = "c",
loan_size_credit_limit = "d",
loan_size_credit_limit_currency = "c",
sector_classification_system = "c",
sector_classification_input_type = "c",
sector_classification_direct_loantaker = "d",
fi_type = "c",
flag_project_finance_loan = "c",
name_project = "c",
lei_direct_loantaker = "c",
isin_direct_loantaker = "c"
)
)

abcd_sample_sector_codes <- abcd_sample %>%
dplyr::inner_join(
sample_sector_codes,
by = "sector"
)
loanbook_demo$loan_size_outstanding <- as.double(
loanbook_demo$loan_size_outstanding
)

loanbook_sample_prep <- abcd_sample_sector_codes %>%
dplyr::distinct(
.data$company_id,
.data$name_company,
.data$lei,
.data$sector_classification_system,
.data$sector_classification_input_type,
.data$code
loanbook_demo <- loanbook_demo %>%
mutate(
name_direct_loantaker = case_when(
name_direct_loantaker == "Holcim Hüttenzement" ~ "Holcim Huttenzement",
TRUE ~ name_direct_loantaker
),
name_ultimate_parent = case_when(
name_ultimate_parent == "Sa Tudela Veguín" ~ "Sa Tudela Veguin",
name_ultimate_parent == "Chongyang Hui’An Cement" ~ "Chongyang Hui'An Cement",
TRUE ~ name_ultimate_parent
)
)

# TODO: how to handle group_id?
loanbook_sample <- tibble::tibble(
id_direct_loantaker = paste0("C", loanbook_sample_prep$company_id),
name_direct_loantaker = loanbook_sample_prep$name_company,
id_intermediate_parent_1 = NA_character_,
name_intermediate_parent_1 = NA_character_,
id_ultimate_parent = paste0("UP", loanbook_sample_prep$company_id),
name_ultimate_parent = loanbook_sample_prep$name_company,
loan_size_outstanding = NA_real_,
loan_size_outstanding_currency = .env$currency_lbk,
loan_size_credit_limit = NA_real_,
loan_size_credit_limit_currency = .env$currency_lbk,
sector_classification_system = loanbook_sample_prep$sector_classification_system,
sector_classification_input_type = loanbook_sample_prep$sector_classification_input_type,
sector_classification_direct_loantaker = as.numeric(loanbook_sample_prep$code),
fi_type = "Loan",
flag_project_finance_loan = "No",
name_project = NA_character_,
lei_direct_loantaker = loanbook_sample_prep$lei,
isin_direct_loantaker = NA_character_
) %>%
tibble::rowid_to_column() %>%
dplyr::mutate(id_loan = paste0("L", .data$rowid)) %>%
dplyr::select(-"rowid")

# add randomly sampled loan size----
loanbook_demo <- loanbook_sample %>%
dplyr::mutate(
random_beta = stats::rbeta(n = nrow(loanbook_sample), shape1 = 1, shape2 = 3),
share_exposure = .data$random_beta / sum(.data$random_beta, na.rm = TRUE)
) %>%
dplyr::mutate(
loan_size_outstanding = round(.data$share_exposure * .env$total_exposure_lbk, 0),
loan_size_credit_limit = round(.data$share_exposure * .env$total_credit_limit_lbk, 0)
) %>%
dplyr::select(-dplyr::all_of(c("random_beta", "share_exposure")))

usethis::use_data(loanbook_demo, overwrite = TRUE)
Loading

0 comments on commit e9b40e9

Please sign in to comment.