Skip to content

Commit

Permalink
implement pacta.data.validation (#185)
Browse files Browse the repository at this point in the history
closes #18

Most validation errors originally found (below) have been resolved.
Validation of `financial_data` and `abcd_flags_equity` has been removed
for now and will be added in the future.

investigation issues:
- #196
- #197
- #198
- #198

relevant fixes in pacta.data.validation:
- RMI-PACTA/pacta.data.validation#65
- RMI-PACTA/pacta.data.validation#66
- RMI-PACTA/pacta.data.validation#67
- RMI-PACTA/pacta.data.validation#68

relevant fix in pacta.data.preparation
- RMI-PACTA/pacta.data.preparation#18

validation of `financial_data` and `abcd_flags_equity` has been removed
from this PR, and future intended implementation is tracked here
- #222
- dependent on
RMI-PACTA/pacta.data.validation#69
  • Loading branch information
cjyetman authored Apr 18, 2024
1 parent b4f6273 commit a837c76
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 29 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Imports:
logger,
pacta.data.preparation (>= 0.1.0.9003),
pacta.data.scraping,
pacta.data.validation,
pacta.scenario.data.preparation,
readr,
rlang,
Expand All @@ -43,6 +44,7 @@ Imports:
Remotes:
RMI-PACTA/pacta.data.preparation,
RMI-PACTA/pacta.data.scraping,
RMI-PACTA/pacta.data.validation,
RMI-PACTA/pacta.scenario.data.preparation
Depends:
R (>= 3.5.0)
Expand Down
97 changes: 68 additions & 29 deletions run_pacta_data_preparation.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ logger::log_formatter(logger::formatter_glue)
suppressPackageStartupMessages({
library(pacta.data.preparation)
library(pacta.data.scraping)
library(pacta.data.validation)
library(pacta.scenario.data.preparation)
library(DBI)
library(dplyr)
Expand Down Expand Up @@ -290,6 +291,7 @@ logger::log_info("Scenario data prepared.")
# currency data output ---------------------------------------------------------

logger::log_info("Saving file: \"currencies.rds\".")
pacta.data.validation::validate_currencies(currencies)
saveRDS(currencies, currencies_data_path)


Expand All @@ -306,11 +308,19 @@ logger::log_info("Preparing financial data.")
# read raw FactSet financial data, filter to unique rows, merge AR company_id,
# merge PACTA sectors from AR data
logger::log_info("Formatting and saving file: \"financial_data.rds\".")
readRDS(factset_financial_data_path) %>%
pacta.data.preparation::prepare_financial_data(factset_issue_code_bridge) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "financial_data.rds"))

financial_data <-
readRDS(factset_financial_data_path) %>%
pacta.data.preparation::prepare_financial_data(factset_issue_code_bridge)

saveRDS(
object = financial_data,
file = file.path(config[["data_prep_outputs_path"]], "financial_data.rds")
)
rm(financial_data)
invisible(gc())


logger::log_info("Formatting and saving file: \"entity_financing.rds\".")
readRDS(factset_entity_financing_data_path) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "entity_financing.rds"))
Expand Down Expand Up @@ -352,27 +362,43 @@ invisible(gc())
logger::log_info(
"Formatting and saving file: \"masterdata_ownership_datastore.rds\"."
)
readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE) %>%

masterdata_ownership_datastore <-
readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE) %>%
pacta.data.preparation::prepare_masterdata(
ar_company_id__country_of_domicile,
config[["pacta_financial_timestamp"]],
config[["zero_emission_factor_techs"]]
) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "masterdata_ownership_datastore.rds"))
)

pacta.data.validation::validate_masterdata_ownership_datastore(masterdata_ownership_datastore)

saveRDS(
object = masterdata_ownership_datastore,
file = file.path(config[["data_prep_outputs_path"]], "masterdata_ownership_datastore.rds")
)
rm(masterdata_ownership_datastore)
invisible(gc())


logger::log_info("Formatting and saving file: \"masterdata_debt_datastore.rds\".")

pacta.data.preparation::prepare_masterdata_debt(
masterdata_debt_raw = readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE),
ar_company_id__country_of_domicile = ar_company_id__country_of_domicile,
ar_company_id__credit_parent_ar_company_id = ar_company_id__credit_parent_ar_company_id,
pacta_financial_timestamp = config[["pacta_financial_timestamp"]],
zero_emission_factor_techs = config[["zero_emission_factor_techs"]]
) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "masterdata_debt_datastore.rds"))
masterdata_debt_datastore <-
pacta.data.preparation::prepare_masterdata_debt(
masterdata_debt_raw = readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE),
ar_company_id__country_of_domicile = ar_company_id__country_of_domicile,
ar_company_id__credit_parent_ar_company_id = ar_company_id__credit_parent_ar_company_id,
pacta_financial_timestamp = config[["pacta_financial_timestamp"]],
zero_emission_factor_techs = config[["zero_emission_factor_techs"]]
)

pacta.data.validation::validate_masterdata_debt_datastore(masterdata_debt_datastore)

saveRDS(
object = masterdata_debt_datastore,
file = file.path(config[["data_prep_outputs_path"]], "masterdata_debt_datastore.rds")
)
rm(masterdata_debt_datastore)
invisible(gc())

rm(ar_company_id__country_of_domicile)
Expand Down Expand Up @@ -409,14 +435,20 @@ ar_company_id__sectors_with_assets__ownership <-
readRDS(file.path(config[["data_prep_outputs_path"]], "masterdata_ownership_datastore.rds")) %>%
pacta.data.preparation::prepare_ar_company_id__sectors_with_assets__ownership(relevant_years)

pacta.data.preparation::prepare_abcd_flags_equity(
financial_data,
factset_entity_id__ar_company_id,
factset_entity_id__security_mapped_sector,
ar_company_id__sectors_with_assets__ownership
) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "abcd_flags_equity.rds"))
abcd_flags_equity <-
pacta.data.preparation::prepare_abcd_flags_equity(
financial_data,
factset_entity_id__ar_company_id,
factset_entity_id__security_mapped_sector,
ar_company_id__sectors_with_assets__ownership
)

saveRDS(
object = abcd_flags_equity,
file = file.path(config[["data_prep_outputs_path"]], "abcd_flags_equity.rds")
)

rm(abcd_flags_equity)
rm(ar_company_id__sectors_with_assets__ownership)
invisible(gc())

Expand All @@ -427,15 +459,22 @@ ar_company_id__sectors_with_assets__debt <-
readRDS(file.path(config[["data_prep_outputs_path"]], "masterdata_debt_datastore.rds")) %>%
pacta.data.preparation::prepare_ar_company_id__sectors_with_assets__debt(relevant_years)

pacta.data.preparation::prepare_abcd_flags_bonds(
financial_data,
factset_entity_id__ar_company_id,
factset_entity_id__security_mapped_sector,
ar_company_id__sectors_with_assets__debt,
factset_entity_id__credit_parent_id
) %>%
saveRDS(file.path(config[["data_prep_outputs_path"]], "abcd_flags_bonds.rds"))
abcd_flags_bonds <-
pacta.data.preparation::prepare_abcd_flags_bonds(
financial_data,
factset_entity_id__ar_company_id,
factset_entity_id__security_mapped_sector,
ar_company_id__sectors_with_assets__debt,
factset_entity_id__credit_parent_id
)

pacta.data.validation::validate_abcd_flags_bonds(abcd_flags_bonds)

saveRDS(
object = abcd_flags_bonds,
file = file.path(config[["data_prep_outputs_path"]], "abcd_flags_bonds.rds")
)
rm(abcd_flags_bonds)
rm(ar_company_id__sectors_with_assets__debt)
invisible(gc())

Expand Down

0 comments on commit a837c76

Please sign in to comment.