Skip to content

Commit

Permalink
Merge branch 'main' into build/143-2022q4-config
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexAxthelm authored Feb 23, 2024
2 parents d6ddce8 + a4b662b commit 1f1da7f
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 38 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Imports:
dbplyr,
dplyr,
logger,
pacta.data.preparation (>= 0.1.0.9002),
pacta.data.preparation (>= 0.1.0.9003),
pacta.data.scraping,
pacta.scenario.preparation,
readr,
Expand Down
91 changes: 54 additions & 37 deletions run_pacta_data_preparation.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,31 +155,50 @@ factset_timestamp <-

# check that everything is ready to go -----------------------------------------

stopifnot(file.exists(masterdata_ownership_path))
stopifnot(file.exists(masterdata_debt_path))
stopifnot(file.exists(ar_company_id__factset_entity_id_path))
stopifnot(file.exists(factset_financial_data_path))
stopifnot(file.exists(factset_entity_info_path))
stopifnot(file.exists(factset_entity_financing_data_path))
stopifnot(file.exists(factset_fund_data_path))
stopifnot(file.exists(factset_isin_to_fund_table_path))
stopifnot(file.exists(factset_iss_emissions_data_path))
stopifnot(file.exists(factset_issue_code_bridge_path))
stopifnot(file.exists(factset_industry_map_bridge_path))
stopifnot(file.exists(factset_manual_pacta_sector_override_path))
stopifnot(file.exists(data_prep_outputs_path))
input_filepaths <- c(
masterdata_ownership_path = masterdata_ownership_path,
masterdata_debt_path = masterdata_debt_path,
ar_company_id__factset_entity_id_path = ar_company_id__factset_entity_id_path,
factset_financial_data_path = factset_financial_data_path,
factset_entity_info_path = factset_entity_info_path,
factset_entity_financing_data_path = factset_entity_financing_data_path,
factset_fund_data_path = factset_fund_data_path,
factset_isin_to_fund_table_path = factset_isin_to_fund_table_path,
factset_iss_emissions_data_path = factset_iss_emissions_data_path,
factset_issue_code_bridge_path = factset_issue_code_bridge_path,
factset_industry_map_bridge_path = factset_industry_map_bridge_path,
factset_manual_pacta_sector_override_path = factset_manual_pacta_sector_override_path
)

if (!update_currencies) {
stopifnot(file.exists(currencies_data_path))
input_filepaths <- c(
input_filepaths,
currencies_preflight_data_path = currencies_preflight_data_path
)
}

missing_input_files <- input_filepaths[!file.exists(input_filepaths)]

if (length(missing_input_files) > 0L) {
logger::log_error(
"Input file cannot be found: {names(missing_input_files)} ({missing_input_files})."
)
stop(
"Input files are missing: ",
toString(missing_input_files)
)
}

# pre-flight -------------------------------------------------------------------

logger::log_info("Fetching pre-flight data.")

if (update_currencies) {
logger::log_info("Fetching currency data.")
input_filepaths <- c(
input_filepaths,
currencies_preflight_data_path = currencies_preflight_data_path
)
currencies <- pacta.data.scraping::get_currency_exchange_rates(
quarter = imf_quarter_timestamp
)
Expand All @@ -191,6 +210,10 @@ if (update_currencies) {
}

logger::log_info("Scraping index regions.")
input_filepaths <- c(
input_filepaths,
index_regions_preflight_data_path = index_regions_preflight_data_path
)
index_regions <- pacta.data.scraping::get_index_regions()
saveRDS(index_regions, index_regions_preflight_data_path)

Expand Down Expand Up @@ -856,23 +879,7 @@ parameters <-
list(
config_name = config_name,
config = unclass(config),
input_filepaths = list(
masterdata_ownership_path = masterdata_ownership_path,
masterdata_debt_path = masterdata_debt_path,
ar_company_id__factset_entity_id_path = ar_company_id__factset_entity_id_path,
factset_financial_data_path = factset_financial_data_path,
factset_entity_info_path = factset_entity_info_path,
factset_entity_financing_data_path = factset_entity_financing_data_path,
factset_fund_data_path = factset_fund_data_path,
factset_isin_to_fund_table_path = factset_isin_to_fund_table_path,
factset_iss_emissions_data_path = factset_iss_emissions_data_path,
factset_issue_code_bridge_path = factset_issue_code_bridge_path,
factset_industry_map_bridge_path = factset_industry_map_bridge_path,
factset_manual_pacta_sector_override_path = factset_manual_pacta_sector_override_path
),
preflight_filepaths = list(
currencies_data_path = currencies_data_path
),
input_filepaths = as.list(input_filepaths),
timestamps = list(
imf_quarter_timestamp = imf_quarter_timestamp,
factset_data_identifier = factset_timestamp,
Expand Down Expand Up @@ -901,14 +908,24 @@ parameters <-
package_news = package_news
)

logger::log_trace("Getting list of output files.")
output_files <- normalizePath(
list.files(
data_prep_outputs_path,
full.names = TRUE,
recursive = TRUE
)
)

manifest_path <- file.path(data_prep_outputs_path, "manifest.json")
logger::log_trace("Writing manifest file: \"{manifest_path}\".")
pacta.data.preparation::write_manifest(
path = file.path(data_prep_outputs_path, "manifest.json"),
path = manifest_path,
parameters = parameters,
asset_impact_data_path = asset_impact_data_path,
factset_data_path = factset_data_path,
data_prep_outputs_path = data_prep_outputs_path
input_files = input_filepaths,
output_files = output_files
)

output_files <- c(output_files, manifest_path = manifest_path)

# copy in NEWs.md files from relevant PACTA packages ---------------------------

Expand Down Expand Up @@ -945,7 +962,7 @@ if (export_archives) {
logger::log_trace("Zip file path: \"{outputs_zip_file_path}\".")
zip(
zipfile = outputs_zip_file_path,
files = list.files(data_prep_outputs_path, full.names = TRUE, recursive = TRUE),
files = output_files,
extras = c(
"--junk-paths", # do not preserve paths
"--no-dir-entries", # do not include directory entries
Expand Down

0 comments on commit 1f1da7f

Please sign in to comment.