diff --git a/DESCRIPTION b/DESCRIPTION index 4ba0c28..ec88448 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ Imports: dbplyr, dplyr, logger, - pacta.data.preparation (>= 0.1.0.9002), + pacta.data.preparation (>= 0.1.0.9003), pacta.data.scraping, pacta.scenario.preparation, readr, diff --git a/run_pacta_data_preparation.R b/run_pacta_data_preparation.R index e6dcca4..6ea8e07 100644 --- a/run_pacta_data_preparation.R +++ b/run_pacta_data_preparation.R @@ -155,24 +155,39 @@ factset_timestamp <- # check that everything is ready to go ----------------------------------------- -stopifnot(file.exists(masterdata_ownership_path)) -stopifnot(file.exists(masterdata_debt_path)) -stopifnot(file.exists(ar_company_id__factset_entity_id_path)) -stopifnot(file.exists(factset_financial_data_path)) -stopifnot(file.exists(factset_entity_info_path)) -stopifnot(file.exists(factset_entity_financing_data_path)) -stopifnot(file.exists(factset_fund_data_path)) -stopifnot(file.exists(factset_isin_to_fund_table_path)) -stopifnot(file.exists(factset_iss_emissions_data_path)) -stopifnot(file.exists(factset_issue_code_bridge_path)) -stopifnot(file.exists(factset_industry_map_bridge_path)) -stopifnot(file.exists(factset_manual_pacta_sector_override_path)) -stopifnot(file.exists(data_prep_outputs_path)) +input_filepaths <- c( + masterdata_ownership_path = masterdata_ownership_path, + masterdata_debt_path = masterdata_debt_path, + ar_company_id__factset_entity_id_path = ar_company_id__factset_entity_id_path, + factset_financial_data_path = factset_financial_data_path, + factset_entity_info_path = factset_entity_info_path, + factset_entity_financing_data_path = factset_entity_financing_data_path, + factset_fund_data_path = factset_fund_data_path, + factset_isin_to_fund_table_path = factset_isin_to_fund_table_path, + factset_iss_emissions_data_path = factset_iss_emissions_data_path, + factset_issue_code_bridge_path = factset_issue_code_bridge_path, + factset_industry_map_bridge_path = factset_industry_map_bridge_path, + factset_manual_pacta_sector_override_path = factset_manual_pacta_sector_override_path +) if (!update_currencies) { - stopifnot(file.exists(currencies_data_path)) + input_filepaths <- c( + input_filepaths, + currencies_preflight_data_path = currencies_preflight_data_path + ) } +missing_input_files <- input_filepaths[!file.exists(input_filepaths)] + +if (length(missing_input_files) > 0L) { + logger::log_error( + "Input file cannot be found: {names(missing_input_files)} ({missing_input_files})." + ) + stop( + "Input files are missing: ", + toString(missing_input_files) + ) +} # pre-flight ------------------------------------------------------------------- @@ -180,6 +195,10 @@ logger::log_info("Fetching pre-flight data.") if (update_currencies) { logger::log_info("Fetching currency data.") + input_filepaths <- c( + input_filepaths, + currencies_preflight_data_path = currencies_preflight_data_path + ) currencies <- pacta.data.scraping::get_currency_exchange_rates( quarter = imf_quarter_timestamp ) @@ -191,6 +210,10 @@ if (update_currencies) { } logger::log_info("Scraping index regions.") +input_filepaths <- c( + input_filepaths, + index_regions_preflight_data_path = index_regions_preflight_data_path +) index_regions <- pacta.data.scraping::get_index_regions() saveRDS(index_regions, index_regions_preflight_data_path) @@ -856,23 +879,7 @@ parameters <- list( config_name = config_name, config = unclass(config), - input_filepaths = list( - masterdata_ownership_path = masterdata_ownership_path, - masterdata_debt_path = masterdata_debt_path, - ar_company_id__factset_entity_id_path = ar_company_id__factset_entity_id_path, - factset_financial_data_path = factset_financial_data_path, - factset_entity_info_path = factset_entity_info_path, - factset_entity_financing_data_path = factset_entity_financing_data_path, - factset_fund_data_path = factset_fund_data_path, - factset_isin_to_fund_table_path = factset_isin_to_fund_table_path, - factset_iss_emissions_data_path = factset_iss_emissions_data_path, - factset_issue_code_bridge_path = factset_issue_code_bridge_path, - factset_industry_map_bridge_path = factset_industry_map_bridge_path, - factset_manual_pacta_sector_override_path = factset_manual_pacta_sector_override_path - ), - preflight_filepaths = list( - currencies_data_path = currencies_data_path - ), + input_filepaths = as.list(input_filepaths), timestamps = list( imf_quarter_timestamp = imf_quarter_timestamp, factset_data_identifier = factset_timestamp, @@ -901,14 +908,24 @@ parameters <- package_news = package_news ) +logger::log_trace("Getting list of output files.") +output_files <- normalizePath( + list.files( + data_prep_outputs_path, + full.names = TRUE, + recursive = TRUE + ) +) + +manifest_path <- file.path(data_prep_outputs_path, "manifest.json") +logger::log_trace("Writing manifest file: \"{manifest_path}\".") pacta.data.preparation::write_manifest( - path = file.path(data_prep_outputs_path, "manifest.json"), + path = manifest_path, parameters = parameters, - asset_impact_data_path = asset_impact_data_path, - factset_data_path = factset_data_path, - data_prep_outputs_path = data_prep_outputs_path + input_files = input_filepaths, + output_files = output_files ) - +output_files <- c(output_files, manifest_path = manifest_path) # copy in NEWs.md files from relevant PACTA packages --------------------------- @@ -945,7 +962,7 @@ if (export_archives) { logger::log_trace("Zip file path: \"{outputs_zip_file_path}\".") zip( zipfile = outputs_zip_file_path, - files = list.files(data_prep_outputs_path, full.names = TRUE, recursive = TRUE), + files = output_files, extras = c( "--junk-paths", # do not preserve paths "--no-dir-entries", # do not include directory entries