From 821e26e9d8d1e81a86d76ff0c2b66ca304d08fab Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Fri, 4 Aug 2023 17:21:25 +0200 Subject: [PATCH 1/6] Give a better error message in case aggregate_by is not in the tags --- openquake/calculators/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openquake/calculators/base.py b/openquake/calculators/base.py index f278078957cc..07b280160631 100644 --- a/openquake/calculators/base.py +++ b/openquake/calculators/base.py @@ -845,7 +845,14 @@ def _read_risk_data(self): if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) - + # NOTE: it would be good to specify the exposure file name in the + # error message, but the exposure might be reused from the hazard + if oq.aggregate_by: + for taglist in oq.aggregate_by: + for tag in taglist: + if tag not in self.assetcol.tagcol.tagnames: + raise AttributeError( + f"Missing tag '{tag}' in expsure model") if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: From 2a3b293a25428d53c8d9ec78ba54dbd9588c8d10 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Wed, 9 Aug 2023 14:19:08 +0200 Subject: [PATCH 2/6] Add a test with aggregate_by = occupancy but occupancy is missing in the exposure --- .../tests/event_based_risk_test.py | 6 ++ .../calculators/tests/event_risk_test.py | 2 +- .../case_1/job_missing_occupancy.ini | 56 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 openquake/qa_tests_data/event_based_risk/case_1/job_missing_occupancy.ini diff --git a/openquake/calculators/tests/event_based_risk_test.py b/openquake/calculators/tests/event_based_risk_test.py index dc596d22ed1e..33f22e354992 100644 --- a/openquake/calculators/tests/event_based_risk_test.py +++ b/openquake/calculators/tests/event_based_risk_test.py @@ -83,6 +83,12 @@ def test_case_1(self): self.assertEqualFiles('expected/%s' % strip_calc_id(fname), fname, delta=1E-5) + def test_case_1_missing_occupancy(self): + with self.assertRaises(AttributeError) as ctx: + self.run_calc(case_1.__file__, 'job_missing_occupancy.ini') + self.assertIn("Missing tag 'occupancy' in expsure model", + str(ctx.exception)) + def test_case_1_ins(self): # no aggregation self.run_calc(case_1.__file__, 'job2.ini') diff --git a/openquake/calculators/tests/event_risk_test.py b/openquake/calculators/tests/event_risk_test.py index 6801862be97d..a0c81b9e114e 100644 --- a/openquake/calculators/tests/event_risk_test.py +++ b/openquake/calculators/tests/event_risk_test.py @@ -107,7 +107,7 @@ def test_case_5(self): self.run_calc(case_5.__file__, 'job.ini') rbe = self.calc.datastore.read_df('risk_by_event') self.assertEqual(len(rbe), 0) - + def test_case_master(self): self.run_calc(case_master.__file__, 'job.ini') calc0 = self.calc.datastore # single file event_based_risk diff --git a/openquake/qa_tests_data/event_based_risk/case_1/job_missing_occupancy.ini b/openquake/qa_tests_data/event_based_risk/case_1/job_missing_occupancy.ini new file mode 100644 index 000000000000..f78bbd05fed5 --- /dev/null +++ b/openquake/qa_tests_data/event_based_risk/case_1/job_missing_occupancy.ini @@ -0,0 +1,56 @@ +[general] +random_seed = 23 +master_seed = 42 +description = Event Based Risk QA Test 1 +calculation_mode = event_based_risk +structural_vulnerability_file = vulnerability_model_stco.xml +nonstructural_vulnerability_file = vulnerability_model_nonstco.xml +exposure_file = exposure1.zip +discard_assets = true +aggregate_by = occupancy + +region = 81.1 26, 88 26, 88 30, 81.1 30 + +asset_hazard_distance = 20 + +conditional_loss_poes = 0.1 + +quantiles = 0.25 + +avg_losses = true + +sites = 81.2985 29.1098, 83.082298 27.9006, 85.747703 27.9015 + +[logic_tree] + +number_of_logic_tree_samples = 0 + +[erf] + +# km +rupture_mesh_spacing = 5 +width_of_mfd_bin = 0.3 +# km +area_source_discretization = 10 + +[site_params] + +reference_vs30_type = measured +reference_vs30_value = 760.0 +reference_depth_to_2pt5km_per_sec = 5.0 +reference_depth_to_1pt0km_per_sec = 100.0 + +[calculation] +source_model_logic_tree_file = source_model_logic_tree.xml +gsim_logic_tree_file = gmpe_logic_tree.xml +# years +investigation_time = 50.0 +ses_per_logic_tree_path = 20 +truncation_level = 3 +# km +maximum_distance = 100.0 +return_periods = [30, 60, 120, 240, 480, 960] +individual_rlzs = true + +[output] +export_dir = /tmp From acbaa8aa44af5256b1022c5065cb755e64269703 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Wed, 9 Aug 2023 16:24:19 +0200 Subject: [PATCH 3/6] Specify the exposure file where the aggregate_by tag is missing --- openquake/calculators/base.py | 20 ++++++++----------- .../tests/event_based_risk_test.py | 6 ++++-- openquake/commonlib/readinput.py | 3 ++- openquake/risklib/asset.py | 12 +++++++---- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/openquake/calculators/base.py b/openquake/calculators/base.py index f663adfc9a8e..06e3fad3fd9d 100644 --- a/openquake/calculators/base.py +++ b/openquake/calculators/base.py @@ -572,7 +572,8 @@ def pre_execute(self): haz_sitecol = readinput.get_site_collection(oq, self.datastore) self.load_crmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site - self.datastore.create_df('_poes', readinput.Global.pmap.to_dframe()) + self.datastore.create_df('_poes', + readinput.Global.pmap.to_dframe()) self.datastore['assetcol'] = self.assetcol self.datastore['full_lt'] = fake = logictree.FullLogicTree.fake() self.datastore['trt_rlzs'] = U32([[0]]) @@ -678,7 +679,9 @@ def read_exposure(self, haz_sitecol): # after load_risk_model .sitecol, .assetcol """ oq = self.oqparam - self.sitecol, self.assetcol, discarded = readinput.get_sitecol_assetcol( + (self.sitecol, + self.assetcol, + discarded) = readinput.get_sitecol_assetcol( oq, haz_sitecol, self.crmodel.loss_types, self.datastore) # this is overriding the sitecol in test_case_miriam self.datastore['sitecol'] = self.sitecol @@ -845,14 +848,6 @@ def _read_risk_data(self): if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) - # NOTE: it would be good to specify the exposure file name in the - # error message, but the exposure might be reused from the hazard - if oq.aggregate_by: - for taglist in oq.aggregate_by: - for tag in taglist: - if tag not in self.assetcol.tagcol.tagnames: - raise AttributeError( - f"Missing tag '{tag}' in expsure model") if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: @@ -999,7 +994,7 @@ def store_source_info(self, source_data): recs, source_reader.source_info_dt) def post_process(self): - """ + """ Run postprocessing function, if any """ oq = self.oqparam @@ -1118,7 +1113,8 @@ def import_gmfs_csv(dstore, oqparam, sitecol): if names[0] == 'rlzi': # backward compatibility names = names[1:] # discard the field rlzi names = [n for n in names if n != 'custom_site_id'] - imts = [name.lstrip('gmv_') for name in names if name not in ('sid', 'eid')] + imts = [name.lstrip('gmv_') + for name in names if name not in ('sid', 'eid')] oqparam.hazard_imtls = {imt: [0] for imt in imts} missing = set(oqparam.imtls) - set(imts) if missing: diff --git a/openquake/calculators/tests/event_based_risk_test.py b/openquake/calculators/tests/event_based_risk_test.py index 33f22e354992..8922c3159f63 100644 --- a/openquake/calculators/tests/event_based_risk_test.py +++ b/openquake/calculators/tests/event_based_risk_test.py @@ -22,6 +22,7 @@ from openquake.baselib.general import gettemp from openquake.baselib.hdf5 import read_csv +from openquake.hazardlib import InvalidFile from openquake.hazardlib.source.rupture import get_ruptures from openquake.commonlib import logs, readinput from openquake.calculators.views import view, text_table @@ -84,9 +85,10 @@ def test_case_1(self): delta=1E-5) def test_case_1_missing_occupancy(self): - with self.assertRaises(AttributeError) as ctx: + with self.assertRaises(InvalidFile) as ctx: self.run_calc(case_1.__file__, 'job_missing_occupancy.ini') - self.assertIn("Missing tag 'occupancy' in expsure model", + self.assertIn('Missing tag "occupancy" in', str(ctx.exception)) + self.assertIn('qa_tests_data/event_based_risk/case_1/exposure.csv', str(ctx.exception)) def test_case_1_ins(self): diff --git a/openquake/commonlib/readinput.py b/openquake/commonlib/readinput.py index 537f01f49120..3717fc9983e2 100644 --- a/openquake/commonlib/readinput.py +++ b/openquake/commonlib/readinput.py @@ -939,7 +939,8 @@ def get_exposure(oqparam, h5=None): oqparam.ignore_missing_costs, by_country='country' in asset.tagset(oqparam.aggregate_by), errors='ignore' if oqparam.ignore_encoding_errors else None, - infr_conn_analysis=oqparam.infrastructure_connectivity_analysis) + infr_conn_analysis=oqparam.infrastructure_connectivity_analysis, + aggregate_by=oqparam.aggregate_by) return exposure diff --git a/openquake/risklib/asset.py b/openquake/risklib/asset.py index 40e79c48671b..1fc9d4df6610 100644 --- a/openquake/risklib/asset.py +++ b/openquake/risklib/asset.py @@ -783,7 +783,7 @@ def check_exposure_for_infr_conn_analysis(df, fname): def read_exp_df(fname, calculation_mode='', ignore_missing_costs=(), check_dupl=True, by_country=False, asset_prefix='', tagcol=None, errors=None, infr_conn_analysis=False, - monitor=None): + aggregate_by=None, monitor=None): logging.info('Reading %s', fname) exposure, assetnodes = _get_exposure(fname) if tagcol: @@ -817,7 +817,11 @@ def read_exp_df(fname, calculation_mode='', ignore_missing_costs=(), 'value-number': df['value-number']}) if infr_conn_analysis: check_exposure_for_infr_conn_analysis(df, fname) - + if aggregate_by: + for taglist in aggregate_by: + for tag in taglist: + if f'value-{tag}' not in df.columns: + raise InvalidFile(f'Missing tag "{tag}" in {fname}') df['id'] = asset_prefix + df.id dfs.append(df) @@ -893,7 +897,7 @@ def check(fname): @staticmethod def read_all(fnames, calculation_mode='', ignore_missing_costs=(), check_dupl=True, tagcol=None, by_country=False, errors=None, - infr_conn_analysis=False): + infr_conn_analysis=False, aggregate_by=None): """ :returns: an :class:`Exposure` instance keeping all the assets in memory @@ -917,7 +921,7 @@ def read_all(fnames, calculation_mode='', ignore_missing_costs=(), prefix = '' allargs.append((fname, calculation_mode, ignore_missing_costs, check_dupl, by_country, prefix, tagcol, errors, - infr_conn_analysis)) + infr_conn_analysis, aggregate_by)) exp = None dfs = [] for exposure, df in itertools.starmap(read_exp_df, allargs): From 394af9803836b33ad5e391385d28f1da42f1be65 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Wed, 9 Aug 2023 16:53:39 +0200 Subject: [PATCH 4/6] Check presence of tag in the exposure both as it is and as 'value-tag' --- openquake/risklib/asset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openquake/risklib/asset.py b/openquake/risklib/asset.py index 1fc9d4df6610..c4f0de498fa8 100644 --- a/openquake/risklib/asset.py +++ b/openquake/risklib/asset.py @@ -820,7 +820,8 @@ def read_exp_df(fname, calculation_mode='', ignore_missing_costs=(), if aggregate_by: for taglist in aggregate_by: for tag in taglist: - if f'value-{tag}' not in df.columns: + if (tag not in df.columns + and f'value-{tag}' not in df.columns): raise InvalidFile(f'Missing tag "{tag}" in {fname}') df['id'] = asset_prefix + df.id dfs.append(df) From ccbced26f6313b80d6cb179cffdb63f4deb71d72 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Thu, 10 Aug 2023 10:21:28 +0200 Subject: [PATCH 5/6] Do not raise InvalidFile if aggregate_by = site_id, because that field is added after importing all the exposure files --- openquake/risklib/asset.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openquake/risklib/asset.py b/openquake/risklib/asset.py index c4f0de498fa8..cf1dadc649b7 100644 --- a/openquake/risklib/asset.py +++ b/openquake/risklib/asset.py @@ -820,6 +820,9 @@ def read_exp_df(fname, calculation_mode='', ignore_missing_costs=(), if aggregate_by: for taglist in aggregate_by: for tag in taglist: + if tag == 'site_id': + # 'site_id' is added later in _get_mesh_assets + continue if (tag not in df.columns and f'value-{tag}' not in df.columns): raise InvalidFile(f'Missing tag "{tag}" in {fname}') From e4144c1f83603119d5534a2e58e83f69146303ca Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 21 Aug 2023 09:46:56 +0200 Subject: [PATCH 6/6] Update changelog [ci skip] --- debian/changelog | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debian/changelog b/debian/changelog index c834786122fc..c6530f33655d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ [Paolo Tormene] * Raise an error if files specified in site_model_file do not have the same headers + * Give a better error message for risk calculations where the aggregate_by + tag is specified but it doesn't exist in the exposure model [Michele Simionato] * Reduced memory consumption in the master node in post_risk