diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index a28acda1b0..af0611b074 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -163,7 +163,7 @@ jobs: ## The token provided needs contents and pages access to the target repo ## Token can be (re)generated by a member of the UCL organisation, ## the current member is the rc-softdev-admin. - ## [17-07-2024] New token generated, will expire 10-07-2025 + ## [10-07-2023] The current token will expire 10-07-2024 - name: Push results to profiling repository uses: dmnemec/copy_file_to_another_repo_action@v1.1.1 env: diff --git a/.github/workflows/tests-unpinned.yml b/.github/workflows/tests-unpinned.yml deleted file mode 100644 index d776644eb9..0000000000 --- a/.github/workflows/tests-unpinned.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Tests with unpinned dependencies - -on: - schedule: - - cron: 0 0 15 * * - -jobs: - test: - name: Run tests - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.x] - fail-fast: false - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Cache tox - uses: actions/cache@v4 - with: - path: .tox - key: tox-${{hashFiles('pyproject.toml') }} - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install tox - run: python -m pip install tox - - name: Run tests - run: tox -v -e py3-latest -- pytest -n auto -vv tests --skip-slow diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 60b0f10cc4..283a53594a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,8 +10,6 @@ on: - requirements/** - resources/** - src/tlo/** - - src/scripts/profiling/scale_run.py - - src/scripts/profiling/shared.py - tests/** - pyproject.toml - tox.ini @@ -77,12 +75,4 @@ jobs: tox --version - name: Test with tox run: | - tox -v -e py311 -- pytest --show-capture=no -vv "${{ matrix.file }}" --junit-xml="${{ matrix.file }}.results.xml" - - name: Generate test report - if: always() - uses: pmeier/pytest-results-action@fc6576eced1f411ea48ab10e917d9cfce2960e29 - with: - path: ${{ matrix.file }}.results.xml - summary: true - display-options: fEX - title: Results for ${{ matrix.file }} + tox -v -e py311,report -- pytest --cov --cov-report=term-missing -vv "${{ matrix.file }}" diff --git a/.gitignore b/.gitignore index 9711f1da10..cb11b17ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -109,6 +109,9 @@ venv.bak/ # PyCharm .idea/ +# TLO .rst files +docs/reference/tlo*.rst + # TLO configuration tlo.conf @@ -121,11 +124,3 @@ profiling_results/ # ignore _version.py file generated by setuptools_scm src/**/_version.py - -# Generated TLO docs files -docs/_*.rst -docs/hsi_events.csv -docs/parameters.rst -docs/reference/modules.rst -docs/reference/tlo*.rst -docs/resources/**/*.rst diff --git a/CITATION.cff b/CITATION.cff index 07d4c8801c..3d5d0c7cc0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -113,11 +113,6 @@ authors: family-names: Janoušková orcid: https://orcid.org/0000-0002-4104-0119 affiliation: University College London - website: https://profiles.ucl.ac.uk/90260 -- given-names: Rachel - family-names: Murray-Watson - affiliation: Imperial College London - orcid: https://orcid.org/0000-0001-9079-5975 repository-code: https://github.com/UCL/TLOmodel url: https://tlomodel.org abstract: Our fundamental aim is to develop the use of epidemiological and economic diff --git a/README.md b/README.md index ce6afead33..eadcbd2c60 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
-Thanzi la Onse +Thanzi La Onze

Thanzi la Onse model

@@ -24,7 +24,7 @@ The __Thanzi la Onse model (TLOmodel)__ is a part of the [Thanzi la Onse][thanzi TLOmodel is developed in a collaboration between: - [Kamuzu University of Health Sciences][kuhes-link] -- [MRC Centre for Global Infectious Disease Analysis][mrc-gida-link], [Imperial College London][imperial-link] +- [MRC Centre for Global Infectioous Disease Analysis][mrc-gida-link], [Imperial College London][imperial-link] - [Institute for Global Health][igh-link], [University College London][ucl-link] - [Centre for Advanced Research Computing][arc-link], [University College London][ucl-link] - [Centre for Health Economics][che-link], [University of York][york-link] diff --git a/contributors.yaml b/contributors.yaml index 601baf176a..1ea698d181 100644 --- a/contributors.yaml +++ b/contributors.yaml @@ -4,7 +4,7 @@ affiliation: "Imperial College London" website: "https://www.imperial.ac.uk/people/timothy.hallett" github-username: tbhallett - role: Project Lead + role: Joint lead epidemiology contributions: - Epidemiology and modelling - Software development @@ -14,7 +14,7 @@ affiliation: "University College London" website: "https://profiles.ucl.ac.uk/5430" github-username: andrew-phillips-1 - role: Lead Epidemiology + role: Joint lead epidemiology contributions: - Epidemiology and modelling - Software development @@ -102,6 +102,7 @@ website: "https://www.york.ac.uk/che/staff/research/sakshi-mohan/" github-username: sakshimohan contributions: + - Epidemiology and modelling - Health economics - Software development - given-names: Wingston @@ -195,7 +196,6 @@ family-names: Janoušková orcid: "https://orcid.org/0000-0002-4104-0119" affiliation: "University College London" - website: "https://profiles.ucl.ac.uk/90260" github-username: EvaJanouskova contributions: - Epidemiology and modelling @@ -206,14 +206,15 @@ affiliation: University College London website: "https://profiles.ucl.ac.uk/954" contributions: - - Clinical process modelling + - Clinical consultant - given-names: Paul family-names: Revill orcid: "https://orcid.org/0000-0001-8632-0600" affiliation: University of York website: "https://www.york.ac.uk/che/staff/research/paul-revill/" github-username: paulrevill - role: "Lead Health-Economics" + contributions: + - Health economics - given-names: Wiktoria family-names: Tafesse orcid: "https://orcid.org/0000-0002-0076-8285" @@ -236,7 +237,7 @@ website: "https://www.york.ac.uk/che/staff/students/newton-chagoma/" github-username: nchagoma503 contributions: - - Health economics + - Health economics - given-names: Martin family-names: Chalkley orcid: "https://orcid.org/0000-0002-1091-8259" @@ -272,31 +273,3 @@ family-names: Uwais website: "https://uk.linkedin.com/in/leila-uwais-597705142" github-username: Leila-Uwais -- given-names: Dominic - family-names: Nkhoma - affiliation: "Kamuzu University of Health Sciences" - orcid: "https://orcid.org/0000-0001-6125-6630" - contributions: - - Policy translation - website: "https://mw.linkedin.com/in/dominicnkhoma1978" -- given-names: Gerald - family-names: Manthalu - affiliation: "Department of Planning and Policy Development, Ministry of Health and Population, Lilongwe, Malawi" - orcid: "https://orcid.org/0000-0002-3501-8601" - contributions: - - Policy translation -- given-names: Rachel - family-names: Murray-Watson - affiliation: "Imperial College London" - orcid: https://orcid.org/0000-0001-9079-5975 - github-username: RachelMurray-Watson - contributions: - - Epidemiology and modelling - - Software development -- given-names: Victor - family-names: Mwapasa - orcid: "https://orcid.org/0000-0002-2748-8902" - affiliation: "Kamuzu University of Health Sciences" - website: "https://www.kuhes.ac.mw/prof-victor-mwapasa/" - contributions: - - Clinical process modelling diff --git a/docs/conf.py b/docs/conf.py index 52eb0ff76b..2b1c453203 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -106,9 +106,6 @@ 'exclude-members': '__dict__, name, rng, sim' # , read_parameters', } -# Include both class level and __init__ docstring content in class documentation -autoclass_content = 'both' - # The checker can't see private repos linkcheck_ignore = ['^https://github.com/UCL/TLOmodel.*', 'https://www.who.int/bulletin/volumes/88/8/09-068213/en/nn'] diff --git a/docs/index.rst b/docs/index.rst index 47d4857290..9de9d148d1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -52,7 +52,6 @@ Contents azure_batch reference/index resources/index - parameters learning publications contributors diff --git a/docs/publications.rst b/docs/publications.rst index e4b6e473bc..77ae7ef93a 100644 --- a/docs/publications.rst +++ b/docs/publications.rst @@ -14,9 +14,6 @@ Overview of the Model Analyses Using The Model ======================== -* `Health workforce needs in Malawi: analysis of the Thanzi La Onse integrated epidemiological model of care `_ - -* `A new approach to Health Benefits Package design: an application of the Thanzi La Onse model in Malawi `_ * `The Changes in Health Service Utilisation in Malawi During the COVID-19 Pandemic `_ @@ -24,7 +21,7 @@ Analyses Using The Model * `Factors Associated with Consumable Stock-Outs in Malawi: Evidence from a Facility Census `_ -* `The Effects of Health System Frailties on the Projected Impact of the HIV and TB Programmes in Malawi `_ +* `The Effects of Health System Frailties on the Projected Impact of the HIV and TB Programmes in Malawi `_ * `Estimating the health burden of road traffic injuries in Malawi using an individual-based model `_ @@ -32,8 +29,6 @@ Analyses Using The Model * `The potential impact of including pre-school aged children in the praziquantel mass-drug administration programmes on the S.haematobium infections in Malawi: a modelling study `_ -* `A Decade of Progress in HIV, Malaria, and Tuberculosis Initiatives in Malawi. `_ - Healthcare Seeking Behaviour ============================ diff --git a/docs/tlo_contributors.py b/docs/tlo_contributors.py index 0a26ebbbc3..680418efa5 100644 --- a/docs/tlo_contributors.py +++ b/docs/tlo_contributors.py @@ -98,12 +98,11 @@ def categorized_contributor_lists_html( with open(args.contributors_file_path, "r") as f: contributors = yaml.safe_load(f) contribution_categories = ( - "Clinical process modelling", "Epidemiology and modelling", "Health economics", - "Policy translation", - "Project management", "Software development", + "Clinical consultant", + "Project management", ) category_predicates = { "Scientific leads": lambda c: "lead" in c.get("role", "").lower(), diff --git a/docs/tlo_parameters.py b/docs/tlo_parameters.py deleted file mode 100644 index 6fb38d102b..0000000000 --- a/docs/tlo_parameters.py +++ /dev/null @@ -1,321 +0,0 @@ -"""Create listings of model parameters in tabular format""" - -import argparse -from collections import defaultdict -from collections.abc import Iterable -from functools import partial -from pathlib import Path -from typing import TypeAlias, get_args -import numpy -import pandas - -import tlo -from tlo import Date, Module, Simulation -from tlo.methods import fullmodel -from tlo.analysis.utils import get_parameters_for_status_quo - - -_TYPE_TO_DESCRIPTION = { - bool: "Boolean", - pandas.Categorical: "Categorical", - pandas.DataFrame: "Dataframe", - pandas.Timestamp: "Date", - defaultdict: "Dictionary", - dict: "Dictionary", - int: "Integer", - numpy.int64: "Integer", - list: "List", - float: "Real", - numpy.float64: "Real", - pandas.Series: "Series", - set: "Set", - str: "String", -} - - -ScalarParameterValue: TypeAlias = float | int | bool | str | numpy.generic | Date -StructuredParameterValue: TypeAlias = ( - dict | list | tuple | set | pandas.Series | pandas.DataFrame -) -ParameterValue: TypeAlias = ( - ScalarParameterValue | pandas.Categorical | StructuredParameterValue -) - -_SCALAR_TYPES = get_args(ScalarParameterValue) - - -ModuleParameterTablesDict: TypeAlias = dict[str, dict[str, pandas.DataFrame]] -ModuleStructuredParametersDict: TypeAlias = dict[ - str, dict[str, pandas.DataFrame | dict[str, pandas.DataFrame]] -] - - -def structured_value_to_dataframe( - value: StructuredParameterValue, -) -> pandas.DataFrame | dict[str, pandas.DataFrame]: - if isinstance(value, (list, tuple, set)): - return pandas.DataFrame.from_records([value], index=["Value"]) - elif isinstance(value, pandas.Series): - return pandas.DataFrame(value) - elif isinstance(value, pandas.DataFrame): - return value - elif isinstance(value, dict): - if all(isinstance(v, _SCALAR_TYPES) for v in value.values()): - return pandas.DataFrame(value, index=["Value"]) - else: - return {k: structured_value_to_dataframe(v) for k, v in value.items()} - else: - raise ValueError( - f"Unrecognized structured value type {type(value)} for value {value}" - ) - - -def get_parameter_tables( - modules: Iterable[Module], - overriden_parameters: dict[str, dict[str, ParameterValue]], - excluded_modules: set[str], - excluded_parameters: dict[str, set[str]], - escape_characters: callable, - format_internal_link: callable, - max_inline_parameter_length: int = 10, -) -> tuple[ModuleParameterTablesDict, ModuleStructuredParametersDict]: - module_parameter_tables = {} - module_structured_parameters = {} - for module in sorted(modules, key=lambda m: m.name): - if module.name in excluded_modules: - continue - parameter_records = [] - module_structured_parameters[module.name] = {} - module_excluded_parameters = excluded_parameters.get(module.name, set()) - for parameter_name, parameter in module.PARAMETERS.items(): - if parameter_name in module_excluded_parameters: - continue - if ( - module.name in overriden_parameters - and parameter_name in overriden_parameters[module.name] - ): - value = overriden_parameters[module.name][parameter_name] - else: - value = module.parameters.get(parameter_name) - if value is None: - continue - record = { - "Name": escape_characters(parameter_name), - "Description": escape_characters(parameter.description), - "Type": _TYPE_TO_DESCRIPTION[type(value)], - } - if ( - isinstance(value, _SCALAR_TYPES) - or isinstance(value, (list, set, tuple)) - and len(value) < max_inline_parameter_length - ): - record["Value"] = str(value) - elif isinstance(value, pandas.Categorical): - assert len(value) == 1 - record["Value"] = str(value[0]) - else: - record["Value"] = format_internal_link( - "...", parameter_id(module.name, parameter_name) - ) - module_structured_parameters[module.name][parameter_name] = ( - structured_value_to_dataframe(value) - ) - parameter_records.append(record) - module_parameter_tables[module.name] = pandas.DataFrame.from_records( - parameter_records, - ) - return module_parameter_tables, module_structured_parameters - - -def parameter_id(module_name, parameter_name): - return f"{module_name}-{parameter_name}" - - -def dataframe_as_table(dataframe, rows_threshold=None, tablefmt="pipe"): - summarize = rows_threshold is not None and len(dataframe) > rows_threshold - if summarize: - original_rows = len(dataframe) - dataframe = dataframe[1:rows_threshold] - table_string = dataframe.to_markdown(index=False, tablefmt=tablefmt) - if summarize: - table_string += ( - f"\n\n*Only first {rows_threshold} rows of {original_rows} are shown.*\n" - ) - return table_string - - -def md_anchor_tag(id: str) -> str: - return f"" - - -def md_list_item(text: str, bullet: str = "-", indent_level: int = 0) -> str: - return " " * indent_level + f"{bullet} {text}\n" - - -def md_hyperlink(link_text: str, url: str) -> str: - return f"[{link_text}]({url})" - - -def md_internal_link_with_backlink_anchor( - link_text: str, id: str, suffix: str = "backlink" -): - return md_anchor_tag(f"{id}-{suffix}") + md_hyperlink(link_text, f"#{id}") - - -def rst_internal_link(link_text: str, id: str): - return f":ref:`{link_text}<{id}>`" - - -def escape_rst_markup_characters(text: str): - return text.replace("_", "\_").replace("*", "\*") - - -def md_anchor_and_backlink(id: str, suffix: str = "backlink"): - return md_anchor_tag(id) + md_hyperlink("↩", f"#{id}-{suffix}") - - -def md_table_of_contents(module_names): - return "\n".join( - [ - md_list_item( - md_internal_link_with_backlink_anchor(module_name, module_name.lower()) - ) - for module_name in module_names - ] - ) - - -def rst_table_of_contents(_module_names): - return ".. contents::\n :local:\n :depth: 1\n :backlinks: entry\n\n" - - -def md_header(text: str, level: int) -> str: - return ("#" * level if level > 0 else "%") + " " + text + "\n\n" - - -def rst_header(title: str, level: int = 0) -> str: - separator_character = '*=-^"'[level] - line = separator_character * len(title) - return (line + "\n" if level == 0 else "") + title + "\n" + line + "\n\n" - - -def md_module_header(module_name): - return md_header(f"{module_name} " + md_anchor_and_backlink(module_name.lower()), 1) - - -def rst_module_header(module_name): - return rst_header(module_name, 1) - - -def md_structured_parameter_header(parameter_name, module_name): - return md_header( - f"{parameter_name} " - + md_anchor_and_backlink(parameter_id(module_name, parameter_name)), - 2, - ) - - -def rst_structured_parameter_header(parameter_name, module_name): - return f".. _{parameter_id(module_name, parameter_name)}:\n\n" + rst_header( - parameter_name, 2 - ) - - -_formatters = { - ".md": { - "header": md_header, - "table_of_contents": md_table_of_contents, - "module_header": md_module_header, - "structured_parameter_header": md_structured_parameter_header, - "dataframe_as_table": partial(dataframe_as_table, tablefmt="pipe"), - "internal_link": md_internal_link_with_backlink_anchor, - "character_escaper": lambda x: x, - }, - ".rst": { - "header": rst_header, - "table_of_contents": rst_table_of_contents, - "module_header": rst_module_header, - "structured_parameter_header": rst_structured_parameter_header, - "dataframe_as_table": partial(dataframe_as_table, tablefmt="grid"), - "internal_link": rst_internal_link, - "character_escaper": escape_rst_markup_characters, - }, -} - - -def write_parameters_file( - output_file_path: Path, - module_parameter_tables: ModuleParameterTablesDict, - module_structured_parameters: ModuleStructuredParametersDict, - summarization_rows_threshold: int = 10, -) -> None: - formatter = _formatters[output_file_path.suffix] - with output_file_path.open("w") as output_file: - output_file.write(formatter["header"]("Parameters", 0)) - output_file.write("Default parameter values used in simulations.\n\n") - output_file.write( - formatter["table_of_contents"](module_parameter_tables.keys()) - ) - output_file.write("\n") - for module_name, parameter_table in module_parameter_tables.items(): - output_file.write(formatter["module_header"](module_name)) - output_file.write(formatter["dataframe_as_table"](parameter_table)) - output_file.write("\n\n") - for ( - parameter_name, - structured_parameter, - ) in module_structured_parameters[module_name].items(): - output_file.write( - formatter["structured_parameter_header"]( - parameter_name, module_name - ) - ) - if isinstance(structured_parameter, dict): - for key, dataframe in structured_parameter.items(): - output_file.write(formatter["header"](key, 3)) - output_file.write( - formatter["dataframe_as_table"]( - dataframe, summarization_rows_threshold - ) - ) - output_file.write("\n\n") - else: - output_file.write( - formatter["dataframe_as_table"]( - structured_parameter, summarization_rows_threshold - ) - ) - output_file.write("\n") - output_file.write("\n") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "resource_file_path", - type=Path, - default=Path(tlo.__file__).parent.parent.parent / "resources", - help="Path to resource directory", - ) - parser.add_argument( - "output_file_path", type=Path, help="Path to file to write tables to" - ) - args = parser.parse_args() - simulation = Simulation( - start_date=Date(2010, 1, 1), seed=1234, log_config={"suppress_stdout": True} - ) - status_quo_parameters = get_parameters_for_status_quo() - simulation.register(*fullmodel.fullmodel(args.resource_file_path)) - internal_link_formatter = _formatters[args.output_file_path.suffix]["internal_link"] - character_escaper = _formatters[args.output_file_path.suffix]["character_escaper"] - module_parameter_tables, module_structured_parameters = get_parameter_tables( - simulation.modules.values(), - status_quo_parameters, - {"HealthBurden", "Wasting"}, - {"Demography": {"gbd_causes_of_death_data"}, "Tb": {"who_incidence_estimates"}}, - character_escaper, - internal_link_formatter, - ) - write_parameters_file( - args.output_file_path, module_parameter_tables, module_structured_parameters - ) diff --git a/pyproject.toml b/pyproject.toml index ce24c3a3cc..f18a736844 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,9 @@ classifiers = [ 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', ] @@ -24,7 +27,7 @@ dependencies = [ "pyshp", "squarify", "numpy", - "pandas~=2.0", + "pandas~=2.0.0", "scipy", # Avoid https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1963 "openpyxl==3.1.0", @@ -33,14 +36,12 @@ dependencies = [ "azure-identity", "azure-keyvault", "azure-storage-file-share", - # For saving and loading simulation state - "dill", ] description = "Thanzi la Onse Epidemiology Model" dynamic = ["version"] license = {file = "LICENSE.txt"} readme = "README.md" -requires-python = ">=3.11" +requires-python = ">=3.8" [project.optional-dependencies] dev = [ @@ -119,7 +120,7 @@ addopts = "-ra --strict-markers --doctest-modules --doctest-glob=*.rst --tb=shor markers = ["group2", "slow"] [tool.ruff] -target-version = "py311" +target-version = "py38" line-length = 120 [tool.setuptools.packages.find] diff --git a/requirements/base.txt b/requirements/base.txt index 458aa584ea..dc44b868c6 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -56,8 +56,6 @@ cryptography==41.0.3 # pyjwt cycler==0.11.0 # via matplotlib -dill==0.3.8 - # via tlo (pyproject.toml) et-xmlfile==1.1.0 # via openpyxl fonttools==4.42.1 @@ -114,7 +112,6 @@ pyjwt[crypto]==2.8.0 # via # adal # msal - # pyjwt pyparsing==3.1.1 # via matplotlib pyshp==2.3.1 diff --git a/requirements/dev.txt b/requirements/dev.txt index a6e0468a19..efd4f0e3e8 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile --extra=dev --output-file=requirements/dev.txt @@ -61,9 +61,7 @@ colorama==0.4.6 contourpy==1.1.1 # via matplotlib coverage[toml]==7.3.1 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.3 # via # adal @@ -74,14 +72,14 @@ cryptography==41.0.3 # pyjwt cycler==0.11.0 # via matplotlib -dill==0.3.8 - # via - # pylint - # tlo (pyproject.toml) +dill==0.3.7 + # via pylint distlib==0.3.7 # via virtualenv et-xmlfile==1.1.0 # via openpyxl +exceptiongroup==1.1.3 + # via pytest execnet==2.0.2 # via pytest-xdist filelock==3.12.4 @@ -96,6 +94,10 @@ gitpython==3.1.36 # via tlo (pyproject.toml) idna==3.4 # via requests +importlib-metadata==6.8.0 + # via build +importlib-resources==6.1.1 + # via matplotlib iniconfig==2.0.0 # via pytest isodate==0.6.1 @@ -170,7 +172,6 @@ pyjwt[crypto]==2.8.0 # via # adal # msal - # pyjwt pylint==3.0.1 # via tlo (pyproject.toml) pyparsing==3.1.1 @@ -220,17 +221,29 @@ smmap==5.0.1 # via gitdb squarify==0.4.3 # via tlo (pyproject.toml) +tomli==2.0.1 + # via + # build + # coverage + # pip-tools + # pylint + # pyproject-api + # pyproject-hooks + # pytest + # tox tomlkit==0.12.1 # via pylint tox==4.11.3 # via tlo (pyproject.toml) typing-extensions==4.8.0 # via + # astroid # azure-core # azure-keyvault-certificates # azure-keyvault-keys # azure-keyvault-secrets # azure-storage-file-share + # pylint tzdata==2023.3 # via pandas urllib3==2.0.4 @@ -241,6 +254,10 @@ virtualenv==20.24.5 # tox wheel==0.41.2 # via pip-tools +zipp==3.17.0 + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/resources/ResourceFile_Cervical_Cancer.xlsx b/resources/ResourceFile_Cervical_Cancer.xlsx new file mode 100644 index 0000000000..8c66a47124 --- /dev/null +++ b/resources/ResourceFile_Cervical_Cancer.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5673464abe172fd73956a44833ff8b409e89f7a4fa97d146f4f1b12a38715c8a +size 7312 diff --git a/resources/ResourceFile_HIV.xlsx b/resources/ResourceFile_HIV.xlsx index 00f7b684db..1cdb865eb1 100644 --- a/resources/ResourceFile_HIV.xlsx +++ b/resources/ResourceFile_HIV.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b34a88635b02ee8a465462c8eb67a485d721c9159a5bba1df8e63609b803ebe9 -size 161679 +oid sha256:58978c108515c3762addd18824129b2654f241d94bcc778ab17b27d0d8250593 +size 160402 diff --git a/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx b/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx index 7ec045407a..8fc0a24ae9 100644 --- a/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx +++ b/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49282122ff1c60e3bf73765013b6770a2fbd3f0df9a6e3f71e1d4c40e9cdfa2a -size 48238 +oid sha256:1b462c20ca6cbf0ca1f98936416e015fa248289e5bf4f66838e1b9920874f651 +size 48142 diff --git a/resources/ResourceFile_RTI.xlsx b/resources/ResourceFile_RTI.xlsx index 553d6febb0..68cdd18422 100644 --- a/resources/ResourceFile_RTI.xlsx +++ b/resources/ResourceFile_RTI.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d950c5d769848fb226db8c1a7d7796c8e43cc2590806f846b98a4bbef6840948 -size 13776 +oid sha256:2c11ada2e8b77675950b61fc8e0efd1c4fa35dffaecaf1029eafd61892a7cefb +size 13949 diff --git a/resources/ResourceFile_TB.xlsx b/resources/ResourceFile_TB.xlsx index 2b612ad6ec..e6c1bf80db 100644 --- a/resources/ResourceFile_TB.xlsx +++ b/resources/ResourceFile_TB.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93d7bf76c8bece548e08e3f0cb6e9e28a09ca2b5760a408399bf9641f7ed2001 -size 56523 +oid sha256:3cb13e128d4bcb3b694def108c3bd61b16508b48e389c3e5cdf8155717aab9e9 +size 55662 diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx index e7f34296e6..3d804bbc77 100644 --- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx +++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f6e1a0c8ec505dd613dfc9c0b1b14d16ee3161500bc08c743398754d2074203 -size 15682 +oid sha256:af86c2c2af5c291c18c5d481681d6d316526b81806c8c8e898517e850160e6fd +size 12465 diff --git a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES.xlsx b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES.xlsx index d9dbac2e99..3a26090f34 100644 --- a/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES.xlsx +++ b/resources/healthsystem/priority_policies/ResourceFile_PriorityRanking_ALLPOLICIES.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:734d46d83dccf15bf38ee171a487664f01035da6cf68660d4af62097a6160fb6 -size 42716 +oid sha256:83cfa3d9b6f858abe6f74e241952310ac0df43ce8e3fb6d280c2c3eb1355d367 +size 44022 diff --git a/resources/malaria/ResourceFile_malaria.xlsx b/resources/malaria/ResourceFile_malaria.xlsx index 7537f3ace9..70902b7480 100644 --- a/resources/malaria/ResourceFile_malaria.xlsx +++ b/resources/malaria/ResourceFile_malaria.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f256d5007b36e2428ae844747bd766bb6086540c5135408d606dd821e185d9f -size 69578 +oid sha256:6ba5849e265103ee799d1982325b6fed1ef4d3df559ffce9d6790395c201fcaf +size 67562 diff --git a/src/scripts/automation/mark_slow_tests.py b/src/scripts/automation/mark_slow_tests.py deleted file mode 100644 index daa507e2ab..0000000000 --- a/src/scripts/automation/mark_slow_tests.py +++ /dev/null @@ -1,298 +0,0 @@ -"""Script to automatically mark slow running tests with `pytest.mark.slow` decorator.""" - - -import argparse -import difflib -import json -import re -import warnings -from collections import defaultdict -from pathlib import Path -from typing import Dict, NamedTuple, Optional, Set, Tuple, Union - -import redbaron - -SLOW_MARK_DECORATOR = "pytest.mark.slow" - - -class TestFunction(NamedTuple): - module_path: Path - name: str - - -class TestMethod(NamedTuple): - module_path: Path - class_name: str - method_name: str - - -TestNode = Union[TestFunction, TestMethod] - - -def parse_nodeid_last_part(last_part: str) -> Tuple[str, Optional[str]]: - match = re.match(r"(.+)\[(.+)\]", last_part) - if match is not None: - return match[1], match[2] - else: - return last_part, None - - -def parse_nodeid(nodeid: str) -> TestNode: - parts = nodeid.split("::") - if len(parts) == 2: - module_path, last_part = parts - name, _ = parse_nodeid_last_part(last_part) - return TestFunction(Path(module_path), name) - elif len(parts) == 3: - module_path, class_name, last_part = parts - method_name, _ = parse_nodeid_last_part(last_part) - return TestMethod(Path(module_path), class_name, method_name) - else: - msg = f"Test nodeid has unexpected format: {nodeid}" - raise ValueError(msg) - - -def parse_test_report( - json_test_report_path: Path, - remove_slow_threshold: float, - add_slow_threshold: float, -) -> Dict[Path, Dict[str, Set[TestNode]]]: - with open(json_test_report_path, "r") as f: - test_report = json.load(f) - tests_to_change_slow_mark_by_module: defaultdict = defaultdict( - lambda: {"add": set(), "remove": set()} - ) - tests_to_keep_slow_mark_by_module: defaultdict = defaultdict(set) - for test in test_report["tests"]: - if test["outcome"] != "passed": - continue - test_node = parse_nodeid(test["nodeid"]) - marked_slow = "slow" in test["keywords"] - call_duration = test["call"]["duration"] - if marked_slow and call_duration < remove_slow_threshold: - tests_to_change_slow_mark_by_module[test_node.module_path]["remove"].add( - test_node - ) - elif not marked_slow and call_duration > add_slow_threshold: - tests_to_change_slow_mark_by_module[test_node.module_path]["add"].add( - test_node - ) - elif marked_slow: - tests_to_keep_slow_mark_by_module[test_node.module_path].add(test_node) - # Parameterized tests may have different call durations for different parameters - # however slow mark applies to all parameters, therefore if any tests appear in - # both set of tests to keep slow mark and test to remove slow mark (corresponding - # to runs of same test with different parameters) we remove them from the set of - # tests to remove slow mark - for ( - module_path, - test_nodes_to_change, - ) in tests_to_change_slow_mark_by_module.items(): - test_nodes_to_change["remove"].difference_update( - tests_to_keep_slow_mark_by_module[module_path] - ) - return dict(tests_to_change_slow_mark_by_module) - - -def find_function( - module_fst: redbaron.RedBaron, function_name: str -) -> redbaron.DefNode: - return module_fst.find("def", lambda node: node.name == function_name) - - -def find_class_method( - module_fst: redbaron.RedBaron, class_name: str, method_name: str -) -> redbaron.DefNode: - class_fst = module_fst.find("class", lambda node: node.name == class_name) - return class_fst.fund("def", lambda node: node.name == method_name) - - -def find_decorator( - function_fst: redbaron.DefNode, decorator_code: str -) -> redbaron.DecoratorNode: - return function_fst.find( - "decorator", lambda node: str(node.value) == decorator_code - ) - - -def add_decorator(function_fst: redbaron.DefNode, decorator_code: str): - if len(function_fst.decorators) == 0: - function_fst.decorators = f"@{decorator_code}" - else: - function_fst.decorators.append(f"@{decorator_code}") - - -def remove_decorator( - function_fst: redbaron.DefNode, decorator_fst: redbaron.DecoratorNode -): - # Need to remove both decorator and associated end line node so we find index of - # decorator and pop it and next node (which should be end line node) rather than - # use remove method of decorators proxy list directly - decorator_index = function_fst.decorators.node_list.index(decorator_fst) - popped_decorator_fst = function_fst.decorators.node_list.pop(decorator_index) - endline_fst = function_fst.decorators.node_list.pop(decorator_index) - if popped_decorator_fst is not decorator_fst or not isinstance( - endline_fst, redbaron.EndlNode - ): - msg = ( - f"Removed {popped_decorator_fst} and {endline_fst} when expecting " - f"{decorator_fst} and end line node." - ) - raise RuntimeError(msg) - - -def remove_mark_from_tests( - module_fst: redbaron.RedBaron, - tests_to_remove_mark: Set[TestNode], - mark_decorator: str, -): - for test_node in tests_to_remove_mark: - if isinstance(test_node, TestFunction): - function_fst = find_function(module_fst, test_node.name) - else: - function_fst = find_class_method( - module_fst, test_node.class_name, test_node.method_name - ) - decorator_fst = find_decorator(function_fst, mark_decorator) - if decorator_fst is None: - msg = ( - f"Test {test_node} unexpectedly does not have a decorator " - f"{mark_decorator} - this suggests you may be using a JSON test report " - "generated using a different version of tests code." - ) - warnings.warn(msg, stacklevel=2) - else: - remove_decorator(function_fst, decorator_fst) - - -def add_mark_to_tests( - module_fst: redbaron.RedBaron, tests_to_add_mark: Set[TestNode], mark_decorator: str -): - for test_node in tests_to_add_mark: - if isinstance(test_node, TestFunction): - function_fst = find_function(module_fst, test_node.name) - else: - function_fst = find_class_method( - module_fst, test_node.class_name, test_node.method_name - ) - if find_decorator(function_fst, mark_decorator) is not None: - msg = ( - f"Test {test_node} unexpectedly already has a decorator " - f"{mark_decorator} - this suggests you may be using a JSON test report " - "generated using a different version of tests code." - ) - warnings.warn(msg, stacklevel=2) - else: - add_decorator(function_fst, mark_decorator) - - -def add_import(module_fst: redbaron.RedBaron, module_name: str): - last_top_level_import = module_fst.find_all( - "import", lambda node: node.parent is module_fst - )[-1] - import_statement = f"import {module_name}" - if last_top_level_import is not None: - last_top_level_import.insert_after(import_statement) - else: - if isinstance(module_fst[0], redbaron.Nodes.StringNode): - module_fst[0].insert_after(import_statement) - else: - module_fst[0].insert_before(import_statement) - - -def remove_import(module_fst: redbaron.RedBaron, module_name: str): - import_fst = module_fst.find("import", lambda node: module_name in node.modules()) - if len(import_fst.modules()) > 1: - import_fst.remove(module_name) - else: - module_fst.remove(import_fst) - - -def update_test_slow_marks( - tests_to_change_slow_mark_by_module: Dict[Path, Dict[str, Set[TestNode]]], - show_diff: bool, -): - for ( - module_path, - test_nodes_to_change, - ) in tests_to_change_slow_mark_by_module.items(): - with open(module_path, "r") as source_code: - module_fst = redbaron.RedBaron(source_code.read()) - original_module_fst = module_fst.copy() - remove_mark_from_tests( - module_fst, test_nodes_to_change["remove"], SLOW_MARK_DECORATOR - ) - add_mark_to_tests(module_fst, test_nodes_to_change["add"], SLOW_MARK_DECORATOR) - any_marked = ( - module_fst.find( - "decorator", lambda node: str(node.value) == SLOW_MARK_DECORATOR - ) - is not None - ) - pytest_imported = ( - module_fst.find("import", lambda node: "pytest" in node.modules()) - is not None - ) - if any_marked and not pytest_imported: - add_import(module_fst, "pytest") - elif not any_marked and pytest_imported: - pytest_references = module_fst.find_all("name", "pytest") - if ( - len(pytest_references) == 1 - and pytest_references[0].parent_find("import") is not None - ): - remove_import(module_fst, "pytest") - if show_diff: - diff_lines = difflib.unified_diff( - original_module_fst.dumps().split("\n"), - module_fst.dumps().split("\n"), - fromfile=str(module_path), - tofile=f"Updated {module_path}", - ) - print("\n".join(diff_lines), end="") - else: - with open(module_path, "w") as source_code: - source_code.write(module_fst.dumps()) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser("Mark slow running tests with pytest.mark.slow") - parser.add_argument( - "--json-test-report-path", - type=Path, - help="JSON report output from pytest-json-report plugin listing test durations", - ) - parser.add_argument( - "--remove-slow-threshold", - type=float, - default=9.0, - help="Threshold in seconds for test duration below which to remove slow marker", - ) - parser.add_argument( - "--add-slow-threshold", - type=float, - default=11.0, - help="Threshold in seconds for test duration above which to add slow marker", - ) - parser.add_argument( - "--show-diff", - action="store_true", - help="Print line-by-line diff of changes to stdout without changing files", - ) - args = parser.parse_args() - if not args.json_test_report_path.exists(): - msg = f"No file found at --json-test-report-path={args.json_test_report_path}" - raise FileNotFoundError(msg) - # We want a hysteresis effect by having remove_slow_threshold < add_slow_threshold - # so a test with duration close to the thresholds doesn't keep getting marks added - # and removed due to noise in durations - if args.remove_slow_threshold > args.add_slow_threshold: - msg = ( - "Argument --remove-slow-threshold should be less than or equal to " - "--add-slow-threshold" - ) - raise ValueError(msg) - tests_to_change_slow_mark_by_module = parse_test_report( - args.json_test_report_path, args.remove_slow_threshold, args.add_slow_threshold - ) - update_test_slow_marks(tests_to_change_slow_mark_by_module, args.show_diff) diff --git a/src/scripts/cervical_cancer_analyses/cervical_cancer_analyses.py b/src/scripts/cervical_cancer_analyses/cervical_cancer_analyses.py new file mode 100644 index 0000000000..ee8a77fada --- /dev/null +++ b/src/scripts/cervical_cancer_analyses/cervical_cancer_analyses.py @@ -0,0 +1,507 @@ +""" +* Check key outputs for reporting in the calibration table of the write-up +* Produce representative plots for the default parameters + +NB. To see larger effects +* Increase incidence of cancer (see tests) +* Increase symptom onset +* Increase progression rates (see tests) +""" + +import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import json +import math +from tlo import Simulation, logging, Date + +from tlo.analysis.utils import make_age_grp_types, parse_log_file +from tlo.methods import ( + cervical_cancer, + demography, + enhanced_lifestyle, + healthburden, + healthseekingbehaviour, + healthsystem, + simplified_births, + symptommanager, + epi, + tb, + hiv +) +import hashlib + +# Function to hash the DataFrame +def hash_dataframe(df): + # Generate hash for each row + row_hashes = pd.util.hash_pandas_object(df).values + # Create a single hash for the DataFrame + return hashlib.sha256(row_hashes).hexdigest() + + +# Where outputs will go +output_csv_file = Path("./outputs/output1_data.csv") +seed = 100 + +# date-stamp to label log files and any other outputs +datestamp = datetime.date.today().strftime("__%Y_%m_%d") + +# The resource files +resourcefilepath = Path("./resources") + + +log_config = { + "filename": "cervical_cancer_analysis", # The name of the output file (a timestamp will be appended). + "directory": "./outputs", # The default output path is `./outputs`. Change it here, if necessary + "custom_levels": { # Customise the output of specific loggers. They are applied in order: + "*": logging.WARNING, # Asterisk matches all loggers - we set the default level to WARNING + "tlo.methods.cervical_cancer": logging.INFO, + "tlo.methods.healthsystem": logging.INFO, + } +} + + +# Set parameters for the simulation +start_date = Date(2010, 1, 1) +end_date = Date(2025, 1, 1) +popsize = 1700 + +def run_sim(service_availability): + # Establish the simulation object and set the seed + sim = Simulation(start_date=start_date, seed=seed, log_config=log_config) +# sim = Simulation(start_date=start_date, log_config={"filename": "logfile"}) + + # Register the appropriate modules + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + cervical_cancer.CervicalCancer(resourcefilepath=resourcefilepath), +# cc_test.CervicalCancer(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, + disable=False, + cons_availability='all'), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath, run_with_checks=False), + hiv.Hiv(resourcefilepath=resourcefilepath, run_with_checks=False) + ) + + logfile = sim._configure_logging(filename="LogFile") + + sim.make_initial_population(n=popsize) + sim.simulate(end_date=end_date) + # df_hash_population_props = hash_dataframe(sim.population.props) + # + # print(f"Hash: {df_hash_population_props}") + # + # # Save hash to a file + # with open('/Users/marianasuarez/Downloads/TLOmodelTest/df_hash_test.txt', 'w') as f: + # f.write(df_hash_population_props) + # df_hash_population_props = hash_dataframe(sim.population.props) + # + # print(f"Hash: {df_hash_population_props}") + # + # # Save hash to a file + # with open('/Users/marianasuarez/Downloads/TLOmodelTest/df_hash_test.txt', 'w') as f: + # f.write(df_hash_population_props) + # parse the simulation logfile to get the output dataframes + log_df = parse_log_file(sim.log_filepath) + + return log_df + + +if output_csv_file.exists(): + output_csv_file.unlink() + +log_df = run_sim(service_availability=['*']) + + +scale_factor = 17000000 / popsize +print(scale_factor) +# +# plot number of cervical cancer deaths in past year +out_df = pd.read_csv(output_csv_file) +# out_df = pd.read_csv('C:/Users/User/PycharmProjects/TLOmodel/outputs/output_data.csv', encoding='ISO-8859-1') +out_df = out_df[['n_deaths_past_year', 'rounded_decimal_year']].dropna() +out_df = out_df[out_df['rounded_decimal_year'] >= 2011] +out_df['n_deaths_past_year'] = out_df['n_deaths_past_year'] * scale_factor +print(out_df) +plt.figure(figsize=(10, 6)) +plt.plot(out_df['rounded_decimal_year'], out_df['n_deaths_past_year'], marker='o') +plt.title('Total deaths by Year') +plt.xlabel('Year') +plt.ylabel('Total deaths past year') +plt.grid(True) +plt.ylim(0, 10000) +plt.show() + + +# plot number of cervical cancer deaths in hivneg in past year +out_df_6 = pd.read_csv(output_csv_file) +out_df_6 = out_df_6[['n_deaths_cc_hivneg_past_year', 'rounded_decimal_year']].dropna() +out_df_6 = out_df_6[out_df_6['rounded_decimal_year'] >= 2011] +out_df_6['n_deaths_cc_hivneg_past_year'] = out_df_6['n_deaths_cc_hivneg_past_year'] * scale_factor +print(out_df_6) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_6['rounded_decimal_year'], out_df_6['n_deaths_cc_hivneg_past_year'], marker='o') +plt.title('Total deaths cervical cancer in hivneg by Year') +plt.xlabel('Year') +plt.ylabel('Total deaths cervical cancer in hivneg past year') +plt.grid(True) +plt.ylim(0, 10000) +plt.show() + + +# plot number of cervical cancer deaths in hivpos in past year +out_df_9 = pd.read_csv(output_csv_file) +out_df_9 = out_df_9[['n_deaths_cc_hivpos_past_year', 'rounded_decimal_year']].dropna() +out_df_9 = out_df_9[out_df_9['rounded_decimal_year'] >= 2011] +out_df_9['n_deaths_cc_hivpos_past_year'] = out_df_9['n_deaths_cc_hivpos_past_year'] * scale_factor +print(out_df_9) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_9['rounded_decimal_year'], out_df_9['n_deaths_cc_hivpos_past_year'], marker='o') +plt.title('Total deaths cervical cancer in hivpos by Year') +plt.xlabel('Year') +plt.ylabel('Total deaths cervical cancer in hivpos past year') +plt.grid(True) +plt.ylim(0, 10000) +plt.show() + + +# plot number of cc diagnoses in past year +out_df_4 = pd.read_csv(output_csv_file) +out_df_4 = out_df_4[['n_diagnosed_past_year', 'rounded_decimal_year']].dropna() +out_df_4 = out_df_4[out_df_4['rounded_decimal_year'] >= 2011] +out_df_4['n_diagnosed_past_year'] = out_df_4['n_diagnosed_past_year'] * scale_factor +print(out_df_4) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_4['rounded_decimal_year'], out_df_4['n_diagnosed_past_year'], marker='o') +plt.title('Total diagnosed per Year') +plt.xlabel('Year') +plt.ylabel('Total diagnosed per year') +plt.grid(True) +plt.ylim(0,10000) +plt.show() + + + + +# plot number cc treated in past year +out_df_13 = pd.read_csv(output_csv_file) +out_df_13 = out_df_13[['n_treated_past_year', 'rounded_decimal_year']].dropna() +out_df_13 = out_df_13[out_df_13['rounded_decimal_year'] >= 2011] +out_df_13['n_treated_past_year'] = out_df_13['n_treated_past_year'] * scale_factor +print(out_df_13) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_13['rounded_decimal_year'], out_df_13['n_treated_past_year'], marker='o') +plt.title('Total treated per Year') +plt.xlabel('Year') +plt.ylabel('Total treated per year') +plt.grid(True) +plt.ylim(0,10000) +plt.show() + + + + +# plot number cc cured in past year +out_df_14 = pd.read_csv(output_csv_file) +out_df_14 = out_df_14[['n_cured_past_year', 'rounded_decimal_year']].dropna() +out_df_14 = out_df_14[out_df_14['rounded_decimal_year'] >= 2011] +out_df_14['n_cured_past_year'] = out_df_14['n_cured_past_year'] * scale_factor +print(out_df_14) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_14['rounded_decimal_year'], out_df_14['n_cured_past_year'], marker='o') +plt.title('Total cured per Year') +plt.xlabel('Year') +plt.ylabel('Total cured per year') +plt.grid(True) +plt.ylim(0,10000) +plt.show() + + + + +# plot prevalence of each ce stage +out_df_2 = pd.read_csv(output_csv_file) +columns_to_calculate = ['total_none', 'total_hpv', 'total_cin1', 'total_cin2', 'total_cin3', 'total_stage1', + 'total_stage2a', 'total_stage2b', 'total_stage3', 'total_stage4'] +for column in columns_to_calculate: + new_column_name = column.replace('total_', '') + out_df_2[f'proportion_{new_column_name}'] = out_df_2[column] / out_df_2[columns_to_calculate].sum(axis=1) +print(out_df_2) +columns_to_plot = ['proportion_hpv', 'proportion_cin1', 'proportion_cin2', 'proportion_cin3', + 'proportion_stage1', 'proportion_stage2a', 'proportion_stage2b', 'proportion_stage3', + 'proportion_stage4'] +plt.figure(figsize=(10, 6)) +# Initialize the bottom of the stack +bottom = 0 +for column in columns_to_plot: + plt.fill_between(out_df_2['rounded_decimal_year'], + bottom, + bottom + out_df_2[column], + label=column, + alpha=0.7) + bottom += out_df_2[column] +# plt.plot(out_df_2['rounded_decimal_year'], out_df_2['proportion_cin1'], marker='o') +plt.title('Proportion of women aged 15+ with HPV, CIN, cervical cancer') +plt.xlabel('Year') +plt.ylabel('Proportion') +plt.grid(True) +plt.legend(loc='upper right') +plt.ylim(0, 0.30) +plt.show() + + + +# Proportion of people with cervical cancer who are HIV positive +out_df_3 = pd.read_csv(output_csv_file) +out_df_3 = out_df_3[['prop_cc_hiv', 'rounded_decimal_year']].dropna() +plt.figure(figsize=(10, 6)) +plt.plot(out_df_3['rounded_decimal_year'], out_df_3['prop_cc_hiv'], marker='o') +plt.title('Proportion of people with cervical cancer who are HIV positive') +plt.xlabel('Year') +plt.ylabel('Proportion') +plt.grid(True) +plt.ylim(0, 1) +plt.show() + +# log_config = { +# "filename": "cervical_cancer_analysis", # The name of the output file (a timestamp will be appended). +# "directory": "./outputs", # The default output path is `./outputs`. Change it here, if necessary +# "custom_levels": { # Customise the output of specific loggers. They are applied in order: +# "*": logging.WARNING, # Asterisk matches all loggers - we set the default level to WARNING +# "tlo.methods.cervical_cancer": logging.INFO, +# "tlo.methods.healthsystem": logging.INFO, +# } +# } + + + +# plot number of women living with unsuppressed HIV +out_df_4 = pd.read_csv(output_csv_file) +out_df_4 = out_df_4[['n_women_hiv_unsuppressed', 'rounded_decimal_year']].dropna() +out_df_4 = out_df_4[out_df_4['rounded_decimal_year'] >= 2011] +out_df_4['n_women_hiv_unsuppressed'] = out_df_4['n_women_hiv_unsuppressed'] * scale_factor +print(out_df_4) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_4['rounded_decimal_year'], out_df_4['n_women_hiv_unsuppressed'], marker='o') +plt.title('n_women_hiv_unsuppressed') +plt.xlabel('Year') +plt.ylabel('n_women_hiv_unsuppressed') +plt.grid(True) +plt.ylim(0, 300000) +plt.show() + + + +# plot prevalence of each ce stage for hivneg +out_df_5 = pd.read_csv(output_csv_file) +columns_to_calculate = ['total_hivneg_none', 'total_hivneg_hpv', 'total_hivneg_cin1', 'total_hivneg_cin2', 'total_hivneg_cin3', + 'total_hivneg_stage1','total_hivneg_stage2a', 'total_hivneg_stage2b', 'total_hivneg_stage3', 'total_hivneg_stage4'] +for column in columns_to_calculate: + new_column_name = column.replace('total_hivneg_', '') + out_df_5[f'proportion_hivneg_{new_column_name}'] = out_df_5[column] / out_df_5[columns_to_calculate].sum(axis=1) +print(out_df_5) +columns_to_plot = ['proportion_hivneg_hpv', 'proportion_hivneg_cin1', 'proportion_hivneg_cin2', 'proportion_hivneg_cin3', + 'proportion_hivneg_stage1', 'proportion_hivneg_stage2a', 'proportion_hivneg_stage2b', 'proportion_hivneg_stage3', + 'proportion_hivneg_stage4'] +plt.figure(figsize=(10, 6)) +# Initialize the bottom of the stack +bottom = 0 +for column in columns_to_plot: + plt.fill_between(out_df_5['rounded_decimal_year'], + bottom, + bottom + out_df_5[column], + label=column, + alpha=0.7) + bottom += out_df_5[column] +plt.title('Proportion of hivneg women aged 15+ with HPV, CIN, cervical cancer') +plt.xlabel('Year') +plt.ylabel('Proportion') +plt.grid(True) +plt.legend(loc='upper right') +plt.ylim(0, 0.30) +plt.show() + + + +# plot prevalence of each ce stage for hivpos +out_df_8 = pd.read_csv(output_csv_file) +columns_to_calculate = ['total_hivpos_none', 'total_hivpos_hpv', 'total_hivpos_cin1', 'total_hivpos_cin2', 'total_hivpos_cin3', + 'total_hivpos_stage1','total_hivpos_stage2a', 'total_hivpos_stage2b', 'total_hivpos_stage3', 'total_hivpos_stage4'] +for column in columns_to_calculate: + new_column_name = column.replace('total_hivpos_', '') + out_df_8[f'proportion_hivpos_{new_column_name}'] = out_df_8[column] / out_df_8[columns_to_calculate].sum(axis=1) +print(out_df_8) +columns_to_plot = ['proportion_hivpos_hpv', 'proportion_hivpos_cin1', 'proportion_hivpos_cin2', 'proportion_hivpos_cin3', + 'proportion_hivpos_stage1', 'proportion_hivpos_stage2a', 'proportion_hivpos_stage2b', 'proportion_hivpos_stage3', + 'proportion_hivpos_stage4'] +plt.figure(figsize=(10, 6)) +# Initialize the bottom of the stack +bottom = 0 +for column in columns_to_plot: + plt.fill_between(out_df_8['rounded_decimal_year'], + bottom, + bottom + out_df_8[column], + label=column, + alpha=0.7) + bottom += out_df_8[column] +plt.title('Proportion of hivpos women aged 15+ with HPV, CIN, cervical cancer') +plt.xlabel('Year') +plt.ylabel('Proportion') +plt.grid(True) +plt.legend(loc='upper right') +plt.ylim(0, 0.30) +plt.show() + + +# plot number of hivpos in stage 4 +out_df_11 = pd.read_csv(output_csv_file) +out_df_11 = out_df_11[['total_hivpos_stage4', 'rounded_decimal_year']].dropna() +# out_df_11 = out_df_11[out_df_11['rounded_decimal_year'] >= 2011] +# out_df_11['total_hivpos_stage4'] = out_df_11['total_hivpos_stage4'] * scale_factor +print(out_df_11) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_11['rounded_decimal_year'], out_df_11['total_hivpos_stage4'], marker='o') +plt.title('total_hivpos_stage4') +plt.xlabel('Year') +plt.ylabel('total_hivpos_stage4') +plt.grid(True) +plt.ylim(0,100) +plt.show() + + +# plot number of hivneg in stage 4 +out_df_7 = pd.read_csv(output_csv_file) +out_df_7 = out_df_7[['total_hivneg_stage4', 'rounded_decimal_year']].dropna() +# out_df_7 = out_df_7[out_df_7['rounded_decimal_year'] >= 2011] +# out_df_7['total_hivneg_stage4'] = out_df_7['total_hivneg_stage4'] * scale_factor +print(out_df_7) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_7['rounded_decimal_year'], out_df_7['total_hivneg_stage4'], marker='o') +plt.title('total_hivneg_stage4') +plt.xlabel('Year') +plt.ylabel('total_hivneg_stage4') +plt.grid(True) +plt.ylim(0,100) +plt.show() + + +# plot number of hivneg in stage 4 +out_df_13 = pd.read_csv(output_csv_file) +out_df_13 = out_df_13[['total_hivneg_stage4', 'rounded_decimal_year']].dropna() +out_df_13 = out_df_13[out_df_13['rounded_decimal_year'] >= 2011] +out_df_13['total_hivneg_stage4'] = out_df_13['total_hivneg_stage4'] * scale_factor +print(out_df_13) +plt.figure(figsize=(10, 6)) +plt.plot(out_df_13['rounded_decimal_year'], out_df_13['total_hivneg_stage4'], marker='o') +plt.title('total_hivneg_stage4') +plt.xlabel('Year') +plt.ylabel('total_hivneg_stage4') +plt.grid(True) +plt.ylim(0,10000) +plt.show() + +# LOG PLOTTING with function --------------------------------------------------------------------------- +# +# start_year=2011 +# scale_factor = 10000 +# +# +# # Function to plot data +# def plot_data(log_df, year_col, columns, prefix = '',scale_factor=1000, start_year=2011, title="", xlabel="Year", ylabel="", ylim=None, proportion_plot=False): +# # Filter by year and ensure only valid values +# log_df_plot = log_df["tlo.methods.cervical_cancer"]["all"] +# log_df_plot = log_df_plot[[year_col] + columns].dropna() +# log_df_plot = log_df_plot[log_df_plot[year_col] >= start_year] +# +# +# # If proportion plot is True, calculate proportions +# if proportion_plot: +# total_col = log_df_plot[columns].sum(axis=1) # Sum across the columns to get the total for each row +# for col in columns: +# new_col_name = col.replace(prefix, '') # Remove the prefix +# log_df_plot[f'proportion_{new_col_name}'] = log_df_plot[col] / total_col # Calculate proportion +# +# # Update columns to use proportion columns and remove those containing 'none' +# columns = [f'proportion_{col.replace(prefix, "")}' for col in columns if 'none' not in col] +# +# # Scale values +# if not proportion_plot: +# for col in columns: +# log_df_plot[col] = log_df_plot[col] * scale_factor +# +# # Plotting logic +# plt.figure(figsize=(10, 6)) +# +# if proportion_plot: +# bottom = 0 +# for col in columns: +# plt.fill_between(log_df_plot[year_col], bottom, bottom + log_df_plot[col], label=col, alpha=0.7) +# bottom += log_df_plot[col] +# plt.legend(loc='upper right') +# else: +# plt.plot(log_df_plot[year_col], log_df_plot[columns[0]], marker='o') +# +# # Plot +# plt.style.use("seaborn-v0_8-white") +# plt.title(title) +# plt.xlabel(xlabel) +# plt.ylabel(ylabel) +# plt.grid(True) +# +# # Set y-axis limits if provided +# if ylim: +# plt.ylim(ylim) +# +# plt.show() +# +# # Execute functions +# +# # 1. Total deaths by Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_deaths_past_year'], scale_factor=scale_factor, title='Total deaths by Year', ylabel='Total deaths past year', ylim=(0, 10000)) +# +# # 2. Total deaths cervical cancer in HIV negative by Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_deaths_cc_hivneg_past_year'], scale_factor=scale_factor, title='Total deaths cervical cancer in HIV negative by Year', ylabel='Total deaths in HIV negative past year', ylim=(0, 10000)) +# +# # 3. Total deaths cervical cancer in HIV positive by Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_deaths_cc_hivpos_past_year'], scale_factor=scale_factor, title='Total deaths cervical cancer in HIV positive by Year', ylabel='Total deaths in HIV positive past year', ylim=(0, 10000)) +# +# # 4. Total diagnosed per Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_diagnosed_past_year'], scale_factor=scale_factor, title='Total diagnosed per Year', ylabel='Total diagnosed per year', ylim=(0, 10000)) +# +# # 5. Total treated per Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_treated_past_year'], scale_factor=scale_factor, title='Total treated per Year', ylabel='Total treated per year', ylim=(0, 10000)) +# +# # 6. Total cured per Year +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_cured_past_year'], scale_factor=scale_factor, title='Total cured per Year', ylabel='Total cured per year', ylim=(0, 10000)) +# +# # 7. Proportion of women aged 15+ with HPV, CIN, cervical cancer +# plot_data(log_df, year_col='rounded_decimal_year', columns=['total_none', 'total_hpv', 'total_cin1', 'total_cin2', 'total_cin3', 'total_stage1', +# 'total_stage2a', 'total_stage2b', 'total_stage3', 'total_stage4'], prefix = 'total_',scale_factor=scale_factor, title='Proportion of women aged 15+ with HPV, CIN, cervical cancer', ylabel='Proportion', ylim=(0, 0.30), proportion_plot=True) +# +# # 8. Proportion of people with cervical cancer who are HIV positive +# plot_data(log_df, year_col='rounded_decimal_year', columns=['prop_cc_hiv'], title='Proportion of people with cervical cancer who are HIV positive', ylabel='Proportion', ylim=(0, 1)) +# +# # 9. Number of women living with unsuppressed HIV +# plot_data(log_df, year_col='rounded_decimal_year', columns=['n_women_hiv_unsuppressed'], scale_factor=scale_factor, title='Number of women living with unsuppressed HIV', ylabel='n_women_hiv_unsuppressed', ylim=(0, 300000)) +# +# # 10. Proportion of HIV negative women aged 15+ with HPV, CIN, cervical cancer +# plot_data(log_df, year_col='rounded_decimal_year', columns=['total_hivneg_none', 'total_hivneg_hpv', 'total_hivneg_cin1', 'total_hivneg_cin2', 'total_hivneg_cin3', +# 'total_hivneg_stage1','total_hivneg_stage2a', 'total_hivneg_stage2b', 'total_hivneg_stage3', 'total_hivneg_stage4'], prefix = 'total_',title='Proportion of HIV negative women aged 15+ with HPV, CIN, cervical cancer', ylabel='Proportion', ylim=(0, 0.30), proportion_plot=True) +# +# # 11. Proportion of HIV positive women aged 15+ with HPV, CIN, cervical cancer +# plot_data(log_df, year_col='rounded_decimal_year', columns=['total_hivpos_none', 'total_hivpos_hpv', 'total_hivpos_cin1', 'total_hivpos_cin2', 'total_hivpos_cin3', +# 'total_hivpos_stage1','total_hivpos_stage2a', 'total_hivpos_stage2b', 'total_hivpos_stage3', 'total_hivpos_stage4'], prefix = 'total_', title='Proportion of HIV positive women aged 15+ with HPV, CIN, cervical cancer', ylabel='Proportion', ylim=(0, 0.30), proportion_plot=True) +# +# # 12. Number of HIV positive women in Stage 4 +# plot_data(log_df, year_col='rounded_decimal_year', columns=['total_hivpos_stage4'], scale_factor=scale_factor, title='Number of HIV positive women in Stage 4', ylabel='total_hivpos_stage4', ylim=(0, 100)) +# +# # 13. Number of HIV negative women in Stage 4 +# plot_data(log_df, year_col='rounded_decimal_year', columns=['total_hivneg_stage4'], scale_factor=scale_factor, title='Number of HIV negative women in Stage 4', ylabel='total_hivneg_stage4', ylim=(0, 100)) diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_hss_elements.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_hss_elements.py deleted file mode 100644 index 76708f7c25..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_hss_elements.py +++ /dev/null @@ -1,272 +0,0 @@ -"""Produce plots to show the impact each the healthcare system (overall health impact) when running under different -scenarios (scenario_impact_of_healthsystem.py)""" - -import argparse -import textwrap -from pathlib import Path -from typing import Tuple - -import numpy as np -import pandas as pd -from matplotlib import pyplot as plt - -from tlo import Date -from tlo.analysis.utils import extract_results, make_age_grp_lookup, summarize - - -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): - """Produce standard set of plots describing the effect of each TREATMENT_ID. - - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. - - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. - """ - - TARGET_PERIOD = (Date(2020, 1, 1), Date(2030, 12, 31)) - - # Definitions of general helper functions - make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 - - _, age_grp_lookup = make_age_grp_lookup() - - def target_period() -> str: - """Returns the target period as a string of the form YYYY-YYYY""" - return "-".join(str(t.year) for t in TARGET_PERIOD) - - def get_parameter_names_from_scenario_file() -> Tuple[str]: - """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" - from scripts.comparison_of_horizontal_and_vertical_programs.scenario_hss_elements import ( - HSSElements, - ) - e = HSSElements() - return tuple(e._scenarios.keys()) - - def get_num_deaths(_df): - """Return total number of Deaths (total within the TARGET_PERIOD)""" - return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) - - def get_num_dalys(_df): - """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD). - Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using - results from runs that crashed mid-way through the simulation. - """ - years_needed = [i.year for i in TARGET_PERIOD] - assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." - return pd.Series( - data=_df - .loc[_df.year.between(*years_needed)] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum().sum() - ) - - def set_param_names_as_column_index_level_0(_df): - """Set the columns index (level 0) as the param_names.""" - ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] - assert len(names_of_cols_level0) == len(_df.columns.levels[0]) - _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) - return _df - - def find_difference_relative_to_comparison(_ser: pd.Series, - comparison: str, - scaled: bool = False, - drop_comparison: bool = True, - ): - """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) - within the runs (level 1), relative to where draw = `comparison`. - The comparison is `X - COMPARISON`.""" - return _ser \ - .unstack(level=0) \ - .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ - .drop(columns=([comparison] if drop_comparison else [])) \ - .stack() - - def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False, put_labels_in_legend=True): - """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the - extent of the error bar.""" - - substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - - yerr = np.array([ - (_df['mean'] - _df['lower']).values, - (_df['upper'] - _df['mean']).values, - ]) - - xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} - - # Define colormap (used only with option `put_labels_in_legend=True`) - cmap = plt.get_cmap("tab20") - rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y)) # noqa: E731 - colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.bar( - xticks.keys(), - _df['mean'].values, - yerr=yerr, - alpha=0.8, - ecolor='black', - color=colors, - capsize=10, - label=xticks.values() - ) - if annotations: - for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations): - ax.text(xpos, ypos*1.15, text, horizontalalignment='center', rotation='vertical', fontsize='x-small') - ax.set_xticks(list(xticks.keys())) - - if put_labels_in_legend: - # Update xticks label with substitute labels - # Insert legend with updated labels that shows correspondence between substitute label and original label - xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())] - xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())] - h, legs = ax.get_legend_handles_labels() - ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5)) - ax.set_xticklabels(list(xtick_values)) - else: - if not xticklabels_horizontal_and_wrapped: - # xticklabels will be vertical and not wrapped - ax.set_xticklabels(list(xticks.values()), rotation=90) - else: - wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()] - ax.set_xticklabels(wrapped_labs) - - ax.grid(axis="y") - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - fig.tight_layout() - - return fig, ax - - # %% Define parameter names - param_names = get_parameter_names_from_scenario_file() - - # %% Quantify the health gains associated with all interventions combined. - - # Absolute Number of Deaths and DALYs - num_deaths = extract_results( - results_folder, - module='tlo.methods.demography', - key='death', - custom_generate_series=get_num_deaths, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - - num_dalys = extract_results( - results_folder, - module='tlo.methods.healthburden', - key='dalys_stacked', - custom_generate_series=get_num_dalys, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - - # %% Charts of total numbers of deaths / DALYS - num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names) - num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names) - - name_of_plot = f'Deaths, {target_period()}' - fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6) - ax.set_title(name_of_plot) - ax.set_ylabel('(Millions)') - fig.tight_layout() - ax.axhline(num_deaths_summarized.loc['Baseline', 'mean']/1e6, color='black', alpha=0.5) - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - name_of_plot = f'All Scenarios: DALYs, {target_period()}' - fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6) - ax.set_title(name_of_plot) - ax.set_ylabel('(Millions)') - ax.axhline(num_dalys_summarized.loc['Baseline', 'mean']/1e6, color='black', alpha=0.5) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - - # %% Deaths and DALYS averted relative to Status Quo - num_deaths_averted = summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison( - num_deaths.loc[0], - comparison='Baseline') - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - pc_deaths_averted = 100.0 * summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison( - num_deaths.loc[0], - comparison='Baseline', - scaled=True) - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - num_dalys_averted = summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison( - num_dalys.loc[0], - comparison='Baseline') - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - pc_dalys_averted = 100.0 * summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison( - num_dalys.loc[0], - comparison='Baseline', - scaled=True) - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - # DEATHS - name_of_plot = f'Additional Deaths Averted vs Baseline, {target_period()}' - fig, ax = do_bar_plot_with_ci( - num_deaths_averted.clip(lower=0.0), - annotations=[ - f"{round(row['mean'], 0)} ({round(row['lower'], 1)}-{round(row['upper'], 1)}) %" - for _, row in pc_deaths_averted.clip(lower=0.0).iterrows() - ] - ) - ax.set_title(name_of_plot) - ax.set_ylabel('Additional Deaths Averted') - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - # DALYS - name_of_plot = f'Additional DALYs Averted vs Baseline, {target_period()}' - fig, ax = do_bar_plot_with_ci( - (num_dalys_averted / 1e6).clip(lower=0.0), - annotations=[ - f"{round(row['mean'])} ({round(row['lower'], 1)}-{round(row['upper'], 1)}) %" - for _, row in pc_dalys_averted.clip(lower=0.0).iterrows() - ] - ) - ax.set_title(name_of_plot) - ax.set_ylabel('Additional DALYS Averted \n(Millions)') - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - # todo: Neaten graphs - # todo: Graph showing difference broken down by disease (this can be cribbed from the calcs about wealth from the - # third set of analyses in the overview paper). - # todo: other metrics of health - # todo: other graphs, broken down by age/sex (this can also be cribbed from overview paper stuff) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("results_folder", type=Path) # outputs/horizontal_and_vertical_programs-2024-05-16 - args = parser.parse_args() - - apply( - results_folder=args.results_folder, - output_folder=args.results_folder, - resourcefilepath=Path('./resources') - ) diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py deleted file mode 100644 index 0cfcd05315..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py +++ /dev/null @@ -1,229 +0,0 @@ -""" -This scenario file sets up the scenarios for simulating the effects of scaling up programs - -The scenarios are: -*0 baseline mode 1 -*1 scale-up HIV program -*2 scale-up TB program -*3 scale-up malaria program -*4 scale-up HIV and Tb and malaria programs - -scale-up occurs on the default scale-up start date (01/01/2025: in parameters list of resourcefiles) - -For all scenarios, keep all default health system settings - -check the batch configuration gets generated without error: -tlo scenario-run --draw-only src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py - -Run on the batch system using: -tlo batch-submit src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py - -or locally using: -tlo scenario-run src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py - -or execute a single run: -tlo scenario-run src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_maxHTM_scenario.py --draw 1 0 - -""" - -import datetime -from pathlib import Path - -from tlo import Date, logging -from tlo.methods import ( - demography, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - malaria, - simplified_births, - symptommanager, - tb, -) -from tlo.scenario import BaseScenario - -resourcefilepath = Path("./resources") -datestamp = datetime.date.today().strftime("__%Y_%m_%d") - -outputspath = Path("./outputs") -scaleup_start_year = 2012 -end_date = Date(2015, 1, 1) - - -class EffectOfProgrammes(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 0 - self.start_date = Date(2010, 1, 1) - self.end_date = end_date - self.pop_size = 1_000 - self.number_of_draws = 5 - self.runs_per_draw = 1 - - def log_configuration(self): - return { - 'filename': 'scaleup_tests', - 'directory': Path('./outputs'), # <- (specified only for local running) - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.hiv': logging.INFO, - 'tlo.methods.tb': logging.INFO, - 'tlo.methods.malaria': logging.INFO, - 'tlo.methods.demography': logging.INFO, - } - } - - def modules(self): - return [ - demography.Demography(resourcefilepath=self.resources), - simplified_births.SimplifiedBirths(resourcefilepath=self.resources), - enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), - healthsystem.HealthSystem(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - healthburden.HealthBurden(resourcefilepath=self.resources), - epi.Epi(resourcefilepath=self.resources), - hiv.Hiv(resourcefilepath=self.resources), - tb.Tb(resourcefilepath=self.resources), - malaria.Malaria(resourcefilepath=self.resources), - ] - - def draw_parameters(self, draw_number, rng): - - return { - 'Hiv': { - 'type_of_scaleup': ['none', 'max', 'none', 'none', 'max'][draw_number], - 'scaleup_start_year': scaleup_start_year, - }, - 'Tb': { - 'type_of_scaleup': ['none', 'none', 'max', 'none', 'max'][draw_number], - 'scaleup_start_year': scaleup_start_year, - }, - 'Malaria': { - 'type_of_scaleup': ['none', 'none', 'none', 'max', 'max'][draw_number], - 'scaleup_start_year': scaleup_start_year, - }, - } - - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) - - - -# %% Produce some figures and summary info - -# import pandas as pd -# import matplotlib.pyplot as plt - -# # Find results_folder associated with a given batch_file (and get most recent [-1]) -# results_folder = get_scenario_outputs("scaleup_tests-", outputspath)[-1] -# -# # get basic information about the results -# info = get_scenario_info(results_folder) -# -# # 1) Extract the parameters that have varied over the set of simulations -# params = extract_params(results_folder) -# -# -# # DEATHS -# -# -# def get_num_deaths_by_cause_label(_df): -# """Return total number of Deaths by label within the TARGET_PERIOD -# values are summed for all ages -# df returned: rows=COD, columns=draw -# """ -# return _df \ -# .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] \ -# .groupby(_df['label']) \ -# .size() -# -# -# TARGET_PERIOD = (Date(scaleup_start_year, 1, 1), end_date) -# -# # produce df of total deaths over scale-up period -# num_deaths_by_cause_label = extract_results( -# results_folder, -# module='tlo.methods.demography', -# key='death', -# custom_generate_series=get_num_deaths_by_cause_label, -# do_scaling=True -# ) -# -# -# def summarise_deaths_for_one_cause(results_folder, label): -# """ returns mean deaths for each year of the simulation -# values are aggregated across the runs of each draw -# for the specified cause -# """ -# -# results_deaths = extract_results( -# results_folder, -# module="tlo.methods.demography", -# key="death", -# custom_generate_series=( -# lambda df: df.assign(year=df["date"].dt.year).groupby( -# ["year", "label"])["person_id"].count() -# ), -# do_scaling=True, -# ) -# # removes multi-index -# results_deaths = results_deaths.reset_index() -# -# # select only cause specified -# tmp = results_deaths.loc[ -# (results_deaths.label == label) -# ] -# -# # group deaths by year -# tmp = pd.DataFrame(tmp.groupby(["year"]).sum()) -# -# # get mean for each draw -# mean_deaths = pd.concat({'mean': tmp.iloc[:, 1:].groupby(level=0, axis=1).mean()}, axis=1).swaplevel(axis=1) -# -# return mean_deaths -# -# -# aids_deaths = summarise_deaths_for_one_cause(results_folder, 'AIDS') -# tb_deaths = summarise_deaths_for_one_cause(results_folder, 'TB (non-AIDS)') -# malaria_deaths = summarise_deaths_for_one_cause(results_folder, 'Malaria') -# -# -# draw_labels = ['No scale-up', 'HIV scale-up', 'TB scale-up', 'Malaria scale-up', 'HTM scale-up'] -# colours = ['blue', 'green', 'red', 'purple', 'orange'] -# -# # Create subplots -# fig, axs = plt.subplots(3, 1, figsize=(10, 10)) -# # Plot for df1 -# for i, col in enumerate(aids_deaths.columns): -# axs[0].plot(aids_deaths.index, aids_deaths[col], label=draw_labels[i], -# color=colours[i]) -# axs[0].set_title('HIV/AIDS') -# axs[0].legend(loc='center left', bbox_to_anchor=(1, 0.5)) # Legend to the right of the plot -# axs[0].axvline(x=scaleup_start_year, color='gray', linestyle='--') -# -# # Plot for df2 -# for i, col in enumerate(tb_deaths.columns): -# axs[1].plot(tb_deaths.index, tb_deaths[col], color=colours[i]) -# axs[1].set_title('TB') -# axs[1].axvline(x=scaleup_start_year, color='gray', linestyle='--') -# -# # Plot for df3 -# for i, col in enumerate(malaria_deaths.columns): -# axs[2].plot(malaria_deaths.index, malaria_deaths[col], color=colours[i]) -# axs[2].set_title('Malaria') -# axs[2].axvline(x=scaleup_start_year, color='gray', linestyle='--') -# -# for ax in axs: -# ax.set_xlabel('Years') -# ax.set_ylabel('Number deaths') -# -# plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legend -# plt.show() -# diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_vertical_programs_with_and_without_hss.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_vertical_programs_with_and_without_hss.py deleted file mode 100644 index f0dd083d97..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/analysis_vertical_programs_with_and_without_hss.py +++ /dev/null @@ -1,363 +0,0 @@ -"""Produce plots to show the impact each the healthcare system (overall health impact) when running under different -scenarios (scenario_impact_of_healthsystem.py)""" - -import argparse -import textwrap -from pathlib import Path -from typing import Tuple - -import numpy as np -import pandas as pd -from matplotlib import pyplot as plt - -from tlo import Date -from tlo.analysis.utils import extract_results, make_age_grp_lookup, summarize - - -def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None): - """Produce standard set of plots describing the effect of each TREATMENT_ID. - - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. - - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. - """ - - TARGET_PERIOD = (Date(2020, 1, 1), Date(2030, 12, 31)) - - # Definitions of general helper functions - make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 - - _, age_grp_lookup = make_age_grp_lookup() - - def target_period() -> str: - """Returns the target period as a string of the form YYYY-YYYY""" - return "-".join(str(t.year) for t in TARGET_PERIOD) - - def get_parameter_names_from_scenario_file() -> Tuple[str]: - """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" - from scripts.comparison_of_horizontal_and_vertical_programs.scenario_vertical_programs_with_and_without_hss import ( - HTMWithAndWithoutHSS, - ) - e = HTMWithAndWithoutHSS() - return tuple(e._scenarios.keys()) - - def get_num_deaths(_df): - """Return total number of Deaths (total within the TARGET_PERIOD)""" - return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) - - def get_num_dalys(_df): - """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD). - Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using - results from runs that crashed mid-way through the simulation. - """ - years_needed = [i.year for i in TARGET_PERIOD] - assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." - return pd.Series( - data=_df - .loc[_df.year.between(*years_needed)] - .drop(columns=['date', 'sex', 'age_range', 'year']) - .sum().sum() - ) - - def set_param_names_as_column_index_level_0(_df): - """Set the columns index (level 0) as the param_names.""" - ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} - names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] - assert len(names_of_cols_level0) == len(_df.columns.levels[0]) - _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) - return _df - - def find_difference_relative_to_comparison_series( - _ser: pd.Series, - comparison: str, - scaled: bool = False, - drop_comparison: bool = True, - ): - """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) - within the runs (level 1), relative to where draw = `comparison`. - The comparison is `X - COMPARISON`.""" - return _ser \ - .unstack(level=0) \ - .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ - .drop(columns=([comparison] if drop_comparison else [])) \ - .stack() - - def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, **kwargs): - """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe""" - return pd.concat({ - _idx: find_difference_relative_to_comparison_series(row, **kwargs) - for _idx, row in _df.iterrows() - }, axis=1).T - - def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False, put_labels_in_legend=True): - """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the - extent of the error bar.""" - - substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - - yerr = np.array([ - (_df['mean'] - _df['lower']).values, - (_df['upper'] - _df['mean']).values, - ]) - - xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} - - # Define colormap (used only with option `put_labels_in_legend=True`) - cmap = plt.get_cmap("tab20") - rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y)) # noqa: E731 - colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None - - fig, ax = plt.subplots(figsize=(10, 5)) - ax.bar( - xticks.keys(), - _df['mean'].values, - yerr=yerr, - alpha=0.8, - ecolor='black', - color=colors, - capsize=10, - label=xticks.values() - ) - if annotations: - for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations): - ax.text(xpos, ypos*1.15, text, horizontalalignment='center', rotation='vertical', fontsize='x-small') - ax.set_xticks(list(xticks.keys())) - - if put_labels_in_legend: - # Update xticks label with substitute labels - # Insert legend with updated labels that shows correspondence between substitute label and original label - xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())] - xtick_legend = [f'{letter}: {label}' for letter, label in zip(substitute_labels, xticks.values())] - h, legs = ax.get_legend_handles_labels() - ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5)) - ax.set_xticklabels(list(xtick_values)) - else: - if not xticklabels_horizontal_and_wrapped: - # xticklabels will be vertical and not wrapped - ax.set_xticklabels(list(xticks.values()), rotation=90) - else: - wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()] - ax.set_xticklabels(wrapped_labs) - - ax.grid(axis="y") - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - fig.tight_layout() - - return fig, ax - - # %% Define parameter names - param_names = get_parameter_names_from_scenario_file() - - # %% Quantify the health gains associated with all interventions combined. - - # Absolute Number of Deaths and DALYs - num_deaths = extract_results( - results_folder, - module='tlo.methods.demography', - key='death', - custom_generate_series=get_num_deaths, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - - num_dalys = extract_results( - results_folder, - module='tlo.methods.healthburden', - key='dalys_stacked', - custom_generate_series=get_num_dalys, - do_scaling=True - ).pipe(set_param_names_as_column_index_level_0) - - # %% Charts of total numbers of deaths / DALYS - num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names) - num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names) - - name_of_plot = f'Deaths, {target_period()}' - fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6) - ax.set_title(name_of_plot) - ax.set_ylabel('(Millions)') - fig.tight_layout() - ax.axhline(num_deaths_summarized.loc['Baseline', 'mean']/1e6, color='black', alpha=0.5) - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - name_of_plot = f'All Scenarios: DALYs, {target_period()}' - fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6) - ax.set_title(name_of_plot) - ax.set_ylabel('(Millions)') - ax.axhline(num_dalys_summarized.loc['Baseline', 'mean']/1e6, color='black', alpha=0.5) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - - # %% Deaths and DALYS averted relative to Status Quo - num_deaths_averted = summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison_series( - num_deaths.loc[0], - comparison='Baseline') - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - pc_deaths_averted = 100.0 * summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison_series( - num_deaths.loc[0], - comparison='Baseline', - scaled=True) - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - num_dalys_averted = summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison_series( - num_dalys.loc[0], - comparison='Baseline') - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - pc_dalys_averted = 100.0 * summarize( - -1.0 * - pd.DataFrame( - find_difference_relative_to_comparison_series( - num_dalys.loc[0], - comparison='Baseline', - scaled=True) - ).T - ).iloc[0].unstack().reindex(param_names).drop(['Baseline']) - - # DEATHS - name_of_plot = f'Additional Deaths Averted vs Baseline, {target_period()}' - fig, ax = do_bar_plot_with_ci( - num_deaths_averted.clip(lower=0.0), - annotations=[ - f"{round(row['mean'], 0)} ({round(row['lower'], 1)}-{round(row['upper'], 1)}) %" - for _, row in pc_deaths_averted.clip(lower=0.0).iterrows() - ] - ) - ax.set_title(name_of_plot) - ax.set_ylabel('Additional Deaths Averted vs Baseline') - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - # DALYS - name_of_plot = f'DALYs Averted vs Baseline, {target_period()}' - fig, ax = do_bar_plot_with_ci( - (num_dalys_averted / 1e6).clip(lower=0.0), - annotations=[ - f"{round(row['mean'])} ({round(row['lower'], 1)}-{round(row['upper'], 1)}) %" - for _, row in pc_dalys_averted.clip(lower=0.0).iterrows() - ] - ) - ax.set_title(name_of_plot) - ax.set_ylabel('Additional DALYS Averted vs Baseline \n(Millions)') - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - - # %% DALYS averted relative to Baseline - broken down by major cause (HIV, TB, MALARIA) - - def get_total_num_dalys_by_label(_df): - """Return the total number of DALYS in the TARGET_PERIOD by wealth and cause label.""" - y = _df \ - .loc[_df['year'].between(*[d.year for d in TARGET_PERIOD])] \ - .drop(columns=['date', 'year', 'li_wealth']) \ - .sum(axis=0) - - # define course cause mapper for HIV, TB, MALARIA and OTHER - causes = { - 'AIDS': 'HIV/AIDS', - 'TB (non-AIDS)': 'TB', - 'Malaria': 'Malaria', - '': 'Other', # defined in order to use this dict to determine ordering of the causes in output - } - causes_relabels = y.index.map(causes).fillna('Other') - - return y.groupby(by=causes_relabels).sum()[list(causes.values())] - - total_num_dalys_by_label_results = extract_results( - results_folder, - module="tlo.methods.healthburden", - key="dalys_by_wealth_stacked_by_age_and_time", - custom_generate_series=get_total_num_dalys_by_label, - do_scaling=True, - ).pipe(set_param_names_as_column_index_level_0) - - total_num_dalys_by_label_results_averted_vs_baseline = summarize( - -1.0 * find_difference_relative_to_comparison_series_dataframe( - total_num_dalys_by_label_results, - comparison='Baseline' - ), - only_mean=True - ) - - # Check that when we sum across the causes, we get the same total as calculated when we didn't split by cause. - assert ( - (total_num_dalys_by_label_results_averted_vs_baseline.sum(axis=0).sort_index() - - num_dalys_averted['mean'].sort_index() - ) < 1e-6 - ).all() - - # Make a separate plot for the scale-up of each program/programs - plots = { - 'HIV programs': [ - 'HIV Programs Scale-up WITHOUT HSS PACKAGE', - 'HIV Programs Scale-up WITH HSS PACKAGE', - ], - 'TB programs': [ - 'TB Programs Scale-up WITHOUT HSS PACKAGE', - 'TB Programs Scale-up WITH HSS PACKAGE', - ], - 'Malaria programs': [ - 'Malaria Programs Scale-up WITHOUT HSS PACKAGE', - 'Malaria Programs Scale-up WITH HSS PACKAGE', - ], - 'All programs': [ - 'FULL HSS PACKAGE', - 'HIV/Tb/Malaria Programs Scale-up WITHOUT HSS PACKAGE', - 'HIV/Tb/Malaria Programs Scale-up WITH HSS PACKAGE', - ] - } - - for plot_name, scenario_names in plots.items(): - name_of_plot = f'{plot_name}' - fig, ax = plt.subplots() - total_num_dalys_by_label_results_averted_vs_baseline[scenario_names].T.plot.bar( - stacked=True, - ax=ax, - rot=0, - alpha=0.75 - ) - ax.set_ylim([0, 10e7]) - ax.set_title(name_of_plot) - ax.set_ylabel(f'DALYs Averted vs Baseline, {target_period()}\n(Millions)') - wrapped_labs = ["\n".join(textwrap.wrap(_lab.get_text(), 20)) for _lab in ax.get_xticklabels()] - ax.set_xticklabels(wrapped_labs) - fig.tight_layout() - fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) - fig.show() - plt.close(fig) - - # todo: Neaten graphs - # todo: other metrics of health - # todo: other graphs, broken down by age/sex (this can also be cribbed from overview paper stuff) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("results_folder", type=Path) # outputs/horizontal_and_vertical_programs-2024-05-16 - args = parser.parse_args() - - apply( - results_folder=args.results_folder, - output_folder=args.results_folder, - resourcefilepath=Path('./resources') - ) diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/mini_analysis_for_testing/mini_version_scenario.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/mini_analysis_for_testing/mini_version_scenario.py deleted file mode 100644 index 24256efd3a..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/mini_analysis_for_testing/mini_version_scenario.py +++ /dev/null @@ -1,85 +0,0 @@ -"""This Scenario file is intended to help with debugging the scale-up of HIV. Tb and Malaria services, per issue #1413. - -Changes to the main analysis: - -* We're running this in MODE 1 and we're only looking. -* We're capturing the logged output from HIV, Tb and malaria -* We're limiting it to few scenarios: baseline + the scale-up of all HTM programs (no HealthSystem scale-up) - -""" - -from pathlib import Path -from typing import Dict - -from scripts.comparison_of_horizontal_and_vertical_programs.scenario_definitions import ( - ScenarioDefinitions, -) -from tlo import Date, logging -from tlo.analysis.utils import mix_scenarios -from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher -from tlo.scenario import BaseScenario - - -class MiniRunHTMWithAndWithoutHSS(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 0 - self.start_date = Date(2010, 1, 1) - self.end_date = Date(2031, 1, 1) - self.pop_size = 100_000 - self._scenarios = self._get_scenarios() - self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 - - def log_configuration(self): - return { - 'filename': 'mini_htm_with_and_without_hss', - 'directory': Path('./outputs'), - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.demography': logging.INFO, - 'tlo.methods.demography.detail': logging.WARNING, - 'tlo.methods.healthburden': logging.INFO, - 'tlo.methods.healthsystem': logging.WARNING, - 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.hiv': logging.INFO, - 'tlo.methods.tb': logging.INFO, - 'tlo.methods.malaria': logging.INFO, - } - } - - def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) - - def draw_parameters(self, draw_number, rng): - if draw_number < len(self._scenarios): - return list(self._scenarios.values())[draw_number] - - def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.""" - # Load helper class containing the definitions of the elements of all the scenarios - scenario_definitions = ScenarioDefinitions() - - return { - "Baseline": - scenario_definitions.baseline(), - - # - - - HIV & TB & MALARIA SCALE-UP WITHOUT HSS PACKAGE- - - - "HIV/Tb/Malaria Programs Scale-up WITHOUT HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hiv_scaleup(), - scenario_definitions.tb_scaleup(), - scenario_definitions.malaria_scaleup(), - ), - } - - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_definitions.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_definitions.py deleted file mode 100644 index 31615bdc27..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_definitions.py +++ /dev/null @@ -1,150 +0,0 @@ -"""The file contains all the definitions of scenarios used the Horizontal and Vertical Program Impact Analyses""" -from typing import Dict - -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios - - -class ScenarioDefinitions: - - @property - def YEAR_OF_CHANGE_FOR_HSS(self) -> int: - """Year in which Health Systems Strengthening changes are made.""" - return 2019 # <-- baseline year of Human Resources for Health is 2018, and this is consistent with calibration - # during 2015-2019 period. - - @property - def YEAR_OF_CHANGE_FOR_HTM(self) -> int: - """Year in which HIV, TB, Malaria scale-up changes are made.""" - return 2019 - - def baseline(self) -> Dict: - """Return the Dict with values for the parameter changes that define the baseline scenario. """ - return mix_scenarios( - get_parameters_for_status_quo(), # <-- Parameters that have been the calibration targets - - # Set up the HealthSystem to transition from Mode 1 -> Mode 2, with rescaling when there are HSS changes - { - "HealthSystem": { - "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration - "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH - "scale_to_effective_capabilities": True, - # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1 - "year_mode_switch": self.YEAR_OF_CHANGE_FOR_HSS, - - # Normalize the behaviour of Mode 2 - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - } - }, - ) - - def double_capacity_at_primary_care(self) -> Dict: - return { - 'HealthSystem': { - 'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE_FOR_HSS, - 'HR_scaling_by_level_and_officer_type_mode': 'x2_fac0&1', - } - } - - def hrh_at_pop_grwoth(self) -> Dict: - return { - 'HealthSystem': { - 'yearly_HR_scaling_mode': 'scaling_by_population_growth', - # This is in-line with population growth _after 2018_ (baseline year for HRH) - } - } - - def hrh_at_gdp_growth(self) -> Dict: - return { - 'HealthSystem': { - 'yearly_HR_scaling_mode': 'GDP_growth', - # This is GDP growth after 2018 (baseline year for HRH) - } - } - - def hrh_above_gdp_growth(self) -> Dict: - return { - 'HealthSystem': { - 'yearly_HR_scaling_mode': 'GDP_growth_fHE_case5', - # This is above-GDP growth after 2018 (baseline year for HRH) - } - } - - def perfect_clinical_practices(self) -> Dict: - return { - 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { - 'max_healthsystem_function': [False, True], # <-- switch from False to True mid-way - 'year_of_switch': self.YEAR_OF_CHANGE_FOR_HSS, - } - } - - def perfect_healthcare_seeking(self) -> Dict: - return { - 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { - 'max_healthcare_seeking': [False, True], # <-- switch from False to True mid-way - 'year_of_switch': self.YEAR_OF_CHANGE_FOR_HSS, - } - } - - def vital_items_available(self) -> Dict: - return { - 'HealthSystem': { - 'year_cons_availability_switch': self.YEAR_OF_CHANGE_FOR_HSS, - 'cons_availability_postSwitch': 'all_vital_available', - } - } - - def medicines_available(self) -> Dict: - return { - 'HealthSystem': { - 'year_cons_availability_switch': self.YEAR_OF_CHANGE_FOR_HSS, - 'cons_availability_postSwitch': 'all_medicines_available', - } - } - - def all_consumables_available(self) -> Dict: - return { - 'HealthSystem': { - 'year_cons_availability_switch': self.YEAR_OF_CHANGE_FOR_HSS, - 'cons_availability_postSwitch': 'all', - } - } - - def hss_package(self) -> Dict: - """The parameters for the Health System Strengthening Package""" - return mix_scenarios( - self.double_capacity_at_primary_care(), # } - self.hrh_above_gdp_growth(), # } <-- confirmed that these two do build on one another under - # mode 2 rescaling: see `test_scaling_up_HRH_using_yearly_scaling_and_scaling_by_level_together`. - self.perfect_clinical_practices(), - self.perfect_healthcare_seeking(), - self.all_consumables_available(), - ) - - def hiv_scaleup(self) -> Dict: - """The parameters for the scale-up of the HIV program""" - return { - "Hiv": { - 'type_of_scaleup': 'max', # <--- using MAXIMUM SCALE-UP as an experiment - 'scaleup_start_year': self.YEAR_OF_CHANGE_FOR_HTM, - } - } - - def tb_scaleup(self) -> Dict: - """The parameters for the scale-up of the TB program""" - return { - "Tb": { - 'type_of_scaleup': 'max', # <--- using MAXIMUM SCALE-UP as an experiment - 'scaleup_start_year': self.YEAR_OF_CHANGE_FOR_HTM, - } - } - - def malaria_scaleup(self) -> Dict: - """The parameters for the scale-up of the Malaria program""" - return { - 'Malaria': { - 'type_of_scaleup': 'max', # <--- using MAXIMUM SCALE-UP as an experiment - 'scaleup_start_year': self.YEAR_OF_CHANGE_FOR_HTM, - } - } diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_hss_elements.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_hss_elements.py deleted file mode 100644 index 8c2f2afc09..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_hss_elements.py +++ /dev/null @@ -1,143 +0,0 @@ -"""This Scenario file run the model under different assumptions for the HealthSystem and Vertical Program Scale-up - -Run on the batch system using: -``` -tlo batch-submit - src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_hss_elements.py -``` - -""" - -from pathlib import Path -from typing import Dict - -from scripts.comparison_of_horizontal_and_vertical_programs.scenario_definitions import ( - ScenarioDefinitions, -) -from tlo import Date, logging -from tlo.analysis.utils import mix_scenarios -from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher -from tlo.scenario import BaseScenario - - -class HSSElements(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 0 - self.start_date = Date(2010, 1, 1) - self.end_date = Date(2031, 1, 1) - self.pop_size = 100_000 - self._scenarios = self._get_scenarios() - self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 3 # <--- todo: N.B. Very small number of repeated run, to be efficient for now - - def log_configuration(self): - return { - 'filename': 'hss_elements', - 'directory': Path('./outputs'), - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.demography': logging.INFO, - 'tlo.methods.demography.detail': logging.WARNING, - 'tlo.methods.healthburden': logging.INFO, - 'tlo.methods.healthsystem': logging.WARNING, - 'tlo.methods.healthsystem.summary': logging.INFO, - } - } - - def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) - - def draw_parameters(self, draw_number, rng): - if draw_number < len(self._scenarios): - return list(self._scenarios.values())[draw_number] - - def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.""" - - scenario_definitions = ScenarioDefinitions() - - return { - "Baseline": scenario_definitions.baseline(), - - # *************************** - # HEALTH SYSTEM STRENGTHENING - # *************************** - - # - - - Human Resource for Health - - - - - "Double Capacity at Primary Care": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.double_capacity_at_primary_care(), - ), - - "HRH Keeps Pace with Population Growth": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions._hrh_at_pop_growth(), - ), - - "HRH Increases at GDP Growth": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions._hrh_at_grp_growth(), - ), - - "HRH Increases above GDP Growth": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hrh_above_gdp_growth(), - ), - - - # - - - Quality of Care - - - - "Perfect Clinical Practice": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions._perfect_clinical_practice(), - ), - - "Perfect Healthcare Seeking": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.perfect_healthcare_seeking(), - ), - - # - - - Supply Chains - - - - "Perfect Availability of Vital Items": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.vital_items_available(), - ), - - "Perfect Availability of Medicines": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.medicines_available(), - - ), - - "Perfect Availability of All Consumables": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.all_consumables_available(), - ), - - # - - - FULL PACKAGE OF HEALTH SYSTEM STRENGTHENING - - - - "FULL PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hss_package(), - ), - } - - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) diff --git a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_vertical_programs_with_and_without_hss.py b/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_vertical_programs_with_and_without_hss.py deleted file mode 100644 index e4f6dcbd88..0000000000 --- a/src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_vertical_programs_with_and_without_hss.py +++ /dev/null @@ -1,147 +0,0 @@ -"""This Scenario file run the model under different assumptions for the HealthSystem and Vertical Program Scale-up - -Run on the batch system using: -``` -tlo batch-submit - src/scripts/comparison_of_horizontal_and_vertical_programs/scenario_vertical_programs_with_and_without_hss.py -``` - -""" - -from pathlib import Path -from typing import Dict - -from scripts.comparison_of_horizontal_and_vertical_programs.scenario_definitions import ( - ScenarioDefinitions, -) -from tlo import Date, logging -from tlo.analysis.utils import mix_scenarios -from tlo.methods.fullmodel import fullmodel -from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher -from tlo.scenario import BaseScenario - - -class HTMWithAndWithoutHSS(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 0 - self.start_date = Date(2010, 1, 1) - self.end_date = Date(2031, 1, 1) - self.pop_size = 100_000 - self._scenarios = self._get_scenarios() - self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 3 # <--- todo: N.B. Very small number of repeated run, to be efficient for now - - def log_configuration(self): - return { - 'filename': 'htm_with_and_without_hss', - 'directory': Path('./outputs'), - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.demography': logging.INFO, - 'tlo.methods.demography.detail': logging.WARNING, - 'tlo.methods.healthburden': logging.INFO, - 'tlo.methods.healthsystem': logging.WARNING, - 'tlo.methods.healthsystem.summary': logging.INFO, - 'tlo.methods.hiv': logging.INFO, - 'tlo.methods.tb': logging.INFO, - 'tlo.methods.malaria': logging.INFO, - } - } - - def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) - - def draw_parameters(self, draw_number, rng): - if draw_number < len(self._scenarios): - return list(self._scenarios.values())[draw_number] - - def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.""" - # Load helper class containing the definitions of the elements of all the scenarios - scenario_definitions = ScenarioDefinitions() - - return { - "Baseline": - scenario_definitions.baseline(), - - # - - - FULL PACKAGE OF HEALTH SYSTEM STRENGTHENING - - - - "FULL HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hss_package(), - ), - - # ************************************************** - # VERTICAL PROGRAMS WITH AND WITHOUT THE HSS PACKAGE - # ************************************************** - - # - - - HIV SCALE-UP WITHOUT HSS PACKAGE- - - - "HIV Programs Scale-up WITHOUT HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hiv_scaleup(), - ), - # - - - HIV SCALE-UP *WITH* HSS PACKAGE- - - - "HIV Programs Scale-up WITH HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hiv_scaleup(), - scenario_definitions.hss_package(), - ), - - # - - - TB SCALE-UP WITHOUT HSS PACKAGE- - - - "TB Programs Scale-up WITHOUT HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.tb_scaleup(), - ), - # - - - TB SCALE-UP *WITH* HSS PACKAGE- - - - "TB Programs Scale-up WITH HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.tb_scaleup(), - scenario_definitions.hss_package(), - ), - - # - - - MALARIA SCALE-UP WITHOUT HSS PACKAGE- - - - "Malaria Programs Scale-up WITHOUT HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.malaria_scaleup(), - ), - # - - - MALARIA SCALE-UP *WITH* HSS PACKAGE- - - - "Malaria Programs Scale-up WITH HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.malaria_scaleup(), - scenario_definitions.hss_package(), - ), - - # - - - HIV & TB & MALARIA SCALE-UP WITHOUT HSS PACKAGE- - - - "HIV/Tb/Malaria Programs Scale-up WITHOUT HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hiv_scaleup(), - scenario_definitions.tb_scaleup(), - scenario_definitions.malaria_scaleup(), - ), - # - - - HIV & TB & MALARIA SCALE-UP *WITH* HSS PACKAGE- - - - "HIV/Tb/Malaria Programs Scale-up WITH HSS PACKAGE": - mix_scenarios( - scenario_definitions.baseline(), - scenario_definitions.hiv_scaleup(), - scenario_definitions.tb_scaleup(), - scenario_definitions.malaria_scaleup(), - scenario_definitions.hss_package(), - ), - } - - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) diff --git a/src/scripts/dependencies/tlo_module_graph.py b/src/scripts/dependencies/tlo_module_graph.py deleted file mode 100644 index 278539db31..0000000000 --- a/src/scripts/dependencies/tlo_module_graph.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Construct a graph showing dependencies between modules.""" - -import argparse -from pathlib import Path -from typing import Dict, Set - -from tlo.dependencies import DependencyGetter, get_all_dependencies, get_module_class_map -from tlo.methods import Metadata - -try: - import pydot -except ImportError: - pydot = None - - -def construct_module_dependency_graph( - excluded_modules: Set[str], - disease_module_node_defaults: Dict, - other_module_node_defaults: Dict, - get_dependencies: DependencyGetter = get_all_dependencies, -): - """Construct a pydot object representing module dependency graph. - - :param excluded_modules: Set of ``Module`` subclass names to not included in graph. - :param disease_module_node_defaults: Any dot node attributes to apply to by default - to disease module nodes. - :param other_module_node_defaults: Any dot node attributes to apply to by default - to non-disease module nodes. - :param get_dependencies: Function which given a module gets the set of module - dependencies. Defaults to extracting all dependencies. - :return: Pydot directed graph representing module dependencies. - """ - if pydot is None: - raise RuntimeError("pydot package must be installed") - module_class_map = get_module_class_map(excluded_modules) - module_graph = pydot.Dot("modules", graph_type="digraph") - disease_module_subgraph = pydot.Subgraph("disease_modules") - module_graph.add_subgraph(disease_module_subgraph) - other_module_subgraph = pydot.Subgraph("other_modules") - module_graph.add_subgraph(other_module_subgraph) - disease_module_subgraph.set_node_defaults(**disease_module_node_defaults) - other_module_subgraph.set_node_defaults(**other_module_node_defaults) - for name, module_class in module_class_map.items(): - node = pydot.Node(name) - if Metadata.DISEASE_MODULE in module_class.METADATA: - disease_module_subgraph.add_node(node) - else: - other_module_subgraph.add_node(node) - for key, module in module_class_map.items(): - for dependency in get_dependencies(module, module_class_map.keys()): - if dependency not in excluded_modules: - module_graph.add_edge(pydot.Edge(key, dependency)) - return module_graph - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "output_file", type=Path, help=( - "Path to output graph to. File extension will determine output format - for example: dot, dia, png, svg" - ) - ) - args = parser.parse_args() - excluded_modules = { - "Mockitis", - "ChronicSyndrome", - "Skeleton", - "AlriPropertiesOfOtherModules", - "DiarrhoeaPropertiesOfOtherModules", - "DummyHivModule", - "SimplifiedBirths", - "Tb", - } - module_graph = construct_module_dependency_graph( - excluded_modules, - disease_module_node_defaults={"fontname": "Arial", "shape": "box"}, - other_module_node_defaults={"fontname": "Arial", "shape": "ellipse"}, - ) - format = ( - args.output_file.suffix[1:] if args.output_file.suffix else "raw" - ) - module_graph.write(args.output_file, format=format) diff --git a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py index 7a2af7fbed..eca9f999bc 100644 --- a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py +++ b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py @@ -34,8 +34,8 @@ # %% Run the simulation start_date = Date(2010, 1, 1) -end_date = Date(2022, 1, 1) -popsize = 5000 +end_date = Date(2014, 1, 1) +popsize = 1000 # scenario = 1 @@ -87,8 +87,8 @@ ) # set the scenario -sim.modules["Hiv"].parameters["do_scaleup"] = True -sim.modules["Hiv"].parameters["scaleup_start_year"] = 2019 +# sim.modules["Hiv"].parameters["beta"] = 0.129671 +# sim.modules["Tb"].parameters["scaling_factor_WHO"] = 1.5 # sim.modules["Tb"].parameters["scenario"] = scenario # sim.modules["Tb"].parameters["scenario_start_date"] = Date(2010, 1, 1) # sim.modules["Tb"].parameters["scenario_SI"] = "z" diff --git a/src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py b/src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py deleted file mode 100644 index beacb5e218..0000000000 --- a/src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py +++ /dev/null @@ -1,112 +0,0 @@ - -""" -This scenario file sets up the scenarios for simulating the effects of scaling up programs - -The scenarios are: -*0 baseline mode 1 -*1 scale-up HIV program -*2 scale-up TB program -*3 scale-up malaria program -*4 scale-up HIV and Tb and malaria programs - -scale-up occurs on the default scale-up start date (01/01/2025: in parameters list of resourcefiles) - -For all scenarios, keep all default health system settings - -check the batch configuration gets generated without error: -tlo scenario-run --draw-only src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py - -Run on the batch system using: -tlo batch-submit src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py - -or locally using: -tlo scenario-run src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py - -or execute a single run: -tlo scenario-run src/scripts/htm_scenario_analyses/analysis_htm_scaleup.py --draw 1 0 - -""" - -from pathlib import Path - -from tlo import Date, logging -from tlo.methods import ( - demography, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - malaria, - simplified_births, - symptommanager, - tb, -) -from tlo.scenario import BaseScenario - - -class EffectOfProgrammes(BaseScenario): - def __init__(self): - super().__init__() - self.seed = 0 - self.start_date = Date(2010, 1, 1) - self.end_date = Date(2025, 1, 1) - self.pop_size = 5_000 - self.number_of_draws = 2 - self.runs_per_draw = 1 - - def log_configuration(self): - return { - 'filename': 'scaleup_tests', - 'directory': Path('./outputs'), # <- (specified only for local running) - 'custom_levels': { - '*': logging.WARNING, - 'tlo.methods.hiv': logging.INFO, - 'tlo.methods.tb': logging.INFO, - 'tlo.methods.malaria': logging.INFO, - 'tlo.methods.demography': logging.INFO, - } - } - - def modules(self): - - return [ - demography.Demography(resourcefilepath=self.resources), - simplified_births.SimplifiedBirths(resourcefilepath=self.resources), - enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), - healthsystem.HealthSystem(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - healthburden.HealthBurden(resourcefilepath=self.resources), - epi.Epi(resourcefilepath=self.resources), - hiv.Hiv(resourcefilepath=self.resources), - tb.Tb(resourcefilepath=self.resources), - malaria.Malaria(resourcefilepath=self.resources), - ] - - def draw_parameters(self, draw_number, rng): - scaleup_start_year = 2019 - - return { - 'Hiv': { - 'do_scaleup': [False, True, False, False, True][draw_number], - 'scaleup_start_year': scaleup_start_year - }, - 'Tb': { - 'do_scaleup': [False, False, True, False, True][draw_number], - 'scaleup_start_year': scaleup_start_year - }, - 'Malaria': { - 'do_scaleup': [False, False, False, True, True][draw_number], - 'scaleup_start_year': scaleup_start_year - }, - } - - -if __name__ == '__main__': - from tlo.cli import scenario_run - - scenario_run([__file__]) - - diff --git a/src/scripts/htm_scenario_analyses/scenario_plots.py b/src/scripts/htm_scenario_analyses/scenario_plots.py deleted file mode 100644 index c209c60f6e..0000000000 --- a/src/scripts/htm_scenario_analyses/scenario_plots.py +++ /dev/null @@ -1,140 +0,0 @@ -""" this reads in the outputs generates through analysis_htm_scaleup.py -and produces plots for HIV, TB and malaria incidence -""" - - -import datetime -from pathlib import Path - -import matplotlib.pyplot as plt -import pandas as pd -import seaborn as sns - -from tlo import Date -from tlo.analysis.utils import ( - extract_params, - extract_results, - get_scenario_info, - get_scenario_outputs, - load_pickled_dataframes, -) - -resourcefilepath = Path("./resources") -datestamp = datetime.date.today().strftime("__%Y_%m_%d") - -outputspath = Path("./outputs") -# outputspath = Path("./outputs/t.mangal@imperial.ac.uk") - - -# 0) Find results_folder associated with a given batch_file (and get most recent [-1]) -results_folder = get_scenario_outputs("scaleup_tests", outputspath)[-1] - -# Declare path for output graphs from this script -make_graph_file_name = lambda stub: results_folder / f"{stub}.png" # noqa: E731 - -# look at one log (so can decide what to extract) -log = load_pickled_dataframes(results_folder, draw=1) - -# get basic information about the results -info = get_scenario_info(results_folder) - -# 1) Extract the parameters that have varied over the set of simulations -params = extract_params(results_folder) - - -# DEATHS - - -def get_num_deaths_by_cause_label(_df): - """Return total number of Deaths by label within the TARGET_PERIOD - values are summed for all ages - df returned: rows=COD, columns=draw - """ - return _df \ - .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] \ - .groupby(_df['label']) \ - .size() - - -TARGET_PERIOD = (Date(2020, 1, 1), Date(2025, 1, 1)) - -num_deaths_by_cause_label = extract_results( - results_folder, - module='tlo.methods.demography', - key='death', - custom_generate_series=get_num_deaths_by_cause_label, - do_scaling=False - ) - - -def summarise_deaths_for_one_cause(results_folder, label): - """ returns mean deaths for each year of the simulation - values are aggregated across the runs of each draw - for the specified cause - """ - - results_deaths = extract_results( - results_folder, - module="tlo.methods.demography", - key="death", - custom_generate_series=( - lambda df: df.assign(year=df["date"].dt.year).groupby( - ["year", "label"])["person_id"].count() - ), - do_scaling=True, - ) - # removes multi-index - results_deaths = results_deaths.reset_index() - - # select only cause specified - tmp = results_deaths.loc[ - (results_deaths.label == label) - ] - - # group deaths by year - tmp = pd.DataFrame(tmp.groupby(["year"]).sum()) - - # get mean for each draw - mean_deaths = pd.concat({'mean': tmp.iloc[:, 1:].groupby(level=0, axis=1).mean()}, axis=1).swaplevel(axis=1) - - return mean_deaths - - -aids_deaths = summarise_deaths_for_one_cause(results_folder, 'AIDS') -tb_deaths = summarise_deaths_for_one_cause(results_folder, 'TB (non-AIDS)') -malaria_deaths = summarise_deaths_for_one_cause(results_folder, 'Malaria') - -draw_labels = ['No scale-up', 'HIV, scale-up', 'TB scale-up', 'Malaria scale-up', 'HTM scale-up'] - -colors = sns.color_palette("Set1", 5) # Blue, Orange, Green, Red - - -# Create subplots -fig, axs = plt.subplots(3, 1, figsize=(6, 10)) - -# Plot for df1 -for i, col in enumerate(aids_deaths.columns): - axs[0].plot(aids_deaths.index, aids_deaths[col], label=draw_labels[i], color=colors[i]) -axs[0].set_title('HIV/AIDS') -axs[0].legend() -axs[0].axvline(x=2019, color='gray', linestyle='--') - -# Plot for df2 -for i, col in enumerate(tb_deaths.columns): - axs[1].plot(tb_deaths.index, tb_deaths[col], color=colors[i]) -axs[1].set_title('TB') -axs[1].axvline(x=2019, color='gray', linestyle='--') - -# Plot for df3 -for i, col in enumerate(malaria_deaths.columns): - axs[2].plot(malaria_deaths.index, malaria_deaths[col], color=colors[i]) -axs[2].set_title('Malaria') -axs[2].axvline(x=2019, color='gray', linestyle='--') - -for ax in axs: - ax.set_xlabel('Years') - ax.set_ylabel('Number deaths') - -plt.tight_layout() -plt.show() - diff --git a/src/scripts/malaria/analysis_malaria.py b/src/scripts/malaria/analysis_malaria.py index b2b4217dc6..56d05cf3ae 100644 --- a/src/scripts/malaria/analysis_malaria.py +++ b/src/scripts/malaria/analysis_malaria.py @@ -34,8 +34,8 @@ resourcefilepath = Path("./resources") start_date = Date(2010, 1, 1) -end_date = Date(2014, 1, 1) -popsize = 100 +end_date = Date(2016, 1, 1) +popsize = 300 # set up the log config @@ -84,15 +84,6 @@ ) ) -# update parameters -sim.modules["Hiv"].parameters["do_scaleup"] = True -sim.modules["Tb"].parameters["do_scaleup"] = True -sim.modules["Malaria"].parameters["do_scaleup"] = True -sim.modules["Hiv"].parameters["scaleup_start"] = 2 -sim.modules["Tb"].parameters["scaleup_start"] = 2 -sim.modules["Malaria"].parameters["scaleup_start"] = 2 - - # Run the simulation and flush the logger sim.make_initial_population(n=popsize) sim.simulate(end_date=end_date) @@ -106,5 +97,5 @@ pickle.dump(dict(output), f, pickle.HIGHEST_PROTOCOL) # load the results -with open(outputpath / "malaria_run.pickle", "rb") as f: +with open(outputpath / "default_run.pickle", "rb") as f: output = pickle.load(f) diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index 6097177af9..882894d6af 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -12,7 +12,6 @@ from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer from pyinstrument.session import Session from scale_run import save_arguments_to_json, scale_run -from shared import memory_statistics try: from ansi2html import Ansi2HTMLConverter @@ -169,8 +168,6 @@ def record_run_statistics( **profiling_session_statistics(profiling_session), # Disk input/output statistics **disk_statistics(disk_usage), - # Process memory statistics - **memory_statistics(), # Statistics from end end-state of the simulation **simulation_statistics(completed_sim), # User-defined additional stats (if any) @@ -225,7 +222,7 @@ def run_profiling( "initial_population": initial_population, "log_filename": "scale_run_profiling", "log_level": "WARNING", - "parse_log_file": True, + "parse_log_file": False, "show_progress_bar": show_progress_bar, "seed": 0, "disable_health_system": False, @@ -248,7 +245,7 @@ def run_profiling( # Profile scale_run disk_at_start = disk_io_counters() - completed_simulation, logs_dict = scale_run( + completed_simulation = scale_run( **scale_run_args, output_dir=output_dir, profiler=profiler ) disk_at_end = disk_io_counters() @@ -326,13 +323,6 @@ def run_profiling( additional_stats=additional_stats, ) print("done") - - # Write out logged profiling statistics - logged_statistics_file = output_dir / f"{output_name}.logged-stats.csv" - print(f"Writing {logged_statistics_file}", end="...", flush=True) - logs_dict["tlo.profiling"]["stats"].to_csv(logged_statistics_file, index=False) - print("done") - if __name__ == "__main__": diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py index 1e5d8042b3..735d1e7ba3 100644 --- a/src/scripts/profiling/scale_run.py +++ b/src/scripts/profiling/scale_run.py @@ -13,7 +13,6 @@ from shared import print_checksum, schedule_profile_log from tlo import Date, Simulation, logging -from tlo.analysis.utils import LogsDict from tlo.analysis.utils import parse_log_file as parse_log_file_fn from tlo.methods.fullmodel import fullmodel @@ -56,10 +55,14 @@ def scale_run( ignore_warnings: bool = False, log_final_population_checksum: bool = True, profiler: Optional["Profiler"] = None, -) -> Simulation | tuple[Simulation, LogsDict]: +) -> Simulation: if ignore_warnings: warnings.filterwarnings("ignore") + # Start profiler if one has been passed + if profiler is not None: + profiler.start() + # Simulation period start_date = Date(2010, 1, 1) end_date = start_date + pd.DateOffset(years=years, months=months) @@ -67,14 +70,9 @@ def scale_run( log_config = { "filename": log_filename, "directory": output_dir, - # Ensure tlo.profiling log records always recorded - "custom_levels": {"*": getattr(logging, log_level), "tlo.profiling": logging.INFO}, + "custom_levels": {"*": getattr(logging, log_level)}, "suppress_stdout": disable_log_output_to_stdout, } - - # Start profiler if one has been passed - if profiler is not None: - profiler.start() sim = Simulation( start_date=start_date, @@ -104,19 +102,17 @@ def scale_run( # Run the simulation sim.make_initial_population(n=initial_population) - schedule_profile_log(sim, frequency_months=1) + schedule_profile_log(sim) sim.simulate(end_date=end_date) - - # Stop profiling session - if profiler is not None: - profiler.stop() - if log_final_population_checksum: print_checksum(sim) if save_final_population: sim.population.props.to_pickle(output_dir / "final_population.pkl") + if parse_log_file: + parse_log_file_fn(sim.log_filepath) + if record_hsi_event_details: with open(output_dir / "hsi_event_details.json", "w") as json_file: json.dump( @@ -128,11 +124,10 @@ def scale_run( ], json_file, ) - - if parse_log_file: - logs_dict = parse_log_file_fn(sim.log_filepath) - return sim, logs_dict + # Stop profiling session + if profiler is not None: + profiler.stop() return sim diff --git a/src/scripts/profiling/shared.py b/src/scripts/profiling/shared.py index caa06cf468..cc972cfa66 100644 --- a/src/scripts/profiling/shared.py +++ b/src/scripts/profiling/shared.py @@ -4,11 +4,6 @@ import pandas as pd -try: - import psutil -except ImportError: - psutil = None - from tlo import DateOffset, Simulation, logging from tlo.events import PopulationScopeEventMixin, RegularEvent from tlo.util import hash_dataframe @@ -17,34 +12,9 @@ logger.setLevel(logging.INFO) -def memory_statistics() -> dict[str, float]: - """ - Extract memory usage statistics in current process using `psutil` if available. - Statistics are returned as a dictionary. If `psutil` not installed an empty dict is returned. - - Key / value pairs are: - memory_rss_MiB: float - Resident set size in mebibytes. The non-swapped physical memory the process has used. - memory_vms_MiB: float - Virtual memory size in mebibytes. The total amount of virtual memory used by the process. - memory_uss_MiB: float - Unique set size in mebibytes. The memory which is unique to a process and which would be freed if the process - was terminated right now - """ - if psutil is None: - return {} - process = psutil.Process() - memory_info = process.memory_full_info() - return { - "memory_rss_MiB": memory_info.rss / 2**20, - "memory_vms_MiB": memory_info.vms / 2**20, - "memory_uss_MiB": memory_info.uss / 2**20, - } - - class LogProgress(RegularEvent, PopulationScopeEventMixin): - def __init__(self, module, frequency_months=3): - super().__init__(module, frequency=DateOffset(months=frequency_months)) + def __init__(self, module): + super().__init__(module, frequency=DateOffset(months=3)) self.time = time.time() def apply(self, population): @@ -56,18 +26,16 @@ def apply(self, population): key="stats", data={ "time": datetime.datetime.now().isoformat(), - "duration_minutes": duration, - "pop_df_number_alive": df.is_alive.sum(), - "pop_df_rows": len(df), - "pop_df_mem_MiB": df.memory_usage(index=True, deep=True).sum() / 2**20, - **memory_statistics(), + "duration": duration, + "alive": df.is_alive.sum(), + "total": len(df), }, ) -def schedule_profile_log(sim: Simulation, frequency_months: int = 3) -> None: +def schedule_profile_log(sim: Simulation) -> None: """Schedules the log progress event, used only for profiling""" - sim.schedule_event(LogProgress(sim.modules["Demography"], frequency_months), sim.start_date) + sim.schedule_event(LogProgress(sim.modules["Demography"]), sim.start_date) def print_checksum(sim: Simulation) -> None: diff --git a/src/tlo/analysis/life_expectancy.py b/src/tlo/analysis/life_expectancy.py index ebde940f66..6e3e9b4e83 100644 --- a/src/tlo/analysis/life_expectancy.py +++ b/src/tlo/analysis/life_expectancy.py @@ -99,36 +99,6 @@ def _aggregate_person_years_by_age(results_folder, target_period) -> pd.DataFram return py_by_sex_and_agegroup -def calculate_probability_of_dying(interval_width, fraction_of_last_age_survived, sex, _person_years_at_risk, - _number_of_deaths_in_interval) -> pd.DataFrame: - """Returns the probability of dying in each interval""" - - person_years_by_sex = _person_years_at_risk.xs(key=sex, level='sex') - - number_of_deaths_by_sex = _number_of_deaths_in_interval.xs(key=sex, level='sex') - - death_rate_in_interval = number_of_deaths_by_sex / person_years_by_sex - - death_rate_in_interval = death_rate_in_interval.fillna(0) - - if death_rate_in_interval.loc['90'] == 0: - death_rate_in_interval.loc['90'] = death_rate_in_interval.loc['85-89'] - - condition = number_of_deaths_by_sex > ( - - person_years_by_sex / interval_width / interval_width) - - probability_of_dying_in_interval = pd.Series(index=number_of_deaths_by_sex.index, dtype=float) - - probability_of_dying_in_interval[condition] = 1 - - probability_of_dying_in_interval[~condition] = interval_width * death_rate_in_interval / ( - - 1 + interval_width * (1 - fraction_of_last_age_survived) * death_rate_in_interval) - - probability_of_dying_in_interval.at['90'] = 1 - return probability_of_dying_in_interval, death_rate_in_interval - def _estimate_life_expectancy( _person_years_at_risk: pd.Series, @@ -154,11 +124,29 @@ def _estimate_life_expectancy( # separate male and female data for sex in ['M', 'F']: - probability_of_dying_in_interval, death_rate_in_interval = calculate_probability_of_dying(interval_width, - fraction_of_last_age_survived, - sex, - _person_years_at_risk, - _number_of_deaths_in_interval) + person_years_by_sex = _person_years_at_risk.xs(key=sex, level='sex') + number_of_deaths_by_sex = _number_of_deaths_in_interval.xs(key=sex, level='sex') + + death_rate_in_interval = number_of_deaths_by_sex / person_years_by_sex + # if no deaths or person-years, produces nan + death_rate_in_interval = death_rate_in_interval.fillna(0) + # if no deaths in age 90+, set death rate equal to value in age 85-89 + if death_rate_in_interval.loc['90'] == 0: + death_rate_in_interval.loc['90'] = death_rate_in_interval.loc['85-89'] + + # Calculate the probability of dying in the interval + # condition checks whether the observed number deaths is significantly higher than would be expected + # based on population years at risk and survival fraction + # if true, suggests very high mortality rates and returns value 1 + condition = number_of_deaths_by_sex > ( + person_years_by_sex / interval_width / fraction_of_last_age_survived) + probability_of_dying_in_interval = pd.Series(index=number_of_deaths_by_sex.index, dtype=float) + probability_of_dying_in_interval[condition] = 1 + probability_of_dying_in_interval[~condition] = interval_width * death_rate_in_interval / ( + 1 + interval_width * (1 - fraction_of_last_age_survived) * death_rate_in_interval) + # all those surviving to final interval die during this interval + probability_of_dying_in_interval.at['90'] = 1 + # number_alive_at_start_of_interval # keep dtype as float in case using aggregated outputs # note range stops BEFORE the specified number @@ -260,90 +248,3 @@ def get_life_expectancy_estimates( else: return summarize(results=output, only_mean=False, collapse_columns=False) - - -def _calculate_probability_of_premature_death_for_single_run( - age_before_which_death_is_defined_as_premature: int, - person_years_at_risk: pd.Series, - number_of_deaths_in_interval: pd.Series -) -> Dict[str, float]: - """ - For a single run, estimate the probability of dying before the defined premature age for males and females. - Returns: Dict (keys by "M" and "F" for the sex, values the estimated probability of dying before the defined - premature age). - """ - probability_of_premature_death = dict() - - age_group_labels = person_years_at_risk.index.get_level_values('age_group').unique() - interval_width = [ - 5 if '90' in interval else int(interval.split('-')[1]) - int(interval.split('-')[0]) + 1 - if '-' in interval else 1 for interval in age_group_labels.categories - ] - number_age_groups = len(interval_width) - fraction_of_last_age_survived = pd.Series([0.5] * number_age_groups, index=age_group_labels) - - for sex in ['M', 'F']: - probability_of_dying_in_interval, death_rate_in_interval = calculate_probability_of_dying(interval_width, - fraction_of_last_age_survived, - sex, - person_years_at_risk, - number_of_deaths_in_interval) - - # Calculate cumulative probability of dying before the defined premature age - cumulative_probability_of_dying = 0 - proportion_alive_at_start_of_interval = 1.0 - - for age_group, prob in probability_of_dying_in_interval.items(): - if int(age_group.split('-')[0]) >= age_before_which_death_is_defined_as_premature: - break - cumulative_probability_of_dying += proportion_alive_at_start_of_interval * prob - proportion_alive_at_start_of_interval *= (1 - prob) - - probability_of_premature_death[sex] = cumulative_probability_of_dying - - return probability_of_premature_death - - -def get_probability_of_premature_death( - results_folder: Path, - target_period: Tuple[datetime.date, datetime.date], - summary: bool = True, - age_before_which_death_is_defined_as_premature: int = 70 -) -> pd.DataFrame: - """ - Produces sets of probability of premature death for each draw/run. - - Args: - - results_folder (PosixPath): The path to the results folder containing log, `tlo.methods.demography` - - target period (tuple of dates): Declare the date range (inclusively) in which the probability is to be estimated. - - summary (bool): Declare whether to return a summarized value (mean with 95% uncertainty intervals) - or return the estimate for each draw/run. - - age_before_which_death_is_defined_as_premature (int): proposed in defined in Norheim et al.(2015) to be 70 years - - Returns: - - pd.DataFrame: The DataFrame with the probability estimates for every draw/run in the results folder; - or, with option `summary=True`, summarized (central, lower, upper estimates) for each draw. - """ - info = get_scenario_info(results_folder) - deaths = _num_deaths_by_age_group(results_folder, target_period) - person_years = _aggregate_person_years_by_age(results_folder, target_period) - - prob_for_each_draw_and_run = dict() - - for draw in range(info['number_of_draws']): - for run in range(info['runs_per_draw']): - prob_for_each_draw_and_run[(draw, run)] = _calculate_probability_of_premature_death_for_single_run( - age_before_which_death_is_defined_as_premature=age_before_which_death_is_defined_as_premature, - number_of_deaths_in_interval=deaths[(draw, run)], - person_years_at_risk=person_years[(draw, run)] - ) - - output = pd.DataFrame.from_dict(prob_for_each_draw_and_run) - output.index.name = "sex" - output.columns = output.columns.set_names(level=[0, 1], names=['draw', 'run']) - - if not summary: - return output - - else: - return summarize(results=output, only_mean=False, collapse_columns=False) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index e605400332..201f2fb25e 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -1,7 +1,6 @@ """ General utility functions for TLO analysis """ -import fileinput import gzip import json import os @@ -87,40 +86,6 @@ def parse_log_file(log_filepath, level: int = logging.INFO): return LogsDict({name: handle.name for name, handle in module_name_to_filehandle.items()}, level) -def merge_log_files(log_path_1: Path, log_path_2: Path, output_path: Path) -> None: - """Merge two log files, skipping any repeated header lines. - - :param log_path_1: Path to first log file to merge. Records from this log file will - appear first in merged log file. - :param log_path_2: Path to second log file to merge. Records from this log file will - appear after those in log file at `log_path_1` and any header lines in this file - which are also present in log file at `log_path_1` will be skipped. - :param output_path: Path to write merged log file to. Must not be one of `log_path_1` - or `log_path_2` as data is read from files while writing to this path. - """ - if output_path == log_path_1 or output_path == log_path_2: - msg = "output_path must not be equal to log_path_1 or log_path_2" - raise ValueError(msg) - with fileinput.input(files=(log_path_1, log_path_2), mode="r") as log_lines: - with output_path.open("w") as output_file: - written_header_lines = {} - for log_line in log_lines: - log_data = json.loads(log_line) - if "type" in log_data and log_data["type"] == "header": - if log_data["uuid"] in written_header_lines: - previous_header_line = written_header_lines[log_data["uuid"]] - if previous_header_line == log_line: - continue - else: - msg = ( - "Inconsistent header lines with matching UUIDs found when merging logs:\n" - f"{previous_header_line}\n{log_line}\n" - ) - raise RuntimeError(msg) - written_header_lines[log_data["uuid"]] = log_line - output_file.write(log_line) - - def write_log_to_excel(filename, log_dataframes): """Takes the output of parse_log_file() and creates an Excel file from dataframes""" metadata = list() @@ -325,9 +290,7 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] output_from_eval: pd.Series = generate_series(df) - assert isinstance(output_from_eval, pd.Series), ( - 'Custom command does not generate a pd.Series' - ) + assert pd.Series == type(output_from_eval), 'Custom command does not generate a pd.Series' if do_scaling: res[draw_run] = output_from_eval * get_multiplier(draw, run) else: @@ -1166,7 +1129,7 @@ def get_parameters_for_status_quo() -> Dict: "equip_availability": "all", # <--- NB. Existing calibration is assuming all equipment is available }, } - + def get_parameters_for_standard_mode2_runs() -> Dict: """ Returns a dictionary of parameters and their updated values to indicate diff --git a/src/tlo/bitset_handler/bitset_extension.py b/src/tlo/bitset_handler/bitset_extension.py deleted file mode 100644 index 6163b3e4db..0000000000 --- a/src/tlo/bitset_handler/bitset_extension.py +++ /dev/null @@ -1,707 +0,0 @@ -from __future__ import annotations - -import operator -import re -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Sequence, - Set, - Tuple, - Type, - TypeAlias, -) - -import numpy as np -import pandas as pd -from numpy.dtypes import BytesDType # pylint: disable=E0611 -from numpy.typing import NDArray -from pandas._typing import TakeIndexer, type_t -from pandas.core.arrays.base import ExtensionArray -from pandas.core.dtypes.base import ExtensionDtype - -if TYPE_CHECKING: - from pandas._typing import type_t - -BYTE_WIDTH = 8 -BooleanArray: TypeAlias = np.ndarray[bool] -CastableForPandasOps: TypeAlias = ( - "ElementType" - | Iterable["ElementType"] - | NDArray[np.uint8] - | NDArray[np.bytes_] - | "BitsetArray" -) -SingletonForPandasOps: TypeAlias = "ElementType" | Iterable["ElementType"] -# Assume nodes are strings, else we can't construct from string when passed the name! -# We can likely get around this with some careful planning, but we'd have to figure out how -# to pass type-metadata for the elements from inside the output of self.name, so that casting -# was successful. -ElementType: TypeAlias = str - - -class BitsetDtype(ExtensionDtype): - """ - A Bitset is represented by a fixed-width string, whose characters are each a uint8. - Elements of the set map 1:1 to these characters. - - If the elements set is indexed starting from 0, then: - - The quotient of these indices (modulo 8) is the character within the string that contains the bit representing the element, - - The remainder (modulo 8) is the index within said character that represents the element itself. - - The element map takes an element of the bitset as a key, and returns a tuple whose first element is the - corresponding string-character index, and the latter the uint8 representation of the element within that - string character. - """ - _element_map: Dict[ElementType, Tuple[int, np.uint8]] - _elements: Tuple[ElementType] - _index_map: Dict[Tuple[int, np.uint8], ElementType] - _metadata = ("_elements",) - - @classmethod - def construct_array_type(cls) -> type_t[BitsetArray]: - return BitsetArray - - @classmethod - def construct_from_string(cls, string: str) -> BitsetDtype: - """ - Construct an instance of this class by passing in a string of the form - that str() produces. - - That is, given a string of the form - bitset(#elements): e1, e2, e3, ... - - this method will return a BitsetDtype with elements e1, e2, e3, ... etc. - - The bitset(#elements): prefix is not required, simply passing a comma-separated - string of values will suffice to construct a bitset with those elements. - The prefix is typically supplied when constructing an implicit instance as part of - a call to `pd.Series` with the `dtype` parameter set to a string, - """ - if not isinstance(string, str): - raise TypeError(f"'construct_from_string' expects a string, got {type(string)}") - - string_has_bitset_prefix = re.match("bitset\((\d+)\):", string) - n_elements = None - if string_has_bitset_prefix: - prefix = string_has_bitset_prefix.group(0) - # Remove prefix - string = string.removeprefix(prefix) - # Extract number of elements if provided though - n_elements = int(re.search("(\d+)", prefix).group(0)) - if "," not in string: - raise TypeError( - "Need at least 2 (comma-separated) elements in string to construct bitset." - ) - else: - iterable_values = tuple(s.strip() for s in string.split(",")) - if n_elements is not None and len(iterable_values) != n_elements: - raise ValueError( - f"Requested bitset with {n_elements} elements, but provided {len(iterable_values)} elements: {iterable_values}" - ) - return BitsetDtype(s.strip() for s in string.split(",")) - - @property - def elements(self) -> Tuple[ElementType]: - return self._elements - - @property - def fixed_width(self) -> int: - """ - Fixed-length of the character string that represents this bitset. - """ - return (self.n_elements - 1) // BYTE_WIDTH + 1 - - @property - def n_elements(self) -> int: - return len(self._elements) - - @property - def na_value(self) -> np.bytes_: - return self.type(self.fixed_width) - - @property - def name(self) -> str: - return self.__str__() - - @property - def np_array_dtype(self) -> BytesDType: - return BytesDType(self.fixed_width) - - @property - def type(self) -> Type[np.bytes_]: - return self.np_array_dtype.type - - def __init__(self, elements: Iterable[ElementType]) -> None: - # Take only unique elements. - # Sort elements alphabetically for consistency when constructing Bitsets that - # represent the same items. - # Cast all element types to strings so that construct_from_string does not need - # metadata about the type of each element. - provided_elements = sorted([e for e in elements]) - if not all( - isinstance(e, ElementType) for e in provided_elements - ): - raise TypeError(f"BitSet elements must type {ElementType}") - self._elements = tuple( - sorted(set(provided_elements), key=lambda x: provided_elements.index(x)) - ) - - if len(self._elements) <= 1: - raise ValueError("Bitsets must have at least 2 possible elements (use bool for 1-element sets).") - - # Setup the element map and its inverse, one-time initialisation cost. - self._element_map = { - e: (index // BYTE_WIDTH, np.uint8(2 ** (index % BYTE_WIDTH))) - for index, e in enumerate(self._elements) - } - self._index_map = {loc: element for element, loc in self._element_map.items()} - - def __repr__(self) -> str: - return f"bitset({self.n_elements}): {', '.join(str(e) for e in self._elements)}" - - def __str__(self) -> str: - return self.__repr__() - - def as_bytes(self, collection: Iterable[ElementType] | ElementType) -> np.bytes_: - """ - Return the bytes representation of this set or single element. - """ - return np.bytes_(self.as_uint8_array(collection)) - - def as_set(self, binary_repr: np.bytes_) -> Set[ElementType]: - """ - Return the set corresponding to the binary representation provided. - """ - elements_in_set = set() - for char_index, byte_value in enumerate(binary_repr): - bin_rep = format(byte_value, "b") - elements_in_set |= { - self._index_map[(char_index, np.uint8(2**i))] - for i, bit in enumerate(reversed(bin_rep)) - if bit == "1" - } - return elements_in_set - - def as_uint8_array(self, collection: Iterable[ElementType] | ElementType) -> NDArray[np.uint8]: - """ - Return the collection of elements as a 1D array of ``self.fixed_width`` uint8s. - Each uint8 corresponds to the bitwise representation of a single character - in a character string. - - A single element will be broadcast to a (1,) numpy array. - """ - if isinstance(collection, ElementType): - collection = set(collection) - - output = np.zeros((self.fixed_width, 1), dtype=np.uint8) - for element in collection: - char, bin_repr = self._element_map[element] - output[char] |= bin_repr - return output.squeeze(axis=1) - - def element_loc(self, element: ElementType) -> Tuple[int, np.uint8]: - """ - Location in of the bit corresponding to the element in this bitset. - - Each element in the bitset is mapped to a single bit via the _element_map, and - can be located by specifying both: - - The index of the character in the fixed-width string that represents the bitset. - - The power of 2 within the uint8 representation of the the single character that corresponds to the element. - - For example, a bitset of 18 elements is stored as a fixed-width string of 3 characters, - giving 24 bits to utilise. These are further subdivided into groups of 8, the first 8 - corresponding to the uint8 representation of the 0-indexed character, and so on. Each element within - this bitset is assigned a power of two within one of the character representations. - - :param element: Element value to locate. - :returns: The character index, and ``np.uint8`` representation of the element, unpacked in that order. - """ - return self._element_map[element] - - -class BitsetArray(ExtensionArray): - """ - Represents a series of Bitsets; each element in the series is a fixed-width bytestring, - which represents some possible combination of elements of a bitset as defined by - ``self.dtype``. - - When extracting a single entry via ``.loc`` or ``.at``, the value returned is a ``set``. - This means that operations such as ``self.loc[0] |= {"1"}`` will behave as set operations - from base Python. This is achieved by setting the behaviour of the ``__setitem__`` method - to interpret ``set`` values as representations of the underlying bitset, thus causing them - to be cast to their bytestring representation being being assigned. - - Supported Operations (slices) - ----------------------------- - When operating on slices or masks of the series, we have to re-implement the desired operators - so that users can continue to pass ``set``s as scalar arguments on the left. As a general rule - of thumb, if a binary operator can be performed on ``set``s, it will also work identically, - but entry-wise, on a bitset series. - - ``NodeType`` instances will be cast to ``set``s if provided as singletons. Comparisons will be - performed entry-wise if a suitable vector of values is provided as the comparison target. - - Currently implemented methods are: - - = : - Directly assign the value on the right to the entry/entries on the left. - +, | : - Perform union of the values on the left with those on the right. - +=, |= : - In-place union; add values on the right to the sets on the left. - & : - Perform intersection of the values on the left with those on the right. - &= : - In-place intersection; retain only elements on the left that appear on the right. - -, -= : - Remove the values on the right from the sets on the left. - <, <= : - Entry-wise subset (strict subset) with the values on the right. - >, >= : - Entry-wise superset (strict superset) with the values on the right. - Note that the >= operation is the equivalent of entry-wise "if the values on the right - are contained in the bitsets on the left". - """ - - _data: NDArray[np.bytes_] - _dtype: BitsetDtype - - @staticmethod - def uint8s_to_byte_string(arr: np.ndarray[np.uint8]) -> NDArray[np.bytes_]: - """ - Returns a view of an array of ``np.uint8``s of shape ``(M, N)`` - as an array of ``M`` fixed-width byte strings of size ``N``. - """ - fixed_width = arr.shape[1] - return arr.view(f"{fixed_width}S").squeeze() - - @classmethod - def _concat_same_type(cls, to_concat: Sequence[BitsetArray]) -> BitsetArray: - concat_data = np.concatenate(bsa._data for bsa in to_concat) - return cls(concat_data, to_concat[0].dtype) - - @classmethod - def _from_sequence( - cls, scalars: Iterable[Set[ElementType] | ElementType], *, dtype: BitsetDtype | None = None, copy: bool = False - ) -> BitsetArray: - """ - Construct a new BitSetArray from a sequence of scalars. - - :param scalars: Sequence of sets of elements (or single-values to be interpreted as single-element sets). - :param dtype: Cast to this datatype, only BitsetDtype is supported if not None. - If None, an attempt will be made to construct an appropriate BitsetDtype using the scalar values provided. - :param copy: If True, copy the underlying data. Default False. - """ - # Check that we have only been passed sets as scalars. Implicitly convert single-items to sets. - for i, s in enumerate(scalars): - if not isinstance(s, set): - if isinstance(s, ElementType): - scalars[i] = set(s) - else: - raise ValueError(f"{s} cannot be cast to an element of a bitset.") - - # If no dtype has been provided, attempt to construct an appropriate BitsetDtype. - if dtype is None: - # Determine the elements in the bitset by looking through the scalars - all_elements = set().union(scalars) - dtype = BitsetDtype(all_elements) - elif not isinstance(dtype, BitsetDtype): - raise TypeError(f"BitsetArray cannot be constructed with dtype {dtype}") - - # With an appropriate dtype, we can construct the data array to pass to the constructor. - # We will need to convert each of our scalars to their binary representations before passing though. - data = np.zeros((len(scalars),), dtype=dtype.np_array_dtype) - view_format = f"{dtype.fixed_width}B" if dtype.fixed_width != 1 else "(1,1)B" - data_view = data.view(view_format) - for series_index, s in enumerate(scalars): - for element in s: - char, u8_repr = dtype.element_loc(element=element) - data_view[series_index, char] |= u8_repr - return cls(data, dtype, copy=copy) - - @classmethod - def _from_factorized(cls, uniques: np.ndarray, original: BitsetArray) -> BitsetArray: - return cls(uniques, original.dtype) - - @property - def _uint8_view_format(self) -> str: - """ - Format string to be applied to self._data, so that the output of - - self._data.view() - - returns a numpy array of shape (len(self), self.dtype.fixed_width) - and dtype uint8. - """ - return f"({self.dtype.fixed_width},)B" - - @property - def _uint8_view(self) -> NDArray[np.bytes_]: - """ - Returns a view of the fixed-width byte strings stored in ``self._data`` - as an array of ``numpy.uint8``s, with shape - - ``(len(self._data), self.dtype.fixed_width)``. - - Each row ``i`` of this view corresponds to a bitset stored in this array. - The value at index ``i, j`` in this view is the ``uint8`` that represents - character ``j`` in ``self._data[i]``, which can have bitwise operations - performed on it. - """ - return self._data.view(self._uint8_view_format) - - @property - def as_sets(self) -> List[Set[ElementType]]: - """ - Return a list whose entry i is the set representation of the - bitset in entry i of this array. - """ - return [self.dtype.as_set(x) for x in self._data] - - @property - def dtype(self) -> BitsetDtype: - return self._dtype - - @property - def nbytes(self) -> int: - return self._data.nbytes - - def __init__( - self, - data: Iterable[BytesDType] | np.ndarray[BytesDType], - dtype: BitsetDtype, - copy: bool = False, - ) -> None: - """ """ - if not isinstance(dtype, BitsetDtype): - raise TypeError("BitsetArray must have BitsetDtype data.") - - self._data = np.array(data, copy=copy, dtype=dtype.type) - self._dtype = dtype - - def __add__( - self, other: CastableForPandasOps - ) -> BitsetArray: - """ - Entry-wise union with other. - - - If other is ``NodeType`` or ``Iterable[NodeType]``, perform entry-wise OR with the set - representing the passed element values. - - If other is ``BitsetArray`` of compatible shape, take entry-wise union. - - If other is compatible ``np.ndarray``, take entry-wise union. - - Under the hood this is bitwise OR with other; self OR other. - """ - return BitsetArray( - self.__operate_bitwise( - lambda A, B: A | B, other, return_as_bytestring=True - ), - dtype=self.dtype, - ) - - def __and__(self, other: CastableForPandasOps - ) -> BitsetArray: - """ - Entry-wise intersection with other. - - - If other is ``NodeType`` or ``Iterable[NodeType]``, perform entry-wise AND with the set - representing the passed element values. - - If other is ``BitsetArray`` of compatible shape, take entry-wise intersection. - - If other is compatible ``np.ndarray``, take entry-wise intersection. - - Under the hood this is bitwise AND with other; self AND other. - """ - return BitsetArray( - self.__operate_bitwise( - lambda A, B: A & B, other, return_as_bytestring=True - ), - dtype=self.dtype, - ) - - def __cast_before_comparison_op( - self, value: CastableForPandasOps - ) -> Set[ElementType] | bool: - """ - Common steps taken before employing comparison operations on this class. - - Converts the value passed (as safely as possible) to a set, which can then - be compared with the bitsets stored in the instance. - - Return values are the converted value, and whether this value should be considered - a scalar-set (False) or a collection of sets (True). - """ - if isinstance(value, ElementType): - return set(value), False - elif isinstance(value, set): - return value, False - elif isinstance(value, BitsetArray): - return value.as_sets, True - elif isinstance(value, np.ndarray): - return [ - self.dtype.as_set(bytestr) - for bytestr in self.uint8s_to_byte_string(self.__cast_to_uint8(value)) - ] - # Last ditch attempt - we might have been given a list of sets, for example... - try: - value = set(value) - if all([isinstance(item, ElementType) for item in value]): - return value, False - elif all([isinstance(item, set) for item in value]): - return value, True - except Exception as e: - raise ValueError(f"Cannot compare bitsets with: {value}") from e - - def __cast_to_uint8(self, other: CastableForPandasOps) -> NDArray[np.uint8]: - """ - Casts the passed object to a ``np.uint8`` array that is compatible with bitwise operations - on ``self._uint8_view``. See the docstring for behaviour in the various usage cases. - - Scalar elements: - Cast to single-element sets, then treated as set. - - Sets: - Are converted to the (array of) uint8s that represents the set. - - ``np.ndarray``s of ``np.uint8`` - Are returned if they have the same number of columns as ``self._uint8_view``. - - ``np.ndarray``s of ``np.dtype("Sx")`` - If ``x`` corresponds to the same fixed-width as ``self.dtype.np_array_dtype``, are cast - to the corresponding ``np.uint8`` view, like ``self._uint8_view`` is from ``self._data``. - - BitsetArrays - Return their ``_uint8_view`` attribute. - """ - if isinstance(other, ElementType): - # Treat single-elements as single-element sets - other = set(other) - if isinstance(other, BitsetArray): - if self.dtype != other.dtype: - raise TypeError("Cannot cast a different Bitset to this one!") - else: - cast = other._uint8_view - elif isinstance(other, np.ndarray): - if other.size == 0: - cast = self.dtype.as_uint8_array({}) - elif (other == other[0]).all(): - cast = self.dtype.as_uint8_array(other[0]) - elif other.dtype == np.uint8 and other.shape[0] == self._uint8_view.shape[0]: - # Compatible uint8s, possibly a view of another fixed-width bytestring array - cast = other - elif other.dtype == self.dtype.np_array_dtype: - # An array of compatible fixed-width bytestrings - cast = other.view(self._uint8_view_format) - elif other.dtype == object and all(isinstance(s, (ElementType, set)) for s in other): - # We might have been passed an object array, where each object is a set or singleton that - # we need to convert. - as_bytes = np.array([self.dtype.as_bytes(s) for s in other], dtype=self.dtype.np_array_dtype) - cast = as_bytes.view(self._uint8_view_format) - else: - raise ValueError(f"Cannot convert {other} to an array of uint8s representing a bitset") - else: - # Must be a collection of elements (or will error), so cast. - cast = self.dtype.as_uint8_array(other) - return cast - - def __comparison_op(self, other: CastableForPandasOps, op: Callable[[Set[ElementType], Set[ElementType]], bool]) -> BooleanArray: - """ - Abstract method for strict and non-strict comparison operations. - - Notably, __eq__ does not redirect here since it is more efficient for us to convert - the single value to a bytestring and use numpy array comparison. - - For the other set comparison methods however, it's easier as a first implementation - for us to convert to sets and run the set operations. If there was a Pythonic way - of doing "bitwise less than" and "bitwise greater than", we could instead take the - same approach as in __operate_bitwise: - - Convert the inputs to ``NDArray[np.bytes_]``. - - Compare using __operate_bitwise with self._data. - - which would avoid us having to cast everything to a list and then do a list - comprehension (the numpy direct array comparison should be faster). - """ - if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)): - return NotImplemented - other, is_vector = self.__cast_before_comparison_op(other) - - if is_vector: - return np.array([op(s, other[i]) for i, s in enumerate(self.as_sets)]) - else: - return np.array([op(s, other) for s in self.as_sets], dtype=bool) - - def __contains__(self, item: SingletonForPandasOps | Any) -> BooleanArray | bool: - if isinstance(item, ElementType): - item = set(item) - if isinstance(item, set): - return item in self.as_sets - else: - return super().__contains__(item) - - def __eq__(self, other) -> bool: - if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)): - return NotImplemented - elif isinstance(other, ElementType): - other = set(other) - - if isinstance(other, set): - ans = self._data == self.dtype.as_bytes(other) - else: - ans = self._data == other - return np.squeeze(ans) - - def __getitem__(self, item: int | slice | NDArray) -> BitsetArray: - return ( - self.dtype.as_set(self._data[item]) - if isinstance(item, int) - else BitsetArray(self._data[item], dtype=self.dtype) - ) - - def __ge__(self, other: SingletonForPandasOps) -> BooleanArray: - """ - Entry-wise non-strict superset: self >= other_set. - """ - return self.__comparison_op(other, operator.ge) - - def __gt__(self, other: SingletonForPandasOps) -> BooleanArray: - """ - Entry-wise strict superset: self > other_set. - """ - return self.__comparison_op(other, operator.gt) - - def __len__(self) -> int: - return self._data.shape[0] - - def __le__(self, other: SingletonForPandasOps) -> BooleanArray: - """ - Entry-wise non-strict subset: self <= other_set. - """ - return self.__comparison_op(other, operator.le) - - def __lt__(self, other: SingletonForPandasOps) -> BooleanArray: - """ - Entry-wise strict subset: self < other_set. - """ - return self.__comparison_op(other, operator.lt) - - def __operate_bitwise( - self, - op: Callable[[NDArray[np.uint8], NDArray[np.uint8]], NDArray[np.uint8]], - r_value: CastableForPandasOps, - l_value: Optional[CastableForPandasOps] = None, - return_as_bytestring: bool = False, - ) -> NDArray[np.bytes_] | NDArray[np.uint8]: - """ - Perform a bitwise operation on two compatible ``np.ndarray``s of ``np.uint8``s. - - By default, the left value passed to the operator is assumed to be ``self._uint8_data``. - - Return value is the result of the bitwise operation, as an array of uint8s. If you wish - to have this converted to the corresponding bytestring(s) before returning, use the - return_as_bytestring argument. - - :param op: Bitwise operation to perform on input values. - :param r_value: Right-value to pass to the operator. - :param l_value: Left-value to pass to the operator. - :param return_as_bytestring: Result will be returned as a fixed-width bytestring. - """ - l_value = self._uint8_view if l_value is None else self.__cast_to_uint8(l_value) - op_result = op(l_value, self.__cast_to_uint8(r_value)) - if return_as_bytestring: - op_result = self.uint8s_to_byte_string(op_result) - return op_result - - def __or__( - self, other: CastableForPandasOps - ) -> BitsetArray: - """ - Entry-wise union with other, delegating to ``self.__add__``. - - np.ndarrays of objects will attempt to interpret their elements as bitsets. - """ - return self.__add__(other) - - def __setitem__( - self, - key: int | slice | NDArray, - value: ( - np.bytes_ - | ElementType - | Set[ElementType] - | Sequence[np.bytes_ | ElementType| Set[ElementType]] - ), - ) -> None: - if isinstance(value, ElementType) or isinstance(value, set): - # Interpret this as a "scalar" set that we want to set all values to - value = self.dtype.as_bytes(value) - elif isinstance(value, np.bytes_): - # Value is a scalar that we don't need to convert - pass - else: - # Assume value is a sequence, and we will have to convert each value in turn - value = [ - v if isinstance(v, np.bytes_) else self.dtype.as_bytes(v) for v in value - ] - self._data[key] = value - - def __sub__( - self, other: CastableForPandasOps - ) -> BitsetArray: - """ - Remove elements from the Bitsets represented here. - - - If other is ``NodeType``, remove the single element from all series entries. - - If other is ``Iterable[NodeType]``, remove all elements from all series entries. - - If other is ``BitsetArray`` of compatible shape, take element-wise complements of series entries. - - If other is compatible ``np.ndarray``, take element-wise complements of series entries. - - Under the hood this the bitwise operation self AND (NOT other). - """ - return BitsetArray( - self.__operate_bitwise( - lambda A, B: A & (~B), other, return_as_bytestring=True - ), - dtype=self.dtype, - ) - - def _formatter(self, boxed: bool = False) -> Callable[[np.bytes_], str | None]: - if boxed: # If rendering an individual data value - return lambda x: ",".join(x) if x else "{}" - return repr # Render the table itself - - def copy(self) -> BitsetArray: - return BitsetArray(self._data, self.dtype, copy=True) - - def isna(self) -> NDArray: - """ - TODO: This isn't a great way to express missing data, but equally a bitset doesn't really ever contain missing data... - """ - return np.isnan(self._data) - - def take( - self, - indices: TakeIndexer, - *, - allow_fill: bool = False, - fill_value: Optional[BytesDType | Set[ElementType]] = None, - ) -> BitsetArray: - if allow_fill: - if isinstance(fill_value, set): - fill_value = self.dtype.as_bytes(fill_value) - elif fill_value is None: - fill_value = self.dtype.na_value - elif not isinstance(fill_value, self.dtype.type): - raise TypeError( - f"Fill value must be of type {self.dtype.type} (got {type(fill_value).__name__})" - ) - scalars = np.empty((len(indices), ), dtype=self.dtype.type) - scalars[indices[indices >= 0]] = self._data[indices[indices >= 0]] - scalars[indices[indices < 0]] = fill_value - else: - scalars = np.take(self._data, indices) - return self._from_sequence(scalars) diff --git a/src/tlo/core.py b/src/tlo/core.py index 9fbbf08893..fe92203e56 100644 --- a/src/tlo/core.py +++ b/src/tlo/core.py @@ -8,18 +8,14 @@ import json from enum import Enum, auto -from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional +from typing import TYPE_CHECKING import numpy as np import pandas as pd if TYPE_CHECKING: - from pathlib import Path from typing import Optional - from tlo.methods import Metadata - from tlo.methods.causes import Cause - from tlo.population import Population from tlo.simulation import Simulation class Types(Enum): @@ -80,7 +76,7 @@ class Specifiable: Types.BITSET: int, } - def __init__(self, type_: Types, description: str, categories: List[str] = None): + def __init__(self, type_, description, categories=None): """Create a new Specifiable. :param type_: an instance of Types giving the type of allowed values @@ -98,16 +94,16 @@ def __init__(self, type_: Types, description: str, categories: List[str] = None) self.categories = categories @property - def python_type(self) -> type: + def python_type(self): """Return the Python type corresponding to this Specifiable.""" return self.PYTHON_TYPE_MAP[self.type_] @property - def pandas_type(self) -> type: + def pandas_type(self): """Return the Pandas type corresponding to this Specifiable.""" return self.PANDAS_TYPE_MAP[self.type_] - def __repr__(self) -> str: + def __repr__(self): """Return detailed description of Specifiable.""" delimiter = " === " @@ -135,17 +131,8 @@ class Property(Specifiable): object: float("nan"), np.uint32: 0, } - _default_value_override: Any - - def __init__( - self, - type_: Types, - description: str, - categories: List[str] = None, - *, - ordered: bool = False, - default_value: Optional[Any] = None, - ) -> None: + + def __init__(self, type_, description, categories=None, *, ordered=False): """Create a new property specification. :param type_: An instance of ``Types`` giving the type of allowed values of this @@ -155,53 +142,17 @@ def __init__( ``Types.CATEGORICAL``. :param ordered: Whether categories are ordered if ``type_`` is ``Types.CATEGORICAL``. - :param default_value: The default value for the property. """ if type_ in [Types.SERIES, Types.DATA_FRAME]: raise TypeError("Property cannot be of type SERIES or DATA_FRAME.") - super().__init__(type_, description, categories) self.ordered = ordered - # Use _default_value setter method to set property initial value - self._default_value = default_value @property - def _default_value(self) -> Any: - """ - Default value for this property, which will be used to fill the respective columns - of the population dataframe, for example. - - If not explicitly set, it will fall back on the ``PANDAS_TYPE_DEFAULT_TYPE_MAP``. - If a value is provided, it must: - - - Be of the corresponding TYPE for the property. - - If ``type_`` is ``Types.CATEGORICAL``, it must also be a possible category. - """ - return ( - self.PANDAS_TYPE_DEFAULT_VALUE_MAP[self.pandas_type] - if self._default_value_override is None - else self._default_value_override - ) + def _default_value(self): + return self.PANDAS_TYPE_DEFAULT_VALUE_MAP[self.pandas_type] - @_default_value.setter - def _default_value(self, new_val: Any) -> None: - if new_val is not None: - # Check for valid category - if self.type_ is Types.CATEGORICAL: - if new_val not in self.categories: - raise ValueError( - f"Value {new_val} is not a valid category, so cannot be set as the default." - ) - # If not categorical, check for valid data type for default - elif not isinstance(new_val, self.python_type): - raise ValueError( - f"Trying to set a default value of type {type(new_val).__name__}, " - f"which is different from Property's type of {type(self.python_type).__name__}." - ) - # Outside block so that providing new_val = None reverts to Property-wide default. - self._default_value_override = new_val - - def create_series(self, name: str, size: int) -> pd.Series: + def create_series(self, name, size): """Create a Pandas Series for this property. The values will be left uninitialised. @@ -250,47 +201,48 @@ class attribute on a subclass. # Subclasses can override this to declare the set of initialisation dependencies # Declares modules that need to be registered in simulation and initialised before # this module - INIT_DEPENDENCIES: FrozenSet[str] = frozenset() + INIT_DEPENDENCIES = frozenset() # Subclasses can override this to declare the set of optional init. dependencies # Declares modules that need to be registered in simulation and initialised before # this module if they are present, but are not required otherwise - OPTIONAL_INIT_DEPENDENCIES: FrozenSet[str] = frozenset() + OPTIONAL_INIT_DEPENDENCIES = frozenset() # Subclasses can override this to declare the set of additional dependencies # Declares any modules that need to be registered in simulation in addition to those # in INIT_DEPENDENCIES to allow running simulation - ADDITIONAL_DEPENDENCIES: FrozenSet[str] = frozenset() + ADDITIONAL_DEPENDENCIES = frozenset() # Subclasses can override this to declare the set of modules that this module can be # used in place of as a dependency - ALTERNATIVE_TO: FrozenSet[str] = frozenset() + ALTERNATIVE_TO = frozenset() # Subclasses can override this set to add metadata tags to their class # See tlo.methods.Metadata class - METADATA: FrozenSet[Metadata] = frozenset() + METADATA = {} - # Subclasses can override this dict to declare the causes death that this module contributes to + # Subclasses can override this set to declare the causes death that this module contributes to # This is a dict of the form { None: + + def __init__(self, name=None): """Construct a new disease module ready to be included in a simulation. Initialises an empty parameters dictionary and module-specific random number @@ -303,7 +255,7 @@ def __init__(self, name: Optional[str] = None) -> None: self.name = name or self.__class__.__name__ self.sim: Optional[Simulation] = None - def load_parameters_from_dataframe(self, resource: pd.DataFrame) -> None: + def load_parameters_from_dataframe(self, resource: pd.DataFrame): """Automatically load parameters from resource dataframe, updating the class parameter dictionary Goes through parameters dict self.PARAMETERS and updates the self.parameters with values @@ -336,7 +288,7 @@ def load_parameters_from_dataframe(self, resource: pd.DataFrame) -> None: f"The value of '{parameter_value}' for parameter '{parameter_name}' " f"could not be parsed as a {parameter_definition.type_.name} data type" ) - if parameter_definition.python_type is list: + if parameter_definition.python_type == list: try: # chose json.loads instead of save_eval # because it raises error instead of joining two strings without a comma @@ -364,7 +316,7 @@ def load_parameters_from_dataframe(self, resource: pd.DataFrame) -> None: # Save the values to the parameters self.parameters[parameter_name] = parameter_value - def read_parameters(self, data_folder: str | Path) -> None: + def read_parameters(self, data_folder): """Read parameter values from file, if required. Must be implemented by subclasses. @@ -374,41 +326,23 @@ def read_parameters(self, data_folder: str | Path) -> None: """ raise NotImplementedError - def initialise_population(self, population: Population) -> None: + def initialise_population(self, population): """Set our property values for the initial population. + Must be implemented by subclasses. + This method is called by the simulation when creating the initial population, and is responsible for assigning initial values, for every individual, of those properties 'owned' by this module, i.e. those declared in its PROPERTIES dictionary. - By default, all ``Property``s in ``self.PROPERTIES`` will have - their columns in the population dataframe set to the default value. - - Modules that wish to implement this behaviour do not need to implement this method, - it will be inherited automatically. Modules that wish to perform additional steps - during the initialise_population stage should reimplement this method and call - - ```python - super().initialise_population(population=population) - ``` - - at the beginning of the method, then proceed with their additional steps. Modules that - do not wish to inherit this default behaviour should re-implement initialise_population - without the call to ``super()`` above. - TODO: We probably need to declare somehow which properties we 'read' here, so the simulation knows what order to initialise modules in! - :param population: The population of individuals in the simulation. + :param population: the population of individuals """ - df = population.props - - for property_name, property in self.PROPERTIES.items(): - df.loc[df.is_alive, property_name] = ( - property._default_value - ) + raise NotImplementedError - def initialise_simulation(self, sim: Simulation) -> None: + def initialise_simulation(self, sim): """Get ready for simulation start. Must be implemented by subclasses. @@ -419,7 +353,7 @@ def initialise_simulation(self, sim: Simulation) -> None: """ raise NotImplementedError - def pre_initialise_population(self) -> None: + def pre_initialise_population(self): """Carry out any work before any populations have been initialised This optional method allows access to all other registered modules, before any of @@ -427,7 +361,7 @@ def pre_initialise_population(self) -> None: when a module's properties rely upon information from other modules. """ - def on_birth(self, mother_id: int, child_id: int) -> None: + def on_birth(self, mother_id, child_id): """Initialise our properties for a newborn individual. Must be implemented by subclasses. @@ -439,6 +373,6 @@ def on_birth(self, mother_id: int, child_id: int) -> None: """ raise NotImplementedError - def on_simulation_end(self) -> None: + def on_simulation_end(self): """This is called after the simulation has ended. Modules do not need to declare this.""" diff --git a/src/tlo/dependencies.py b/src/tlo/dependencies.py index 03a847d315..8003b44328 100644 --- a/src/tlo/dependencies.py +++ b/src/tlo/dependencies.py @@ -57,67 +57,6 @@ def get_all_dependencies( ) -def get_missing_dependencies( - module_instances: Iterable[Module], - get_dependencies: DependencyGetter = get_all_dependencies, -) -> Set[str]: - """Get the set of missing required dependencies if any from an iterable of modules. - - :param module_instances: Iterable of ``Module`` subclass instances to get missing - dependencies for. - :param get_dependencies: Callable which extracts the set of dependencies to check - for from a module instance. Defaults to extracting all dependencies. - :return: Set of ``Module`` subclass names corresponding to missing dependencies. - """ - module_instances = list(module_instances) - modules_present = {type(module).__name__ for module in module_instances} - modules_present_are_alternatives_to = set.union( - # Force conversion to set to avoid errors when using set.union with frozenset - *(set(module.ALTERNATIVE_TO) for module in module_instances) - ) - modules_required = set.union( - *(set(get_dependencies(module, modules_present)) for module in module_instances) - ) - - missing_dependencies = modules_required - modules_present - return ( - missing_dependencies - modules_present_are_alternatives_to - ) - - -def initialise_missing_dependencies(modules: Iterable[Module], **module_kwargs) -> Set[Module]: - """Get list of initialised instances of any missing dependencies for an iterable of modules. - - :param modules: Iterable of ``Module`` subclass instances to get instances of missing - dependencies for. - :param module_kwargs: Any keyword arguments to use when initialising missing - module dependencies. - :return: Set of ``Module`` subclass instances corresponding to missing dependencies. - """ - module_class_map: Mapping[str, Type[Module]] = get_module_class_map(set()) - all_module_instances: list[Module] = list(modules) - - def add_missing_module_instances(modules: list[Module], all_missing_module_names: set[str]) -> None: - """ add missing module instances to all_module_instances list - :param modules: Iterable of registered modules - :param all_missing_module_names: Set of missing module names - """ - missing_dependencies: set[str] = get_missing_dependencies( - modules, get_all_dependencies - ) - if len(missing_dependencies) > 0: - all_missing_module_names |= missing_dependencies - missing_module_instances: list[Module] = [ - module_class_map[dependency](**module_kwargs) - for dependency in missing_dependencies - ] - modules.extend(missing_module_instances) - add_missing_module_instances(modules, all_missing_module_names) - - add_missing_module_instances(all_module_instances, set()) - return set(all_module_instances) - set(modules) - - def get_all_required_dependencies( module: Union[Module, Type[Module]], module_names_present: Optional[Set[str]] = None @@ -137,7 +76,7 @@ def get_all_required_dependencies( def topologically_sort_modules( module_instances: Iterable[Module], - get_dependencies: DependencyGetter = get_init_dependencies + get_dependencies: DependencyGetter = get_init_dependencies, ) -> Generator[Module, None, None]: """Generator which yields topological sort of modules based on their dependencies. @@ -181,7 +120,6 @@ def depth_first_search(module): dependencies = get_dependencies( module_instance_map[module], module_instance_map.keys() ) - for dependency in sorted(dependencies): if dependency not in module_instance_map: alternatives_with_instances = [ @@ -326,12 +264,23 @@ def check_dependencies_present( :raises ModuleDependencyError: Raised if any dependencies are missing. """ - missing_dependencies = get_missing_dependencies( - module_instances, get_dependencies + module_instances = list(module_instances) + modules_present = {type(module).__name__ for module in module_instances} + modules_present_are_alternatives_to = set.union( + # Force conversion to set to avoid errors when using set.union with frozenset + *(set(module.ALTERNATIVE_TO) for module in module_instances) + ) + modules_required = set.union( + *(set(get_dependencies(module, modules_present)) for module in module_instances) ) - if len(missing_dependencies) > 0: + missing_dependencies = modules_required - modules_present + missing_dependencies_without_alternatives_present = ( + missing_dependencies - modules_present_are_alternatives_to + ) + if not missing_dependencies_without_alternatives_present == set(): + raise ModuleDependencyError( 'One or more required dependency is missing from the module list and no ' 'alternative to this / these modules are available either: ' - f'{missing_dependencies}' + f'{missing_dependencies_without_alternatives_present}' ) diff --git a/src/tlo/logging/__init__.py b/src/tlo/logging/__init__.py index 7f1447f037..e17e5c37b5 100644 --- a/src/tlo/logging/__init__.py +++ b/src/tlo/logging/__init__.py @@ -1,27 +1,7 @@ -from .core import ( - CRITICAL, - DEBUG, - FATAL, - INFO, - WARNING, - disable, - getLogger, - initialise, - reset, - set_output_file, -) -from .helpers import set_logging_levels +from .core import CRITICAL, DEBUG, FATAL, INFO, WARNING, disable, getLogger +from .helpers import init_logging, set_logging_levels, set_output_file, set_simulation -__all__ = [ - "CRITICAL", - "DEBUG", - "FATAL", - "INFO", - "WARNING", - "disable", - "getLogger", - "initialise", - "reset", - "set_output_file", - "set_logging_levels", -] +__all__ = ['CRITICAL', 'DEBUG', 'FATAL', 'INFO', 'WARNING', 'disable', 'getLogger', + 'set_output_file', 'init_logging', 'set_simulation', 'set_logging_levels'] + +init_logging() diff --git a/src/tlo/logging/core.py b/src/tlo/logging/core.py index dc3beaf2f1..e870e1f179 100644 --- a/src/tlo/logging/core.py +++ b/src/tlo/logging/core.py @@ -1,361 +1,217 @@ -from __future__ import annotations - import hashlib import json import logging as _logging -import sys -import warnings -from functools import partialmethod -from pathlib import Path -from typing import Any, Callable, List, Optional, TypeAlias, Union +from typing import Union -import numpy as np import pandas as pd from tlo.logging import encoding -LogLevel: TypeAlias = int -LogData: TypeAlias = Union[str, dict, list, set, tuple, pd.DataFrame, pd.Series] -SimulationDateGetter: TypeAlias = Callable[[], str] - -CRITICAL = _logging.CRITICAL -DEBUG = _logging.DEBUG -FATAL = _logging.FATAL -INFO = _logging.INFO -WARNING = _logging.WARNING -_DEFAULT_LEVEL = INFO - -_DEFAULT_FORMATTER = _logging.Formatter("%(message)s") - - -class InconsistentLoggedColumnsWarning(UserWarning): - """Warning raised when structured log entry has different columns from header.""" - - -def _mock_simulation_date_getter() -> str: - return "0000-00-00T00:00:00" - - -_get_simulation_date: SimulationDateGetter = _mock_simulation_date_getter -_loggers: dict[str, Logger] = {} - - -def initialise( - add_stdout_handler: bool = True, - simulation_date_getter: SimulationDateGetter = _mock_simulation_date_getter, - root_level: LogLevel = WARNING, - stdout_handler_level: LogLevel = DEBUG, - formatter: _logging.Formatter = _DEFAULT_FORMATTER, -) -> None: - """Initialise logging system and set up root `tlo` logger. - - :param add_stdout_handler: Whether to add a handler to output log entries to stdout. - :param simulation_date_getter: Zero-argument function returning simulation date as - string in ISO format to use in log entries. Defaults to function returning a - a fixed dummy date for use before a simulation has been initialised. - :param root_level: Logging level for root `tlo` logger. - :param formatter: Formatter to use for logging to stdout. - """ - global _get_simulation_date, _loggers - _get_simulation_date = simulation_date_getter - for logger in _loggers.values(): - logger.reset_attributes() - root_logger = getLogger("tlo") - root_logger.setLevel(root_level) - if add_stdout_handler: - handler = _logging.StreamHandler(sys.stdout) - handler.setLevel(stdout_handler_level) - handler.setFormatter(formatter) - root_logger.handlers = [ - h - for h in root_logger.handlers - if not (isinstance(h, _logging.StreamHandler) and h.stream is sys.stdout) - ] - root_logger.addHandler(handler) - - -def reset(): - """Reset global logging state to values at initial import.""" - global _get_simulation_date, _loggers - while len(_loggers) > 0: - name, _ = _loggers.popitem() - _logging.root.manager.loggerDict.pop(name, None) # pylint: disable=E1101 - _loggers.clear() - _get_simulation_date = _mock_simulation_date_getter - - -def set_output_file( - log_path: Path, - formatter: _logging.Formatter = _DEFAULT_FORMATTER, -) -> _logging.FileHandler: - """Add file handler to logger. - - :param log_path: Path for file. - :return: File handler object. - """ - file_handler = _logging.FileHandler(log_path) - file_handler.setFormatter(formatter) - logger = getLogger("tlo") - logger.handlers = [ - h for h in logger.handlers if not isinstance(h, _logging.FileHandler) - ] - logger.addHandler(file_handler) - return file_handler - - -def disable(level: LogLevel) -> None: - """Disable all logging calls of specified level and below.""" +def disable(level): _logging.disable(level) -def getLogger(name: str = "tlo") -> Logger: +def getLogger(name='tlo'): """Returns a TLO logger of the specified name""" - if name not in _loggers: - _loggers[name] = Logger(name) - return _loggers[name] - - -def _numeric_or_str_sort_key(value): - """Key function to sort mixture of numeric and string items. - - Orders non-string values first and then string values, assuming ascending order. - """ - return isinstance(value, str), value - - -def _convert_keys_to_strings_and_sort(data: dict) -> dict[str, Any]: - """Convert all dictionary keys to strings and sort dictionary by key.""" - # Sort by mix of numeric or string keys _then_ convert all keys to strings to - # ensure stringified numeric keys have natural numeric ordering, for example - # '1', '2', '10' not '1', '10', '2' - sorted_data = dict( - (str(k), v) - for k, v in sorted(data.items(), key=lambda i: _numeric_or_str_sort_key(i[0])) - ) - if len(sorted_data) != len(data): - raise ValueError( - f"At least one pair of keys in data dictionary {data} map to same string." - ) - return sorted_data - - -def _sort_set_with_numeric_or_str_elements(data: set) -> list: - """Sort a set with elements that may be either strings or numeric types.""" - return sorted(data, key=_numeric_or_str_sort_key) - - -def _get_log_data_as_dict(data: LogData) -> dict: - """Convert log data to a dictionary if it isn't already""" - if isinstance(data, dict): - return _convert_keys_to_strings_and_sort(data) - if isinstance(data, pd.DataFrame): - if len(data) == 1: - data_dict = data.iloc[0].to_dict() - return _convert_keys_to_strings_and_sort(data_dict) - else: - raise ValueError( - "Logging multirow dataframes is not currently supported - " - "if you need this feature let us know" - ) - if isinstance(data, (list, set, tuple, pd.Series)): - if isinstance(data, set): - data = _sort_set_with_numeric_or_str_elements(data) - return {f"item_{index + 1}": value for index, value in enumerate(data)} - if isinstance(data, str): - return {"message": data} - raise ValueError(f"Unexpected type given as data:\n{data}") - - -def _convert_numpy_scalars_to_python_types(data: dict) -> dict: - """Convert NumPy scalar types to suitable standard Python types.""" - return { - key: ( - value.item() if isinstance(value, (np.number, np.bool_, np.str_)) else value - ) - for key, value in data.items() - } - - -def _get_columns_from_data_dict(data: dict) -> dict: - """Get columns dictionary specifying types of data dictionary values.""" - # using type().__name__ so both pandas and stdlib types can be used - return {k: type(v).__name__ for k, v, in data.items()} + if name not in _LOGGERS: + _LOGGERS[name] = Logger(name) + return _LOGGERS[name] -class Logger: - """Logger for structured log messages output by simulation. +class _MockSim: + # used as place holder for any logging that happens before simulation is setup! + class MockDate: + @staticmethod + def isoformat(): + return "0000-00-00T00:00:00" + date = MockDate() - Outputs structured log messages in JSON format along with simulation date log entry - was generated at. Log messages are associated with a string key and for each key - the log message data is expected to have a fixed structure: - - Collection like data (tuples, lists, sets) should be of fixed length. - - Mapping like data (dictionaries, pandas series and dataframes) should have a fixed - set of keys and the values should be of fixed data types. +class Logger: + """A Logger for TLO log messages, with simplified usage. Outputs structured log messages in JSON + format and is connected to the Simulation instance.""" + HASH_LEN = 10 - The first log message for a given key will generate a 'header' log entry which - records the structure of the message with subsequent log messages only logging the - values for efficiency, hence the requirement for the structure to remain fixed. - """ + def __init__(self, name: str, level=_logging.NOTSET): - HASH_LEN = 10 + assert name.startswith('tlo'), f'Only logging of tlo modules is allowed; name is {name}' - def __init__(self, name: str, level: LogLevel = _DEFAULT_LEVEL) -> None: - assert name.startswith( - "tlo" - ), f"Only logging of tlo modules is allowed; name is {name}" # we build our logger on top of the standard python logging self._std_logger = _logging.getLogger(name=name) self._std_logger.setLevel(level) - # don't propagate messages up from "tlo" to root logger - if name == "tlo": + self.name = self._std_logger.name + + # don't propograte messages up from "tlo" to root logger + if name == 'tlo': self._std_logger.propagate = False - # the unique identifiers of the structured logging calls for this logger - self._uuids = dict() - # the columns for the structured logging calls for this logger - self._columns = dict() - def __repr__(self) -> str: - return f"" + # the key of the structured logging calls for this logger + self.keys = dict() - @property - def name(self) -> str: - return self._std_logger.name + # populated by init_logging(simulation) for the top-level "tlo" logger + self.simulation = _MockSim() + + # a logger should only be using old-style or new-style logging, not a mixture + self.logged_stdlib = False + self.logged_structured = False + + # disable logging multirow dataframes until we're confident it's robust + self._disable_dataframe_logging = True + + def __repr__(self): + return f'' @property - def handlers(self) -> List[_logging.Handler]: + def handlers(self): return self._std_logger.handlers @property - def level(self) -> LogLevel: + def level(self): return self._std_logger.level @handlers.setter - def handlers(self, handlers: List[_logging.Handler]): + def handlers(self, handlers): self._std_logger.handlers.clear() for handler in handlers: self._std_logger.handlers.append(handler) - def addHandler(self, hdlr: _logging.Handler): + def addHandler(self, hdlr): self._std_logger.addHandler(hdlr=hdlr) - def isEnabledFor(self, level: LogLevel) -> bool: + def isEnabledFor(self, level): return self._std_logger.isEnabledFor(level) - def reset_attributes(self) -> None: + def reset_attributes(self): """Reset logger attributes to an unset state""" # clear all logger settings self.handlers.clear() - self._uuids.clear() - self._columns.clear() - self.setLevel(_DEFAULT_LEVEL) - - def setLevel(self, level: LogLevel) -> None: + self.keys.clear() + self.simulation = _MockSim() + # boolean attributes used for now, can be removed after transition to structured logging + self.logged_stdlib = False + self.logged_structured = False + self.setLevel(INFO) + + def setLevel(self, level): self._std_logger.setLevel(level) - def _get_uuid(self, key: str) -> str: - hexdigest = hashlib.md5(f"{self.name}+{key}".encode()).hexdigest() - return hexdigest[: Logger.HASH_LEN] - - def _get_json( - self, - level: int, - key: str, - data: Optional[LogData] = None, - description: Optional[str] = None, - ) -> str: - """Writes structured log message if handler allows this and level is allowed. - - Will write a header line the first time a new logging key is encountered. - Then will only write data rows in later rows for this logging key. - - :param level: Level the message is being logged as. - :param key: Logging key. - :param data: Data to be logged. - :param description: Description of this log type. - - :returns: String with JSON-encoded data row and optionally header row. + def _get_data_as_dict(self, data): + """Convert log data to a dictionary if it isn't already""" + if isinstance(data, dict): + return data + if isinstance(data, pd.DataFrame): + if len(data.index) == 1: + return data.to_dict('records')[0] + elif self._disable_dataframe_logging: + raise ValueError("Logging multirow dataframes is disabled - if you need this feature let us know") + else: + return {'dataframe': data.to_dict('index')} + if isinstance(data, (list, set, tuple, pd.Series)): + return {f'item_{index + 1}': value for index, value in enumerate(data)} + if isinstance(data, str): + return {'message': data} + + raise ValueError(f'Unexpected type given as data:\n{data}') + + def _get_json(self, level, key, data: Union[dict, pd.DataFrame, list, set, tuple, str] = None, description=None): + """Writes structured log message if handler allows this and logging level is allowed + + Will write a header line the first time a new logging key is encountered + Then will only write data rows in later rows for this logging key + + :param level: Level the message is being logged as + :param key: logging key + :param data: data to be logged + :param description: description of this log type """ - data = _get_log_data_as_dict(data) - data = _convert_numpy_scalars_to_python_types(data) - header_json = None + # message level less than than the logger level, early exit + if level < self._std_logger.level: + return + + data = self._get_data_as_dict(data) + header_json = "" - if key not in self._uuids: + if key not in self.keys: # new log key, so create header json row - uuid = self._get_uuid(key) - columns = _get_columns_from_data_dict(data) - self._uuids[key] = uuid - self._columns[key] = columns + uuid = hashlib.md5(f"{self.name}+{key}".encode()).hexdigest()[:Logger.HASH_LEN] + self.keys[key] = uuid + header = { "uuid": uuid, "type": "header", "module": self.name, "key": key, "level": _logging.getLevelName(level), - "columns": columns, - "description": description, + # using type().__name__ so both pandas and stdlib types can be used + "columns": {key: type(value).__name__ for key, value in data.items()}, + "description": description } - header_json = json.dumps(header) - else: - uuid = self._uuids[key] - columns = _get_columns_from_data_dict(data) - if columns != self._columns[key]: - header_columns = set(self._columns[key].items()) - logged_columns = set(columns.items()) - msg = ( - f"Inconsistent columns in logged values for {self.name} logger " - f"with key {key} compared to header generated from initial log " - f"entry:\n" - f" Columns in header not in logged values are\n" - f" {dict(sorted(header_columns - logged_columns))}\n" - f" Columns in logged values not in header are\n" - f" {dict(sorted(logged_columns - header_columns))}" - ) - warnings.warn( - msg, - InconsistentLoggedColumnsWarning, - # Set stack level so that user is given location of top-level - # {info,warning,debug,critical} convenience method call - stacklevel=3, - ) - - # create data json row - row = { - "uuid": uuid, - "date": _get_simulation_date(), - "values": list(data.values()), - } + header_json = json.dumps(header) + "\n" + + uuid = self.keys[key] + + # create data json row; in DEBUG mode we echo the module and key for easier eyeballing if self._std_logger.level == DEBUG: - # in DEBUG mode we echo the module and key for easier eyeballing - row["module"] = self.name - row["key"] = key + row = {"date": getLogger('tlo').simulation.date.isoformat(), + "module": self.name, + "key": key, + "uuid": uuid, + "values": list(data.values())} + else: + row = {"uuid": uuid, + "date": getLogger('tlo').simulation.date.isoformat(), + "values": list(data.values())} row_json = json.dumps(row, cls=encoding.PandasEncoder) - return row_json if header_json is None else f"{header_json}\n{row_json}" - - def log( - self, - level: LogLevel, - key: str, - data: LogData, - description: Optional[str] = None, - ) -> None: - """Log structured data for a key at specified level with optional description. - - :param level: Level the message is being logged as. - :param key: Logging key. - :param data: Data to be logged. - :param description: Description of this log type. - """ + return f"{header_json}{row_json}" + + def _make_old_style_msg(self, level, msg): + return f'{level}|{self.name}|{msg}' + + def _check_logging_style(self, is_structured: bool): + """Set booleans for logging type and throw exception if both types of logging haven't been used""" + if is_structured: + self.logged_structured = True + else: + self.logged_stdlib = True + + if self.logged_structured and self.logged_stdlib: + raise ValueError(f"Both oldstyle and structured logging has been used for {self.name}, " + "please update all logging to use structured logging") + + def _check_and_filter(self, msg=None, *args, key=None, data=None, description=None, level, **kwargs): if self._std_logger.isEnabledFor(level): - msg = self._get_json( - level=level, key=key, data=data, description=description - ) - self._std_logger.log(level=level, msg=msg) - - critical = partialmethod(log, CRITICAL) - debug = partialmethod(log, DEBUG) - info = partialmethod(log, INFO) - warning = partialmethod(log, WARNING) + level_str = _logging.getLevelName(level) # e.g. 'CRITICAL', 'INFO' etc. + level_function = getattr(self._std_logger, level_str.lower()) # e.g. `critical` or `info` methods + if key is None or data is None: + raise ValueError("Structured logging requires `key` and `data` keyword arguments") + self._check_logging_style(is_structured=True) + level_function(self._get_json(level=level, key=key, data=data, description=description)) + + def critical(self, msg=None, *args, key: str = None, + data: Union[dict, pd.DataFrame, list, set, tuple, str] = None, description=None, **kwargs): + self._check_and_filter(msg, *args, key=key, data=data, description=description, level=CRITICAL, **kwargs) + + def debug(self, msg=None, *args, key: str = None, + data: Union[dict, pd.DataFrame, list, set, tuple, str] = None, description=None, **kwargs): + self._check_and_filter(msg, *args, key=key, data=data, description=description, level=DEBUG, **kwargs) + + def info(self, msg=None, *args, key: str = None, + data: Union[dict, pd.DataFrame, list, set, tuple, str] = None, description=None, **kwargs): + self._check_and_filter(msg, *args, key=key, data=data, description=description, level=INFO, **kwargs) + + def warning(self, msg=None, *args, key: str = None, + data: Union[dict, pd.DataFrame, list, set, tuple, str] = None, description=None, **kwargs): + self._check_and_filter(msg, *args, key=key, data=data, description=description, level=WARNING, **kwargs) + + +CRITICAL = _logging.CRITICAL +DEBUG = _logging.DEBUG +FATAL = _logging.FATAL +INFO = _logging.INFO +WARNING = _logging.WARNING + +_FORMATTER = _logging.Formatter('%(message)s') +_LOGGERS = {'tlo': Logger('tlo', WARNING)} diff --git a/src/tlo/logging/encoding.py b/src/tlo/logging/encoding.py index c5db27caa5..9968ce9cb8 100644 --- a/src/tlo/logging/encoding.py +++ b/src/tlo/logging/encoding.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -from pandas.api.types import is_extension_array_dtype class PandasEncoder(json.JSONEncoder): @@ -11,16 +10,16 @@ def default(self, obj): # using base classes for numpy numeric types if isinstance(obj, np.floating): return float(obj) - elif isinstance(obj, np.integer): + elif isinstance(obj, np.signedinteger): return int(obj) elif isinstance(obj, pd.Timestamp): return obj.isoformat() - elif is_extension_array_dtype(obj): - # for pandas extension dtypes assume length 1 arrays / series are scalars - return obj.tolist()[0 if len(obj) == 1 else slice(None)] + elif isinstance(obj, pd.Categorical): + # assume only only one categorical value per cell + return obj.tolist()[0] elif isinstance(obj, set): return list(obj) - elif isinstance(obj, (type(pd.NaT), type(pd.NA))): + elif isinstance(obj, type(pd.NaT)): return None # when logging a series directly, numpy datatypes are used elif isinstance(obj, np.datetime64): diff --git a/src/tlo/logging/helpers.py b/src/tlo/logging/helpers.py index 99fc51c473..2195c602d0 100644 --- a/src/tlo/logging/helpers.py +++ b/src/tlo/logging/helpers.py @@ -1,14 +1,26 @@ import logging as _logging -from collections.abc import Collection, Iterable -from typing import Dict, List, Optional, Union +import sys +from pathlib import Path +from typing import Dict -import pandas as pd -from pandas.api.types import is_extension_array_dtype +from .core import _FORMATTER, _LOGGERS, DEBUG, getLogger -from .core import getLogger +def set_output_file(log_path: Path) -> _logging.FileHandler: + """Add filehandler to logger -def set_logging_levels(custom_levels: Dict[str, int]) -> None: + :param log_path: path for file + :return: filehandler object + """ + file_handler = _logging.FileHandler(log_path) + file_handler.setFormatter(_FORMATTER) + getLogger('tlo').handlers = [h for h in getLogger('tlo').handlers + if not isinstance(h, _logging.FileHandler)] + getLogger('tlo').addHandler(file_handler) + return file_handler + + +def set_logging_levels(custom_levels: Dict[str, int]): """Set custom logging levels for disease modules :param custom_levels: Dictionary of modules and their level, '*' can be used as a key for all modules @@ -53,78 +65,23 @@ def set_logging_levels(custom_levels: Dict[str, int]) -> None: getLogger(logger_name).setLevel(logger_level) -def get_dataframe_row_as_dict_for_logging( - dataframe: pd.DataFrame, - row_label: Union[int, str], - columns: Optional[Iterable[str]] = None, -) -> dict: - """Get row of a pandas dataframe in a format suitable for logging. - - Retrieves entries for all or a subset of columns for a particular row in a dataframe - and returns a dict keyed by column name, with values NumPy or pandas extension types - which should be the same for all rows in dataframe. - - :param dataframe: Population properties dataframe to get properties from. - :param row_label: Unique index label identifying row in dataframe. - :param columns: Set of column names to extract - if ``None``, the default, all - column values will be returned. - :returns: Dictionary with column names as keys and corresponding entries in row as - values. - """ - dataframe = dataframe.convert_dtypes(convert_integer=False, convert_floating=False) - columns = dataframe.columns if columns is None else columns - row_index = dataframe.index.get_loc(row_label) - return { - column_name: - dataframe[column_name].values[row_index] - # pandas extension array datatypes such as nullable types and categoricals, will - # be type unstable if a scalar is returned as NA / NaT / NaN entries will have a - # different type from non-missing entries, therefore use a length 1 array of - # relevant NumPy or pandas extension type in these cases to ensure type - # stability across different rows. - if not is_extension_array_dtype(dataframe[column_name].dtype) else - dataframe[column_name].values[row_index:row_index+1] - for column_name in columns - } +def init_logging(add_stdout_handler=True): + """Initialise default logging with stdout stream""" + for logger_name, logger in _LOGGERS.items(): + logger.reset_attributes() + if add_stdout_handler: + handler = _logging.StreamHandler(sys.stdout) + handler.setLevel(DEBUG) + handler.setFormatter(_FORMATTER) + getLogger('tlo').addHandler(handler) + _logging.basicConfig(level=_logging.WARNING) -def grouped_counts_with_all_combinations( - dataframe: pd.DataFrame, - group_by_columns: List[str], - column_possible_values: Optional[Dict[str, Collection]] = None, -) -> pd.Series: - """Perform group-by count in which all combinations of column values are included. - - As all combinations are included irrespective of whether they are present in data - (and so have a non-zero count), this gives a multi-index series output of fixed - structure suitable for logging. - - Attempts to convert all columns to categorical datatype, with bool(ean) columns - automatically converted, and other non-categorical columns needing to have set of - possible values specified (which requires that this set is finite). - - :param dataframe: Dataframe to perform group-by counts on. - :param group_by_columns: Columns to perform grouping on. - :param column_possible_values: Dictionary mapping from column names to set of - possible values for all columns not of categorical or bool(ean) data type. - :returns: Multi-index series with values corresponding to grouped counts. +def set_simulation(simulation): + """ + Inject simulation into logger for structured logging, called by the simulation + :param simulation: + :return: """ - subset = dataframe[group_by_columns].copy() - # Convert any bool(ean) columns to categoricals - for column_name in group_by_columns: - if subset[column_name].dtype in ("bool", "boolean"): - subset[column_name] = pd.Categorical( - subset[column_name], categories=[True, False] - ) - # For other non-categorical columns possible values need to be explicitly stated - if column_possible_values is not None: - for column_name, possible_values in column_possible_values.items(): - subset[column_name] = pd.Categorical( - subset[column_name], categories=possible_values - ) - if not (subset.dtypes == "category").all(): - msg = "At least one column not convertable to categorical dtype:\n" + str( - {subset.dtypes[subset.dtypes != "categorical"]} - ) - raise ValueError(msg) - return subset.groupby(by=group_by_columns).size() + logger = getLogger('tlo') + logger.simulation = simulation diff --git a/src/tlo/methods/alri.py b/src/tlo/methods/alri.py index 70ac14fe2d..c27a54dd30 100644 --- a/src/tlo/methods/alri.py +++ b/src/tlo/methods/alri.py @@ -1253,7 +1253,7 @@ def do_effects_of_treatment_and_return_outcome(self, person_id, antibiotic_provi # Gather underlying properties that will affect success of treatment SpO2_level = person.ri_SpO2_level - symptoms = self.sim.modules['SymptomManager'].has_what(person_id=person_id) + symptoms = self.sim.modules['SymptomManager'].has_what(person_id) imci_symptom_based_classification = self.get_imci_classification_based_on_symptoms( child_is_younger_than_2_months=person.age_exact_years < (2.0 / 12.0), symptoms=symptoms, @@ -2726,7 +2726,7 @@ def apply(self, person_id, squeeze_factor): return # Do nothing if the persons does not have indicating symptoms - symptoms = self.sim.modules['SymptomManager'].has_what(person_id=person_id) + symptoms = self.sim.modules['SymptomManager'].has_what(person_id) if not {'cough', 'difficult_breathing'}.intersection(symptoms): return self.make_appt_footprint({}) @@ -3009,7 +3009,7 @@ def apply(self, person_id): assert 'danger_signs_pneumonia' == self.module.get_imci_classification_based_on_symptoms( child_is_younger_than_2_months=df.at[person_id, 'age_exact_years'] < (2.0 / 12.0), - symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id) + symptoms=self.sim.modules['SymptomManager'].has_what(person_id) ) @@ -3040,7 +3040,7 @@ def apply(self, person_id): assert 'fast_breathing_pneumonia' == \ self.module.get_imci_classification_based_on_symptoms( - child_is_younger_than_2_months=False, symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id) + child_is_younger_than_2_months=False, symptoms=self.sim.modules['SymptomManager'].has_what(person_id) ) diff --git a/src/tlo/methods/bed_days.py b/src/tlo/methods/bed_days.py index a47b75b16a..ef501f3b2e 100644 --- a/src/tlo/methods/bed_days.py +++ b/src/tlo/methods/bed_days.py @@ -5,12 +5,12 @@ """ from collections import defaultdict -from typing import Dict, Literal, Tuple +from typing import Dict, Tuple import numpy as np import pandas as pd -from tlo import Date, Property, Types, logging +from tlo import Property, Types, logging # --------------------------------------------------------------------------------------------------------- # CLASS DEFINITIONS @@ -145,40 +145,6 @@ def initialise_beddays_tracker(self, model_to_data_popsize_ratio=1.0): assert not df.isna().any().any() self.bed_tracker[bed_type] = df - def switch_beddays_availability( - self, - new_availability: Literal["all", "none", "default"], - effective_on_and_from: Date, - model_to_data_popsize_ratio: float = 1.0, - ) -> None: - """ - Action to be taken if the beddays availability changes in the middle - of the simulation. - - If bed capacities are reduced below the currently scheduled occupancy, - inpatients are not evicted from beds and are allowed to remain in the - bed until they are scheduled to leave. Obviously, no new patients will - be admitted if there is no room in the new capacities. - - :param new_availability: The new bed availability. See __init__ for details. - :param effective_on_and_from: First day from which the new capacities will be imposed. - :param model_to_data_popsize_ratio: As in initialise_population. - """ - # Store new bed availability - self.availability = new_availability - # Before we update the bed capacity, we need to store its old values - # This is because we will need to update the trackers to reflect the new# - # maximum capacities for each bed type. - old_max_capacities: pd.DataFrame = self._scaled_capacity.copy() - # Set the new capacity for beds - self.set_scaled_capacity(model_to_data_popsize_ratio) - # Compute the difference between the new max capacities and the old max capacities - difference_in_max = self._scaled_capacity - old_max_capacities - # For each tracker, after the effective date, impose the difference on the max - # number of beds - for bed_type, tracker in self.bed_tracker.items(): - tracker.loc[effective_on_and_from:] += difference_in_max[bed_type] - def on_start_of_day(self): """Things to do at the start of each new day: * Refresh inpatient status @@ -318,60 +284,6 @@ def issue_bed_days_according_to_availability(self, facility_id: int, footprint: return available_footprint - def combine_footprints_for_same_patient( - self, fp1: Dict[str, int], fp2: Dict[str, int] - ) -> Dict[str, int]: - """ - Given two footprints that are due to start on the same day, combine the two footprints by - overlaying the higher-priority bed over the lower-priority beds. - - As an example, given the footprints, - fp1 = {"bedtype1": 2, "bedtype2": 0} - fp2 = {"bedtype1": 1, "bedtype2": 6} - - where bedtype1 is higher priority than bedtype2, we expect the combined allocation to be - {"bedtype1": 2, "bedtype2": 5}. - - This is because footprints are assumed to run in the order of the bedtypes priority; so - fp2's second day of being allocated to bedtype2 is overwritten by the higher-priority - allocation to bedtype1 from fp1. The remaining 5 days are allocated to bedtype2 since - fp1 does not require a bed after the first 2 days, but fp2 does. - - :param fp1: Footprint, to be combined with the other argument. - :param pf2: Footprint, to be combined with the other argument. - """ - fp1_length = sum(days for days in fp1.values()) - fp2_length = sum(days for days in fp2.values()) - max_length = max(fp1_length, fp2_length) - - # np arrays where each entry is the priority of bed allocated by the footprint - # on that day. fp_priority[i] = priority of the bed allocated by the footprint on - # day i (where the current day is day 0). - # By default, fill with priority equal to the lowest bed priority; though all - # the values will have been explicitly overwritten after the next loop completes. - fp1_priority = np.ones((max_length,), dtype=int) * (len(self.bed_types) - 1) - fp2_priority = fp1_priority.copy() - - fp1_at = 0 - fp2_at = 0 - for priority, bed_type in enumerate(self.bed_types): - # Bed type priority is dictated by list order, so it is safe to loop here. - # We will start with the highest-priority bed type and work to the lowest - fp1_priority[fp1_at:fp1_at + fp1[bed_type]] = priority - fp1_at += fp1[bed_type] - fp2_priority[fp2_at:fp2_at + fp2[bed_type]] = priority - fp2_at += fp2[bed_type] - - # Element-wise minimum of the two priority arrays is then the bed to assign - final_priorities = np.minimum(fp1_priority, fp2_priority) - # Final footprint is then formed by converting the priorities into blocks of days - return { - # Cast to int here since pd.datetime.timedelta doesn't know what to do with - # np.int64 types - bed_type: int(sum(final_priorities == priority)) - for priority, bed_type in enumerate(self.bed_types) - } - def impose_beddays_footprint(self, person_id, footprint): """This is called to reflect that a new occupancy of bed-days should be recorded: * Cause to be reflected in the bed_tracker that an hsi_event is being run that will cause bed to be @@ -399,7 +311,9 @@ def impose_beddays_footprint(self, person_id, footprint): remaining_footprint = self.get_remaining_footprint(person_id) # combine the remaining footprint with the new footprint, with days in each bed-type running concurrently: - combo_footprint = self.combine_footprints_for_same_patient(footprint, remaining_footprint) + combo_footprint = {bed_type: max(footprint[bed_type], remaining_footprint[bed_type]) + for bed_type in self.bed_types + } # remove the old footprint and apply the combined footprint self.remove_beddays_footprint(person_id) diff --git a/src/tlo/methods/bladder_cancer.py b/src/tlo/methods/bladder_cancer.py index 52271f6f16..113d19fde2 100644 --- a/src/tlo/methods/bladder_cancer.py +++ b/src/tlo/methods/bladder_cancer.py @@ -718,7 +718,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the symptom blood_urine - assert 'blood_urine' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'blood_urine' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "bc_date_diagnosis"]): @@ -791,7 +791,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the symptom pelvic_pain - assert 'pelvic_pain' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'pelvic_pain' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "bc_date_diagnosis"]): diff --git a/src/tlo/methods/breast_cancer.py b/src/tlo/methods/breast_cancer.py index a55c6f4930..d362f7ce08 100644 --- a/src/tlo/methods/breast_cancer.py +++ b/src/tlo/methods/breast_cancer.py @@ -685,7 +685,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the symptom breast_lump_discernible - assert 'breast_lump_discernible' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'breast_lump_discernible' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "brc_date_diagnosis"]): diff --git a/src/tlo/methods/cancer_consumables.py b/src/tlo/methods/cancer_consumables.py index e26d577242..db1aa19c72 100644 --- a/src/tlo/methods/cancer_consumables.py +++ b/src/tlo/methods/cancer_consumables.py @@ -25,6 +25,12 @@ def get_consumable_item_codes_cancers(self) -> Dict[str, int]: cons_dict['screening_biopsy_core'] = \ {get_item_code("Biopsy needle"): 1} + # cons_dict['cervical_cancer_screening_via_optional'] = \ + # {get_item_code("Gloves"): 2} + + cons_dict['cervical_cancer_screening_via'] = \ + {get_item_code("Clean delivery kit"): 1} + cons_dict['treatment_surgery_core'] = \ {get_item_code("Halothane (fluothane)_250ml_CMST"): 100, get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1} diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py index 3c985c2bf1..d90688adb0 100644 --- a/src/tlo/methods/cardio_metabolic_disorders.py +++ b/src/tlo/methods/cardio_metabolic_disorders.py @@ -1306,7 +1306,7 @@ def proportion_of_something_in_a_groupby_ready_for_logging(_df, something, group df.age_years >= 20)]) / len(df[df[f'nc_{condition}'] & df.is_alive & (df.age_years >= 20)]) } else: - diagnosed = {f'{condition}_diagnosis_prevalence': float("nan")} + diagnosed = {0.0} logger.info( key=f'{condition}_diagnosis_prevalence', @@ -1320,7 +1320,7 @@ def proportion_of_something_in_a_groupby_ready_for_logging(_df, something, group df.age_years >= 20)]) / len(df[df[f'nc_{condition}'] & df.is_alive & (df.age_years >= 20)]) } else: - on_medication = {f'{condition}_medication_prevalence': float("nan")} + on_medication = {0.0} logger.info( key=f'{condition}_medication_prevalence', diff --git a/src/tlo/methods/care_of_women_during_pregnancy.py b/src/tlo/methods/care_of_women_during_pregnancy.py index 69ce038299..dba3bcda8e 100644 --- a/src/tlo/methods/care_of_women_during_pregnancy.py +++ b/src/tlo/methods/care_of_women_during_pregnancy.py @@ -490,9 +490,9 @@ def further_on_birth_care_of_women_in_pregnancy(self, mother_id): # We log the total number of ANC contacts a woman has undergone at the time of birth via this dictionary if 'ga_anc_one' in mni[mother_id]: - ga_anc_one = float(mni[mother_id]['ga_anc_one']) + ga_anc_one = mni[mother_id]['ga_anc_one'] else: - ga_anc_one = 0.0 + ga_anc_one = 0 total_anc_visit_count = {'person_id': mother_id, 'total_anc': df.at[mother_id, 'ac_total_anc_visits_current_pregnancy'], diff --git a/src/tlo/methods/cervical_cancer.py b/src/tlo/methods/cervical_cancer.py new file mode 100644 index 0000000000..ec8cfd5576 --- /dev/null +++ b/src/tlo/methods/cervical_cancer.py @@ -0,0 +1,1908 @@ + +""" +Cervical Cancer Disease Module + +Limitations to note: +* Footprints of HSI -- pending input from expert on resources required. +at some point we may need to specify the treatment eg total hysterectomy plus or minus chemotherapy +but we agree not now +""" + +from __future__ import annotations +from pathlib import Path +from datetime import datetime + +import math +from typing import TYPE_CHECKING, List + +import pandas as pd +import json +import numpy as np +import csv + +from tlo import DateOffset, Module, Parameter, Property, Types, logging +from tlo.events import IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent +from tlo.lm import LinearModel, LinearModelType, Predictor +from tlo.methods.causes import Cause +from tlo.methods.demography import InstantaneousDeath +from tlo.methods.dxmanager import DxTest +from tlo.methods.healthsystem import HSI_Event +from tlo.methods.symptommanager import Symptom +from tlo.methods import Metadata +from tlo.methods.cancer_consumables import get_consumable_item_codes_cancers + +if TYPE_CHECKING: + from tlo.methods.hsi_generic_first_appts import HSIEventScheduler + from tlo.population import IndividualProperties + +from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CervicalCancer(Module, GenericFirstAppointmentsMixin): + """Cervical Cancer Disease Module""" + + def __init__(self, name=None, resourcefilepath=None): + super().__init__(name) + self.resourcefilepath = resourcefilepath + self.linear_models_for_progression_of_hpv_cc_status = dict() + self.lm_onset_vaginal_bleeding = None + self.daly_wts = dict() + self.item_codes_cervical_can = dict() + + INIT_DEPENDENCIES = { + 'Demography', 'SimplifiedBirths', 'HealthSystem', 'Lifestyle', 'SymptomManager' + } + + OPTIONAL_INIT_DEPENDENCIES = {'HealthBurden', 'HealthSeekingBehaviour'} + + ADDITIONAL_DEPENDENCIES = {'Tb', 'Hiv'} + + METADATA = { + Metadata.DISEASE_MODULE, + Metadata.USES_SYMPTOMMANAGER, + Metadata.USES_HEALTHSYSTEM, + Metadata.USES_HEALTHBURDEN + } + + # Declare Causes of Death + CAUSES_OF_DEATH = { + 'CervicalCancer': Cause(gbd_causes='Cervical cancer', label='Cancer (Cervix)'), + } + + # Declare Causes of Disability + CAUSES_OF_DISABILITY = { + 'CervicalCancer': Cause(gbd_causes='Cervical cancer', label='Cancer (Cervix)'), + } + + PARAMETERS = { + "init_prev_cin_hpv_cc_stage_hiv": Parameter( + Types.LIST, + "initial proportions in hpv cancer categories in women with hiv" + ), + "init_prev_cin_hpv_cc_stage_nhiv": Parameter( + Types.LIST, + "initial proportions in hpv cancer categories in women without hiv" + ), + "r_hpv": Parameter( + Types.REAL, + "probability per month of oncogenic hpv infection", + ), + "r_cin1_hpv": Parameter( + Types.REAL, + "probability per month of incident cin1 amongst people with hpv", + ), + "prob_revert_from_cin1": Parameter( + Types.REAL, + "probability of reverting from cin1 to none", + ), + "r_cin2_cin1": Parameter( + Types.REAL, + "probability per month of incident cin2 amongst people with cin1", + ), + "r_cin3_cin2": Parameter( + Types.REAL, + "probability per month of incident cin3 amongst people with cin2", + ), + "r_stage1_cin3": Parameter( + Types.REAL, + "probability per month of incident stage1 cervical cancer amongst people with cin3", + ), + "r_stage2a_stage1": Parameter( + Types.REAL, + "probability per month of incident stage2a cervical cancer amongst people with stage1", + ), + "r_stage2b_stage2a": Parameter( + Types.REAL, + "probability per month of incident stage2b cervical cancer amongst people with stage2a", + ), + "r_stage3_stage2b": Parameter( + Types.REAL, + "probability per month of incident stage3 cervical cancer amongst people with stage2b", + ), + "r_stage4_stage3": Parameter( + Types.REAL, + "probability per month of incident stage4 cervical cancer amongst people with stage3", + ), + "rr_progress_cc_hiv": Parameter( + Types.REAL, "rate ratio for progressing through cin and cervical cancer stages if have unsuppressed hiv" + ), + "rr_hpv_vaccinated": Parameter( + Types.REAL, + "rate ratio for hpv if vaccinated - this is combined effect of probability the hpv is " + "vaccine-preventable and vaccine efficacy against vaccine-preventable hpv ", + ), + "rr_hpv_age50plus": Parameter( + Types.REAL, + "rate ratio for hpv if age 50 plus" + ), + "prob_cure_stage1": Parameter( + Types.REAL, + "probability of cure if treated in stage 1 cervical cancer", + ), + "prob_cure_stage2a": Parameter( + Types.REAL, + "probability of cure if treated in stage 1 cervical cancer", + ), + "prob_cure_stage2b": Parameter( + Types.REAL, + "probability of cure if treated in stage 1 cervical cancer", + ), + "prob_cure_stage3": Parameter( + Types.REAL, + "probability of cure if treated in stage 1 cervical cancer", + ), + "r_death_cervical_cancer": Parameter( + Types.REAL, + "probability per month of death from cervical cancer amongst people with stage 4 cervical cancer", + ), + "r_vaginal_bleeding_cc_stage1": Parameter( + Types.REAL, "rate of vaginal bleeding if have stage 1 cervical cancer" + ), + "rr_vaginal_bleeding_cc_stage2a": Parameter( + Types.REAL, "rate ratio for vaginal bleeding if have stage 2a cervical cancer" + ), + "rr_vaginal_bleeding_cc_stage2b": Parameter( + Types.REAL, "rate ratio for vaginal bleeding if have stage 2b cervical cancer" + ), + "rr_vaginal_bleeding_cc_stage3": Parameter( + Types.REAL, "rate ratio for vaginal bleeding if have stage 3 cervical cancer" + ), + "rr_vaginal_bleeding_cc_stage4": Parameter( + Types.REAL, "rate ratio for vaginal bleeding if have stage 4 cervical cancer" + ), + "prob_referral_biopsy_given_vaginal_bleeding": Parameter( + Types.REAL, "probability of being referred for a biopsy if presenting with vaginal bleeding" + ), + "sensitivity_of_biopsy_for_cervical_cancer": Parameter( + Types.REAL, "sensitivity of biopsy for diagnosis of cervical cancer" + ), + "sensitivity_of_xpert_for_hpv_cin_cc": Parameter( + Types.REAL, "sensitivity of xpert for presence of hpv, cin or cervical cancer" + ), + "sensitivity_of_via_for_cin_cc": Parameter( + Types.REAL, "sensitivity of via for cin and cervical cancer bu stage" + ), + "prob_xpert_screen": Parameter( + Types.REAL, "prob_xpert_screen" + ), + "prob_via_screen": Parameter( + Types.REAL, "prob_via_screen" + ), + "prob_thermoabl_successful": Parameter( + Types.REAL, "prob_thermoabl_successful" + ), + "prob_cryotherapy_successful": Parameter( + Types.REAL, "prob_cryotherapy_successful" + ), + "transition_testing_year": Parameter( + Types.REAL, "transition_testing_year" + ), + "transition_screening_year": Parameter( + Types.REAL, "transition_screening_year" + ) + } + + """ + note: hpv vaccination is in epi.py + """ + + PROPERTIES = { + "ce_hpv_cc_status": Property( + Types.CATEGORICAL, + "Current hpv / cervical cancer status - note that hpv means persistent hpv", + categories=["none", "hpv", "cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"], + ), + "ce_date_diagnosis": Property( + Types.DATE, + "the date of diagnosis of cervical cancer (pd.NaT if never diagnosed)" + ), + "ce_stage_at_diagnosis": Property( + Types.CATEGORICAL, + "the cancer stage at which cancer diagnosis was made", + categories=["none", "hpv", "cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"], + ), + "ce_date_cin_removal": Property( + Types.DATE, + "the date of last cin removal (pd.NaT if never diagnosed)" + ), + "ce_date_treatment": Property( + Types.DATE, + "date of first receiving attempted curative treatment (pd.NaT if never started treatment)" + ), + "ce_ever_screened": Property( + Types.DATE, + "whether ever been screened" + ), + "ce_ever_treated": Property( + Types.BOOL, + "ever been treated for cc" + ), + "ce_cured_date_cc": Property( + Types.DATE, + "ever cured of cervical cancer date" + ), + "ce_cc_ever": Property( + Types.BOOL, + "ever had cc" + ), + # currently this property has levels to match ce_hov_cc_status to enable the code as written, even + # though can only be treated when in stage 1-3 + "ce_stage_at_which_treatment_given": Property( + Types.CATEGORICAL, + "the cancer stage at which treatment was given (because the treatment only has an effect during the stage" + "at which it is given).", + categories=["none", "hpv", "cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"], + ), + "ce_date_palliative_care": Property( + Types.DATE, + "date of first receiving palliative care (pd.NaT is never had palliative care)" + ), + "ce_ever_diagnosed": Property( + Types.DATE, + "ever diagnosed with cervical cancer (even if now cured)" + ), + "ce_date_death": Property( + Types.DATE, + "date of cervical cancer death" + ), + "ce_new_stage_this_month": Property( + Types.BOOL, + "new_stage_this month" + ), + "ce_xpert_hpv_ever_pos": Property( + Types.BOOL, + "hpv positive on xpert test ever" + ), + "ce_via_cin_ever_detected": Property( + Types.BOOL, + "cin ever_detected on via" + ), + "ce_date_last_screened": Property( + Types.DATE, + "date of last screening" + ), + "ce_date_thermoabl": Property( + Types.DATE, + "date of thermoablation for CIN" + ), + "ce_date_cryotherapy": Property( + Types.DATE, + "date of cryotherapy for CIN" + ), + "ce_current_cc_diagnosed": Property( + Types.BOOL, + "currently has diagnosed cervical cancer (which until now has not been cured)" + ), + "ce_selected_for_via_this_month": Property( + Types.BOOL, + "selected for via this period" + ), + "ce_selected_for_xpert_this_month": Property( + Types.BOOL, + "selected for xpert this month" + ), + "ce_biopsy": Property( + Types.BOOL, + "ce biopsy done" + ) + } + + def read_parameters(self, data_folder): + """Setup parameters used by the module, now including disability weights""" + + # Update parameters from the resourcefile + self.load_parameters_from_dataframe( + pd.read_excel(Path(self.resourcefilepath) / "ResourceFile_Cervical_Cancer.xlsx", + sheet_name="parameter_values") + ) + + # note that health seeking probability quite high even though or =1 + self.sim.modules['SymptomManager'].register_symptom( + Symptom(name='vaginal_bleeding', + odds_ratio_health_seeking_in_adults=1.00) + ) + + # in order to implement screening for cervical cancer creating a dummy symptom - likely there is a better way + # self.sim.modules['SymptomManager'].register_symptom( + # Symptom(name='chosen_via_screening_for_cin_cervical_cancer', + # odds_ratio_health_seeking_in_adults=100.00) + # ) +# todo: in order to implement screening for cervical cancer creating a dummy symptom - likely there is a better way + + self.sim.modules['SymptomManager'].register_symptom( + Symptom(name='chosen_via_screening_for_cin_cervical_cancer', + odds_ratio_health_seeking_in_adults=100.00) + ) + + self.sim.modules['SymptomManager'].register_symptom( + Symptom(name='chosen_xpert_screening_for_hpv_cervical_cancer', + odds_ratio_health_seeking_in_adults=100.00) + ) + + + def initialise_population(self, population): + """Set property values for the initial population.""" + df = population.props # a shortcut to the data-frame + p = self.parameters + rng = self.rng + + # defaults + df.loc[df.is_alive, "ce_hpv_cc_status"] = "none" + df.loc[df.is_alive, "ce_date_diagnosis"] = pd.NaT + df.loc[df.is_alive, "ce_date_treatment"] = pd.NaT + df.loc[df.is_alive, "ce_stage_at_which_treatment_given"] = "none" + df.loc[df.is_alive, "ce_date_palliative_care"] = pd.NaT + df.loc[df.is_alive, "ce_date_death"] = pd.NaT + df.loc[df.is_alive, "ce_new_stage_this_month"] = False + df.loc[df.is_alive, "ce_stage_at_diagnosis"] = "none" + df.loc[df.is_alive, "ce_ever_treated"] = False + df.loc[df.is_alive, "ce_cc_ever"] = False + df.loc[df.is_alive, "ce_xpert_hpv_ever_pos"] = False + df.loc[df.is_alive, "ce_via_cin_ever_detected"] = False + df.loc[df.is_alive, "ce_date_thermoabl"] = pd.NaT + df.loc[df.is_alive, "ce_date_cryotherapy"] = pd.NaT + df.loc[df.is_alive, "ce_date_via"] = pd.NaT + df.loc[df.is_alive, "ce_date_xpert"] = pd.NaT + df.loc[df.is_alive, 'ce_current_cc_diagnosed'] = False + df.loc[df.is_alive, "ce_selected_for_via_this_month"] = False + df.loc[df.is_alive, "ce_selected_for_xpert_this_month"] = False + df.at[df.is_alive, "days_since_last_via"] = pd.NaT + df.at[df.is_alive, "days_since_last_xpert"] = pd.NaT + df.loc[df.is_alive, "ce_biopsy"] = False + df.loc[df.is_alive, "ce_ever_screened"] = False + df.loc[df.is_alive, "ce_ever_diagnosed"] = False + df.loc[df.is_alive, "ce_cured_date_cc"] = pd.NaT + df.loc[df.is_alive, "ce_date_last_screened"] = pd.NaT + + # -------------------- ce_hpv_cc_status ----------- + # this was not assigned here at outset because baseline value of hv_inf was not accessible - it is assigned + # st start of main polling event below + + # -------------------- symptoms, diagnosis, treatment ----------- + # For simplicity we assume all these are null at baseline - we don't think this will influence population + # status in the present to any significant degree + + + def initialise_simulation(self, sim): + """ + * Schedule the main polling event + * Schedule the main logging event + * Define the LinearModels + * Define the Diagnostic used + * Define the Disability-weights + * Schedule the palliative care appointments for those that are on palliative care at initiation + """ + + + # ----- SCHEDULE MAIN POLLING EVENTS ----- + # Schedule main polling event to happen immediately + sim.schedule_event(CervicalCancerMainPollingEvent(self), sim.date) + + # ----- SCHEDULE LOGGING EVENTS ----- + # Schedule logging event to happen immediately + sim.schedule_event(CervicalCancerLoggingEvent(self), sim.date + DateOffset(months=1)) + + # Look-up consumable item codes + self.item_codes_cervical_can = get_consumable_item_codes_cancers(self) + + # ----- LINEAR MODELS ----- + # Define LinearModels for the progression of cancer, in each 1 month period + # NB. The effect being produced is that treatment only has the effect in the stage at which the + # treatment was received. + + df = sim.population.props + p = self.parameters + lm = self.linear_models_for_progression_of_hpv_cc_status + + lm['hpv'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_hpv'], + Predictor('va_hpv') + .when(1, p['rr_hpv_vaccinated']) + .when(2, p['rr_hpv_vaccinated']), + Predictor('age_years', conditions_are_mutually_exclusive=True) + .when('.between(0,15)', 0.0) + .when('.between(50,110)', p['rr_hpv_age50plus']), + Predictor('sex').when('M', 0.0), + Predictor('ce_hpv_cc_status').when('none', 1.0).otherwise(0.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['cin1'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_cin1_hpv'], + Predictor('ce_hpv_cc_status').when('hpv', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['cin2'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_cin2_cin1'], + Predictor('ce_hpv_cc_status').when('cin1', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['cin3'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_cin3_cin2'], + Predictor('ce_hpv_cc_status').when('cin2', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['stage1'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_stage1_cin3'], + Predictor('ce_hpv_cc_status').when('cin3', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['stage2a'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_stage2a_stage1'], + Predictor('ce_hpv_cc_status').when('stage1', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['stage2b'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_stage2b_stage2a'], + Predictor('ce_hpv_cc_status').when('stage2a', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['stage3'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_stage3_stage2b'], + Predictor('ce_hpv_cc_status').when('stage2b', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + lm['stage4'] = LinearModel( + LinearModelType.MULTIPLICATIVE, + p['r_stage4_stage3'], + Predictor('ce_hpv_cc_status').when('stage3', 1.0).otherwise(0.0), +# Predictor('hv_inf', conditions_are_mutually_exclusive=True) +# .when(False, 0.0) +# .when(True, 1.0), + Predictor('ce_hiv_unsuppressed').when(True, p['rr_progress_cc_hiv']).otherwise(1.0), + Predictor('ce_new_stage_this_month').when(True, 0.0).otherwise(1.0) + ) + + # Check that the dict labels are correct as these are used to set the value of ce_hpv_cc_status + assert set(lm).union({'none'}) == set(df.ce_hpv_cc_status.cat.categories) + + # Linear Model for the onset of vaginal bleeding, in each 1 month period + # Create variables for used to predict the onset of vaginal bleeding at + # various stages of the disease + + stage1 = p['r_vaginal_bleeding_cc_stage1'] + stage2a = p['rr_vaginal_bleeding_cc_stage2a'] * p['r_vaginal_bleeding_cc_stage1'] + stage2b = p['rr_vaginal_bleeding_cc_stage2b'] * p['r_vaginal_bleeding_cc_stage1'] + stage3 = p['rr_vaginal_bleeding_cc_stage3'] * p['r_vaginal_bleeding_cc_stage1'] + stage4 = p['rr_vaginal_bleeding_cc_stage4'] * p['r_vaginal_bleeding_cc_stage1'] + + self.lm_onset_vaginal_bleeding = LinearModel.multiplicative( + Predictor('sex').when('M', 0.0), + Predictor( + 'ce_hpv_cc_status', + conditions_are_mutually_exclusive=True, + conditions_are_exhaustive=True, + ) + .when('none', 0.00001) + .when('cin1', 0.00001) + .when('cin2', 0.00001) + .when('cin3', 0.00001) + .when('stage1', stage1) + .when('stage2a', stage2a) + .when('stage2b', stage2b) + .when('stage3', stage3) + .when('stage4', stage4) + ) + + # ----- DX TESTS ----- + # Create the diagnostic test representing the use of a biopsy + # This properties of conditional on the test being done only to persons with the Symptom, 'vaginal_bleeding! + + # in future could add different sensitivity according to target category + + self.sim.modules['HealthSystem'].dx_manager.register_dx_test( + biopsy_for_cervical_cancer=DxTest( + property='ce_hpv_cc_status', + sensitivity=self.parameters['sensitivity_of_biopsy_for_cervical_cancer'], + target_categories=["stage1", "stage2a", "stage2b", "stage3", "stage4"] + ) + ) + + self.sim.modules['HealthSystem'].dx_manager.register_dx_test( + screening_with_xpert_for_hpv=DxTest( + property='ce_hpv_cc_status', + sensitivity=self.parameters['sensitivity_of_xpert_for_hpv_cin_cc'], + target_categories=["hpv", "cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"] + ) + ) + + self.sim.modules['HealthSystem'].dx_manager.register_dx_test( + screening_with_via_for_cin_and_cervical_cancer=DxTest( + property='ce_hpv_cc_status', + sensitivity=self.parameters['sensitivity_of_via_for_cin_cc'], + target_categories=["cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"] + ) + ) + + # ----- DISABILITY-WEIGHT ----- + if "HealthBurden" in self.sim.modules: + # For those with cancer (any stage prior to stage 4) and never treated + self.daly_wts["stage_1_3"] = self.sim.modules["HealthBurden"].get_daly_weight( + sequlae_code=607 + # "Diagnosis and primary therapy phase of cervical cancer": + # "Cancer, diagnosis and primary therapy ","has pain, nausea, fatigue, weight loss and high anxiety." + ) + + # For those with cancer (any stage prior to stage 4) and has been treated + self.daly_wts["stage_1_3_treated"] = self.sim.modules["HealthBurden"].get_daly_weight( + sequlae_code=608 + # "Controlled phase of cervical cancer,Generic uncomplicated disease": + # "worry and daily medication,has a chronic disease that requires medication every day and causes some + # worry but minimal interference with daily activities". + ) + + # For those in stage 4: no palliative care + self.daly_wts["stage4"] = self.sim.modules["HealthBurden"].get_daly_weight( + sequlae_code=609 + # "Metastatic phase of cervical cancer: + # "Cancer, metastatic","has severe pain, extreme fatigue, weight loss and high anxiety." + ) + + # For those in stage 4: with palliative care + self.daly_wts["stage4_palliative_care"] = self.daly_wts["stage_1_3"] + # By assumption, we say that that the weight for those in stage 4 with palliative care is the same as + # that for those with stage 1-3 cancers. + + # ----- HSI FOR PALLIATIVE CARE ----- + on_palliative_care_at_initiation = df.index[df.is_alive & ~pd.isnull(df.ce_date_palliative_care)] + for person_id in on_palliative_care_at_initiation: + self.sim.modules['HealthSystem'].schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PalliativeCare(module=self, person_id=person_id), + priority=0, + topen=self.sim.date + DateOffset(months=1), + tclose=self.sim.date + DateOffset(months=1) + DateOffset(weeks=1) + ) + + def on_birth(self, mother_id, child_id): + """Initialise properties for a newborn individual. + :param mother_id: the mother for this child + :param child_id: the new child + """ + df = self.sim.population.props + df.at[child_id, "ce_hpv_cc_status"] = "none" + df.at[child_id, "ce_date_treatment"] = pd.NaT + df.at[child_id, "ce_stage_at_which_treatment_given"] = "none" + df.at[child_id, "ce_date_diagnosis"] = pd.NaT + df.at[child_id, "ce_new_stage_this_month"] = False + df.at[child_id, "ce_date_palliative_care"] = pd.NaT + df.at[child_id, "ce_date_death"] = pd.NaT + df.at[child_id, "ce_date_cin_removal"] = pd.NaT + df.at[child_id, "ce_stage_at_diagnosis"] = 'none' + df.at[child_id, "ce_ever_treated"] = False + df.at[child_id, "ce_cc_ever"] = False + df.at[child_id, "ce_xpert_hpv_ever_pos"] = False + df.at[child_id, "ce_via_cin_ever_detected"] = False + df.at[child_id, "ce_date_thermoabl"] = pd.NaT + df.loc[child_id, "ce_date_cryotherapy"] = pd.NaT + df.at[child_id, "days_since_last_via"] = pd.NaT + df.at[child_id, "days_since_last_xpert"] = pd.NaT + df.at[child_id, "ce_current_cc_diagnosed"] = False + df.at[child_id, "ce_selected_for_via_this_month"] = False + df.at[child_id, "ce_selected_for_xpert_this_month"] = False + df.at[child_id, "ce_biopsy"] = False + df.at[child_id, "ce_ever_screened"] = False + df.at[child_id, "ce_ever_diagnosed"] = False + df.at[child_id, "ce_cured_date_cc"] = pd.NaT + df.at[child_id, "ce_date_last_screened"] = pd.NaT + + def report_daly_values(self): + + # This must send back a dataframe that reports on the HealthStates for all individuals over the past month + + df = self.sim.population.props # shortcut to population properties dataframe for alive persons + + disability_series_for_alive_persons = pd.Series(index=df.index[df.is_alive], data=0.0) + + # Assign daly_wt to those with cancer stages before stage4 and have either never been treated or are no longer + # in the stage in which they were treated + disability_series_for_alive_persons.loc[ + ( + (df.ce_hpv_cc_status == "stage1") | + (df.ce_hpv_cc_status == "stage2a") | + (df.ce_hpv_cc_status == "stage2b") | + (df.ce_hpv_cc_status == "stage3") + ) + ] = self.daly_wts['stage_1_3'] + + # Assign daly_wt to those with cancer stages before stage4 and who have been treated and who are still in the + # stage in which they were treated. + disability_series_for_alive_persons.loc[ + ( + ~pd.isnull(df.ce_date_treatment) & ( + (df.ce_hpv_cc_status == "stage1") | + (df.ce_hpv_cc_status == "stage2a") | + (df.ce_hpv_cc_status == "stage2b") | + (df.ce_hpv_cc_status == "stage3") + ) & (df.ce_hpv_cc_status == df.ce_stage_at_which_treatment_given) + ) + ] = self.daly_wts['stage_1_3_treated'] + + # todo: check + # I'm a bit surprised this works, because the masks being used are wrt to df, but the indexing + # into a series with a difference index. Maybe it only works as long as everyone is alive!? + + + # Assign daly_wt to those in stage4 cancer (who have not had palliative care) + disability_series_for_alive_persons.loc[ + (df.ce_hpv_cc_status == "stage4") & + (pd.isnull(df.ce_date_palliative_care)) + ] = self.daly_wts['stage4'] + + # Assign daly_wt to those in stage4 cancer, who have had palliative care + disability_series_for_alive_persons.loc[ + (df.ce_hpv_cc_status == "stage4") & + (~pd.isnull(df.ce_date_palliative_care)) + ] = self.daly_wts['stage4_palliative_care'] + + return disability_series_for_alive_persons + + def do_at_generic_first_appt( + self, + person_id: int, + individual_properties: IndividualProperties, + symptoms: List[str], + schedule_hsi_event: HSIEventScheduler, + **kwargs, + ) -> None: + if 'vaginal_bleeding' in symptoms: + schedule_hsi_event( + HSI_CervicalCancerPresentationVaginalBleeding( + person_id=person_id, + module=self + ), + priority=0, + topen=self.sim.date, + tclose=None) + + if 'chosen_via_screening_for_cin_cervical_cancer' in symptoms: + schedule_hsi_event( + HSI_CervicalCancer_AceticAcidScreening( + person_id=person_id, + module=self + ), + priority=0, + topen=self.sim.date, + tclose=None) + + if 'chosen_xpert_screening_for_hpv_cervical_cancer' in symptoms: + schedule_hsi_event( + HSI_CervicalCancer_XpertHPVScreening( + person_id=person_id, + module=self + ), + priority=0, + topen=self.sim.date, + tclose=None) + + # else: + # schedule_hsi_event( + # HSI_CervicalCancer_Screening( + # person_id=person_id, + # module=self + # ), + # priority=0, + # topen=self.sim.date, + # tclose=None) + +# --------------------------------------------------------------------------------------------------------- +# DISEASE MODULE EVENTS +# --------------------------------------------------------------------------------------------------------- + +class CervicalCancerMainPollingEvent(RegularEvent, PopulationScopeEventMixin): + """ + Regular event that updates all cervical cancer properties for population: + * Acquisition and progression of hpv, cin, cervical cancer + * Symptom Development according to stage of cervical Cancer + * Deaths from cervical cancer for those in stage4 + """ + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(months=1)) + # scheduled to run every 1 month: do not change as this is hard-wired into the values of all the parameters. + + def apply(self, population): + df = population.props # shortcut to dataframe + year = self.sim.date.year + m = self.module + rng = m.rng + p = self.sim.modules['CervicalCancer'].parameters + + # ------------------- SET INITIAL CE_HPV_CC_STATUS ------------------------------------------------------------------- + # this was done here and not at outset because baseline value of hv_inf was not accessible + + given_date = pd.to_datetime('2010-02-03') + + if self.sim.date < given_date: + + women_over_15_nhiv_idx = df.index[(df["age_years"] > 15) & (df["sex"] == 'F') & ~df["hv_inf"]] + + df.loc[women_over_15_nhiv_idx, 'ce_hpv_cc_status'] = rng.choice( + ['none', 'hpv', 'cin1', 'cin2', 'cin3', 'stage1', 'stage2a', 'stage2b', 'stage3', 'stage4'], + size=len(women_over_15_nhiv_idx), p=p['init_prev_cin_hpv_cc_stage_nhiv'] + ) + + women_over_15_hiv_idx = df.index[(df["age_years"] > 15) & (df["sex"] == 'F') & df["hv_inf"]] + + df.loc[women_over_15_hiv_idx, 'ce_hpv_cc_status'] = rng.choice( + ['none', 'hpv', 'cin1', 'cin2', 'cin3', 'stage1', 'stage2a', 'stage2b', 'stage3', 'stage4'], + size=len(women_over_15_hiv_idx), p=p['init_prev_cin_hpv_cc_stage_hiv'] + ) + + # -------------------- ACQUISITION AND PROGRESSION OF CANCER (ce_hpv_cc_status) ----------------------------------- + + # todo: + # this is being broadcast. it should be lmited to those with is_alive: ie. df.loc[df.is_alive, + # 'cc_new_stage_this_month'] = False + # As I expect this is going to be over-written (further down) it would be more efiicent to not + # write it into the main sim.population.props df yet (reading/writing there is time-consuming), + # and instead do one write to it at the end of the event, when everything is settled. + + df['ce_hiv_unsuppressed'] = ((df['hv_art'] == 'on_not_vl_suppressed') | (df['hv_art'] == 'not')) & (df['hv_inf']) + + # determine if the person had a treatment during this stage of cancer (nb. treatment only has an effect on + # reducing progression risk during the stage at which is received. + + for stage, lm in self.module.linear_models_for_progression_of_hpv_cc_status.items(): + gets_new_stage = lm.predict(df.loc[df.is_alive], rng) + + idx_gets_new_stage = gets_new_stage[gets_new_stage].index + +# print(stage, lm, gets_new_stage, idx_gets_new_stage) + + df.loc[idx_gets_new_stage, 'ce_hpv_cc_status'] = stage + df['ce_new_stage_this_month'] = df.index.isin(idx_gets_new_stage) + + # Identify rows where the status is 'cin1' + has_cin1 = ( + (df.is_alive) & + (df.sex == 'F') & + (df.ce_hpv_cc_status == 'cin1') + ) + + # Apply the reversion probability to change some 'cin1' to 'none' + df.loc[has_cin1, 'ce_hpv_cc_status'] = np.where( + self.module.rng.random(size=len(df[has_cin1])) < p['prob_revert_from_cin1'], + 'none', + df.loc[has_cin1, 'ce_hpv_cc_status'] + ) + + + + # todo: + # this is also broadcasting to all dataframe (including dead peple and never alive people, + # potentially). + # + # Also, it will over-write to False those people not in any of those categories. I can see + # that this will not violate the logic, but the safest thing would be to also include in the + # chanied union statement the current value, in order to absolute prevent reversions... i.e. + # add in ce_cc_ever on the end of this line. + + df.loc[ + (df['is_alive']) & (~df['ce_cc_ever']), # Apply only if is_alive is True and ce_cc_ever is not True + 'ce_cc_ever' + ] = ( + (df['ce_hpv_cc_status'].isin(['stage1', 'stage2a', 'stage2b', 'stage3', 'stage4'])) + | df['ce_ever_treated'] + ) + + # -------------------------------- SCREENING FOR CERVICAL CANCER USING XPERT HPV TESTING AND VIA--------------- + # A subset of women aged 30-50 will receive a screening test + + # in future this may be triggered by family planning visit + + # todo: + # Instead, for the individuals that are chosen to be screened, create and schedule the HSI + # event directly. + # + # e.g. for each individual to be screened... make an HSI_Event_CervicalCancer_Screening..... + # and in that event, do whatever is required for the screening. (might be the same as happens + # in the generic appointment, in which case point them both to the same function) + + + #todo: create a date of last via screen (and same for xpert) and make it a condition of screening + # that last screen was x years ago + + df.ce_selected_for_via_this_month = False + + days_since_last_screen = (self.sim.date - df.ce_date_last_screened).dt.days + days_since_last_thermoabl = (self.sim.date - df.ce_date_thermoabl).dt.days + days_since_last_via = (self.sim.date - df.ce_date_via).dt.days + days_since_last_xpert = (self.sim.date - df.ce_date_xpert).dt.days + + # todo: screening probability depends on date last screen and result (who guidelines) + + eligible_population = ( + (df.is_alive) & + (df.sex == 'F') & + (df.age_years >= 25) & + (df.age_years < 50) & + (~df.ce_current_cc_diagnosed) & + ( + pd.isna(df.ce_date_last_screened) | + (days_since_last_via > 1825) | (days_since_last_xpert > 1825) | + ((days_since_last_screen > 730) & (days_since_last_thermoabl < 1095)) + ) + ) + + # todo: consider fact that who recommend move towards xpert screening away from via + # todo: start with via as screening tool and move to xpert in about 2024 + + m = self.module + rng = m.rng + + + if year <= p['transition_screening_year']: + # Use VIA for screening before the transition year + df.loc[eligible_population, 'ce_selected_for_via_this_month'] = ( + rng.random(size=len(df[eligible_population])) < p['prob_via_screen'] + ) + else: + # Use Xpert for screening from the transition year and onward + df.loc[eligible_population, 'ce_selected_for_xpert_this_month'] = ( + rng.random(size=len(df[eligible_population])) < p['prob_xpert_screen'] + ) + + + self.sim.modules['SymptomManager'].change_symptom( + person_id=df.loc[df['ce_selected_for_via_this_month']].index, + symptom_string='chosen_via_screening_for_cin_cervical_cancer', + add_or_remove='+', + disease_module=self.module + ) + + self.sim.modules['SymptomManager'].change_symptom( + person_id=df.loc[df['ce_selected_for_xpert_this_month']].index, + symptom_string='chosen_xpert_screening_for_hpv_cervical_cancer', + add_or_remove='+', + disease_module=self.module + ) + + + # -------------------- UPDATING OF SYMPTOM OF vaginal bleeding OVER TIME -------------------------------- + # Each time this event is called (every month) individuals with cervical cancer may develop the symptom of + # vaginal bleeding. Once the symptom is developed it never resolves naturally. It may trigger + # health-care-seeking behaviour. + onset_vaginal_bleeding = self.module.lm_onset_vaginal_bleeding.predict( + df.loc[ + np.bitwise_and(df.is_alive, df.ce_stage_at_diagnosis == 'none') + ], + rng + ) + + self.sim.modules['SymptomManager'].change_symptom( + person_id=onset_vaginal_bleeding[onset_vaginal_bleeding].index.tolist(), + symptom_string='vaginal_bleeding', + add_or_remove='+', + disease_module=self.module + ) + + # -------------------- DEATH FROM cervical CANCER --------------------------------------- + # There is a risk of death for those in stage4 only. Death is assumed to go instantly. + stage4_idx = df.index[df.is_alive & (df.ce_hpv_cc_status == "stage4")] + selected_to_die = stage4_idx[ + rng.random_sample(size=len(stage4_idx)) < self.module.parameters['r_death_cervical_cancer']] + + for person_id in selected_to_die: + self.sim.schedule_event( + InstantaneousDeath(self.module, person_id, "CervicalCancer"), self.sim.date + ) + days_spread = 90 + date_min = self.sim.date + date_max = self.sim.date + pd.DateOffset(days=days_spread) + df.loc[person_id, 'ce_date_death'] = pd.to_datetime(rng.uniform(date_min.value, date_max.value), unit='ns') + + # todo: distribute death dates across next 30 days + + +# --------------------------------------------------------------------------------------------------------- +# HEALTH SYSTEM INTERACTION EVENTS +# --------------------------------------------------------------------------------------------------------- + +class HSI_CervicalCancer_AceticAcidScreening(HSI_Event, IndividualScopeEventMixin): + + """ + This event will be scheduled by family planning HSI - for now we determine at random a screening event, + and we determine at random whether this is AceticAcidScreening or HPVXpertScreening + + In future this might be scheduled by the contraception module + + may in future want to modify slightly to reflect this: biopsy is taken if via looks abnormal and the facility + has the capacity to take a biopsy - otherwise thermoablation is performed + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_AceticAcidScreening" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + year = self.sim.date.year + p = self.sim.modules['CervicalCancer'].parameters + person = df.loc[person_id] + hs = self.sim.modules["HealthSystem"] + + # Check consumables are available + cons_avail = self.get_consumables( + item_codes=self.module.item_codes_cervical_can['cervical_cancer_screening_via']) + + if cons_avail: + self.add_equipment({'Infusion pump', 'Drip stand'}) + # self.add_equipment(self.healthcare_system.equipment.from_pkg_names('Major Surgery')) + + # Run a test to diagnose whether the person has condition: + dx_result = hs.dx_manager.run_dx_test( + dx_tests_to_run='screening_with_via_for_cin_and_cervical_cancer', + hsi_event=self + ) + df.at[person_id, "ce_date_last_screened"] = self.sim.date + df.at[person_id, "ce_date_via"] = self.sim.date + df.at[person_id, "ce_ever_screened"] = True + + if dx_result: + df.at[person_id, 'ce_via_cin_ever_detected'] = True + + if (df.at[person_id, 'ce_hpv_cc_status'] == 'cin2' + or df.at[person_id, 'ce_hpv_cc_status'] == 'cin3' + ): + if year >= p['transition_testing_year'] : + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Thermoablation_CIN( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + else: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Cryotherapy_CIN( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + elif (df.at[person_id, 'ce_hpv_cc_status'] == 'stage1' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage2a' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage2b' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage3' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage4'): + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Biopsy( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + # sy_chosen_via_screening_for_cin_cervical_cancer reset to 0 + # if df.at[person_id, 'sy_chosen_via_screening_for_cin_cervical_cancer'] == 2: + # self.sim.modules['SymptomManager'].change_symptom( + # person_id=person_id, + # symptom_string='chosen_via_screening_for_cin_cervical_cancer', + # add_or_remove='-', + # disease_module=self.module + # ) + # + # df.at[person_id, 'ce_selected_for_via_this_month'] = False + + +class HSI_CervicalCancer_XpertHPVScreening(HSI_Event, IndividualScopeEventMixin): + + """ + This event will be scheduled by family planning HSI - for now we determine at random a screening event, and + we determine at random whether this is AceticAcidScreening or HPVXpertScreening + + In future this might be scheduled by the contraception module + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_XpertHPVScreening" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + p = self.sim.modules['CervicalCancer'].parameters + year = self.sim.date.year + person = df.loc[person_id] + hs = self.sim.modules["HealthSystem"] + + # todo: if positive on xpert then do via if hiv negative but go straight to thermoablation + # todo: if hiv positive ? + + # Run a test to diagnose whether the person has condition: + dx_result = hs.dx_manager.run_dx_test( + dx_tests_to_run='screening_with_xpert_for_hpv', + hsi_event=self + ) + df.at[person_id, "ce_date_last_screened"] = self.sim.date + df.at[person_id, "ce_date_xpert"] = self.sim.date + df.at[person_id, "ce_ever_screened"] = True + + if dx_result: + df.at[person_id, 'ce_xpert_hpv_ever_pos'] = True + + hpv_cin_options = ['hpv','cin1','cin2','cin3'] + hpv_stage_options = ['stage1','stage2a','stage2b','stage3','stage4'] + + # If HIV negative, do VIA + if not person['hv_inf']: + if dx_result and (df.at[person_id, 'ce_hpv_cc_status'] in (hpv_cin_options+hpv_stage_options) + ): + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_AceticAcidScreening( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + # IF HIV positive, + if person['hv_inf']: + if dx_result and (df.at[person_id, 'ce_hpv_cc_status'] in (hpv_cin_options+hpv_stage_options) + ): + if year >= p['transition_testing_year']: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Thermoablation_CIN( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + else: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Cryotherapy_CIN( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + # sy_chosen_via_screening_for_cin_cervical_cancer reset to 0 + # if df.at[person_id, 'sy_chosen_xpert_screening_for_hpv_cervical_cancer'] == 2: + # self.sim.modules['SymptomManager'].change_symptom( + # person_id=person_id, + # symptom_string='chosen_xpert_screening_for_hpv_cervical_cancer', + # add_or_remove='-', + # disease_module=self.module + # ) + # + # df.at[person_id, 'ce_selected_for_xpert_this_month'] = False + + + +class HSI_CervicalCancerPresentationVaginalBleeding(HSI_Event, IndividualScopeEventMixin): + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_presentation_vaginal_bleeding" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + person = df.loc[person_id] + hs = self.sim.modules["HealthSystem"] + p = self.sim.modules['CervicalCancer'].parameters + m = self.module + rng = m.rng + random_value = rng.random() + + if random_value <= p['prob_referral_biopsy_given_vaginal_bleeding']: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_Biopsy( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + +class HSI_CervicalCancer_Biopsy(HSI_Event, IndividualScopeEventMixin): + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + +# print(person_id, self.sim.date, 'vaginal_bleeding_hsi_called -1') + + self.TREATMENT_ID = "CervicalCancer_Biopsy" + + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '3' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + + # Use a biopsy to diagnose whether the person has cervical cancer + # todo: request consumables needed for this and elsewhere + + dx_result = hs.dx_manager.run_dx_test( + dx_tests_to_run='biopsy_for_cervical_cancer', + hsi_event=self + ) + + df.at[person_id, "ce_biopsy"] = True + + if dx_result and (df.at[person_id, 'ce_hpv_cc_status'] == 'stage1' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage2a' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage2b' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage3' + or df.at[person_id, 'ce_hpv_cc_status'] == 'stage4'): + # Record date of diagnosis: + df.at[person_id, 'ce_date_diagnosis'] = self.sim.date + df.at[person_id, 'ce_stage_at_diagnosis'] = df.at[person_id, 'ce_hpv_cc_status'] + df.at[person_id, 'ce_current_cc_diagnosed'] = True + df.at[person_id, 'ce_ever_diagnosed'] = True + + # Check if is in stage4: + in_stage4 = df.at[person_id, 'ce_hpv_cc_status'] == 'stage4' + # If the diagnosis does detect cancer, it is assumed that the classification as stage4 is made accurately. + + if not in_stage4: + # start treatment: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_StartTreatment( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + else: + # start palliative care: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PalliativeCare( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + +class HSI_CervicalCancer_Thermoablation_CIN(HSI_Event, IndividualScopeEventMixin): + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_Thermoablation_CIN" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + p = self.sim.modules['CervicalCancer'].parameters + + # (msyamboza et al 2016) + + # Record date and stage of starting treatment + df.at[person_id, "ce_date_thermoabl"] = self.sim.date + + random_value = self.module.rng.random() + + if random_value <= p['prob_thermoabl_successful']: + df.at[person_id, "ce_hpv_cc_status"] = 'none' + + +class HSI_CervicalCancer_Cryotherapy_CIN(HSI_Event, IndividualScopeEventMixin): + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_Cryotherapy_CIN" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + p = self.sim.modules['CervicalCancer'].parameters + + # (msyamboza et al 2016) + + # Record date and stage of starting treatment + df.at[person_id, "ce_date_cryotherapy"] = self.sim.date + + random_value = self.module.rng.random() + + if random_value <= p['prob_cryotherapy_successful']: + df.at[person_id, "ce_hpv_cc_status"] = 'none' + + +class HSI_CervicalCancer_StartTreatment(HSI_Event, IndividualScopeEventMixin): + """ + This event is scheduled by HSI_CervicalCancer_Biopsy following a diagnosis of + cervical Cancer. It initiates the treatment of cervical Cancer. + It is only for persons with a cancer that is not in stage4 and who have been diagnosed. + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_StartTreatment" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"MajorSurg": 1}) + self.ACCEPTED_FACILITY_LEVEL = '3' + self.BEDDAYS_FOOTPRINT = self.make_beddays_footprint({"general_bed": 5}) + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + p = self.sim.modules['CervicalCancer'].parameters + + # If the status is already in `stage4`, start palliative care (instead of treatment) + if df.at[person_id, "ce_hpv_cc_status"] == 'stage4': + logger.warning(key="warning", data="Cancer is in stage 4 - aborting HSI_CervicalCancer_StartTreatment," + "scheduling HSI_CervicalCancer_PalliativeCare") + + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PalliativeCare( + module=self.module, + person_id=person_id, + ), + topen=self.sim.date, + tclose=None, + priority=0 + ) + return self.make_appt_footprint({}) + + # Check that the person has been diagnosed and is not on treatment + assert not pd.isnull(df.at[person_id, "ce_date_diagnosis"]) + + # Record date and stage of starting treatment + df.at[person_id, "ce_date_treatment"] = self.sim.date + df.at[person_id, "ce_ever_treated"] = True + df.at[person_id, "ce_stage_at_which_treatment_given"] = df.at[person_id, "ce_hpv_cc_status"] + + # stop vaginal bleeding + self.sim.modules['SymptomManager'].change_symptom( + person_id=person_id, + symptom_string='vaginal_bleeding', + add_or_remove='-', + disease_module=self.module + ) + + random_value = self.module.rng.random() + + if (random_value <= p['prob_cure_stage1'] and df.at[person_id, "ce_hpv_cc_status"] == "stage1" + and df.at[person_id, "ce_date_treatment"] == self.sim.date): + df.at[person_id, "ce_hpv_cc_status"] = 'none' + df.at[person_id, 'ce_current_cc_diagnosed'] = False + df.at[person_id, 'ce_cured_date_cc'] = self.sim.date + else: + df.at[person_id, "ce_hpv_cc_status"] = 'stage1' + + if (random_value <= p['prob_cure_stage2a'] and df.at[person_id, "ce_hpv_cc_status"] == "stage2a" + and df.at[person_id, "ce_date_treatment"] == self.sim.date): + df.at[person_id, "ce_hpv_cc_status"] = 'none' + df.at[person_id, 'ce_current_cc_diagnosed'] = False + df.at[person_id, 'ce_cured_date_cc'] = self.sim.date + else: + df.at[person_id, "ce_hpv_cc_status"] = 'stage2a' + + if (random_value <= p['prob_cure_stage2b'] and df.at[person_id, "ce_hpv_cc_status"] == "stage2b" + and df.at[person_id, "ce_date_treatment"] == self.sim.date): + df.at[person_id, "ce_hpv_cc_status"] = 'none' + df.at[person_id, 'ce_current_cc_diagnosed'] = False + df.at[person_id, 'ce_cured_date_cc'] = self.sim.date + else: + df.at[person_id, "ce_hpv_cc_status"] = 'stage2b' + + if (random_value <= p['prob_cure_stage3'] and df.at[person_id, "ce_hpv_cc_status"] == "stage3" + and df.at[person_id, "ce_date_treatment"] == self.sim.date): + df.at[person_id, "ce_hpv_cc_status"] = 'none' + df.at[person_id, 'ce_current_cc_diagnosed'] = False + df.at[person_id, 'ce_cured_date_cc'] = self.sim.date + else: + df.at[person_id, "ce_hpv_cc_status"] = 'stage3' + + # Schedule a post-treatment check for 3 months: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PostTreatmentCheck( + module=self.module, + person_id=person_id, + ), + topen=self.sim.date + DateOffset(months=3), + tclose=None, + priority=0 + ) + +class HSI_CervicalCancer_PostTreatmentCheck(HSI_Event, IndividualScopeEventMixin): + """ + This event is scheduled by HSI_CervicalCancer_StartTreatment and itself. + It is only for those who have undergone treatment for cervical Cancer. + If the person has developed cancer to stage4, the patient is initiated on palliative care; otherwise a further + appointment is scheduled for one year. + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_PostTreatmentCheck" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '3' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + + assert not pd.isnull(df.at[person_id, "ce_date_diagnosis"]) + assert not pd.isnull(df.at[person_id, "ce_date_treatment"]) + + # todo: + # could use pd.Dateoffset(years =...) instead of the number of days for ease for + # reading/comprehension + + if df.at[person_id, 'ce_hpv_cc_status'] == 'stage4': + # If has progressed to stage4, then start Palliative Care immediately: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PalliativeCare( + module=self.module, + person_id=person_id + ), + topen=self.sim.date, + tclose=None, + priority=0 + ) + + else: + if df.at[person_id, 'ce_date_treatment'] > (self.sim.date - pd.DateOffset(years=1)): + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PostTreatmentCheck( + module=self.module, + person_id=person_id + ), + topen=self.sim.date + DateOffset(months=3), + tclose=None, + priority=0 + ) + if df.at[person_id, 'ce_date_treatment'] < (self.sim.date - pd.DateOffset(years=1)) \ + and df.at[person_id, 'ce_date_treatment'] > (self.sim.date - pd.DateOffset(years=3)): + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PostTreatmentCheck( + module=self.module, + person_id=person_id + ), + topen=self.sim.date + DateOffset(months=6), + tclose=None, + priority=0 + ) + if df.at[person_id, 'ce_date_treatment'] < (self.sim.date - pd.DateOffset(years=3)) \ + and df.at[person_id, 'ce_date_treatment'] > (self.sim.date - pd.DateOffset(years=5)): + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PostTreatmentCheck( + module=self.module, + person_id=person_id + ), + topen=self.sim.date + DateOffset(months=12), + tclose=None, + priority=0 + ) + +class HSI_CervicalCancer_PalliativeCare(HSI_Event, IndividualScopeEventMixin): + """ + This is the event for palliative care. It does not affect the patients progress but does affect the disability + weight and takes resources from the healthsystem. + This event is scheduled by either: + * HSI_CervicalCancer_Biopsy following a diagnosis of cervical Cancer at stage4. + * HSI_CervicalCancer_PostTreatmentCheck following progression to stage4 during treatment. + * Itself for the continuance of care. + It is only for persons with a cancer in stage4. + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_PalliativeCare" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({}) + self.ACCEPTED_FACILITY_LEVEL = '2' + self.BEDDAYS_FOOTPRINT = self.make_beddays_footprint({'general_bed': 15}) + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + hs = self.sim.modules["HealthSystem"] + + # Check that the person is in stage4 + assert df.at[person_id, "ce_hpv_cc_status"] == 'stage4' + + # Record the start of palliative care if this is first appointment + if pd.isnull(df.at[person_id, "ce_date_palliative_care"]): + df.at[person_id, "ce_date_palliative_care"] = self.sim.date + + + + # todo: + # for scheduling the same class of HSI_Event to multiple people, more + # efficient to use schedule_batch_of_individual_hsi_events + + + + + # Schedule another instance of the event for one month + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_PalliativeCare( + module=self.module, + person_id=person_id + ), + topen=self.sim.date + DateOffset(months=1), + tclose=None, + priority=0 + ) + + +class HSI_CervicalCancer_Screening(HSI_Event, IndividualScopeEventMixin): + """ + This event is scheduled by HSI_GenericFirstApptAtFacilityLevel1 following screening using VIA or XPERT. + This event begins the investigation that may result in diagnosis of Cervical Cancer and the scheduling + of treatment or palliative care. + """ + + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + + self.TREATMENT_ID = "CervicalCancer_Screening" + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1}) + self.ACCEPTED_FACILITY_LEVEL = '1a' + + def apply(self, person_id, squeeze_factor): + df = self.sim.population.props + person = df.loc[person_id] + hs = self.sim.modules["HealthSystem"] + + # Ignore this event if the person is no longer alive: + if not person.is_alive: + return hs.get_blank_appt_footprint() + + # If the person is already diagnosed, then take no action: + if not pd.isnull(df.at[person_id, "ce_date_diagnosis"]): + return hs.get_blank_appt_footprint() + + if df.at[person_id, 'ce_selected_for_via_this_month'] == True: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_AceticAcidScreening( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + if df.at[person_id, 'ce_selected_for_xpert_this_month'] == True: + hs.schedule_hsi_event( + hsi_event=HSI_CervicalCancer_XpertHPVScreening( + module=self.module, + person_id=person_id + ), + priority=0, + topen=self.sim.date, + tclose=None + ) + + +# --------------------------------------------------------------------------------------------------------- +# LOGGING EVENTS +# --------------------------------------------------------------------------------------------------------- + +class CervicalCancerLoggingEvent(RegularEvent, PopulationScopeEventMixin): + """The only logging event for this module""" + + # the use of groupby might be more efficient in computing the statistics below; + + def __init__(self, module): + """schedule logging to repeat every 1 month + """ + self.repeat = 30 + super().__init__(module, frequency=DateOffset(days=self.repeat)) + + def apply(self, population): + """Compute statistics regarding the current status of persons and output to the logger + """ + df = population.props + + # CURRENT STATUS COUNTS + # Create dictionary for each subset, adding prefix to key name, and adding to make a flat dict for logging. + out = {} + + date_lastlog = self.sim.date - pd.DateOffset(days=29) + + # Current counts, total + out.update({ + f'total_{k}': v for k, v in df.loc[df.is_alive & (df['sex'] == 'F') & + (df['age_years'] > 15)].ce_hpv_cc_status.value_counts().items()}) + + # Current counts, total hiv negative + out.update({ + f'total_hivneg_{k}': v for k, v in df.loc[df.is_alive & (df['sex'] == 'F') & + (df['age_years'] > 15) & (~df['hv_inf'])].ce_hpv_cc_status.value_counts().items()}) + + # Current counts, total hiv positive + out.update({ + f'total_hivpos_{k}': v for k, v in df.loc[df.is_alive & (df['sex'] == 'F') & + (df['age_years'] > 15) & (df['hv_inf'])].ce_hpv_cc_status.value_counts().items()}) + + # Get the day of the year + day_of_year = self.sim.date.timetuple().tm_yday + + # Calculate the decimal year + decimal_year = self.sim.date.year + (day_of_year - 1) / 365.25 + rounded_decimal_year = round(decimal_year, 2) + + date_1_year_ago = self.sim.date - pd.DateOffset(days=365) + n_deaths_past_year = df.ce_date_death.between(date_1_year_ago, self.sim.date).sum() + n_deaths_cc_hivneg_past_year = ((~df['hv_inf']) & df.ce_date_death.between(date_1_year_ago, self.sim.date)).sum() + n_deaths_cc_hivpos_past_year = ((df['hv_inf']) & df.ce_date_death.between(date_1_year_ago, self.sim.date)).sum() + n_deaths_cc_hiv_past_year = ((df['hv_inf']) & df.ce_date_death.between(date_1_year_ago, self.sim.date)).sum() + n_treated_past_year = df.ce_date_treatment.between(date_1_year_ago, self.sim.date).sum() + n_cured_past_year = df.ce_cured_date_cc.between(date_1_year_ago, self.sim.date).sum() + n_thermoabl_past_year = df.ce_date_thermoabl.between(date_1_year_ago, self.sim.date).sum() + n_cryotherapy_past_year = df.ce_date_cryotherapy.between(date_1_year_ago, self.sim.date).sum() + n_via_past_year = df.ce_date_via.between(date_1_year_ago, self.sim.date).sum() + n_xpert_past_year = df.ce_date_xpert.between(date_1_year_ago, self.sim.date).sum() + + + date_1p25_years_ago = self.sim.date - pd.DateOffset(days=456) + date_0p75_years_ago = self.sim.date - pd.DateOffset(days=274) + + cc = (df.is_alive & ((df.ce_hpv_cc_status == 'stage1') | (df.ce_hpv_cc_status == 'stage2a') + | (df.ce_hpv_cc_status == 'stage2b') | (df.ce_hpv_cc_status == 'stage3') + | (df.ce_hpv_cc_status == 'stage4'))).sum() + cc_hiv = (df.is_alive & df.hv_inf & ((df.ce_hpv_cc_status == 'stage1') | (df.ce_hpv_cc_status == 'stage2a') + | (df.ce_hpv_cc_status == 'stage2b') | (df.ce_hpv_cc_status == 'stage3') + | (df.ce_hpv_cc_status == 'stage4'))).sum() + if cc > 0: + prop_cc_hiv = cc_hiv / cc + else: + prop_cc_hiv = np.nan + + n_screened_via_this_month = (df.is_alive & df.ce_selected_for_via_this_month).sum() + n_screened_xpert_this_month = (df.is_alive & df.ce_selected_for_xpert_this_month).sum() + n_ever_screened = ( + (df['is_alive']) & (df['ce_ever_screened']) & (df['age_years'] > 15) & (df['age_years'] < 50)).sum() + + n_vaginal_bleeding_stage1 = (df.is_alive & (df.sy_vaginal_bleeding == 2) & + (df.ce_hpv_cc_status == 'stage1')).sum() + n_vaginal_bleeding_stage2a = (df.is_alive & (df.sy_vaginal_bleeding == 2) & + (df.ce_hpv_cc_status == 'stage2a')).sum() + n_vaginal_bleeding_stage2b = (df.is_alive & (df.sy_vaginal_bleeding == 2) & + (df.ce_hpv_cc_status == 'stage2b')).sum() + n_vaginal_bleeding_stage3 = (df.is_alive & (df.sy_vaginal_bleeding == 2) & + (df.ce_hpv_cc_status == 'stage3')).sum() + n_vaginal_bleeding_stage4 = (df.is_alive & (df.sy_vaginal_bleeding == 2) & + (df.ce_hpv_cc_status == 'stage4')).sum() + + n_diagnosed_1_year_ago = df.ce_date_diagnosis.between(date_1p25_years_ago, date_0p75_years_ago).sum() + n_diagnosed_1_year_ago_died = (df.ce_date_diagnosis.between(date_1p25_years_ago, date_0p75_years_ago) + & ~df.is_alive).sum() + + n_diagnosed_past_year_stage1 = \ + (df.ce_date_diagnosis.between(date_1_year_ago, self.sim.date) & + (df.ce_stage_at_diagnosis == 'stage1')).sum() + n_diagnosed_past_year_stage2a = \ + (df.ce_date_diagnosis.between(date_1_year_ago, self.sim.date) & + (df.ce_stage_at_diagnosis == 'stage2a')).sum() + n_diagnosed_past_year_stage2b = \ + (df.ce_date_diagnosis.between(date_1_year_ago, self.sim.date) & + (df.ce_stage_at_diagnosis == 'stage2b')).sum() + n_diagnosed_past_year_stage3 = \ + (df.ce_date_diagnosis.between(date_1_year_ago, self.sim.date) & + (df.ce_stage_at_diagnosis == 'stage3')).sum() + n_diagnosed_past_year_stage4 = \ + (df.ce_date_diagnosis.between(date_1_year_ago, self.sim.date) & + (df.ce_stage_at_diagnosis == 'stage4')).sum() + + n_diagnosed_past_year = (df['ce_date_diagnosis'].between(date_1_year_ago, self.sim.date)).sum() + + n_ever_diagnosed = ((df['is_alive']) & (df['ce_ever_diagnosed'])).sum() + + n_women_alive = ((df['is_alive']) & (df['sex'] == 'F') & (df['age_years'] > 15)).sum() + n_women_alive_1549 = ((df['is_alive']) & (df['sex'] == 'F') & (df['age_years'] > 15) + & (df['age_years'] < 50)).sum() + + n_women_vaccinated = ((df['is_alive']) & (df['sex'] == 'F') & (df['age_years'] > 15) + & df['va_hpv']).sum() + + n_women_hiv_unsuppressed = ((df['is_alive']) & (df['sex'] == 'F') & (df['age_years'] > 15) + & df['ce_hiv_unsuppressed']).sum() + + n_women_hivneg = ((df['is_alive']) & + (df['sex'] == 'F') & + (df['age_years'] > 15) & + (~df['hv_inf'])).sum() + + n_women_hivpos = ((df['is_alive']) & + (df['sex'] == 'F') & + (df['age_years'] > 15) & + (df['hv_inf'])).sum() + + rate_diagnosed_cc = n_diagnosed_past_year / n_women_alive + + n_women_living_with_diagnosed_cc = \ + (df['ce_date_diagnosis'].notnull()).sum() + + n_women_living_with_diagnosed_cc_age_lt_30 = \ + (df['ce_date_diagnosis'].notnull() & (df['age_years'] < 30)).sum() + n_women_living_with_diagnosed_cc_age_3050 = \ + (df['ce_date_diagnosis'].notnull() & (df['age_years'] > 29) & (df['age_years'] < 50)).sum() + n_women_living_with_diagnosed_cc_age_gt_50 = \ + (df['ce_date_diagnosis'].notnull() & (df['age_years'] > 49)).sum() + + out.update({"rounded_decimal_year": rounded_decimal_year}) + out.update({"n_deaths_past_year": n_deaths_past_year}) + out.update({"n_deaths_cc_hivneg_past_year": n_deaths_cc_hivneg_past_year}) + out.update({"n_deaths_cc_hivpos_past_year": n_deaths_cc_hivpos_past_year}) + out.update({"n_deaths_cc_hiv_past_year": n_deaths_cc_hiv_past_year}) + out.update({"n_treated_past_year": n_treated_past_year}) + out.update({"n_cured_past_year": n_cured_past_year}) + out.update({"prop_cc_hiv": prop_cc_hiv}) + out.update({"n_diagnosed_past_year_stage1": n_diagnosed_past_year_stage1}) + out.update({"n_diagnosed_past_year_stage2a": n_diagnosed_past_year_stage2a}) + out.update({"n_diagnosed_past_year_stage2b": n_diagnosed_past_year_stage2b}) + out.update({"n_diagnosed_past_year_stage3": n_diagnosed_past_year_stage3}) + out.update({"n_diagnosed_past_year_stage4": n_diagnosed_past_year_stage4}) + out.update({"n_ever_diagnosed": n_ever_diagnosed}) + out.update({"n_screened_xpert_this_month": n_screened_xpert_this_month}) + out.update({"n_screened_via_this_month": n_screened_via_this_month}) + out.update({"n_women_alive": n_women_alive}) + out.update({"n_women_alive_1549": n_women_alive_1549}) + out.update({"n_ever_screened": n_ever_screened}) + out.update({"n_women_vaccinated": n_women_vaccinated}) + out.update({"n_vaginal_bleeding_stage1": n_vaginal_bleeding_stage1}) + out.update({"n_vaginal_bleeding_stage2a": n_vaginal_bleeding_stage2a}) + out.update({"n_vaginal_bleeding_stage2b": n_vaginal_bleeding_stage2b}) + out.update({"n_vaginal_bleeding_stage3": n_vaginal_bleeding_stage3}) + out.update({"n_vaginal_bleeding_stage4": n_vaginal_bleeding_stage4}) + out.update({"n_diagnosed_past_year": n_diagnosed_past_year}) + out.update({"n_women_alive": n_women_alive}) + out.update({"rate_diagnosed_cc": rate_diagnosed_cc}) + out.update({"cc": cc}) + out.update({"n_women_living_with_diagnosed_cc": n_women_living_with_diagnosed_cc }) + out.update({"n_women_living_with_diagnosed_cc_age_lt_30": n_women_living_with_diagnosed_cc_age_lt_30}) + out.update({"n_women_living_with_diagnosed_cc_age_3050": n_women_living_with_diagnosed_cc_age_3050}) + out.update({"n_women_living_with_diagnosed_cc_age_gt_50": n_women_living_with_diagnosed_cc_age_gt_50}) + out.update({"n_diagnosed_1_year_ago": n_diagnosed_1_year_ago}) + out.update({"n_diagnosed_1_year_ago_died": n_diagnosed_1_year_ago_died}) + out.update({"n_women_hiv_unsuppressed": n_women_hiv_unsuppressed}) + out.update({"n_women_hivneg": n_women_hivneg}) + out.update({"n_women_hivpos": n_women_hivpos}) + out.update({"n_thermoabl_past_year": n_thermoabl_past_year}) + out.update({"n_cryotherapy_past_year": n_cryotherapy_past_year}) + out.update({"n_via_past_year": n_via_past_year}) + out.update({"n_xpert_past_year": n_xpert_past_year}) + + + pop = len(df[df.is_alive]) + count_summary = { + "population": pop, + "n_deaths_past_year": n_deaths_past_year, + "n_women_alive": n_women_alive, + "n_women_living_with_diagnosed_cc": n_women_living_with_diagnosed_cc, + } + + logger.info(key="deaths", + data=count_summary, + description="summary of deaths") + + logger.info(key="all", + data=out, + description="all_data") + # todo: + # ? move to using the logger: + # i.e. logger.info(key='cervical_cancer_stats_every_month', description='XX', data=out) + + print(self.sim.date, 'total_none:', out['total_none'], 'total_hpv:', out['total_hpv'], 'total_cin1:',out['total_cin1'], + 'total_cin2:', out['total_cin2'], 'total_cin3:', out['total_cin3'], 'total_stage1:', out['total_stage1'], + 'total_stage2a:', out['total_stage2a'], 'total_stage2b:', out['total_stage2b'], + 'total_stage3:', out['total_stage3'],'total_stage4:', out['total_stage4'], + 'total_hivneg_none:', out['total_hivneg_none'], 'total_hivneg_hpv:', out['total_hivneg_hpv'], 'total_hivneg_cin1:', out['total_hivneg_cin1'], + 'total_hivneg_cin2:', out['total_hivneg_cin2'], 'total_hivneg_cin3:', out['total_hivneg_cin3'], 'total_hivneg_stage1:', out['total_hivneg_stage1'], + 'total_hivneg_stage2a:', out['total_hivneg_stage2a'], 'total_hivneg_stage2b:', out['total_hivneg_stage2b'], + 'total_hivneg_stage3:', out['total_hivneg_stage3'], 'total_hivneg_stage4:', out['total_hivneg_stage4'], + 'year:', out['rounded_decimal_year'], 'deaths_past_year:', out['n_deaths_past_year'],out['n_via_past_year'],out['n_xpert_past_year'], + 'n_deaths_cc_hivneg_past_year:', out['n_deaths_cc_hivneg_past_year'], + 'n_deaths_cc_hivpos_past_year:', out['n_deaths_cc_hivpos_past_year'], + 'n_deaths_cc_hiv_past_year:', out['n_deaths_cc_hiv_past_year'], + 'treated past year:', out['n_treated_past_year'], 'prop cc hiv:', out['prop_cc_hiv'], + 'n_vaginal_bleeding_stage1:', out['n_vaginal_bleeding_stage1'], + 'n_vaginal_bleeding_stage2a:', out['n_vaginal_bleeding_stage2a'], + 'n_vaginal_bleeding_stage2b:', out['n_vaginal_bleeding_stage2b'], + 'n_vaginal_bleeding_stage3:', out['n_vaginal_bleeding_stage3'], + 'n_vaginal_bleeding_stage4:', out['n_vaginal_bleeding_stage4'], + 'diagnosed_past_year_stage1:', out['n_diagnosed_past_year_stage1'], + 'diagnosed_past_year_stage2a:', out['n_diagnosed_past_year_stage2a'], + 'diagnosed_past_year_stage2b:', out['n_diagnosed_past_year_stage2b'], + 'diagnosed_past_year_stage3:', out['n_diagnosed_past_year_stage3'], + 'diagnosed_past_year_stage4:', out['n_diagnosed_past_year_stage4'], + 'n_ever_diagnosed', out['n_ever_diagnosed'], + 'n_screened_xpert_this_month:', out['n_screened_xpert_this_month'], + 'n_screened_via_this_month:', out['n_screened_via_this_month'], + 'n_women_alive', out['n_women_alive'], + 'n_women_alive_1549', out['n_women_alive_1549'], + 'n_women_vaccinated', out['n_women_vaccinated'], + 'n_ever_screened', out['n_ever_screened'], + 'n_diagnosed_past_year:', out['n_diagnosed_past_year'], + 'n_cured_past_year:', out['n_cured_past_year'], + 'n_thermoabl_past_year:', out['n_thermoabl_past_year'], + 'n_cryotherapy_past_year:', out['n_cryotherapy_past_year'], + 'n_women_alive:', out['n_women_alive'], + 'rate_diagnosed_cc:', out['rate_diagnosed_cc'], + 'n_women_with_cc:', out['cc'], + 'n_women_living_with_diagnosed_cc:', out['n_women_living_with_diagnosed_cc'], + 'n_women_living_with_diagnosed_cc_age_lt_30:', out['n_women_living_with_diagnosed_cc_age_lt_30'], + 'n_women_living_with_diagnosed_cc_age_3050:', out['n_women_living_with_diagnosed_cc_age_3050'], + 'n_women_living_with_diagnosed_cc_age_gt_50:', out['n_women_living_with_diagnosed_cc_age_gt_50'], + 'n_diagnosed_1_year_ago_died:', out['n_diagnosed_1_year_ago_died'], + 'n_diagnosed_1_year_ago:', out['n_diagnosed_1_year_ago'], + 'n_women_hiv_unsuppressed:', out['n_women_hiv_unsuppressed'], + 'n_women_hivneg', out['n_women_hivneg'], + 'n_women_hivpos', out['n_women_hivpos']) + + # comment out this below when running tests + + # Specify the file path for the CSV file + out_csv = Path("./outputs/output1_data.csv") + +# comment out this code below only when running tests + + with open(out_csv, "a", newline="") as csv_file: + # Create a CSV writer + csv_writer = csv.DictWriter(csv_file, fieldnames=out.keys()) + + # If the file is empty, write the header + if csv_file.tell() == 0: + csv_writer.writeheader() + + # Write the data to the CSV file + csv_writer.writerow(out) + +# print(out) + + # Disable column truncation + pd.set_option('display.max_columns', None) + + # Set the display width to a large value to fit all columns in one row + pd.set_option('display.width', 1000) + + selected_columns = ["ce_hpv_cc_status", + "ce_date_treatment", + "ce_stage_at_which_treatment_given", + "ce_date_diagnosis", + "ce_new_stage_this_month", + "ce_date_palliative_care", + "ce_date_death", + "ce_date_cin_removal", + "ce_date_treatment", + "ce_stage_at_diagnosis", + "ce_ever_treated", + "ce_cc_ever", + "ce_xpert_hpv_ever_pos", + "ce_via_cin_ever_detected", + "ce_date_thermoabl", + "ce_date_cryotherapy", + "ce_current_cc_diagnosed", + "ce_selected_for_via_this_month", + "ce_selected_for_xpert_this_month", + "ce_biopsy"] + + + selected_columns = ['ce_hpv_cc_status', 'sy_vaginal_bleeding', 'ce_biopsy','ce_current_cc_diagnosed', + 'ce_selected_for_xpert_this_month', 'sy_chosen_xpert_screening_for_hpv_cervical_cancer', + 'ce_xpert_hpv_ever_pos', 'ce_date_thermoabl','ce_date_cryotherapy', + 'ce_date_diagnosis', 'ce_date_treatment','ce_cured_date_cc', + 'ce_date_palliative_care', 'ce_selected_for_via_this_month', 'sy_chosen_via_screening_for_cin_cervical_cancer', + 'ce_via_cin_ever_detected'] + +# selected_columns = ["hv_inf", "ce_hiv_unsuppressed", "hv_art", "ce_hpv_cc_status",'ce_cured_date_cc'] + + selected_columns = ["ce_selected_for_via_this_month", "ce_selected_for_xpert_this_month", + "ce_ever_screened", "ce_date_last_screened", "ce_date_cin_removal", + "ce_xpert_hpv_ever_pos", "ce_via_cin_ever_detected", "ce_date_thermoabl","ce_date_cryotherapy", + "ce_biopsy"] + + selected_columns = ["ce_hpv_cc_status"] + + selected_rows = df[(df['sex'] == 'F') & (df['age_years'] > 15) & df['is_alive'] & (df['hv_inf'])] + +# pd.set_option('display.max_rows', None) + print(selected_rows[selected_columns]) + +# selected_columns = ['sex', 'age_years', 'is_alive'] +# pd.set_option('display.max_rows', None) +# print(df[selected_columns]) + + + + + + + + diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index 6fb43e4a6d..9a96ae93cd 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -54,7 +54,7 @@ def __init__(self, self._prob_item_codes_available = None # Data on the probability of each item_code being available self._is_available = None # Dict of sets giving the set of item_codes available, by facility_id self._is_unknown_item_available = None # Whether an unknown item is available, by facility_id - self._not_recognised_item_codes = defaultdict(set) # The item codes requested but which are not recognised. + self._not_recognised_item_codes = set() # The item codes requested but which are not recognised. # Save designations self._item_code_designations = item_code_designations @@ -214,9 +214,8 @@ def _request_consumables(self, """ # Issue warning if any item_code is not recognised. - not_recognised_item_codes = item_codes.keys() - self.item_codes - if len(not_recognised_item_codes) > 0: - self._not_recognised_item_codes[treatment_id] |= not_recognised_item_codes + if not self.item_codes.issuperset(item_codes.keys()): + self._not_recognised_item_codes.add((treatment_id, tuple(set(item_codes.keys()) - self.item_codes))) # Look-up whether each of these items is available in this facility currently: available = self._lookup_availability_of_consumables(item_codes=item_codes, facility_info=facility_info) @@ -266,24 +265,15 @@ def _lookup_availability_of_consumables(self, return avail def on_simulation_end(self): - """Do tasks at the end of the simulation. - - Raise warnings and enter to log about item_codes not recognised. - """ - if len(self._not_recognised_item_codes) > 0: - not_recognised_item_codes = { - treatment_id if treatment_id is not None else "": sorted(codes) - for treatment_id, codes in self._not_recognised_item_codes.items() - } - warnings.warn( - UserWarning( - f"Item_Codes were not recognised.\n{not_recognised_item_codes}" + """Do tasks at the end of the simulation: Raise warnings and enter to log about item_codes not recognised.""" + if self._not_recognised_item_codes: + warnings.warn(UserWarning(f"Item_Codes were not recognised./n" + f"{self._not_recognised_item_codes}")) + for _treatment_id, _item_codes in self._not_recognised_item_codes: + logger.info( + key="item_codes_not_recognised", + data={_treatment_id if _treatment_id is not None else "": list(_item_codes)} ) - ) - logger.info( - key="item_codes_not_recognised", - data=not_recognised_item_codes, - ) def on_end_of_year(self): self._summary_counter.write_to_log_and_reset_counters() diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..8d510f29ae 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -26,7 +26,6 @@ logging, ) from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent -from tlo.logging.helpers import get_dataframe_row_as_dict_for_logging from tlo.methods.causes import ( Cause, collect_causes_from_disease_modules, @@ -125,6 +124,7 @@ def __init__(self, name=None, resourcefilepath=None, equal_allocation_by_distric 'date_of_death': Property(Types.DATE, 'Date of death of this individual'), 'sex': Property(Types.CATEGORICAL, 'Male or female', categories=['M', 'F']), 'mother_id': Property(Types.INT, 'Unique identifier of mother of this individual'), + 'district_num_of_residence': Property(Types.INT, 'The district number in which the person is resident'), # the categories of these properties are set in `pre_initialise_population` 'cause_of_death': Property( @@ -133,12 +133,6 @@ def __init__(self, name=None, resourcefilepath=None, equal_allocation_by_distric categories=['SET_AT_RUNTIME'] ), - 'district_num_of_residence': Property( - Types.CATEGORICAL, - 'The district number in which the person is resident', - categories=['SET_AT_RUNTIME'] - ), - 'district_of_residence': Property( Types.CATEGORICAL, 'The district (name) of residence (mapped from district_num_of_residence).', @@ -226,11 +220,6 @@ def pre_initialise_population(self): 'The cause of death of this individual (the tlo_cause defined by the module)', categories=list(self.causes_of_death.keys()) ) - self.PROPERTIES['district_num_of_residence'] = Property( - Types.CATEGORICAL, - 'The district (name) of residence (mapped from district_num_of_residence).', - categories=sorted(self.parameters['district_num_to_region_name']), - ) self.PROPERTIES['district_of_residence'] = Property( Types.CATEGORICAL, 'The district (name) of residence (mapped from district_num_of_residence).', @@ -508,7 +497,7 @@ def do_death(self, individual_id: int, cause: str, originating_module: Module): data_to_log_for_each_death = { 'age': person['age_years'], 'sex': person['sex'], - 'cause': str(cause), + 'cause': cause, 'label': self.causes_of_death[cause].label, 'person_id': individual_id, 'li_wealth': person['li_wealth'] if 'li_wealth' in person else -99, @@ -524,7 +513,7 @@ def do_death(self, individual_id: int, cause: str, originating_module: Module): # - log all the properties for the deceased person logger_detail.info(key='properties_of_deceased_persons', - data=get_dataframe_row_as_dict_for_logging(df, individual_id), + data=person.to_dict(), description='values of all properties at the time of death for deceased persons') # - log the death in the Deviance module (if it is registered) @@ -810,7 +799,7 @@ def apply(self, population): num_children = pd.Series(index=range(5), data=0).add( df[df.is_alive & (df.age_years < 5)].groupby('age_years').size(), fill_value=0 - ).astype(int) + ) logger.info(key='num_children', data=num_children.to_dict()) diff --git a/src/tlo/methods/depression.py b/src/tlo/methods/depression.py index a0ffdd12b2..81ae29403e 100644 --- a/src/tlo/methods/depression.py +++ b/src/tlo/methods/depression.py @@ -593,7 +593,7 @@ def do_on_presentation_to_care(self, person_id: int, hsi_event: HSI_Event): and there may need to be screening for depression. """ if self._check_for_suspected_depression( - self.sim.modules["SymptomManager"].has_what(person_id=person_id), + self.sim.modules["SymptomManager"].has_what(person_id), hsi_event.TREATMENT_ID, self.sim.population.props.at[person_id, "de_ever_diagnosed_depression"], ): @@ -869,10 +869,10 @@ def apply(self, population): n_ever_talk_ther = (df.de_ever_talk_ther & df.is_alive & df.de_depr).sum() def zero_out_nan(x): - return x if not np.isnan(x) else 0.0 + return x if not np.isnan(x) else 0 def safe_divide(x, y): - return float(x / y) if y > 0.0 else 0.0 + return x / y if y > 0.0 else 0.0 dict_for_output = { 'prop_ge15_depr': zero_out_nan(safe_divide(n_ge15_depr, n_ge15)), diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 26c79d9587..008424ec2b 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -12,7 +12,6 @@ from tlo.analysis.utils import flatten_multi_index_series_into_dict_for_logging from tlo.events import PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor -from tlo.logging.helpers import grouped_counts_with_all_combinations from tlo.util import get_person_id_to_inherit_from logger = logging.getLogger(__name__) @@ -1940,42 +1939,33 @@ def apply(self, population): for _property in all_lm_keys: if _property in log_by_age_15up: if _property in cat_by_rural_urban_props: - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive & (df.age_years >= 15)], - ["li_urban", "sex", _property, "age_range"] - ) + data = df.loc[df.is_alive & (df.age_years >= 15)].groupby(by=[ + 'li_urban', 'sex', _property, 'age_range']).size() else: - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive & (df.age_years >= 15)], - ["sex", _property, "age_range"] - ) + data = df.loc[df.is_alive & (df.age_years >= 15)].groupby(by=[ + 'sex', _property, 'age_range']).size() + elif _property == 'li_in_ed': - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive & df.age_years.between(5, 19)], - ["sex", "li_wealth", "li_in_ed", "age_years"], - {"age_years": range(5, 20)} - ) + data = df.loc[df.is_alive & df.age_years.between(5, 19)].groupby(by=[ + 'sex', 'li_wealth', _property, 'age_years']).size() + elif _property == 'li_ed_lev': - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive & df.age_years.between(15, 49)], - ["sex", "li_wealth", "li_ed_lev", "age_years"], - {"age_years": range(15, 50)} - ) + data = df.loc[df.is_alive & df.age_years.between(15, 49)].groupby(by=[ + 'sex', 'li_wealth', _property, 'age_years']).size() + elif _property == 'li_is_sexworker': - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive & (df.age_years.between(15, 49))], - ["sex", "li_is_sexworker", "age_range"], - ) + data = df.loc[df.is_alive & (df.age_years.between(15, 49))].groupby(by=[ + 'sex', _property, 'age_range']).size() + elif _property in cat_by_rural_urban_props: # log all properties that are also categorised by rural or urban in addition to ex and age groups - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive], ["li_urban", "sex", _property, "age_range"] - ) + data = df.loc[df.is_alive].groupby(by=[ + 'li_urban', 'sex', _property, 'age_range']).size() + else: # log all other remaining properties - data = grouped_counts_with_all_combinations( - df.loc[df.is_alive], ["sex", _property, "age_range"] - ) + data = df.loc[df.is_alive].groupby(by=['sex', _property, 'age_range']).size() + # log data logger.info( key=_property, diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py index 5645d55e34..a1650a3889 100644 --- a/src/tlo/methods/epilepsy.py +++ b/src/tlo/methods/epilepsy.py @@ -563,16 +563,16 @@ def apply(self, population): n_seiz_stat_1_3 = sum(status_groups.iloc[1:].is_alive) n_seiz_stat_2_3 = sum(status_groups.iloc[2:].is_alive) - n_antiep = int((df.is_alive & df.ep_antiep).sum()) + n_antiep = (df.is_alive & df.ep_antiep).sum() - n_epi_death = int(df.ep_epi_death.sum()) + n_epi_death = df.ep_epi_death.sum() status_groups['prop_seiz_stats'] = status_groups.is_alive / sum(status_groups.is_alive) status_groups['prop_seiz_stat_on_anti_ep'] = status_groups['ep_antiep'] / status_groups.is_alive status_groups['prop_seiz_stat_on_anti_ep'] = status_groups['prop_seiz_stat_on_anti_ep'].fillna(0) epi_death_rate = \ - (n_epi_death * 4 * 1000) / n_seiz_stat_2_3 if n_seiz_stat_2_3 > 0 else 0.0 + (n_epi_death * 4 * 1000) / n_seiz_stat_2_3 if n_seiz_stat_2_3 > 0 else 0 cum_deaths = (~df.is_alive).sum() diff --git a/src/tlo/methods/equipment.py b/src/tlo/methods/equipment.py index 62776fb3ad..e00bf030fd 100644 --- a/src/tlo/methods/equipment.py +++ b/src/tlo/methods/equipment.py @@ -6,7 +6,6 @@ import pandas as pd from tlo import logging -from tlo.logging.helpers import get_dataframe_row_as_dict_for_logging logger_summary = logging.getLogger("tlo.methods.healthsystem.summary") @@ -221,16 +220,16 @@ def write_to_log(self) -> None: mfl = self.master_facilities_list - def sorted_keys_or_empty_list(x: Union[dict, None]) -> list: - if isinstance(x, dict): - return sorted(x.keys()) + def set_of_keys_or_empty_set(x: Union[set, dict]): + if isinstance(x, set): + return x + elif isinstance(x, dict): + return set(x.keys()) else: - return [] + return set() set_of_equipment_ever_used_at_each_facility_id = pd.Series({ - fac_id: sorted_keys_or_empty_list( - self._record_of_equipment_used_by_facility_id.get(fac_id) - ) + fac_id: set_of_keys_or_empty_set(self._record_of_equipment_used_by_facility_id.get(fac_id, set())) for fac_id in mfl['Facility_ID'] }, name='EquipmentEverUsed').astype(str) @@ -240,13 +239,14 @@ def sorted_keys_or_empty_list(x: Union[dict, None]) -> list: right_index=True, how='left', ).drop(columns=['Facility_ID', 'Facility_Name']) + # Log multi-row data-frame - for row_index in output.index: + for _, row in output.iterrows(): logger_summary.info( key='EquipmentEverUsed_ByFacilityID', description='For each facility_id (the set of facilities of the same level in a district), the set of' 'equipment items that are ever used.', - data=get_dataframe_row_as_dict_for_logging(output, row_index) + data=row.to_dict(), ) def from_pkg_names(self, pkg_names: Union[str, Iterable[str]]) -> Set[int]: diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 5c6b2022e1..d71435e7aa 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -165,7 +165,7 @@ class HealthSystem(Module): 'use_funded_or_actual_staffing': Parameter( Types.STRING, "If `actual`, then use the numbers and distribution of staff estimated to be available" " currently; If `funded`, then use the numbers and distribution of staff that are " - "potentially available. If `funded_plus`, then use a dataset in which the allocation of " + "potentially available. If 'funded_plus`, then use a dataset in which the allocation of " "staff to facilities is tweaked so as to allow each appointment type to run at each " "facility_level in each district for which it is defined. N.B. This parameter is " "over-ridden if an argument is provided to the module initialiser.", @@ -775,9 +775,6 @@ def initialise_simulation(self, sim): # whilst the actual scaling will only take effect from 2011 onwards. sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date)) - # Schedule the logger to occur at the start of every year - sim.schedule_event(HealthSystemLogger(self), Date(sim.date.year, 1, 1)) - def on_birth(self, mother_id, child_id): self.bed_days.on_birth(self.sim.population.props, mother_id, child_id) @@ -939,21 +936,22 @@ def setup_daily_capabilities(self, use_funded_or_actual_staffing): This is called when the value for `use_funded_or_actual_staffing` is set - at the beginning of the simulation and when the assumption when the underlying assumption for `use_funded_or_actual_staffing` is updated""" # * Store 'DailyCapabilities' in correct format and using the specified underlying assumptions - self._daily_capabilities, self._daily_capabilities_per_staff = self.format_daily_capabilities(use_funded_or_actual_staffing) + self._daily_capabilities = self.format_daily_capabilities(use_funded_or_actual_staffing) # Also, store the set of officers with non-zero daily availability # (This is used for checking that scheduled HSI events do not make appointment requiring officers that are # never available.) self._officers_with_availability = set(self._daily_capabilities.index[self._daily_capabilities > 0]) - def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple[pd.Series,pd.Series]: + def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Series: """ - This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to: - 1. include every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity + This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to include + every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity is available. - 2. Give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type) + + It also give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type) + (This is so that its easier to track where demands are being placed where there is no capacity) - 3. Compute daily capabilities per staff. This will be used to compute staff count in a way that is independent of assumed efficiency. """ # Get the capabilities data imported (according to the specified underlying assumptions). @@ -961,10 +959,6 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}'] ) capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'}) # neaten - - # Create new column where capabilities per staff are computed - capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count'] - # Create dataframe containing background information about facility and officer types facility_ids = self.parameters['Master_Facilities_List']['Facility_ID'].values @@ -984,10 +978,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple # Merge in information about facility from Master Facilities List mfl = self.parameters['Master_Facilities_List'] capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left') - - # Create a copy of this to store staff counts - capabilities_per_staff_ex = capabilities_ex.copy() - + # Merge in information about officers # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']] # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left') @@ -1000,13 +991,6 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple how='left', ) capabilities_ex = capabilities_ex.fillna(0) - - capabilities_per_staff_ex = capabilities_per_staff_ex.merge( - capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']], - on=['Facility_ID', 'Officer_Type_Code'], - how='left', - ) - capabilities_per_staff_ex = capabilities_per_staff_ex.fillna(0) # Give the standard index: capabilities_ex = capabilities_ex.set_index( @@ -1015,14 +999,6 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple + '_Officer_' + capabilities_ex['Officer_Type_Code'] ) - - # Give the standard index: - capabilities_per_staff_ex = capabilities_per_staff_ex.set_index( - 'FacilityID_' - + capabilities_ex['Facility_ID'].astype(str) - + '_Officer_' - + capabilities_ex['Officer_Type_Code'] - ) # Rename 'Total_Minutes_Per_Day' capabilities_ex = capabilities_ex.rename(columns={'Total_Mins_Per_Day': 'Total_Minutes_Per_Day'}) @@ -1030,10 +1006,9 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple # Checks assert abs(capabilities_ex['Total_Minutes_Per_Day'].sum() - capabilities['Total_Mins_Per_Day'].sum()) < 1e-7 assert len(capabilities_ex) == len(facility_ids) * len(officer_type_codes) - assert len(capabilities_per_staff_ex) == len(facility_ids) * len(officer_type_codes) # return the pd.Series of `Total_Minutes_Per_Day' indexed for each type of officer at each facility - return capabilities_ex['Total_Minutes_Per_Day'], capabilities_per_staff_ex['Mins_Per_Day_Per_Staff'] + return capabilities_ex['Total_Minutes_Per_Day'] def _rescale_capabilities_to_capture_effective_capability(self): # Notice that capabilities will only be expanded through this process @@ -1055,11 +1030,6 @@ def _rescale_capabilities_to_capture_effective_capability(self): ) if rescaling_factor > 1 and rescaling_factor != float("inf"): self._daily_capabilities[officer] *= rescaling_factor - - # We assume that increased daily capabilities is a result of each staff performing more - # daily patient facing time per day than contracted (or equivalently performing appts more - # efficiently). - self._daily_capabilities_per_staff[officer] *= rescaling_factor def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original): """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the @@ -1238,13 +1208,8 @@ def load_priority_policy(self, policy): ].iloc[0] # Convert policy dataframe into dictionary to speed-up look-up process. - self.priority_rank_dict = ( - Policy_df.set_index("Treatment", drop=True) - # Standardize dtypes to ensure any integers represented as floats are - # converted to integer dtypes - .convert_dtypes() - .to_dict(orient="index") - ) + self.priority_rank_dict = \ + Policy_df.set_index("Treatment", drop=True).to_dict(orient="index") del self.priority_rank_dict["lowest_priority_considered"] def schedule_hsi_event( @@ -1393,8 +1358,8 @@ def enforce_priority_policy(self, hsi_event) -> int: return _priority_ranking else: # If treatment is not ranked in the policy, issue a warning and assign priority=3 by default - warnings.warn(UserWarning(f"Couldn't find priority ranking for TREATMENT_ID \n" - f"{hsi_event.TREATMENT_ID}")) +# warnings.warn(UserWarning(f"Couldn't find priority ranking for TREATMENT_ID \n" +# f"{hsi_event.TREATMENT_ID}")) return self.lowest_priority_considered def check_hsi_event_is_valid(self, hsi_event): @@ -1818,7 +1783,7 @@ def write_to_never_ran_hsi_log( 'Number_By_Appt_Type_Code': dict(event_details.appt_footprint), 'Person_ID': person_id, 'priority': priority, - 'Facility_Level': event_details.facility_level if event_details.facility_level is not None else "-99", + 'Facility_Level': event_details.facility_level if event_details.facility_level is not None else -99, 'Facility_ID': facility_id if facility_id is not None else -99, }, description="record of each HSI event that never ran" @@ -2683,11 +2648,6 @@ def _reset_internal_stores(self) -> None: self._appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} # <--Same as `self._appts` but also split by facility_level - # Log HSI_Events that have a non-blank appointment footprint - self._no_blank_appt_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s with non-blank footprint - self._no_blank_appt_appts = defaultdict(int) # As above, but for `HSI_Event`s that with non-blank footprint - self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} - # Log HSI_Events that never ran to monitor shortcoming of Health System self._never_ran_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s that never ran self._never_ran_appts = defaultdict(int) # As above, but for `HSI_Event`s that have never ran @@ -2721,13 +2681,6 @@ def record_hsi_event(self, self._appts[appt_type] += number self._appts_by_level[level][appt_type] += number - # Count the non-blank appointment footprints - if len(appt_footprint): - self._no_blank_appt_treatment_ids[treatment_id] += 1 - for appt_type, number in appt_footprint: - self._no_blank_appt_appts[appt_type] += number - self._no_blank_appt_by_level[level][appt_type] += number - def record_never_ran_hsi_event(self, treatment_id: str, hsi_event_name: str, @@ -2772,15 +2725,6 @@ def write_to_log_and_reset_counters(self): } }, ) - logger_summary.info( - key="HSI_Event_non_blank_appt_footprint", - description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints", - data={ - "TREATMENT_ID": self._no_blank_appt_treatment_ids, - "Number_By_Appt_Type_Code": self._no_blank_appt_appts, - "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level, - }, - ) # Log summary of HSI_Events that never ran logger_summary.info( @@ -2876,11 +2820,7 @@ def apply(self, population): self.module.consumables.availability = self._parameters['cons_availability'] if 'beds_availability' in self._parameters: - self.module.bed_days.switch_beddays_availability( - new_availability=self._parameters["beds_availability"], - effective_on_and_from=self.sim.date, - model_to_data_popsize_ratio=self.sim.modules["Demography"].initial_model_to_data_popsize_ratio - ) + self.module.bed_days.availability = self._parameters['beds_availability'] if 'equip_availability' in self._parameters: self.module.equipment.availability = self._parameters['equip_availability'] @@ -2999,34 +2939,3 @@ def apply(self, population): f"Now using mode: " f"{self.module.mode_appt_constraints}" ) - - -class HealthSystemLogger(RegularEvent, PopulationScopeEventMixin): - """ This event runs at the start of each year and does any logging jobs for the HealthSystem module.""" - - def __init__(self, module): - super().__init__(module, frequency=DateOffset(years=1)) - - def apply(self, population): - """Things to do at the start of the year""" - self.log_number_of_staff() - - def log_number_of_staff(self): - """Write to the summary log with the counts of staff (by cadre/facility/level) taking into account: - * Any scaling of capabilities that has taken place, year-by-year, or cadre-by-cadre - * Any re-scaling that has taken place at the transition into Mode 2. - """ - - hs = self.module # HealthSystem module - - # Compute staff counts from available capabilities (hs.capabilities_today) and daily capabilities per staff, - # both of which would have been rescaled to current efficiency levels if scale_to_effective_capabilities=True - # This returns the number of staff counts normalised by the self.capabilities_coefficient parameter - current_staff_count = dict((hs.capabilities_today/hs._daily_capabilities_per_staff).sort_index()) - - logger_summary.info( - key="number_of_hcw_staff", - description="The number of hcw_staff this year", - data=current_staff_count, - ) - diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..d86c706217 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -31,7 +31,7 @@ import numpy as np import pandas as pd -from tlo import DAYS_IN_YEAR, Date, DateOffset, Module, Parameter, Property, Types, logging +from tlo import DAYS_IN_YEAR, DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor from tlo.methods import Metadata, demography, tb @@ -397,19 +397,6 @@ def __init__(self, name=None, resourcefilepath=None, run_with_checks=False): "length in days of inpatient stay for end-of-life HIV patients: list has two elements [low-bound-inclusive," " high-bound-exclusive]", ), - # ------------------ scale-up parameters for scenario analysis ------------------ # - "type_of_scaleup": Parameter( - Types.STRING, "argument to determine type scale-up of program which will be implemented, " - "can be 'none', 'target' or 'max'", - ), - "scaleup_start_year": Parameter( - Types.INT, - "the year when the scale-up starts (it will occur on 1st January of that year)" - ), - "scaleup_parameters": Parameter( - Types.DATA_FRAME, - "the parameters and values changed in scenario analysis" - ), } def read_parameters(self, data_folder): @@ -447,9 +434,6 @@ def read_parameters(self, data_folder): # Load spectrum estimates of treatment cascade p["treatment_cascade"] = workbook["spectrum_treatment_cascade"] - # load parameters for scale-up projections - p['scaleup_parameters'] = workbook["scaleup_parameters"] - # DALY weights # get the DALY weight that this module will use from the weight database (these codes are just random!) if "HealthBurden" in self.sim.modules.keys(): @@ -472,13 +456,10 @@ def read_parameters(self, data_folder): ) def pre_initialise_population(self): - """Do things required before the population is created - * Build the LinearModels""" - self._build_linear_models() - - def _build_linear_models(self): - """Establish the Linear Models""" - + """ + * Establish the Linear Models + * + """ p = self.parameters # ---- LINEAR MODELS ----- @@ -913,12 +894,6 @@ def initialise_simulation(self, sim): # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) - # Optional: Schedule the scale-up of programs - if self.parameters["type_of_scaleup"] != 'none': - scaleup_start_date = Date(self.parameters["scaleup_start_year"], 1, 1) - assert scaleup_start_date >= self.sim.start_date, f"Date {scaleup_start_date} is before simulation starts." - sim.schedule_event(HivScaleUpEvent(self), scaleup_start_date) - # 3) Determine who has AIDS and impose the Symptoms 'aids_symptoms' # Those on ART currently (will not get any further events scheduled): @@ -1101,49 +1076,6 @@ def initialise_simulation(self, sim): ) ) - def update_parameters_for_program_scaleup(self): - """ options for program scale-up are 'target' or 'max' """ - p = self.parameters - scaled_params_workbook = p["scaleup_parameters"] - - if p['type_of_scaleup'] == 'target': - scaled_params = scaled_params_workbook.set_index('parameter')['target_value'].to_dict() - else: - scaled_params = scaled_params_workbook.set_index('parameter')['max_value'].to_dict() - - # scale-up HIV program - # reduce risk of HIV - applies to whole adult population - p["beta"] = p["beta"] * scaled_params["reduction_in_hiv_beta"] - - # increase PrEP coverage for FSW after HIV test - p["prob_prep_for_fsw_after_hiv_test"] = scaled_params["prob_prep_for_fsw_after_hiv_test"] - - # prep poll for AGYW - target to the highest risk - # increase retention to 75% for FSW and AGYW - p["prob_prep_for_agyw"] = scaled_params["prob_prep_for_agyw"] - p["probability_of_being_retained_on_prep_every_3_months"] = scaled_params["probability_of_being_retained_on_prep_every_3_months"] - - # perfect retention on ART - p["probability_of_being_retained_on_art_every_3_months"] = scaled_params["probability_of_being_retained_on_art_every_3_months"] - - # increase probability of VMMC after hiv test - p["prob_circ_after_hiv_test"] = scaled_params["prob_circ_after_hiv_test"] - - # increase testing/diagnosis rates, default 2020 0.03/0.25 -> 93% dx - p["hiv_testing_rates"]["annual_testing_rate_adults"] = scaled_params["annual_testing_rate_adults"] - - # ANC testing - value for mothers and infants testing - p["prob_hiv_test_at_anc_or_delivery"] = scaled_params["prob_hiv_test_at_anc_or_delivery"] - p["prob_hiv_test_for_newborn_infant"] = scaled_params["prob_hiv_test_for_newborn_infant"] - - # viral suppression rates - # adults already at 95% by 2020 - # change all column values - p["prob_start_art_or_vs"]["virally_suppressed_on_art"] = scaled_params["virally_suppressed_on_art"] - - # update exising linear models to use new scaled-up paramters - self._build_linear_models() - def on_birth(self, mother_id, child_id): """ * Initialise our properties for a newborn individual; @@ -2282,20 +2214,6 @@ def apply(self, person_id): ) -class HivScaleUpEvent(Event, PopulationScopeEventMixin): - """ This event exists to change parameters or functions - depending on the scenario for projections which has been set - It only occurs once on date: scaleup_start_date, - called by initialise_simulation - """ - - def __init__(self, module): - super().__init__(module) - - def apply(self, population): - self.module.update_parameters_for_program_scaleup() - - # --------------------------------------------------------------------------- # Health System Interactions (HSI) # --------------------------------------------------------------------------- @@ -3347,15 +3265,15 @@ def treatment_counts(subset): count = sum(subset) # proportion of subset living with HIV that are diagnosed: proportion_diagnosed = ( - sum(subset & df.hv_diagnosed) / count if count > 0 else 0.0 + sum(subset & df.hv_diagnosed) / count if count > 0 else 0 ) # proportions of subset living with HIV on treatment: art = sum(subset & (df.hv_art != "not")) - art_cov = art / count if count > 0 else 0.0 + art_cov = art / count if count > 0 else 0 # proportion of subset on treatment that have good VL suppression art_vs = sum(subset & (df.hv_art == "on_VL_suppressed")) - art_cov_vs = art_vs / art if art > 0 else 0.0 + art_cov_vs = art_vs / art if art > 0 else 0 return proportion_diagnosed, art_cov, art_cov_vs alive_infected = df.is_alive & df.hv_inf diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index b76a865d2d..85feb2b1b5 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -358,7 +358,7 @@ def _check_if_appt_footprint_can_run(self) -> bool: ): return True else: - logger.debug( + logger.warning( key="message", data=( f"The expected footprint of {self.TREATMENT_ID} is not possible with the configuration of " diff --git a/src/tlo/methods/hsi_generic_first_appts.py b/src/tlo/methods/hsi_generic_first_appts.py index 37f6c5e261..30f4d40ac7 100644 --- a/src/tlo/methods/hsi_generic_first_appts.py +++ b/src/tlo/methods/hsi_generic_first_appts.py @@ -184,10 +184,8 @@ def apply(self, person_id: int, squeeze_factor: float = 0.0) -> None: if not individual_properties["is_alive"]: return # Pre-evaluate symptoms for individual to avoid repeat accesses - # Use the individual_properties context here to save independent DF lookups - symptoms = self.sim.modules["SymptomManager"].has_what( - individual_details=individual_properties - ) + # TODO: Use individual_properties to populate symptoms + symptoms = self.sim.modules["SymptomManager"].has_what(self.target) schedule_hsi_event = self.sim.modules["HealthSystem"].schedule_hsi_event for module in self.sim.modules.values(): if isinstance(module, GenericFirstAppointmentsMixin): diff --git a/src/tlo/methods/labour.py b/src/tlo/methods/labour.py index 35081b7d27..695dbeb501 100644 --- a/src/tlo/methods/labour.py +++ b/src/tlo/methods/labour.py @@ -10,7 +10,6 @@ from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType -from tlo.logging.helpers import get_dataframe_row_as_dict_for_logging from tlo.methods import Metadata, labour_lm, pregnancy_helper_functions from tlo.methods.causes import Cause from tlo.methods.dxmanager import DxTest @@ -1057,7 +1056,7 @@ def further_on_birth_labour(self, mother_id): # log delivery setting logger.info(key='delivery_setting_and_mode', data={'mother': mother_id, - 'facility_type': str(mni[mother_id]['delivery_setting']), + 'facility_type': mni[mother_id]['delivery_setting'], 'mode': mni[mother_id]['mode_of_delivery']}) # Store only live births to a mother parity @@ -2612,7 +2611,7 @@ def apply(self, individual_id): self.module.set_intrapartum_complications(individual_id, complication=complication) if df.at[individual_id, 'la_obstructed_labour']: - logger.info(key='maternal_complication', data={'person': individual_id, + logger.info(key='maternal_complication', data={'mother': individual_id, 'type': 'obstructed_labour', 'timing': 'intrapartum'}) @@ -2977,7 +2976,7 @@ def apply(self, person_id, squeeze_factor): self.module.progression_of_hypertensive_disorders(person_id, property_prefix='ps') if df.at[person_id, 'la_obstructed_labour']: - logger.info(key='maternal_complication', data={'person': person_id, + logger.info(key='maternal_complication', data={'mother': person_id, 'type': 'obstructed_labour', 'timing': 'intrapartum'}) @@ -3118,7 +3117,7 @@ def apply(self, person_id, squeeze_factor): # log the PNC visit logger.info(key='postnatal_check', data={'person_id': person_id, - 'delivery_setting': str(mni[person_id]['delivery_setting']), + 'delivery_setting': mni[person_id]['delivery_setting'], 'visit_number': df.at[person_id, 'la_pn_checks_maternal'], 'timing': mni[person_id]['will_receive_pnc']}) @@ -3254,10 +3253,8 @@ def apply(self, person_id, squeeze_factor): # If intervention is delivered - add used equipment self.add_equipment(self.healthcare_system.equipment.from_pkg_names('Major Surgery')) - logger.info( - key='caesarean_delivery', - data=get_dataframe_row_as_dict_for_logging(df, person_id), - ) + person = df.loc[person_id] + logger.info(key='caesarean_delivery', data=person.to_dict()) logger.info(key='cs_indications', data={'id': person_id, 'indication': mni[person_id]['cs_indication']}) diff --git a/src/tlo/methods/malaria.py b/src/tlo/methods/malaria.py index b1fdfb09dd..bf7b5a11be 100644 --- a/src/tlo/methods/malaria.py +++ b/src/tlo/methods/malaria.py @@ -11,7 +11,7 @@ import pandas as pd -from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging +from tlo import DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, Predictor from tlo.methods import Metadata @@ -188,19 +188,8 @@ def __init__(self, name=None, resourcefilepath=None): 'prob_of_treatment_success': Parameter( Types.REAL, 'probability that treatment will clear malaria symptoms' - ), - "type_of_scaleup": Parameter( - Types.STRING, "argument to determine type scale-up of program which will be implemented, " - "can be 'none', 'target' or 'max'", - ), - "scaleup_start_year": Parameter( - Types.INT, - "the year when the scale-up starts (it will occur on 1st January of that year)" - ), - "scaleup_parameters": Parameter( - Types.DATA_FRAME, - "the parameters and values changed in scenario analysis" ) + } PROPERTIES = { @@ -253,15 +242,11 @@ def read_parameters(self, data_folder): p['sev_symp_prob'] = workbook['severe_symptoms'] p['rdt_testing_rates'] = workbook['WHO_TestData2023'] - p['highrisk_districts'] = workbook['highrisk_districts'] p['inf_inc'] = pd.read_csv(self.resourcefilepath / 'malaria' / 'ResourceFile_malaria_InfInc_expanded.csv') p['clin_inc'] = pd.read_csv(self.resourcefilepath / 'malaria' / 'ResourceFile_malaria_ClinInc_expanded.csv') p['sev_inc'] = pd.read_csv(self.resourcefilepath / 'malaria' / 'ResourceFile_malaria_SevInc_expanded.csv') - # load parameters for scale-up projections - p['scaleup_parameters'] = workbook["scaleup_parameters"] - # check itn projected values are <=0.7 and rounded to 1dp for matching to incidence tables p['itn'] = round(p['itn'], 1) assert (p['itn'] <= 0.7) @@ -326,16 +311,13 @@ def read_parameters(self, data_folder): ) def pre_initialise_population(self): - """Do things required before the population is created - * Build the LinearModels""" - self._build_linear_models() - - def _build_linear_models(self): - """Establish the Linear Models + """ + * Establish the Linear Models if HIV is registered, the conditional predictors will apply otherwise only IPTp will affect risk of clinical/severe malaria """ + p = self.parameters # ---- LINEAR MODELS ----- @@ -374,7 +356,7 @@ def _build_linear_models(self): p['rr_severe_malaria_hiv_over5']), Predictor().when('(hv_inf == True) & (is_pregnant == True)', p['rr_severe_malaria_hiv_pregnant']), - ] if "Hiv" in self.sim.modules else [] + ] if "hiv" in self.sim.modules else [] self.lm["rr_of_severe_malaria"] = LinearModel.multiplicative( *(predictors + conditional_predictors)) @@ -552,12 +534,8 @@ def general_population_rdt_scheduler(self, population): # extract annual testing rates from NMCP reports # this is the # rdts issued divided by population size - year = self.sim.date.year if self.sim.date.year <= 2024 else 2024 - - test_rates = ( - p['rdt_testing_rates'].set_index('Year')['Rate_rdt_testing'].dropna() - ) - rdt_rate = test_rates.loc[min(test_rates.index.max(), year)] / 12 + test_rates = p['rdt_testing_rates'].set_index('Year')['Rate_rdt_testing'].dropna() + rdt_rate = test_rates.loc[min(test_rates.index.max(), self.sim.date.year)] / 12 # adjust rdt usage reported rate to reflect consumables availability rdt_rate = rdt_rate * p['scaling_factor_for_rdt_availability'] @@ -600,12 +578,6 @@ def initialise_simulation(self, sim): sim.schedule_event(MalariaTxLoggingEvent(self), sim.date + DateOffset(years=1)) sim.schedule_event(MalariaPrevDistrictLoggingEvent(self), sim.date + DateOffset(months=1)) - # Optional: Schedule the scale-up of programs - if self.parameters["type_of_scaleup"] != 'none': - scaleup_start_date = Date(self.parameters["scaleup_start_year"], 1, 1) - assert scaleup_start_date >= self.sim.start_date, f"Date {scaleup_start_date} is before simulation starts." - sim.schedule_event(MalariaScaleUpEvent(self), scaleup_start_date) - # 2) ----------------------------------- DIAGNOSTIC TESTS ----------------------------------- # Create the diagnostic test representing the use of RDT for malaria diagnosis # and registers it with the Diagnostic Test Manager @@ -654,62 +626,7 @@ def initialise_simulation(self, sim): # malaria IPTp for pregnant women self.item_codes_for_consumables_required['malaria_iptp'] = get_item_code( - 'Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg' - ) - - def update_parameters_for_program_scaleup(self): - """ options for program scale-up are 'target' or 'max' """ - p = self.parameters - scaled_params_workbook = p["scaleup_parameters"] - - if p['type_of_scaleup'] == 'target': - scaled_params = scaled_params_workbook.set_index('parameter')['target_value'].to_dict() - else: - scaled_params = scaled_params_workbook.set_index('parameter')['max_value'].to_dict() - - # scale-up malaria program - # increase testing - # prob_malaria_case_tests=0.4 default - p["prob_malaria_case_tests"] = scaled_params["prob_malaria_case_tests"] - - # gen pop testing rates - # annual Rate_rdt_testing=0.64 at 2023 - p["rdt_testing_rates"]["Rate_rdt_testing"] = scaled_params["rdt_testing_rates"] - - # treatment reaches XX - # no default between testing and treatment, governed by tx availability - - # coverage IPTp reaches XX - # given during ANC visits and MalariaIPTp Event which selects ALL eligible women - - # treatment success reaches 1 - default is currently 1 also - p["prob_of_treatment_success"] = scaled_params["prob_of_treatment_success"] - - # bednet and ITN coverage - # set IRS for 4 high-risk districts - # lookup table created in malaria read_parameters - # produces self.itn_irs called by malaria poll to draw incidence - # need to overwrite this - highrisk_distr_num = p["highrisk_districts"]["district_num"] - - # Find indices where District_Num is in highrisk_distr_num - mask = self.itn_irs['irs_rate'].index.get_level_values('District_Num').isin( - highrisk_distr_num) - - # IRS values can be 0 or 0.8 - no other value in lookup table - self.itn_irs['irs_rate'].loc[mask] = scaled_params["irs_district"] - - # set ITN for all districts - # Set these values to 0.7 - this is the max value possible in lookup table - # equivalent to 0.7 of all pop sleeping under bednet - # household coverage could be 100%, but not everyone in household sleeping under bednet - self.itn_irs['itn_rate'] = scaled_params["itn_district"] - - # itn rates for 2019 onwards - p["itn"] = scaled_params["itn"] - - # update exising linear models to use new scaled-up parameters - self._build_linear_models() + 'Sulfamethoxazole + trimethropin, tablet 400 mg + 80 mg') def on_birth(self, mother_id, child_id): df = self.sim.population.props @@ -778,14 +695,14 @@ def check_if_fever_is_caused_by_malaria( # Log the test: line-list of summary information about each test logger.info( key="rdt_log", - data=_data_for_rdt_log( - person_id=person_id, - age=patient_age, - fever_is_a_symptom=fever_is_a_symptom, - dx_result=dx_result, - facility_level=facility_level, - treatment_id=treatment_id - ) + data={ + "person_id": person_id, + "age": patient_age, + "fever_present": fever_is_a_symptom, + "rdt_result": dx_result, + "facility_level": facility_level, + "called_by": treatment_id, + }, ) # Severe malaria infection always returns positive RDT @@ -902,21 +819,6 @@ def apply(self, population): self.module.general_population_rdt_scheduler(population) -class MalariaScaleUpEvent(Event, PopulationScopeEventMixin): - """ This event exists to change parameters or functions - depending on the scenario for projections which has been set - It only occurs once on date: scaleup_start_date, - called by initialise_simulation - """ - - def __init__(self, module): - super().__init__(module) - - def apply(self, population): - - self.module.update_parameters_for_program_scaleup() - - class MalariaIPTp(RegularEvent, PopulationScopeEventMixin): """ malaria prophylaxis for pregnant women @@ -1068,15 +970,15 @@ def apply(self, person_id, squeeze_factor): ) # Log the test: line-list of summary information about each test - fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id=person_id) - person_details_for_test = _data_for_rdt_log( - person_id=person_id, - age=df.at[person_id, 'age_years'], - fever_is_a_symptom=fever_present, - dx_result=dx_result, - facility_level=self.ACCEPTED_FACILITY_LEVEL, - treatment_id=self.TREATMENT_ID, - ) + fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id) + person_details_for_test = { + 'person_id': person_id, + 'age': df.at[person_id, 'age_years'], + 'fever_present': fever_present, + 'rdt_result': dx_result, + 'facility_level': self.ACCEPTED_FACILITY_LEVEL, + 'called_by': self.TREATMENT_ID + } logger.info(key='rdt_log', data=person_details_for_test) if dx_result: @@ -1160,16 +1062,15 @@ def apply(self, person_id, squeeze_factor): ) # Log the test: line-list of summary information about each test - fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id=person_id) - person_details_for_test = _data_for_rdt_log( - person_id=person_id, - age=df.at[person_id, 'age_years'], - fever_is_a_symptom=fever_present, - dx_result=dx_result, - facility_level=self.ACCEPTED_FACILITY_LEVEL, - treatment_id=self.TREATMENT_ID, - ) - + fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id) + person_details_for_test = { + 'person_id': person_id, + 'age': df.at[person_id, 'age_years'], + 'fever_present': fever_present, + 'rdt_result': dx_result, + 'facility_level': self.ACCEPTED_FACILITY_LEVEL, + 'called_by': self.TREATMENT_ID + } logger.info(key='rdt_log', data=person_details_for_test) # if positive, refer for a confirmatory test at level 1a @@ -1223,15 +1124,15 @@ def apply(self, person_id, squeeze_factor): # rdt is offered as part of the treatment package # Log the test: line-list of summary information about each test - fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id=person_id) - person_details_for_test = _data_for_rdt_log( - person_id=person_id, - age=df.at[person_id, 'age_years'], - fever_is_a_symptom=fever_present, - dx_result=True, - facility_level=self.ACCEPTED_FACILITY_LEVEL, - treatment_id=self.TREATMENT_ID, - ) + fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id) + person_details_for_test = { + 'person_id': person_id, + 'age': df.at[person_id, 'age_years'], + 'fever_present': fever_present, + 'rdt_result': True, + 'facility_level': self.ACCEPTED_FACILITY_LEVEL, + 'called_by': self.TREATMENT_ID + } logger.info(key='rdt_log', data=person_details_for_test) def get_drugs(self, age_of_person): @@ -1320,15 +1221,15 @@ def apply(self, person_id, squeeze_factor): # rdt is offered as part of the treatment package # Log the test: line-list of summary information about each test - fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id=person_id) - person_details_for_test = _data_for_rdt_log( - person_id=person_id, - age=df.at[person_id, 'age_years'], - fever_is_a_symptom=fever_present, - dx_result=True, - facility_level=self.ACCEPTED_FACILITY_LEVEL, - treatment_id=self.TREATMENT_ID, - ) + fever_present = 'fever' in self.sim.modules["SymptomManager"].has_what(person_id) + person_details_for_test = { + 'person_id': person_id, + 'age': df.at[person_id, 'age_years'], + 'fever_present': fever_present, + 'rdt_result': True, + 'facility_level': self.ACCEPTED_FACILITY_LEVEL, + 'called_by': self.TREATMENT_ID + } logger.info(key='rdt_log', data=person_details_for_test) def did_not_run(self): @@ -1765,21 +1666,3 @@ def apply(self, population): logger.info(key='pop_district', data=pop.to_dict(), description='District population sizes') - - -def _data_for_rdt_log( - person_id: int, - age: int, - fever_is_a_symptom: bool, - dx_result: Union[bool, None], - facility_level: str, - treatment_id: str, -): - return { - "person_id": person_id, - "age": age, - "fever_present": fever_is_a_symptom, - "rdt_result": pd.array([dx_result], dtype="boolean"), - "facility_level": facility_level, - "called_by": treatment_id, - } diff --git a/src/tlo/methods/measles.py b/src/tlo/methods/measles.py index 39f9828860..b6955ff9d7 100644 --- a/src/tlo/methods/measles.py +++ b/src/tlo/methods/measles.py @@ -442,7 +442,7 @@ def apply(self, person_id, squeeze_factor): data=f"HSI_Measles_Treatment: treat person {person_id} for measles") df = self.sim.population.props - symptoms = self.sim.modules["SymptomManager"].has_what(person_id=person_id) + symptoms = self.sim.modules["SymptomManager"].has_what(person_id) # for non-complicated measles item_codes = [self.module.consumables['vit_A']] @@ -548,7 +548,7 @@ def apply(self, population): if tmp: proportion_with_symptom = number_with_symptom / tmp else: - proportion_with_symptom = 0.0 + proportion_with_symptom = 0 symptom_output[symptom] = proportion_with_symptom logger.info(key="measles_symptoms", @@ -586,7 +586,7 @@ def apply(self, population): if total_infected: prop_infected_by_age = infected_age_counts / total_infected else: - prop_infected_by_age = infected_age_counts.astype("float") # just output the series of zeros by age group + prop_infected_by_age = infected_age_counts # just output the series of zeros by age group logger.info(key='measles_incidence_age_range', data=prop_infected_by_age.to_dict(), description="measles incidence by age group") diff --git a/src/tlo/methods/newborn_outcomes.py b/src/tlo/methods/newborn_outcomes.py index 3691bc6003..433b21ca88 100644 --- a/src/tlo/methods/newborn_outcomes.py +++ b/src/tlo/methods/newborn_outcomes.py @@ -1363,7 +1363,7 @@ def apply(self, person_id, squeeze_factor): # Log the PNC check logger.info(key='postnatal_check', data={'person_id': person_id, - 'delivery_setting': str(nci[person_id]['delivery_setting']), + 'delivery_setting': nci[person_id]['delivery_setting'], 'visit_number': df.at[person_id, 'nb_pnc_check'], 'timing': nci[person_id]['will_receive_pnc']}) diff --git a/src/tlo/methods/oesophagealcancer.py b/src/tlo/methods/oesophagealcancer.py index 8adc0614e1..1961aa340e 100644 --- a/src/tlo/methods/oesophagealcancer.py +++ b/src/tlo/methods/oesophagealcancer.py @@ -681,7 +681,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the symptom dysphagia - assert 'dysphagia' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'dysphagia' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "oc_date_diagnosis"]): diff --git a/src/tlo/methods/other_adult_cancers.py b/src/tlo/methods/other_adult_cancers.py index 5aad8f971a..5999792393 100644 --- a/src/tlo/methods/other_adult_cancers.py +++ b/src/tlo/methods/other_adult_cancers.py @@ -685,7 +685,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the symptom other_adult_ca_symptom - assert 'early_other_adult_ca_symptom' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'early_other_adult_ca_symptom' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "oac_date_diagnosis"]): diff --git a/src/tlo/methods/prostate_cancer.py b/src/tlo/methods/prostate_cancer.py index dbbe2c427f..8bb7fd82ef 100644 --- a/src/tlo/methods/prostate_cancer.py +++ b/src/tlo/methods/prostate_cancer.py @@ -719,7 +719,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the urinary symptoms - assert 'urinary' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'urinary' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "pc_date_diagnosis"]): @@ -767,7 +767,7 @@ def apply(self, person_id, squeeze_factor): return hs.get_blank_appt_footprint() # Check that this event has been called for someone with the pelvic pain - assert 'pelvic_pain' in self.sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'pelvic_pain' in self.sim.modules['SymptomManager'].has_what(person_id) # If the person is already diagnosed, then take no action: if not pd.isnull(df.at[person_id, "pc_date_diagnosis"]): diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 68ef59fcf0..b76fb40e9f 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -41,7 +41,7 @@ def __init__(self, name=None, resourcefilepath=None): super().__init__(name) self.resourcefilepath = resourcefilepath self.ASSIGN_INJURIES_AND_DALY_CHANGES = None - self.cons_item_codes = None # (Will store consumable item codes) + self.item_codes_for_consumables_required = dict() INIT_DEPENDENCIES = {"SymptomManager", "HealthBurden"} @@ -1016,10 +1016,6 @@ def __init__(self, name=None, resourcefilepath=None): Types.INT, "A cut-off score above which an injuries will be considered severe enough to cause mortality in those who" "have not sought care." - ), - 'maximum_number_of_times_HSI_events_should_run': Parameter( - Types.INT, - "limit on the number of times an HSI event can run" ) } @@ -1529,8 +1525,6 @@ def initialise_simulation(self, sim): sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) # Begin logging the RTI events sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) - # Look-up consumable item codes - self.look_up_consumable_item_codes() def rti_do_when_diagnosed(self, person_id): """ @@ -2297,129 +2291,6 @@ def on_birth(self, mother_id, child_id): df.at[child_id, 'rt_debugging_DALY_wt'] = 0 df.at[child_id, 'rt_injuries_left_untreated'] = [] - def look_up_consumable_item_codes(self): - """Look up the item codes that used in the HSI in the module""" - get_item_codes = self.sim.modules['HealthSystem'].get_item_code_from_item_name - - self.cons_item_codes = dict() - self.cons_item_codes['shock_treatment_child'] = { - get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 500, - get_item_codes("Dextrose (glucose) 5%, 1000ml_each_CMST"): 500, - get_item_codes('Cannula iv (winged with injection pot) 18_each_CMST'): 1, - get_item_codes('Blood, one unit'): 2, - get_item_codes("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040 - } - self.cons_item_codes['shock_treatment_adult'] = { - get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, - get_item_codes('Cannula iv (winged with injection pot) 18_each_CMST'): 1, - get_item_codes('Blood, one unit'): 2, - get_item_codes("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040 - } - self.cons_item_codes['fracture_treatment_plaster'] = { - get_item_codes('Plaster of Paris (POP) 10cm x 7.5cm slab_12_CMST'): 1 - # This is for one fracture. - } - self.cons_item_codes['fracture_treatment_bandage'] = { - get_item_codes('Bandage, crepe 7.5cm x 1.4m long , when stretched'): 200, - # (The 200 is a standard assumption for the amount of bandage needed, irrespective of the number of - # fractures.) - } - self.cons_item_codes['open_fracture_treatment'] = { - get_item_codes('Ceftriaxone 1g, PFR_each_CMST'): 2, - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, - get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, - get_item_codes('Suture pack'): 1, - } - self.cons_item_codes["open_fracture_treatment_additional_if_contaminated"] = { - get_item_codes('Metronidazole, injection, 500 mg in 100 ml vial'): 3 - } - - self.cons_item_codes['laceration_treatment_suture_pack'] = { - get_item_codes('Suture pack'): 1, - } - self.cons_item_codes['laceration_treatment_cetrimide_chlorhexidine'] = { - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, - } - self.cons_item_codes['burn_treatment_per_burn'] = { - get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, - } - self.cons_item_codes['ringers lactate for multiple burns'] = { - get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 4000 - } - self.cons_item_codes['tetanus_treatment'] = {get_item_codes('Tetanus toxoid, injection'): 1} - self.cons_item_codes['pain_management_mild_under_16'] = {get_item_codes("Paracetamol 500mg_1000_CMST"): 8000} - self.cons_item_codes['pain_management_mild_above_16'] = { - get_item_codes("diclofenac sodium 25 mg, enteric coated_1000_IDA"): 300 - } - self.cons_item_codes['pain_management_moderate'] = { - get_item_codes("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 3 - } - self.cons_item_codes['pain_management_severe'] = { - get_item_codes("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 12 - } - self.cons_item_codes['major_surgery'] = { - # request a general anaesthetic - get_item_codes("Halothane (fluothane)_250ml_CMST"): 100, - # clean the site of the surgery - get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 600, - # tools to begin surgery - get_item_codes("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, - # administer an IV - get_item_codes('Cannula iv (winged with injection pot) 18_each_CMST'): 1, - get_item_codes("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1, - get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, - # repair incision made - get_item_codes("Suture pack"): 1, - get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, - # administer pain killer - get_item_codes('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, - # administer antibiotic - get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 2, - # equipment used by surgeon, gloves and facemask - get_item_codes('Disposables gloves, powder free, 100 pieces per box'): 1, - get_item_codes('surgical face mask, disp., with metal nose piece_50_IDA'): 1, - # request syringe - get_item_codes("Syringe, Autodisable SoloShot IX "): 1 - } - self.cons_item_codes['minor_surgery'] = { - # request a local anaesthetic - get_item_codes("Halothane (fluothane)_250ml_CMST"): 100, - # clean the site of the surgery - get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 300, - # tools to begin surgery - get_item_codes("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, - # administer an IV - get_item_codes('Cannula iv (winged with injection pot) 18_each_CMST'): 1, - get_item_codes("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1, - get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, - # repair incision made - get_item_codes("Suture pack"): 1, - get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, - # administer pain killer - get_item_codes('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, - # administer antibiotic - get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 2, - # equipment used by surgeon, gloves and facemask - get_item_codes('Disposables gloves, powder free, 100 pieces per box'): 1, - get_item_codes('surgical face mask, disp., with metal nose piece_50_IDA'): 1, - # request syringe - get_item_codes("Syringe, Autodisable SoloShot IX "): 1 - } - # Function to get the consumables for fracture treatment, which depends on the number of fractures: - self.cons_item_codes['fracture_treatment'] = lambda num_fractures: { - **{item: num_fractures for item in self.cons_item_codes['fracture_treatment_plaster']}, - **self.cons_item_codes['fracture_treatment_bandage'] - } - # Function to get the consumables for laceration treatment, which depends on the number of lacerations: - self.cons_item_codes['laceration_treatment'] = lambda num_laceration: { - **{item: num_laceration for item in self.cons_item_codes['laceration_treatment_suture_pack']}, - **self.cons_item_codes['laceration_treatment_cetrimide_chlorhexidine'] - } - self.cons_item_codes['burn_treatment'] = lambda num_burns: { - item: num_burns for item in self.cons_item_codes['burn_treatment_per_burn'] - } - def on_hsi_alert(self, person_id, treatment_id): """ This is called whenever there is an HSI event commissioned by one of the other disease modules. @@ -2572,7 +2443,7 @@ def rti_assign_injuries(self, number): inc_other = other_counts / ((n_alive - other_counts) * 1 / 12) * 100000 tot_inc_all_inj = inc_amputations + inc_burns + inc_fractures + inc_tbi + inc_sci + inc_minor + inc_other if number > 0: - number_of_injuries = int(inj_df['Number_of_injuries'].iloc[0]) + number_of_injuries = inj_df['Number_of_injuries'].tolist() else: number_of_injuries = 0 dict_to_output = {'inc_amputations': inc_amputations, @@ -2614,7 +2485,7 @@ def rti_assign_injuries(self, number): if n_lx_fracs > 0: proportion_lx_fracture_open = n_open_lx_fracs / n_lx_fracs else: - proportion_lx_fracture_open = float("nan") + proportion_lx_fracture_open = 'no_lx_fractures' injury_info = {'Proportion_lx_fracture_open': proportion_lx_fracture_open} logger.info(key='Open_fracture_information', data=injury_info, @@ -2939,7 +2810,7 @@ def apply(self, population): df.loc[shock_index, 'rt_in_shock'] = True # log the percentage of those with RTIs in shock percent_in_shock = \ - len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else float("nan") + len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else 'none_injured' logger.info(key='Percent_of_shock_in_rti', data={'Percent_of_shock_in_rti': percent_in_shock}, description='The percentage of those assigned injuries who were also assign the shock property') @@ -3954,12 +3825,9 @@ def __init__(self, module, person_id): self.TREATMENT_ID = 'Rti_ShockTreatment' self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'AccidentsandEmerg': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters['maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 # determine if this is a child if df.loc[person_id, 'age_years'] < 15: is_child = True @@ -3967,15 +3835,28 @@ def apply(self, person_id, squeeze_factor): is_child = False if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) - + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name # TODO: find a more complete list of required consumables for adults if is_child: + self.module.item_codes_for_consumables_required['shock_treatment_child'] = { + get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 500, + get_item_code("Dextrose (glucose) 5%, 1000ml_each_CMST"): 500, + get_item_code('Cannula iv (winged with injection pot) 18_each_CMST'): 1, + get_item_code('Blood, one unit'): 2, + get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040 + } is_cons_available = self.get_consumables( - self.module.cons_item_codes['shock_treatment_child'] + self.module.item_codes_for_consumables_required['shock_treatment_child'] ) else: + self.module.item_codes_for_consumables_required['shock_treatment_adult'] = { + get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, + get_item_code('Cannula iv (winged with injection pot) 18_each_CMST'): 1, + get_item_code('Blood, one unit'): 2, + get_item_code("Oxygen, 1000 liters, primarily with oxygen cylinders"): 23_040 + } is_cons_available = self.get_consumables( - self.module.cons_item_codes['shock_treatment_adult'] + self.module.item_codes_for_consumables_required['shock_treatment_adult'] ) if is_cons_available: @@ -3984,8 +3865,7 @@ def apply(self, person_id, squeeze_factor): df.at[person_id, 'rt_in_shock'] = False self.add_equipment({'Infusion pump', 'Drip stand', 'Oxygen cylinder, with regulator', 'Nasal Prongs'}) else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) return self.make_appt_footprint({}) def did_not_run(self): @@ -4038,21 +3918,17 @@ def __init__(self, module, person_id): self.TREATMENT_ID = 'Rti_FractureCast' self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'AccidentsandEmerg': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): # Get the population and health system df = self.sim.population.props p = df.loc[person_id] - self._number_of_times_this_event_has_run += 1 - # if the person isn't alive return a blank footprint if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) # get a shorthand reference to RTI and consumables modules road_traffic_injuries = self.sim.modules['RTI'] + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name # isolate the relevant injury information # Find the untreated injuries untreated_injury_cols = _get_untreated_injury_columns(person_id, df) @@ -4073,13 +3949,14 @@ def apply(self, person_id, squeeze_factor): assert len(p['rt_injuries_to_cast']) > 0 # Check this injury assigned to be treated here is actually had by the person assert all(injuries in person_injuries.values for injuries in p['rt_injuries_to_cast']) - - # If they have a fracture that needs a cast, ask for consumables, updating to match the number of - # fractures). + # If they have a fracture that needs a cast, ask for plaster of paris + self.module.item_codes_for_consumables_required['fracture_treatment'] = { + get_item_code('Plaster of Paris (POP) 10cm x 7.5cm slab_12_CMST'): fracturecastcounts, + get_item_code('Bandage, crepe 7.5cm x 1.4m long , when stretched'): 200, + } is_cons_available = self.get_consumables( - self.module.cons_item_codes['fracture_treatment'](fracturecastcounts) + self.module.item_codes_for_consumables_required['fracture_treatment'] ) - # if the consumables are available then the appointment can run if is_cons_available: logger.debug(key='rti_general_message', @@ -4140,8 +4017,7 @@ def apply(self, person_id, squeeze_factor): df.loc[person_id, 'rt_injuries_to_cast'].clear() df.loc[person_id, 'rt_date_death_no_med'] = pd.NaT else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) logger.debug(key='rti_general_message', @@ -4181,16 +4057,13 @@ def __init__(self, module, person_id): self.TREATMENT_ID = 'Rti_OpenFractureTreatment' self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'MinorSurg': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) road_traffic_injuries = self.sim.modules['RTI'] + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name # isolate the relevant injury information person_injuries = df.loc[[person_id], RTI.INJURY_COLUMNS] # check if they have a fracture that requires a cast @@ -4203,16 +4076,27 @@ def apply(self, person_id, squeeze_factor): assert df.loc[person_id, 'rt_med_int'], 'person sent here has not been treated' # If they have an open fracture, ask for consumables to treat fracture - wound_contaminated = ( - (open_fracture_counts > 0) - and (self.module.parameters['prob_open_fracture_contaminated'] > self.module.rng.random_sample()) - ) - + if open_fracture_counts > 0: + self.module.item_codes_for_consumables_required['open_fracture_treatment'] = { + get_item_code('Ceftriaxone 1g, PFR_each_CMST'): 2000, + get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500, + get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100, + get_item_code('Suture pack'): 1, + } + # If wound is "grossly contaminated" administer Metronidazole + # todo: parameterise the probability of wound contamination + p = self.module.parameters + prob_open_fracture_contaminated = p['prob_open_fracture_contaminated'] + rand_for_contamination = self.module.rng.random_sample(size=1) + # NB: Dose used below from BNF is for surgical prophylaxsis + if rand_for_contamination < prob_open_fracture_contaminated: + self.module.item_codes_for_consumables_required['open_fracture_treatment'].update( + {get_item_code('Metronidazole, injection, 500 mg in 100 ml vial'): 1500} + ) # Check that there are enough consumables to treat this person's fractures - is_cons_available = self.get_consumables(self.module.cons_item_codes["open_fracture_treatment"]) and ( - # If wound is "grossly contaminated" administer Metronidazole, else ignore - self.get_consumables(self.module.cons_item_codes["open_fracture_treatment_additional_if_contaminated"]) - if wound_contaminated else True) + is_cons_available = self.get_consumables( + self.module.item_codes_for_consumables_required['open_fracture_treatment'] + ) if is_cons_available: logger.debug(key='rti_general_message', @@ -4247,8 +4131,7 @@ def apply(self, person_id, squeeze_factor): if code[0] in df.loc[person_id, 'rt_injuries_for_open_fracture_treatment']: df.loc[person_id, 'rt_injuries_for_open_fracture_treatment'].remove(code[0]) else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) logger.debug(key='rti_general_message', @@ -4291,14 +4174,10 @@ def __init__(self, module, person_id): self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({ ('Under5OPD' if self.sim.population.props.at[person_id, "age_years"] < 5 else 'Over5OPD'): 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 - if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) road_traffic_injuries = self.sim.modules['RTI'] @@ -4312,10 +4191,15 @@ def apply(self, person_id, squeeze_factor): # Check that the person sent here has an injury that is treated by this HSI event assert lacerationcounts > 0 if lacerationcounts > 0: + self.module.item_codes_for_consumables_required['laceration_treatment'] = { + get_item_code('Suture pack'): lacerationcounts, + get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500, + } # check the number of suture kits required and request them is_cons_available = self.get_consumables( - self.module.cons_item_codes['laceration_treatment'](lacerationcounts)) + self.module.item_codes_for_consumables_required['laceration_treatment'] + ) # Availability of consumables determines if the intervention is delivered... if is_cons_available: @@ -4338,8 +4222,7 @@ def apply(self, person_id, squeeze_factor): assert df.loc[person_id, date_to_remove_daly_column] > self.sim.date df.loc[person_id, 'rt_date_death_no_med'] = pd.NaT else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) logger.debug(key='rti_general_message', @@ -4386,14 +4269,11 @@ def __init__(self, module, person_id): p = self.module.parameters self.prob_mild_burns = p['prob_mild_burns'] - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = p['maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 - if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) road_traffic_injuries = self.sim.modules['RTI'] @@ -4408,8 +4288,11 @@ def apply(self, person_id, squeeze_factor): assert df.loc[person_id, 'rt_med_int'], 'this person has not been treated' if burncounts > 0: # Request materials for burn treatment - cons_needed = self.module.cons_item_codes['burn_treatment'](burncounts) + self.module.item_codes_for_consumables_required['burn_treatment'] = { + get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): burncounts, + get_item_code('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): burncounts, + } possible_large_TBSA_burn_codes = ['7113', '8113', '4113', '5113'] idx2, bigburncounts = \ road_traffic_injuries.rti_find_and_count_injuries(person_injuries, possible_large_TBSA_burn_codes) @@ -4418,11 +4301,13 @@ def apply(self, person_id, squeeze_factor): if (burncounts > 1) or ((len(idx2) > 0) & (random_for_severe_burn > self.prob_mild_burns)): # check if they have multiple burns, which implies a higher burned total body surface area (TBSA) which # will alter the treatment plan - cons_needed.update( - self.module.cons_item_codes['ringers lactate for multiple burns'] + self.module.item_codes_for_consumables_required['burn_treatment'].update( + {get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 4000} ) - is_cons_available = self.get_consumables(cons_needed) + is_cons_available = self.get_consumables( + self.module.item_codes_for_consumables_required['burn_treatment'] + ) if is_cons_available: logger.debug(key='rti_general_message', data=f"This facility has burn treatment available which has been used for person " @@ -4461,8 +4346,7 @@ def apply(self, person_id, squeeze_factor): ) df.loc[person_id, 'rt_date_death_no_med'] = pd.NaT else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) logger.debug(key='rti_general_message', @@ -4489,14 +4373,9 @@ def __init__(self, module, person_id): self.TREATMENT_ID = 'Rti_TetanusVaccine' self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'EPI': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 - if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) person_injuries = df.loc[[person_id], RTI.INJURY_COLUMNS] @@ -4514,13 +4393,18 @@ def apply(self, person_id, squeeze_factor): return self.make_appt_footprint({}) # If they have a laceration/burn ask request the tetanus vaccine if counts > 0: - is_tetanus_available = self.get_consumables(self.module.cons_item_codes['tetanus_treatment']) + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name + self.module.item_codes_for_consumables_required['tetanus_treatment'] = { + get_item_code('Tetanus toxoid, injection'): 1 + } + is_tetanus_available = self.get_consumables( + self.module.item_codes_for_consumables_required['tetanus_treatment'] + ) if is_tetanus_available: logger.debug(key='rti_general_message', data=f"Tetanus vaccine requested for person {person_id} and given") else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) logger.debug(key='rti_general_message', data=f"Tetanus vaccine requested for person {person_id}, not given") return self.make_appt_footprint({}) @@ -4550,20 +4434,16 @@ def __init__(self, module, person_id): self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({ ('Under5OPD' if self.sim.population.props.at[person_id, "age_years"] < 5 else 'Over5OPD'): 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] def apply(self, person_id, squeeze_factor): df = self.sim.population.props - self._number_of_times_this_event_has_run += 1 - if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) # Check that the person sent here is alive, has been through A&E and RTI_Med_int assert df.loc[person_id, 'rt_diagnosed'], 'This person has not been through a and e' assert df.loc[person_id, 'rt_med_int'], 'This person has not been through rti med int' person_injuries = df.loc[[person_id], RTI.INJURY_COLUMNS] + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name road_traffic_injuries = self.sim.modules['RTI'] pain_level = "none" # create a dictionary to associate the level of pain to the codes @@ -4607,12 +4487,25 @@ def apply(self, person_id, squeeze_factor): data=dict_to_output, description='Summary of the pain medicine requested by each person') if df.loc[person_id, 'age_years'] < 16: + self.module.item_codes_for_consumables_required['pain_management'] = { + get_item_code("Paracetamol 500mg_1000_CMST"): 8000 + } cond = self.get_consumables( - self.module.cons_item_codes['pain_management_mild_under_16'] + self.module.item_codes_for_consumables_required['pain_management'] ) else: - cond1 = self.get_consumables(self.module.cons_item_codes['pain_management_mild_above_16']) - cond2 = self.get_consumables(self.module.cons_item_codes['pain_management_mild_under_16']) + self.module.item_codes_for_consumables_required['pain_management'] = { + get_item_code("diclofenac sodium 25 mg, enteric coated_1000_IDA"): 300 + } + cond1 = self.get_consumables( + self.module.item_codes_for_consumables_required['pain_management'] + ) + self.module.item_codes_for_consumables_required['pain_management'] = { + get_item_code("Paracetamol 500mg_1000_CMST"): 8000 + } + cond2 = self.get_consumables( + self.module.item_codes_for_consumables_required['pain_management'] + ) if (cond1 is True) & (cond2 is True): which = self.module.rng.random_sample(size=1) if which <= 0.5: @@ -4652,8 +4545,7 @@ def apply(self, person_id, squeeze_factor): data=dict_to_output, description='Pain medicine successfully provided to the person') else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) logger.debug(key='rti_general_message', data=f"This facility has no pain management available for their mild pain, person " f"{person_id}.") @@ -4665,8 +4557,12 @@ def apply(self, person_id, squeeze_factor): logger.info(key='Requested_Pain_Management', data=dict_to_output, description='Summary of the pain medicine requested by each person') - - is_cons_available = self.get_consumables(self.module.cons_item_codes['pain_management_moderate']) + self.module.item_codes_for_consumables_required['pain_management'] = { + get_item_code("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 300 + } + is_cons_available = self.get_consumables( + self.module.item_codes_for_consumables_required['pain_management'] + ) logger.debug(key='rti_general_message', data=f"Person {person_id} has requested tramadol for moderate pain relief") @@ -4680,8 +4576,7 @@ def apply(self, person_id, squeeze_factor): data=dict_to_output, description='Pain medicine successfully provided to the person') else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) logger.debug(key='rti_general_message', data=f"This facility has no pain management available for moderate pain for person " f"{person_id}.") @@ -4694,8 +4589,11 @@ def apply(self, person_id, squeeze_factor): data=dict_to_output, description='Summary of the pain medicine requested by each person') # give morphine + self.module.item_codes_for_consumables_required['pain_management'] = { + get_item_code("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 120 + } is_cons_available = self.get_consumables( - self.module.cons_item_codes['pain_management_severe'] + self.module.item_codes_for_consumables_required['pain_management'] ) logger.debug(key='rti_general_message', data=f"Person {person_id} has requested morphine for severe pain relief") @@ -4710,8 +4608,7 @@ def apply(self, person_id, squeeze_factor): data=dict_to_output, description='Pain medicine successfully provided to the person') else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) logger.debug(key='rti_general_message', data=f"This facility has no pain management available for severe pain for person " f"{person_id}.") @@ -4839,8 +4736,6 @@ def __init__(self, module, person_id): self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'MajorSurg': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' self.BEDDAYS_FOOTPRINT = self.make_beddays_footprint({}) - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters['maximum_number_of_times_HSI_events_should_run'] p = self.module.parameters self.prob_perm_disability_with_treatment_severe_TBI = p['prob_perm_disability_with_treatment_severe_TBI'] @@ -4848,14 +4743,38 @@ def __init__(self, module, person_id): self.treated_code = 'none' def apply(self, person_id, squeeze_factor): - self._number_of_times_this_event_has_run += 1 df = self.sim.population.props rng = self.module.rng road_traffic_injuries = self.sim.modules['RTI'] - + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name # Request first draft of consumables used in major surgery + self.module.item_codes_for_consumables_required['major_surgery'] = { + # request a general anaesthetic + get_item_code("Halothane (fluothane)_250ml_CMST"): 100, + # clean the site of the surgery + get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 500, + # tools to begin surgery + get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, + # administer an IV + get_item_code('Cannula iv (winged with injection pot) 18_each_CMST'): 1, + get_item_code("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1, + get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, + # repair incision made + get_item_code("Suture pack"): 1, + get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100, + # administer pain killer + get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, + # administer antibiotic + get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1000, + # equipment used by surgeon, gloves and facemask + get_item_code('Disposables gloves, powder free, 100 pieces per box'): 1, + get_item_code('surgical face mask, disp., with metal nose piece_50_IDA'): 1, + # request syringe + get_item_code("Syringe, Autodisable SoloShot IX "): 1 + } + request_outcome = self.get_consumables( - self.module.cons_item_codes['major_surgery'] + self.module.item_codes_for_consumables_required['major_surgery'] ) if not df.at[person_id, 'is_alive']: @@ -5096,8 +5015,7 @@ def apply(self, person_id, squeeze_factor): ['Treated injury code not removed', self.treated_code] df.loc[person_id, 'rt_date_death_no_med'] = pd.NaT else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) return self.make_appt_footprint({}) @@ -5163,16 +5081,36 @@ def __init__(self, module, person_id): self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'MinorSurg': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' - self._number_of_times_this_event_has_run = 0 - self._maximum_number_times_event_should_run = self.module.parameters[ - 'maximum_number_of_times_HSI_events_should_run'] - def apply(self, person_id, squeeze_factor): - self._number_of_times_this_event_has_run += 1 df = self.sim.population.props if not df.at[person_id, 'is_alive']: return self.make_appt_footprint({}) - + get_item_code = self.sim.modules['HealthSystem'].get_item_code_from_item_name + # Request first draft of consumables used in major surgery + self.module.item_codes_for_consumables_required['minor_surgery'] = { + # request a local anaesthetic + get_item_code("Halothane (fluothane)_250ml_CMST"): 100, + # clean the site of the surgery + get_item_code("Chlorhexidine 1.5% solution_5_CMST"): 500, + # tools to begin surgery + get_item_code("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, + # administer an IV + get_item_code('Cannula iv (winged with injection pot) 18_each_CMST'): 1, + get_item_code("Giving set iv administration + needle 15 drops/ml_each_CMST"): 1, + get_item_code("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 2000, + # repair incision made + get_item_code("Suture pack"): 1, + get_item_code("Gauze, absorbent 90cm x 40m_each_CMST"): 100, + # administer pain killer + get_item_code('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, + # administer antibiotic + get_item_code("Ampicillin injection 500mg, PFR_each_CMST"): 1000, + # equipment used by surgeon, gloves and facemask + get_item_code('Disposables gloves, powder free, 100 pieces per box'): 1, + get_item_code('surgical face mask, disp., with metal nose piece_50_IDA'): 1, + # request syringe + get_item_code("Syringe, Autodisable SoloShot IX "): 1 + } rng = self.module.rng road_traffic_injuries = self.sim.modules['RTI'] surgically_treated_codes = ['322', '211', '212', '323', '722', '291', '241', '811', '812', '813a', '813b', @@ -5198,7 +5136,9 @@ def apply(self, person_id, squeeze_factor): treated_code = rng.choice(relevant_codes) # need to determine whether this person has an injury which will treated with external fixation # external_fixation_codes = ['811', '812', '813a', '813b', '813c'] - request_outcome = self.get_consumables(self.module.cons_item_codes['minor_surgery']) + request_outcome = self.get_consumables( + self.module.item_codes_for_consumables_required['minor_surgery'] + ) # todo: think about consequences of certain consumables not being available for minor surgery and model health # outcomes if request_outcome: @@ -5262,8 +5202,7 @@ def apply(self, person_id, squeeze_factor): ['Injury treated not removed', treated_code] df.loc[person_id, 'rt_date_death_no_med'] = pd.NaT else: - if self._number_of_times_this_event_has_run < self._maximum_number_times_event_should_run: - self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) + self.sim.modules['RTI'].schedule_hsi_event_for_tomorrow(self) if pd.isnull(df.loc[person_id, 'rt_date_death_no_med']): df.loc[person_id, 'rt_date_death_no_med'] = self.sim.date + DateOffset(days=7) logger.debug(key='rti_general_message', @@ -5580,7 +5519,7 @@ def apply(self, population): label: ( len(pop_subset.loc[pop_subset['rt_inj_severity'] == 'severe']) / len(pop_subset) - ) if len(pop_subset) > 0 else float("nan") + ) if len(pop_subset) > 0 else "none_injured" for label, pop_subset in population_subsets_with_injuries.items() } self.totmild += (population_with_injuries.rt_inj_severity == "mild").sum() @@ -5596,25 +5535,25 @@ def apply(self, population): description='severity of injuries in simulation') # ==================================== Incidence ============================================================== # How many were involved in a RTI - n_in_RTI = int(df.rt_road_traffic_inc.sum()) + n_in_RTI = df.rt_road_traffic_inc.sum() children_in_RTI = len(df.loc[df.rt_road_traffic_inc & (df['age_years'] < 19)]) children_alive = len(df.loc[df['age_years'] < 19]) self.numerator += n_in_RTI self.totinjured += n_in_RTI # How many were disabled - n_perm_disabled = int((df.is_alive & df.rt_perm_disability).sum()) + n_perm_disabled = (df.is_alive & df.rt_perm_disability).sum() # self.permdis += n_perm_disabled - n_alive = int(df.is_alive.sum()) + n_alive = df.is_alive.sum() self.denominator += (n_alive - n_in_RTI) * (1 / 12) - n_immediate_death = int((df.rt_road_traffic_inc & df.rt_imm_death).sum()) + n_immediate_death = (df.rt_road_traffic_inc & df.rt_imm_death).sum() self.deathonscene += n_immediate_death diedfromrtiidx = df.index[df.rt_imm_death | df.rt_post_med_death | df.rt_no_med_death | df.rt_death_from_shock | df.rt_unavailable_med_death] - n_sought_care = int((df.rt_road_traffic_inc & df.rt_med_int).sum()) + n_sought_care = (df.rt_road_traffic_inc & df.rt_med_int).sum() self.soughtmedcare += n_sought_care - n_death_post_med = int(df.rt_post_med_death.sum()) + n_death_post_med = df.rt_post_med_death.sum() self.deathaftermed += n_death_post_med - self.deathwithoutmed += int(df.rt_no_med_death.sum()) + self.deathwithoutmed += df.rt_no_med_death.sum() self.death_inc_numerator += n_immediate_death + n_death_post_med + len(df.loc[df.rt_no_med_death]) self.death_in_denominator += (n_alive - (n_immediate_death + n_death_post_med + len(df.loc[df.rt_no_med_death]) )) * \ @@ -5623,7 +5562,7 @@ def apply(self, population): percent_accidents_result_in_death = \ (self.deathonscene + self.deathaftermed + self.deathwithoutmed) / self.numerator else: - percent_accidents_result_in_death = float("nan") + percent_accidents_result_in_death = 'none injured' maleinrti = len(df.loc[df.rt_road_traffic_inc & (df['sex'] == 'M')]) femaleinrti = len(df.loc[df.rt_road_traffic_inc & (df['sex'] == 'F')]) @@ -5632,35 +5571,35 @@ def apply(self, population): maleinrti = maleinrti / divider femaleinrti = femaleinrti / divider else: - maleinrti = 1.0 - femaleinrti = 0.0 + maleinrti = 1 + femaleinrti = 0 mfratio = [maleinrti, femaleinrti] if (n_in_RTI - len(df.loc[df.rt_imm_death])) > 0: percent_sought_care = n_sought_care / (n_in_RTI - len(df.loc[df.rt_imm_death])) else: - percent_sought_care = float("nan") + percent_sought_care = 'none_injured' if n_sought_care > 0: percent_died_post_care = n_death_post_med / n_sought_care else: - percent_died_post_care = float("nan") + percent_died_post_care = 'none_injured' if n_sought_care > 0: percentage_admitted_to_ICU_or_HDU = len(df.loc[df.rt_med_int & df.rt_in_icu_or_hdu]) / n_sought_care else: - percentage_admitted_to_ICU_or_HDU = float("nan") + percentage_admitted_to_ICU_or_HDU = 'none_injured' if (n_alive - n_in_RTI) > 0: inc_rti = (n_in_RTI / ((n_alive - n_in_RTI) * (1 / 12))) * 100000 else: - inc_rti = 0.0 + inc_rti = 0 if (children_alive - children_in_RTI) > 0: inc_rti_in_children = (children_in_RTI / ((children_alive - children_in_RTI) * (1 / 12))) * 100000 else: - inc_rti_in_children = 0.0 + inc_rti_in_children = 0 if (n_alive - len(diedfromrtiidx)) > 0: inc_rti_death = (len(diedfromrtiidx) / ((n_alive - len(diedfromrtiidx)) * (1 / 12))) * 100000 else: - inc_rti_death = 0.0 + inc_rti_death = 0 if (n_alive - len(df.loc[df.rt_post_med_death])) > 0: inc_post_med_death = (len(df.loc[df.rt_post_med_death]) / ((n_alive - len(df.loc[df.rt_post_med_death])) * (1 / 12))) * 100000 @@ -5670,21 +5609,21 @@ def apply(self, population): inc_imm_death = (len(df.loc[df.rt_imm_death]) / ((n_alive - len(df.loc[df.rt_imm_death])) * (1 / 12))) * \ 100000 else: - inc_imm_death = 0.0 + inc_imm_death = 0 if (n_alive - len(df.loc[df.rt_no_med_death])) > 0: inc_death_no_med = (len(df.loc[df.rt_no_med_death]) / ((n_alive - len(df.loc[df.rt_no_med_death])) * (1 / 12))) * 100000 else: - inc_death_no_med = 0.0 + inc_death_no_med = 0 if (n_alive - len(df.loc[df.rt_unavailable_med_death])) > 0: inc_death_unavailable_med = (len(df.loc[df.rt_unavailable_med_death]) / ((n_alive - len(df.loc[df.rt_unavailable_med_death])) * (1 / 12))) * 100000 else: - inc_death_unavailable_med = 0.0 + inc_death_unavailable_med = 0 if self.fracdenominator > 0: frac_incidence = (self.totfracnumber / self.fracdenominator) * 100000 else: - frac_incidence = 0.0 + frac_incidence = 0 # calculate case fatality ratio for those injured who don't seek healthcare did_not_seek_healthcare = len(df.loc[df.rt_road_traffic_inc & ~df.rt_med_int & ~df.rt_diagnosed]) died_no_healthcare = \ @@ -5692,12 +5631,12 @@ def apply(self, population): if did_not_seek_healthcare > 0: cfr_no_med = died_no_healthcare / did_not_seek_healthcare else: - cfr_no_med = float("nan") + cfr_no_med = 'all_sought_care' # calculate incidence rate per 100,000 of deaths on scene if n_alive > 0: inc_death_on_scene = (len(df.loc[df.rt_imm_death]) / n_alive) * 100000 * (1 / 12) else: - inc_death_on_scene = 0.0 + inc_death_on_scene = 0 dict_to_output = { 'number involved in a rti': n_in_RTI, 'incidence of rti per 100,000': inc_rti, @@ -5735,7 +5674,7 @@ def apply(self, population): percent_related_to_alcohol = len(injuredDemographics.loc[injuredDemographics.li_ex_alc]) / \ len(injuredDemographics) except ZeroDivisionError: - percent_related_to_alcohol = 0.0 + percent_related_to_alcohol = 0 injured_demography_summary = { 'males_in_rti': injuredDemographics['sex'].value_counts()['M'], 'females_in_rti': injuredDemographics['sex'].value_counts()['F'], diff --git a/src/tlo/methods/stunting.py b/src/tlo/methods/stunting.py index ec2725bd39..002d24bc31 100644 --- a/src/tlo/methods/stunting.py +++ b/src/tlo/methods/stunting.py @@ -524,9 +524,7 @@ def apply(self, population): """Log the current distribution of stunting classification by age""" df = population.props - subset = df.loc[df.is_alive & (df.age_years < 5)].copy() - subset["age_years"] = pd.Categorical(subset["age_years"], categories=range(5)) - d_to_log = subset.groupby( + d_to_log = df.loc[df.is_alive & (df.age_years < 5)].groupby( by=['age_years', 'un_HAZ_category']).size().sort_index().to_dict() def convert_keys_to_string(d): diff --git a/src/tlo/methods/symptommanager.py b/src/tlo/methods/symptommanager.py index 67389e283e..26f6aa7ee4 100644 --- a/src/tlo/methods/symptommanager.py +++ b/src/tlo/methods/symptommanager.py @@ -11,11 +11,9 @@ * The probability of spurious symptoms is not informed by data. """ -from __future__ import annotations - from collections import defaultdict from pathlib import Path -from typing import TYPE_CHECKING, List, Optional, Sequence, Union +from typing import Sequence, Union import numpy as np import pandas as pd @@ -25,9 +23,6 @@ from tlo.methods import Metadata from tlo.util import BitsetHandler -if TYPE_CHECKING: - from tlo.population import IndividualProperties - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -465,81 +460,33 @@ def who_not_have(self, symptom_string: str) -> pd.Index: ) ] - def has_what( - self, - person_id: Optional[int] = None, - individual_details: Optional[IndividualProperties] = None, - disease_module: Optional[Module] = None, - ) -> List[str]: + def has_what(self, person_id, disease_module: Module = None): """ This is a helper function that will give a list of strings for the symptoms that a _single_ person is currently experiencing. + Optionally can specify disease_module_name to limit to the symptoms caused by that disease module - If working in a `tlo.population.IndividualProperties` context, one can pass the context object - instead of supplying the person's DataFrame index. - Note that at least one of these inputs must be passed as a keyword argument however. - In the event that both arguments are passed, the individual_details argument takes precedence over the person_id. - - Optionally can specify disease_module_name to limit to the symptoms caused by that disease module. - - :param person_id: the person_of of interest. - :param individual_details: `tlo.population.IndividualProperties` object for the person of interest. - :param disease_module: (optional) disease module of interest. - :return: list of strings for the symptoms that are currently being experienced. + :param person_id: the person_of of interest + :param disease_module: (optional) disease module of interest + :return: list of strings for the symptoms that are currently being experienced """ - assert ( - disease_module.name in ([self.name] + self.recognised_module_names) - if disease_module is not None - else True - ), "Disease Module Name is not recognised" - - if individual_details is not None: - # We are working in an IndividualDetails context, avoid lookups to the - # population DataFrame as we have this context stored already. - assert individual_details["is_alive"], "The person is not alive" - - if disease_module is not None: - int_repr = self.bsh._element_to_int_map[disease_module.name] - return [ - symptom - for symptom in self.symptom_names - if individual_details[ - self.bsh._get_columns(self.get_column_name_for_symptom(symptom)) - ] - & int_repr - != 0 - ] - else: - return [ - symptom - for symptom in self.symptom_names - if individual_details[self.get_column_name_for_symptom(symptom)] > 0 - ] - else: - assert isinstance( - person_id, (int, np.integer) - ), "person_id must be a single integer for one particular person" - df = self.sim.population.props - assert df.at[person_id, "is_alive"], "The person is not alive" + assert isinstance(person_id, (int, np.integer)), 'person_id must be a single integer for one particular person' - if disease_module is not None: - sy_columns = [ - self.get_column_name_for_symptom(s) for s in self.symptom_names - ] - person_has = self.bsh.has( - [person_id], disease_module.name, first=True, columns=sy_columns - ) - return [s for s in self.symptom_names if person_has[f"sy_{s}"]] - else: - symptom_cols = df.loc[ - person_id, [f"sy_{s}" for s in self.symptom_names] - ] - return ( - symptom_cols.index[symptom_cols > 0] - .str.removeprefix("sy_") - .to_list() - ) + df = self.sim.population.props + assert df.at[person_id, 'is_alive'], "The person is not alive" + + if disease_module is not None: + assert disease_module.name in ([self.name] + self.recognised_module_names), \ + "Disease Module Name is not recognised" + sy_columns = [self.get_column_name_for_symptom(s) for s in self.symptom_names] + person_has = self.bsh.has( + [person_id], disease_module.name, first=True, columns=sy_columns + ) + return [s for s in self.symptom_names if person_has[f'sy_{s}']] + else: + symptom_cols = df.loc[person_id, [f'sy_{s}' for s in self.symptom_names]] + return symptom_cols.index[symptom_cols > 0].str.removeprefix("sy_").to_list() def have_what(self, person_ids: Sequence[int]): """Find the set of symptoms for a list of person_ids. diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..02d860fe52 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -9,7 +9,7 @@ import pandas as pd -from tlo import Date, DateOffset, Module, Parameter, Property, Types, logging +from tlo import DateOffset, Module, Parameter, Property, Types, logging from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor from tlo.methods import Metadata, hiv @@ -376,19 +376,6 @@ def __init__(self, name=None, resourcefilepath=None, run_with_checks=False): Types.LIST, "length of inpatient stay for end-of-life TB patients", ), - # ------------------ scale-up parameters for scenario analysis ------------------ # - "type_of_scaleup": Parameter( - Types.STRING, "argument to determine type scale-up of program which will be implemented, " - "can be 'none', 'target' or 'max'", - ), - "scaleup_start_year": Parameter( - Types.INT, - "the year when the scale-up starts (it will occur on 1st January of that year)" - ), - "scaleup_parameters": Parameter( - Types.DATA_FRAME, - "the parameters and values changed in scenario analysis" - ) } def read_parameters(self, data_folder): @@ -426,9 +413,6 @@ def read_parameters(self, data_folder): .tolist() ) - # load parameters for scale-up projections - p['scaleup_parameters'] = workbook["scaleup_parameters"] - # 2) Get the DALY weights if "HealthBurden" in self.sim.modules.keys(): # HIV-negative @@ -470,13 +454,9 @@ def read_parameters(self, data_folder): ) def pre_initialise_population(self): - """Do things required before the population is created - * Build the LinearModels""" - self._build_linear_models() - - def _build_linear_models(self): - """Establish the Linear Models""" - + """ + * Establish the Linear Models + """ p = self.parameters # risk of active tb @@ -869,13 +849,6 @@ def initialise_simulation(self, sim): sim.schedule_event(TbSelfCureEvent(self), sim.date) sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - # 2) log at the end of the year - # Optional: Schedule the scale-up of programs - if self.parameters["type_of_scaleup"] != 'none': - scaleup_start_date = Date(self.parameters["scaleup_start_year"], 1, 1) - assert scaleup_start_date >= self.sim.start_date, f"Date {scaleup_start_date} is before simulation starts." - sim.schedule_event(TbScaleUpEvent(self), scaleup_start_date) - # 2) log at the end of the year sim.schedule_event(TbLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -888,37 +861,6 @@ def initialise_simulation(self, sim): TbCheckPropertiesEvent(self), sim.date + pd.DateOffset(months=1) ) - def update_parameters_for_program_scaleup(self): - """ options for program scale-up are 'target' or 'max' """ - p = self.parameters - scaled_params_workbook = p["scaleup_parameters"] - - if p['type_of_scaleup'] == 'target': - scaled_params = scaled_params_workbook.set_index('parameter')['target_value'].to_dict() - else: - scaled_params = scaled_params_workbook.set_index('parameter')['max_value'].to_dict() - - # scale-up TB program - # use NTP treatment rates - p["rate_testing_active_tb"]["treatment_coverage"] = scaled_params["tb_treatment_coverage"] - - # increase tb treatment success rates - p["prob_tx_success_ds"] = scaled_params["tb_prob_tx_success_ds"] - p["prob_tx_success_mdr"] = scaled_params["tb_prob_tx_success_mdr"] - p["prob_tx_success_0_4"] = scaled_params["tb_prob_tx_success_0_4"] - p["prob_tx_success_5_14"] = scaled_params["tb_prob_tx_success_5_14"] - - # change first-line testing for TB to xpert - p["first_line_test"] = scaled_params["first_line_test"] - p["second_line_test"] = scaled_params["second_line_test"] - - # increase coverage of IPT - p["ipt_coverage"]["coverage_plhiv"] = scaled_params["ipt_coverage_plhiv"] - p["ipt_coverage"]["coverage_paediatric"] = scaled_params["ipt_coverage_paediatric"] - - # update exising linear models to use new scaled-up paramters - self._build_linear_models() - def on_birth(self, mother_id, child_id): """Initialise properties for a newborn individual allocate IPT for child if mother diagnosed with TB @@ -1425,21 +1367,6 @@ def apply(self, population): self.module.relapse_event(population) -class TbScaleUpEvent(Event, PopulationScopeEventMixin): - """ This event exists to change parameters or functions - depending on the scenario for projections which has been set - It only occurs once on date: scaleup_start_date, - called by initialise_simulation - """ - - def __init__(self, module): - super().__init__(module) - - def apply(self, population): - - self.module.update_parameters_for_program_scaleup() - - class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period @@ -1708,7 +1635,7 @@ def apply(self, person_id, squeeze_factor): # check if patient has: cough, fever, night sweat, weight loss # if none of the above conditions are present, no further action - persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id=person_id) + persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id) if not any(x in self.module.symptom_list for x in persons_symptoms): return self.make_appt_footprint({}) @@ -1971,7 +1898,7 @@ def apply(self, person_id, squeeze_factor): # check if patient has: cough, fever, night sweat, weight loss set_of_symptoms_that_indicate_tb = set(self.module.symptom_list) - persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id=person_id) + persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id) if not set_of_symptoms_that_indicate_tb.intersection(persons_symptoms): # if none of the above conditions are present, no further action @@ -2475,7 +2402,7 @@ def apply(self, person_id, squeeze_factor): return # if currently have symptoms of TB, refer for screening/testing - persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id=person_id) + persons_symptoms = self.sim.modules["SymptomManager"].has_what(person_id) if any(x in self.module.symptom_list for x in persons_symptoms): self.sim.modules["HealthSystem"].schedule_hsi_event( @@ -2729,7 +2656,7 @@ def apply(self, population): ) # proportion of active TB cases in the last year who are HIV-positive - prop_hiv = inc_active_hiv / new_tb_cases if new_tb_cases else 0.0 + prop_hiv = inc_active_hiv / new_tb_cases if new_tb_cases else 0 logger.info( key="tb_incidence", @@ -2763,7 +2690,7 @@ def apply(self, population): df[(df.age_years >= 15) & df.is_alive] ) if len( df[(df.age_years >= 15) & df.is_alive] - ) else 0.0 + ) else 0 assert prev_active_adult <= 1 # prevalence of active TB in children @@ -2774,7 +2701,7 @@ def apply(self, population): df[(df.age_years < 15) & df.is_alive] ) if len( df[(df.age_years < 15) & df.is_alive] - ) else 0.0 + ) else 0 assert prev_active_child <= 1 # LATENT @@ -2791,7 +2718,7 @@ def apply(self, population): df[(df.age_years >= 15) & df.is_alive] ) if len( df[(df.age_years >= 15) & df.is_alive] - ) else 0.0 + ) else 0 assert prev_latent_adult <= 1 # proportion of population with latent TB - children @@ -2833,7 +2760,7 @@ def apply(self, population): if new_mdr_cases: prop_mdr = new_mdr_cases / new_tb_cases else: - prop_mdr = 0.0 + prop_mdr = 0 logger.info( key="tb_mdr", @@ -2855,7 +2782,7 @@ def apply(self, population): if new_tb_diagnosis: prop_dx = new_tb_diagnosis / new_tb_cases else: - prop_dx = 0.0 + prop_dx = 0 # ------------------------------------ TREATMENT ------------------------------------ # number of tb cases who became active in last timeperiod and initiated treatment @@ -2871,7 +2798,7 @@ def apply(self, population): tx_coverage = new_tb_tx / new_tb_cases # assert tx_coverage <= 1 else: - tx_coverage = 0.0 + tx_coverage = 0 # ipt coverage new_tb_ipt = len( @@ -2884,7 +2811,7 @@ def apply(self, population): if new_tb_ipt: current_ipt_coverage = new_tb_ipt / len(df[df.is_alive]) else: - current_ipt_coverage = 0.0 + current_ipt_coverage = 0 logger.info( key="tb_treatment", @@ -2955,7 +2882,7 @@ def apply(self, population): if adult_num_false_positive: adult_prop_false_positive = adult_num_false_positive / new_tb_tx_adult else: - adult_prop_false_positive = 0.0 + adult_prop_false_positive = 0 # children child_num_false_positive = len( diff --git a/src/tlo/scenario.py b/src/tlo/scenario.py index f64325f9ec..aa1be42aa9 100644 --- a/src/tlo/scenario.py +++ b/src/tlo/scenario.py @@ -73,7 +73,6 @@ def draw_parameters(self, draw_number, rng): from tlo import Date, Simulation, logging from tlo.analysis.utils import parse_log_file -from tlo.util import str_to_pandas_date logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -142,16 +141,6 @@ def parse_arguments(self, extra_arguments: List[str]) -> None: self.arguments = extra_arguments parser = argparse.ArgumentParser() - parser.add_argument( - "--resume-simulation", - type=str, - help="Directory containing suspended state files to resume simulation from", - ) - parser.add_argument( - "--suspend-date", - type=str_to_pandas_date, - help="Date to suspend the simulation at", - ) # add arguments from the subclass self.add_arguments(parser) @@ -393,58 +382,20 @@ def run_sample_by_number(self, output_directory, draw_number, sample_number): sample = self.get_sample(draw, sample_number) log_config = self.scenario.get_log_config(output_directory) - logger.info( - key="message", - data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}", + logger.info(key="message", data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}") + + sim = Simulation( + start_date=self.scenario.start_date, + seed=sample["simulation_seed"], + log_config=log_config ) + sim.register(*self.scenario.modules()) - # if user has specified a restore simulation, we load it from a pickle file - if ( - hasattr(self.scenario, "resume_simulation") - and self.scenario.resume_simulation is not None - ): - suspended_simulation_path = ( - Path(self.scenario.resume_simulation) - / str(draw_number) - / str(sample_number) - / "suspended_simulation.pickle" - ) - logger.info( - key="message", - data=f"Loading pickled suspended simulation from {suspended_simulation_path}", - ) - sim = Simulation.load_from_pickle(pickle_path=suspended_simulation_path, log_config=log_config) - else: - sim = Simulation( - start_date=self.scenario.start_date, - seed=sample["simulation_seed"], - log_config=log_config, - ) - sim.register(*self.scenario.modules()) - - if sample["parameters"] is not None: - self.override_parameters(sim, sample["parameters"]) - - sim.make_initial_population(n=self.scenario.pop_size) - sim.initialise(end_date=self.scenario.end_date) - - # if user has specified a suspend date, we run the simulation to that date and - # save it to a pickle file - if ( - hasattr(self.scenario, "suspend_date") - and self.scenario.suspend_date is not None - ): - sim.run_simulation_to(to_date=self.scenario.suspend_date) - suspended_simulation_path = Path(log_config["directory"]) / "suspended_simulation.pickle" - sim.save_to_pickle(pickle_path=suspended_simulation_path) - sim.close_output_file() - logger.info( - key="message", - data=f"Simulation suspended at {self.scenario.suspend_date} and saved to {suspended_simulation_path}", - ) - else: - sim.run_simulation_to(to_date=self.scenario.end_date) - sim.finalise() + if sample["parameters"] is not None: + self.override_parameters(sim, sample["parameters"]) + + sim.make_initial_population(n=self.scenario.pop_size) + sim.simulate(end_date=self.scenario.end_date) if sim.log_filepath is not None: outputs = parse_log_file(sim.log_filepath) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 547edf1d23..1d15495490 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -1,102 +1,58 @@ """The main simulation controller.""" -from __future__ import annotations - import datetime import heapq import itertools import time from collections import OrderedDict from pathlib import Path -from typing import TYPE_CHECKING, Optional +from typing import Dict, Optional, Union import numpy as np -try: - import dill - - DILL_AVAILABLE = True -except ImportError: - DILL_AVAILABLE = False - from tlo import Date, Population, logging -from tlo.dependencies import ( - check_dependencies_present, - initialise_missing_dependencies, - topologically_sort_modules, -) +from tlo.dependencies import check_dependencies_present, topologically_sort_modules from tlo.events import Event, IndividualScopeEventMixin from tlo.progressbar import ProgressBar -if TYPE_CHECKING: - from tlo.core import Module - from tlo.logging.core import LogLevel - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -class SimulationPreviouslyInitialisedError(Exception): - """Exception raised when trying to initialise an already initialised simulation.""" - - -class SimulationNotInitialisedError(Exception): - """Exception raised when trying to run simulation before initialising.""" - - class Simulation: """The main control centre for a simulation. - This class contains the core simulation logic and event queue, and holds references - to all the information required to run a complete simulation: the population, - disease modules, etc. + This class contains the core simulation logic and event queue, and holds + references to all the information required to run a complete simulation: + the population, disease modules, etc. Key attributes include: - :ivar date: The current simulation date. - :ivar modules: A dictionary of the disease modules used in this simulation, keyed - by the module name. - :ivar population: The population being simulated. - :ivar rng: The simulation-level random number generator. - - .. note:: - Individual modules also have their own random number generator with independent - state. + `date` + The current simulation date. + + `modules` + A list of the disease modules contributing to this simulation. + + `population` + The Population being simulated. + + `rng` + The simulation-level random number generator. + Note that individual modules also have their own random number generator + with independent state. """ - def __init__( - self, - *, - start_date: Date, - seed: Optional[int] = None, - log_config: Optional[dict] = None, - show_progress_bar: bool = False, - resourcefilepath: Optional[Path] = None, - ): + def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = None, + show_progress_bar=False): """Create a new simulation. - :param start_date: The date the simulation begins; must be given as - a keyword parameter for clarity. - :param seed: The seed for random number generator. class will create one if not - supplied - :param log_config: Dictionary specifying logging configuration for this - simulation. Can have entries: `filename` - prefix for log file name, final - file name will have a date time appended, if not present default is to not - output log to a file; `directory` - path to output directory to write log - file to, default if not specified is to output to the `outputs` folder; - `custom_levels` - dictionary to set logging levels, '*' can be used as a key - for all registered modules; `suppress_stdout` - if `True`, suppresses - logging to standard output stream (default is `False`). - :param show_progress_bar: Whether to show a progress bar instead of the logger - output during the simulation. - :param resourcefilepath: Path to resource files folder. Assign ``None` if no - path is provided. - - .. note:: - The `custom_levels` entry in `log_config` argument can be used to disable - logging on all disease modules by setting a high level to `*`, and then - enabling logging on one module of interest by setting a low level, for - example ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}``. + :param start_date: the date the simulation begins; must be given as + a keyword parameter for clarity + :param seed: the seed for random number generator. class will create one if not supplied + :param log_config: sets up the logging configuration for this simulation + :param show_progress_bar: whether to show a progress bar instead of the logger + output during the simulation """ # simulation self.date = self.start_date = start_date @@ -107,60 +63,44 @@ def __init__( self.population: Optional[Population] = None self.show_progress_bar = show_progress_bar - self.resourcefilepath = resourcefilepath # logging if log_config is None: log_config = {} self._custom_log_levels = None - self._log_filepath = self._configure_logging(**log_config) - + self._log_filepath = None + self._configure_logging(**log_config) # random number generator - seed_from = "auto" if seed is None else "user" + seed_from = 'auto' if seed is None else 'user' self._seed = seed self._seed_seq = np.random.SeedSequence(seed) logger.info( - key="info", - data=f"Simulation RNG {seed_from} entropy = {self._seed_seq.entropy}", + key='info', + data=f'Simulation RNG {seed_from} entropy = {self._seed_seq.entropy}' ) self.rng = np.random.RandomState(np.random.MT19937(self._seed_seq)) - # Whether simulation has been initialised - self._initialised = False - - def _configure_logging( - self, - filename: Optional[str] = None, - directory: Path | str = "./outputs", - custom_levels: Optional[dict[str, LogLevel]] = None, - suppress_stdout: bool = False - ): - """Configure logging of simulation outputs. - - Can write log output to a file in addition the default of `stdout`. Mnimum - custom levels for each logger can be specified for filtering out messages. - - :param filename: Prefix for log file name, final file name will have a date time - appended. + def _configure_logging(self, filename: str = None, directory: Union[Path, str] = "./outputs", + custom_levels: Dict[str, int] = None, suppress_stdout: bool = True): + """Configure logging, can write logging to a logfile in addition the default of stdout. + + Minimum custom levels for each logger can be specified for filtering out messages + + :param filename: Prefix for logfile name, final logfile will have a datetime appended :param directory: Path to output directory, default value is the outputs folder. - :param custom_levels: Dictionary to set logging levels, '*' can be used as a key - for all registered modules. This is likely to be used to disable logging on - all disease modules by setting a high level, and then enable one of interest - by setting a low level, for example - ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}``. - :param suppress_stdout: If `True`, suppresses logging to standard output stream - (default is `False`). + :param custom_levels: dictionary to set logging levels, '*' can be used as a key for all registered modules. + This is likely to be used to disable all disease modules, and then enable one of interest + e.g. ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}`` + :param suppress_stdout: If True, suppresses logging to standard output stream (default is False) :return: Path of the log file if a filename has been given. """ # clear logging environment # if using progress bar we do not print log messages to stdout to avoid # clashes between progress bar and log output - logging.initialise( - add_stdout_handler=not (self.show_progress_bar or suppress_stdout), - simulation_date_getter=lambda: self.date.isoformat(), - ) + logging.init_logging(add_stdout_handler=not (self.show_progress_bar or suppress_stdout)) + logging.set_simulation(self) if custom_levels: # if modules have already been registered @@ -171,54 +111,39 @@ def _configure_logging( self._custom_log_levels = custom_levels if filename and directory: - timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") + timestamp = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') log_path = Path(directory) / f"{filename}__{timestamp}.log" self.output_file = logging.set_output_file(log_path) logger.info(key='info', data=f'Log output: {log_path}') + self._log_filepath = log_path return log_path return None @property - def log_filepath(self) -> Path: + def log_filepath(self): """The path to the log file, if one has been set.""" return self._log_filepath - def register( - self, - *modules: Module, - sort_modules: bool = True, - check_all_dependencies: bool = True, - auto_register_dependencies: bool = False, - ) -> None: + def register(self, *modules, sort_modules=True, check_all_dependencies=True): """Register one or more disease modules with the simulation. - :param modules: The disease module(s) to use as part of this simulation. + :param modules: the disease module(s) to use as part of this simulation. Multiple modules may be given as separate arguments to one call. :param sort_modules: Whether to topologically sort the modules so that any initialisation dependencies (specified by the ``INIT_DEPENDENCIES`` attribute) of a module are initialised before the module itself is. A - :py:exc:`.ModuleDependencyError` exception will be raised if there are - missing initialisation dependencies or circular initialisation dependencies - between modules that cannot be resolved. If this flag is set to ``True`` - there is also a requirement that at most one instance of each module is - registered and :py:exc:`.MultipleModuleInstanceError` will be raised if this - is not the case. + ``ModuleDependencyError`` exception will be raised if there are missing + initialisation dependencies or circular initialisation dependencies between + modules that cannot be resolved. If this flag is set to ``True`` there is + also a requirement that at most one instance of each module is registered + and ``MultipleModuleInstanceError`` will be raised if this is not the case. :param check_all_dependencies: Whether to check if all of each module's declared dependencies (that is, the union of the ``INIT_DEPENDENCIES`` and ``ADDITIONAL_DEPENDENCIES`` attributes) have been included in the set of - modules to be registered. A :py:exc:`.ModuleDependencyError` exception will + modules to be registered. A ``ModuleDependencyError`` exception will be raised if there are missing dependencies. - :param auto_register_dependencies: Whether to register missing module dependencies - or not. If this argument is set to True, all module dependencies will be - automatically registered. """ - if auto_register_dependencies: - modules = [ - *modules, - *initialise_missing_dependencies(modules, resourcefilepath=self.resourcefilepath) - ] - if sort_modules: modules = list(topologically_sort_modules(modules)) if check_all_dependencies: @@ -226,32 +151,30 @@ def register( # Iterate over modules and per-module seed sequences spawned from simulation # level seed sequence for module, seed_seq in zip(modules, self._seed_seq.spawn(len(modules))): - assert ( - module.name not in self.modules - ), f"A module named {module.name} has already been registered" + assert module.name not in self.modules, f'A module named {module.name} has already been registered' # Seed the RNG for the registered module using spawned seed sequence logger.info( - key="info", + key='info', data=( - f"{module.name} RNG auto (entropy, spawn key) = " - f"({seed_seq.entropy}, {seed_seq.spawn_key[0]})" - ), + f'{module.name} RNG auto (entropy, spawn key) = ' + f'({seed_seq.entropy}, {seed_seq.spawn_key[0]})' + ) ) module.rng = np.random.RandomState(np.random.MT19937(seed_seq)) self.modules[module.name] = module module.sim = self - module.read_parameters("") + module.read_parameters('') if self._custom_log_levels: logging.set_logging_levels(self._custom_log_levels) - def make_initial_population(self, *, n: int) -> None: + def make_initial_population(self, *, n): """Create the initial population to simulate. - :param n: The number of individuals to create; must be given as - a keyword parameter for clarity. + :param n: the number of individuals to create; must be given as + a keyword parameter for clarity """ start = time.time() @@ -269,46 +192,63 @@ def make_initial_population(self, *, n: int) -> None: for module in self.modules.values(): start1 = time.time() module.initialise_population(self.population) - logger.debug( - key="debug", - data=f"{module.name}.initialise_population() {time.time() - start1} s", - ) + logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s') end = time.time() - logger.info(key="info", data=f"make_initial_population() {end - start} s") + logger.info(key='info', data=f'make_initial_population() {end - start} s') - def initialise(self, *, end_date: Date) -> None: - """Initialise all modules in simulation. + def simulate(self, *, end_date): + """Simulation until the given end date - :param end_date: Date to end simulation on - accessible to modules to allow - initialising data structures which may depend (in size for example) on the - date range being simulated. + :param end_date: when to stop simulating. Only events strictly before this + date will be allowed to occur. + Must be given as a keyword parameter for clarity. """ - if self._initialised: - msg = "initialise method should only be called once" - raise SimulationPreviouslyInitialisedError(msg) - self.date = self.start_date + start = time.time() self.end_date = end_date # store the end_date so that others can reference it + for module in self.modules.values(): module.initialise_simulation(self) - self._initialised = True - def finalise(self, wall_clock_time: Optional[float] = None) -> None: - """Finalise all modules in simulation and close logging file if open. + progress_bar = None + if self.show_progress_bar: + num_simulated_days = (end_date - self.start_date).days + progress_bar = ProgressBar( + num_simulated_days, "Simulation progress", unit="day") + progress_bar.start() + + while self.event_queue: + event, date = self.event_queue.next_event() + + if self.show_progress_bar: + simulation_day = (date - self.start_date).days + stats_dict = { + "date": str(date.date()), + "dataframe size": str(len(self.population.props)), + "queued events": str(len(self.event_queue)), + } + if "HealthSystem" in self.modules: + stats_dict["queued HSI events"] = str( + len(self.modules["HealthSystem"].HSI_EVENT_QUEUE) + ) + progress_bar.update(simulation_day, stats_dict=stats_dict) + + if date >= end_date: + self.date = end_date + break + self.fire_single_event(event, date) + + # The simulation has ended. + if self.show_progress_bar: + progress_bar.stop() - :param wall_clock_time: Optional argument specifying total time taken to - simulate, to be written out to log before closing. - """ for module in self.modules.values(): module.on_simulation_end() - if wall_clock_time is not None: - logger.info(key="info", data=f"simulate() {wall_clock_time} s") - self.close_output_file() - def close_output_file(self) -> None: - """Close logging file if open.""" + logger.info(key='info', data=f'simulate() {time.time() - start} s') + + # From Python logging.shutdown if self.output_file: - # From Python logging.shutdown try: self.output_file.acquire() self.output_file.flush() @@ -317,121 +257,52 @@ def close_output_file(self) -> None: pass finally: self.output_file.release() - self.output_file = None - - def _initialise_progress_bar(self, end_date: Date) -> ProgressBar: - num_simulated_days = (end_date - self.date).days - progress_bar = ProgressBar( - num_simulated_days, "Simulation progress", unit="day" - ) - progress_bar.start() - return progress_bar - - def _update_progress_bar(self, progress_bar: ProgressBar, date: Date) -> None: - simulation_day = (date - self.start_date).days - stats_dict = { - "date": str(date.date()), - "dataframe size": str(len(self.population.props)), - "queued events": str(len(self.event_queue)), - } - if "HealthSystem" in self.modules: - stats_dict["queued HSI events"] = str( - len(self.modules["HealthSystem"].HSI_EVENT_QUEUE) - ) - progress_bar.update(simulation_day, stats_dict=stats_dict) - - def run_simulation_to(self, *, to_date: Date) -> None: - """Run simulation up to a specified date. - - Unlike :py:meth:`simulate` this method does not initialise or finalise - simulation and the date simulated to can be any date before or equal to - simulation end date. - - :param to_date: Date to simulate up to but not including - must be before or - equal to simulation end date specified in call to :py:meth:`initialise`. - """ - if not self._initialised: - msg = "Simulation must be initialised before calling run_simulation_to" - raise SimulationNotInitialisedError(msg) - if to_date > self.end_date: - msg = f"to_date {to_date} after simulation end date {self.end_date}" - raise ValueError(msg) - if self.show_progress_bar: - progress_bar = self._initialise_progress_bar(to_date) - while ( - len(self.event_queue) > 0 and self.event_queue.date_of_next_event < to_date - ): - event, date = self.event_queue.pop_next_event_and_date() - if self.show_progress_bar: - self._update_progress_bar(progress_bar, date) - self.fire_single_event(event, date) - self.date = to_date - if self.show_progress_bar: - progress_bar.stop() - - def simulate(self, *, end_date: Date) -> None: - """Simulate until the given end date - - :param end_date: When to stop simulating. Only events strictly before this - date will be allowed to occur. Must be given as a keyword parameter for - clarity. - """ - start = time.time() - self.initialise(end_date=end_date) - self.run_simulation_to(to_date=end_date) - self.finalise(time.time() - start) - def schedule_event(self, event: Event, date: Date) -> None: + def schedule_event(self, event, date): """Schedule an event to happen on the given future date. - :param event: The event to schedule. - :param date: wWen the event should happen. + :param event: the Event to schedule + :param date: when the event should happen """ - assert date >= self.date, "Cannot schedule events in the past" - - assert "TREATMENT_ID" not in dir( - event - ), "This looks like an HSI event. It should be handed to the healthsystem scheduler" - assert ( - event.__str__().find("HSI_") < 0 - ), "This looks like an HSI event. It should be handed to the healthsystem scheduler" + assert date >= self.date, 'Cannot schedule events in the past' + + assert 'TREATMENT_ID' not in dir(event), \ + 'This looks like an HSI event. It should be handed to the healthsystem scheduler' + assert (event.__str__().find('HSI_') < 0), \ + 'This looks like an HSI event. It should be handed to the healthsystem scheduler' assert isinstance(event, Event) self.event_queue.schedule(event=event, date=date) - def fire_single_event(self, event: Event, date: Date) -> None: + def fire_single_event(self, event, date): """Fires the event once for the given date - :param event: :py:class:`Event` to fire. - :param date: The date of the event. + :param event: :py:class:`Event` to fire + :param date: the date of the event """ self.date = date event.run() - def do_birth(self, mother_id: int) -> int: + def do_birth(self, mother_id): """Create a new child person. We create a new person in the population and then call the `on_birth` method in all modules to initialise the child's properties. - :param mother_id: Row index label of the maternal parent. - :return: Row index label of the new child. + :param mother_id: the maternal parent + :return: the new child """ child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) return child_id - def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: + def find_events_for_person(self, person_id: int): """Find the events in the queue for a particular person. - - :param person_id: The row index of the person of interest. - :return: List of tuples `(date_of_event, event)` for that `person_id` in the - queue. - - .. note:: - This is for debugging and testing only. Not for use in real simulations as it - is slow. + :param person_id: the person_id of interest + :returns list of tuples (date_of_event, event) for that person_id in the queue. + + NB. This is for debugging and testing only - not for use in real simulations as it is slow """ person_events = [] @@ -442,40 +313,6 @@ def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: return person_events - def save_to_pickle(self, pickle_path: Path) -> None: - """Save simulation state to a pickle file using :py:mod:`dill`. - - Requires :py:mod:`dill` to be importable. - - :param pickle_path: File path to save simulation state to. - """ - if not DILL_AVAILABLE: - raise RuntimeError("Cannot save to pickle as dill is not installed") - with open(pickle_path, "wb") as pickle_file: - dill.dump(self, pickle_file) - - @staticmethod - def load_from_pickle( - pickle_path: Path, log_config: Optional[dict] = None - ) -> Simulation: - """Load simulation state from a pickle file using :py:mod:`dill`. - - Requires :py:mod:`dill` to be importable. - - :param pickle_path: File path to load simulation state from. - :param log_config: New log configuration to override previous configuration. If - `None` previous configuration (including output file) will be retained. - - :returns: Loaded :py:class:`Simulation` object. - """ - if not DILL_AVAILABLE: - raise RuntimeError("Cannot load from pickle as dill is not installed") - with open(pickle_path, "rb") as pickle_file: - simulation = dill.load(pickle_file) - if log_config is not None: - simulation._log_filepath = simulation._configure_logging(**log_config) - return simulation - class EventQueue: """A simple priority queue for events. @@ -488,32 +325,23 @@ def __init__(self): self.counter = itertools.count() self.queue = [] - def schedule(self, event: Event, date: Date) -> None: + def schedule(self, event, date): """Schedule a new event. - :param event: The event to schedule. - :param date: When it should happen. + :param event: the event to schedule + :param date: when it should happen """ entry = (date, event.priority, next(self.counter), event) heapq.heappush(self.queue, entry) - def pop_next_event_and_date(self) -> tuple[Event, Date]: - """Get and remove the earliest event and corresponding date in the queue. + def next_event(self): + """Get the earliest event in the queue. - :returns: An `(event, date)` pair. + :returns: an (event, date) pair """ date, _, _, event = heapq.heappop(self.queue) return event, date - @property - def date_of_next_event(self) -> Date: - """Get the date of the earliest event in queue without removing from queue. - - :returns: Date of next event in queue. - """ - date, *_ = self.queue[0] - return date - - def __len__(self) -> int: - """:return: The length of the queue.""" + def __len__(self): + """:return: the length of the queue""" return len(self.queue) diff --git a/src/tlo/util.py b/src/tlo/util.py index 168b1d41a1..77924e4fa3 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -1,12 +1,11 @@ """This file contains helpful utility functions.""" import hashlib from collections import defaultdict -from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Union +from typing import Dict, List, Optional, Set, Union import numpy as np import pandas as pd -from pandas import DataFrame, DateOffset +from pandas import DateOffset from tlo import Population, Property, Types @@ -437,76 +436,3 @@ def get_person_id_to_inherit_from(child_id, mother_id, population_dataframe, rng return abs(mother_id) elif mother_id >= 0: return mother_id - - -def convert_excel_files_to_csv(folder: Path, files: Optional[list[str]] = None, *, delete_excel_files: bool = False) -> None: - """ convert Excel files to csv files. - - :param folder: Folder containing Excel files. - :param files: List of Excel file names to convert to csv files. When `None`, all Excel files in the folder and - subsequent folders within this folder will be converted to csv files with Excel file name becoming - folder name and sheet names becoming csv file names. - :param delete_excel_files: When true, the Excel file we are generating csv files from will get deleted. - """ - # get path to Excel files - if files is None: - excel_file_paths = sorted(folder.rglob("*.xlsx")) - else: - excel_file_paths = [folder / file for file in files] - # exit function if no Excel file is given or found within the path - if excel_file_paths is None: - return - - for excel_file_path in excel_file_paths: - sheet_dataframes: dict[Any, DataFrame] = pd.read_excel(excel_file_path, sheet_name=None) - excel_file_directory: Path = excel_file_path.with_suffix("") - # Create a container directory for per sheet CSVs - if excel_file_directory.exists(): - print(f"Directory {excel_file_directory} already exists") - else: - excel_file_directory.mkdir() - # Write a CSV for each worksheet - for sheet_name, dataframe in sheet_dataframes.items(): - dataframe.to_csv(f'{excel_file_directory / sheet_name}.csv', index=False) - - if delete_excel_files: - # Remove no longer needed Excel file - Path(folder/excel_file_path).unlink() - - -def read_csv_files(folder: Path, files: Optional[list[str]] = None) -> DataFrame | dict[str, DataFrame]: - """ - A function to read CSV files in a similar way pandas reads Excel files (:py:func:`pandas.read_excel`). - - NB: Converting Excel files to csv files caused all columns that had no relevant data to simulation (i.e. - parameter descriptions or data references) to be named `Unnamed1, Unnamed2, ....., UnnamedN` in the csv files. - We are therefore using :py:func:`pandas.filter` to track all unnamed columns and silently drop them using - :py:func:`pandas.drop`. - - :param folder: Path to folder containing CSV files to read. - :param files: preferred csv file name(s). This is the same as sheet names in Excel file. Note that if None(no files - selected) then all files in the containing folder will be loaded - - """ - all_data: dict[str, DataFrame] = {} # dataframes dictionary - - def clean_dataframe(dataframes_dict: dict[str, DataFrame]) -> None: - """ silently drop all columns that have no relevant data to simulation (all columns with a name starting with - Unnamed - :param dataframes_dict: Dictionary of dataframes to clean - """ - for _key, dataframe in dataframes_dict.items(): - all_data[_key] = dataframe.drop(dataframe.filter(like='Unnamed'), axis=1) # filter and drop Unnamed columns - - if files is None: - for f_name in folder.rglob("*.csv"): - all_data[f_name.stem] = pd.read_csv(f_name) - - else: - for f_name in files: - all_data[f_name] = pd.read_csv((folder / f_name).with_suffix(".csv")) - # clean and return the dataframe dictionary - clean_dataframe(all_data) - # If only one file loaded return dataframe directly rather than dict - return next(iter(all_data.values())) if len(all_data) == 1 else all_data - diff --git a/tests/bitset_handler/__init__.py b/tests/bitset_handler/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/bitset_handler/conftest.py b/tests/bitset_handler/conftest.py deleted file mode 100644 index 20b6ae59f0..0000000000 --- a/tests/bitset_handler/conftest.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Implements the fixtures required in -https://github.com/pandas-dev/pandas/blob/bdb509f95a8c0ff16530cedb01c2efc822c0d314/pandas/core/dtypes/dtypes.py, - -which allows us to run the pandas-provided test suite for custom dtypes. -Additional tests and fixtures can be defined on top of those listed in the link above, if we want to -run our own tests. -""" - -from typing import List, Set - -import numpy as np -import pytest -from numpy.dtypes import BytesDType # pylint: disable=E0611 -from numpy.random import PCG64, Generator -from numpy.typing import NDArray - -from tlo.bitset_handler.bitset_extension import BitsetArray, BitsetDtype, ElementType - - -@pytest.fixture(scope="session") -def _rng() -> Generator: - return Generator(PCG64(seed=0)) - - -@pytest.fixture(scope="session") -def _set_elements() -> Set[ElementType]: - return {"1", "2", "3", "4", "5", "a", "b", "c", "d", "e"} - - -@pytest.fixture(scope="session") -def dtype(_set_elements: Set[ElementType]) -> BitsetDtype: - return BitsetDtype(_set_elements) - - -@pytest.fixture(scope="session") -def _1st_3_entries() -> List[Set[ElementType]]: - """ - We will fix the first 3 entries of the data fixture, - which is helpful to ensure we have some explicit test - values that we can directly change if needed. - """ - return [ - {"1", "e"}, {"a", "d"}, {"2", "4", "5"}, - ] - -@pytest.fixture(scope="session") -def _raw_sets( - _1st_3_entries: List[Set[ElementType]], _rng: Generator, _set_elements: Set[ElementType] -) -> List[Set[ElementType]]: - """ - Length 100 list of sets, the first 3 of which are those in - the _1st_3_entries fixture. These sets will be used as the - 'raw_data' for the Bitset Extension test suite. - """ - set_entries = list(_1st_3_entries) - elements = list(_set_elements) - for _ in range(100-len(_1st_3_entries)): - set_entries.append( - { - elements[i] - for i in _rng.integers( - 0, len(elements), size=_rng.integers(0, len(elements)) - ) - } - ) - return set_entries - -@pytest.fixture(scope="session") -def _raw_data( - _raw_sets: List[Set[ElementType]], dtype: BitsetDtype -) -> NDArray[np.bytes_]: - data = np.zeros((100,), dtype=dtype.np_array_dtype) - for i, set_value in enumerate(_raw_sets): - data[i] = dtype.as_bytes(set_value) - return data - - -@pytest.fixture(scope="session") -def data( - _raw_data: NDArray[np.bytes_], dtype: BitsetDtype -) -> BitsetArray: - return BitsetArray(data=_raw_data, dtype=dtype, copy=True) - - -@pytest.fixture -def data_for_twos(dtype: BitsetDtype) -> None: - pytest.skip(f"{dtype} does not support divmod") - - -@pytest.fixture -def data_missing(dtype: BitsetDtype) -> np.ndarray[BytesDType]: - data = np.zeros((2,), dtype=dtype.np_array_dtype) - data[0] = dtype.na_value - data[1] = dtype.as_bytes({"a"}) - return data diff --git a/tests/bitset_handler/test_bitset_pandas_dtype.py b/tests/bitset_handler/test_bitset_pandas_dtype.py deleted file mode 100644 index 156f9e49e6..0000000000 --- a/tests/bitset_handler/test_bitset_pandas_dtype.py +++ /dev/null @@ -1,28 +0,0 @@ -import re - -import pytest -from pandas.tests.extension.base import BaseDtypeTests - -from tlo.bitset_handler.bitset_extension import BitsetDtype - - -class TestBitsetDtype(BaseDtypeTests): - """ - Setting the dtype fixture, above, to out BitsetDtype results in us inheriting - all default pandas tests for extension Dtypes. - - Additional tests can be added to this class if we so desire. - """ - - def test_construct_from_string_another_type_raises( - self, dtype: BitsetDtype - ) -> None: - """ - Reimplementation as the error message we expect is different from that provided - by base ``pandas`` implementation. - """ - msg = ( - "Need at least 2 (comma-separated) elements in string to construct bitset." - ) - with pytest.raises(TypeError, match=re.escape(msg)): - type(dtype).construct_from_string("another_type") diff --git a/tests/bitset_handler/test_bitset_set_like_interactions.py b/tests/bitset_handler/test_bitset_set_like_interactions.py deleted file mode 100644 index 801703ce24..0000000000 --- a/tests/bitset_handler/test_bitset_set_like_interactions.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -Tests for set-like interactions with a pd.Series object of BitsetDtype. -""" -import operator -from typing import Any, Callable, Iterable, List, Set - -import pandas as pd -import pytest - -from tlo.bitset_handler.bitset_extension import BitsetDtype, CastableForPandasOps, ElementType - - -def seq_of_sets_to_series(sets: Iterable[Set[ElementType]], dtype: BitsetDtype) -> pd.Series: - """ - Casts a sequence of sets representing a single BitsetDtype to a - series with those entries of the appropriate dtype. - """ - return pd.Series(data=sets, dtype=dtype, copy=True) - - -@pytest.fixture(scope="function") -def small_series(_1st_3_entries: List[Set[ElementType]], dtype: BitsetDtype): - """ - Recall that the first 3 entries are always fixed in confest; - repeating the values here just for ease of reference: - - {"1", "e"}, - {"a", "d"}, - {"2", "4", "5"}, - """ - return pd.Series(_1st_3_entries, dtype=dtype, copy=True) - - -@pytest.mark.parametrize( - ["op", "r_value", "expected"], - [ - pytest.param( - [operator.or_, operator.add, operator.sub], - set(), - [{"1", "e"}, {"a", "d"}, {"2", "4", "5"}], - id="ADD, OR, SUB w/ empty set", - ), - pytest.param( - [operator.or_, operator.add], - "a", - [{"1", "a", "e"}, {"a", "d"}, {"2", "4", "5", "a"}], - id="ADD, OR w/ scalar element", - ), - pytest.param( - [operator.or_, operator.add], - {"1", "2", "a", "d"}, - [ - {"1", "2", "a", "d", "e"}, - {"1", "2", "a", "d"}, - {"1", "2", "4", "5", "a", "d"}, - ], - id="ADD, OR w/ multiple-entry set", - ), - pytest.param( - operator.and_, - set(), - [set()] * 3, - id="AND w/ empty set", - ), - pytest.param( - operator.and_, - "a", - [set(), {"a"}, set()], - id="AND w/ scalar element", - ), - pytest.param( - operator.and_, - {"1", "a"}, - [{"1"}, {"a"}, set()], - id="AND w/ multiple-entry set", - ), - pytest.param( - [operator.eq, operator.le, operator.lt], - set(), - pd.Series([False, False, False], dtype=bool), - id="EQ, LE, LT w/ empty set", - ), - pytest.param( - [operator.eq, operator.le, operator.lt], - "a", - pd.Series([False, False, False], dtype=bool), - id="EQ, LE, LT w/ scalar element", - ), - pytest.param( - [operator.eq, operator.ge, operator.le], - {"1", "e"}, - pd.Series([True, False, False], dtype=bool), - id="EQ, GE, LE w/ multiple-entry set", - ), - pytest.param( - [operator.ge, operator.gt], - set(), - pd.Series([True, True, True], dtype=bool), - id="GE, GT w/ empty set", - ), - pytest.param( - [operator.ge, operator.gt], - "a", - pd.Series([False, True, False], dtype=bool), - id="GE, GT w/ scalar element", - ), - pytest.param( - [operator.gt, operator.lt], - {"1, e"}, - pd.Series([False, False, False], dtype=bool), - id="GT, LT w/ multiple-entry set", - ), - pytest.param( - operator.sub, - "a", - [{"1", "e"}, {"d"}, {"2", "4", "5"}], - id="SUB w/ scalar element", - ), - pytest.param( - operator.sub, - {"1", "2", "d", "e"}, - [set(), {"a"}, {"4", "5"}], - id="SUB w/ multiple-entry set", - ), - ], -) -def test_series_operation_with_value( - small_series: pd.Series, - dtype: BitsetDtype, - op: List[Callable[[Any, Any], Any]] | Callable[[Any, Any], Any], - r_value: CastableForPandasOps, - expected: List[Set[ElementType]] | pd.Series -) -> None: - """ - The expected value can be passed in as either a list of sets that will be - converted to the appropriate pd.Series of bitsets, or as an explicit pd.Series - of booleans (which is used when testing the comparison operations ==, <=, etc). - - If r_value is a scalar, the test will run once using the scalar as the r_value, - and then again using the cast of the scalar to a set of one element as the r_value. - - In cases such as this, the two results are expected to be the same, - which saves us verbiage in the list of test cases above. - """ - expected = ( - seq_of_sets_to_series(expected, dtype) - if isinstance(expected, list) - else expected - ) - - if not isinstance(op, list): - op = [op] - if isinstance(r_value, ElementType): - r_values = [r_value, {r_value}] - else: - r_values = [r_value] - - for operation in op: - for r_v in r_values: - result = operation(small_series, r_v) - assert ( - expected == result - ).all(), f"Series do not match after operation {operation.__name__} with {r_v} on the right." diff --git a/tests/conftest.py b/tests/conftest.py index 33b463343a..47d6c3fa16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,4 +34,4 @@ def pytest_collection_modifyitems(config, items): def pytest_generate_tests(metafunc): if "seed" in metafunc.fixturenames: - metafunc.parametrize("seed", metafunc.config.getoption("seed"), scope="session") + metafunc.parametrize("seed", metafunc.config.getoption("seed")) diff --git a/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx b/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx deleted file mode 100644 index 84edbd2636..0000000000 --- a/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af1a6a6aa24a7de385efdf1564da3e3abfbba9fe467d92212b5c87b127e899f6 -size 10714 diff --git a/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle b/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle deleted file mode 100644 index 896ce51bf6..0000000000 --- a/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f2cddd2f691393fc27e990170f76ff12a2962d3fbee986deee459a6eb4996fd7 -size 243603 diff --git a/tests/test_alri.py b/tests/test_alri.py index fcce8b4b42..0fba5fea8d 100644 --- a/tests/test_alri.py +++ b/tests/test_alri.py @@ -435,11 +435,7 @@ def __will_die_of_alri(**kwargs): assert pd.isnull(person['ri_scheduled_death_date']) # Check that they have some symptoms caused by ALRI - assert 0 < len( - sim.modules["SymptomManager"].has_what( - person_id=person_id, disease_module=sim.modules["Alri"] - ) - ) + assert 0 < len(sim.modules['SymptomManager'].has_what(person_id, sim.modules['Alri'])) # Check that there is a AlriNaturalRecoveryEvent scheduled for this person: recov_event_tuple = [event_tuple for event_tuple in sim.find_events_for_person(person_id) if @@ -462,11 +458,7 @@ def __will_die_of_alri(**kwargs): assert pd.isnull(person['ri_scheduled_death_date']) # check they they have no symptoms: - assert 0 == len( - sim.modules["SymptomManager"].has_what( - person_id=person_id, disease_module=sim.modules["Alri"] - ) - ) + assert 0 == len(sim.modules['SymptomManager'].has_what(person_id, sim.modules['Alri'])) # check it's logged (one infection + one recovery) assert 1 == sim.modules['Alri'].logging_event.trackers['incident_cases'].report_current_total() diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 0f42b2d851..2686e431b0 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -1,5 +1,4 @@ import os -import textwrap from pathlib import Path from typing import List @@ -19,7 +18,6 @@ get_parameters_for_improved_healthsystem_and_healthcare_seeking, get_parameters_for_status_quo, get_root_path, - merge_log_files, mix_scenarios, order_of_coarse_appt, order_of_short_treatment_ids, @@ -686,99 +684,3 @@ def check_log(log): sim = Simulation(start_date=Date(2010, 1, 1), seed=seed, log_config=log_config) check_log(run_simulation_and_cause_one_death(sim)) - -def test_merge_log_files(tmp_path): - log_file_path_1 = tmp_path / "log_file_1" - log_file_path_1.write_text( - textwrap.dedent( - """\ - {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} - {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} - {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} - {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]} - {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} - {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]} - {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"} - {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]} - {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"} - {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]} - {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]} - """ - ) - ) - log_file_path_2 = tmp_path / "log_file_2" - log_file_path_2.write_text( - textwrap.dedent( - """\ - {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} - {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]} - {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} - {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]} - {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]} - {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} - {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]} - {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"} - {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]} - """ - ) - ) - expected_merged_log_file_content = textwrap.dedent( - """\ - {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} - {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} - {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} - {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]} - {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} - {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]} - {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"} - {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]} - {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"} - {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]} - {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]} - {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]} - {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]} - {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]} - {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]} - {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"} - {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]} - """ - ) - merged_log_file_path = tmp_path / "merged_log_file" - merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path) - merged_log_file_content = merged_log_file_path.read_text() - assert merged_log_file_content == expected_merged_log_file_content - - -def test_merge_log_files_with_inconsistent_headers_raises(tmp_path): - log_file_path_1 = tmp_path / "log_file_1" - log_file_path_1.write_text( - textwrap.dedent( - """\ - {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} - {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} - """ - ) - ) - log_file_path_2 = tmp_path / "log_file_2" - log_file_path_2.write_text( - textwrap.dedent( - """\ - {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "int"}, "description": null} - {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": [1]} - """ - ) - ) - merged_log_file_path = tmp_path / "merged_log_file" - with pytest.raises(RuntimeError, match="Inconsistent header lines"): - merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path) - - -def test_merge_log_files_inplace_raises(tmp_path): - log_file_path_1 = tmp_path / "log_file_1" - log_file_path_1.write_text("foo") - log_file_path_2 = tmp_path / "log_file_2" - log_file_path_2.write_text("bar") - with pytest.raises(ValueError, match="output_path"): - merge_log_files(log_file_path_1, log_file_path_2, log_file_path_1) - with pytest.raises(ValueError, match="output_path"): - merge_log_files(log_file_path_1, log_file_path_2, log_file_path_2) diff --git a/tests/test_beddays.py b/tests/test_beddays.py index 224619e8b3..614719fc86 100644 --- a/tests/test_beddays.py +++ b/tests/test_beddays.py @@ -2,7 +2,6 @@ import copy import os from pathlib import Path -from typing import Dict import pandas as pd import pytest @@ -84,88 +83,6 @@ def test_beddays_in_isolation(tmpdir, seed): assert ([cap_bedtype1] * days_sim == tracker.values).all() -def test_beddays_allocation_resolution(tmpdir, seed): - sim = Simulation(start_date=start_date, seed=seed) - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath), - ) - - # Update BedCapacity data with a simple table: - level2_facility_ids = [128, 129, 130] # <-- the level 2 facilities for each region - # This ensures over-allocations have to be properly resolved - cap_bedtype1 = 10 - cap_bedtype2 = 10 - cap_bedtype3 = 10 - - # create a simple bed capacity dataframe - hs = sim.modules["HealthSystem"] - hs.parameters["BedCapacity"] = pd.DataFrame( - data={ - "Facility_ID": level2_facility_ids, - "bedtype1": cap_bedtype1, - "bedtype2": cap_bedtype2, - "bedtype3": cap_bedtype3, - } - ) - - sim.make_initial_population(n=100) - sim.simulate(end_date=start_date) - - # reset bed days tracker to the start_date of the simulation - hs.bed_days.initialise_beddays_tracker() - - def assert_footprint_matches_expected( - footprint: Dict[str, int], expected_footprint: Dict[str, int] - ): - """ - Asserts that two footprints are identical. - The footprint provided as the 2nd argument is assumed to be the footprint - that we want to match, and the 1st as the result of the program attempting - to resolve over-allocations. - """ - assert len(footprint) == len( - expected_footprint - ), "Bed type footprints did not return same allocations." - for bed_type, expected_days in expected_footprint.items(): - allocated_days = footprint[bed_type] - assert expected_days == allocated_days, ( - f"Bed type {bed_type} was allocated {allocated_days} upon combining, " - f"but expected it to get {expected_days}." - ) - - # Check that combining footprints for a person returns the expected output - - # SIMPLE 2-bed days case - # Test uses example fail case given in https://github.com/UCL/TLOmodel/issues/1399 - # Person p has: bedtyp1 for 2 days, bedtype2 for 0 days. - # Person p then assigned: bedtype1 for 1 days, bedtype2 for 6 days. - # EXPECT: p's footprints are combined into bedtype1 for 2 days, bedtype2 for 5 days. - existing_footprint = {"bedtype1": 2, "bedtype2": 0, "bedtype3": 0} - incoming_footprint = {"bedtype1": 1, "bedtype2": 6, "bedtype3": 0} - expected_resolution = {"bedtype1": 2, "bedtype2": 5, "bedtype3": 0} - allocated_footprint = hs.bed_days.combine_footprints_for_same_patient( - existing_footprint, incoming_footprint - ) - assert_footprint_matches_expected(allocated_footprint, expected_resolution) - - # TEST case involve 3 different bed-types. - # Person p has: bedtype1 for 2 days, then bedtype3 for 4 days. - # p is assigned: bedtype1 for 1 day, bedtype2 for 3 days, and bedtype3 for 1 day. - # EXPECT: p spends 2 days in each bedtype; - # - Day 1 needs bedtype1 for both footprints - # - Day 2 existing footprint at bedtype1 overwrites incoming at bedtype2 - # - Day 3 & 4 incoming footprint at bedtype2 overwrites existing allocation to bedtype3 - # - Day 5 both footprints want bedtype3 - # - Day 6 existing footprint needs bedtype3, whilst incoming footprint is over.s - existing_footprint = {"bedtype1": 2, "bedtype2": 0, "bedtype3": 4} - incoming_footprint = {"bedtype1": 1, "bedtype2": 3, "bedtype3": 1} - expected_resolution = {"bedtype1": 2, "bedtype2": 2, "bedtype3": 2} - allocated_footprint = hs.bed_days.combine_footprints_for_same_patient( - existing_footprint, incoming_footprint - ) - assert_footprint_matches_expected(allocated_footprint, expected_resolution) - def check_dtypes(simulation): # check types of columns df = simulation.population.props @@ -1056,82 +973,3 @@ def apply(self, person_id, squeeze_factor): # Check that the facility_id is included for each entry in the `HSI_Events` log, including HSI Events for # in-patient appointments. assert not (log_hsi['Facility_ID'] == -99).any() - -def test_beddays_availability_switch(seed): - """ - Test that calling bed_days.switch_beddays_availability correctly updates the - bed capacities and adjusts the existing trackers to reflect the new capacities. - """ - sim = Simulation(start_date=start_date, seed=seed) - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath), - ) - - # get shortcut to HealthSystem Module - hs: healthsystem.HealthSystem = sim.modules["HealthSystem"] - - # As obtained from the resource file - facility_id_with_patient = 128 - facility_id_without_patient = 129 - bedtype1_init_capacity = 5 - bedtype2_init_capacity = 10 - - # Create a simple bed capacity dataframe with capacity designated for two regions - hs.parameters["BedCapacity"] = pd.DataFrame( - data={ - "Facility_ID": [ - facility_id_with_patient, #<-- patient 0 is admitted here - facility_id_without_patient, - ], - "bedtype1": bedtype1_init_capacity, - "bedtype2": bedtype2_init_capacity, - } - ) - sim.make_initial_population(n=100) - sim.simulate(end_date=start_date) - - day_2 = start_date + pd.DateOffset(days=1) - day_3 = start_date + pd.DateOffset(days=2) - day_4 = start_date + pd.DateOffset(days=3) - - bed_days = hs.bed_days - # Reset the bed occupancies - bed_days.initialise_beddays_tracker() - # Have a patient occupy a bed at the start of the simulation - bed_days.impose_beddays_footprint(person_id=0, footprint={"bedtype1": 3, "bedtype2": 0}) - - # Have the bed_days availability switch to "none" on the 2nd simulation day - bed_days.switch_beddays_availability("none", effective_on_and_from=day_2) - - # We should now see that the scaled capacities are all zero - assert ( - not bed_days._scaled_capacity.any().any() - ), "At least one bed capacity was not set to 0" - # We should also see that bedtype1 should have -1 beds available for days 2 and 3 of the simulation, - # due to the existing occupancy and the new capacity of 0. - # It should have 4 beds available on the first day (since the original capacity was 5 and the availability - # switch happens day 2). - # It should then have 0 beds available after (not including) day 3 - bedtype1: pd.DataFrame = bed_days.bed_tracker["bedtype1"] - bedtype2: pd.DataFrame = bed_days.bed_tracker["bedtype2"] - - assert ( - bedtype1.loc[start_date, facility_id_with_patient] == bedtype1_init_capacity - 1 - and bedtype1.loc[start_date, facility_id_without_patient] - == bedtype1_init_capacity - ), "Day 1 capacities were incorrectly affected" - assert (bedtype1.loc[day_2:day_3, facility_id_with_patient] == -1).all() and ( - bedtype1.loc[day_2:day_3, facility_id_without_patient] == 0 - ).all(), "Day 2 & 3 capacities were not updated correctly" - assert ( - (bedtype1.loc[day_4:, :] == 0).all().all() - ), "Day 4 onwards did not have correct capacity" - - # Bedtype 2 should have also have been updated, but there is no funny business here. - assert ( - (bedtype2.loc[day_2:, :] == 0).all().all() - ), "Bedtype 2 was not updated correctly" - assert ( - (bedtype2.loc[start_date, :] == bedtype2_init_capacity).all().all() - ), "Bedtype 2 had capacity updated on the incorrect dates" diff --git a/tests/test_cardiometabolicdisorders.py b/tests/test_cardiometabolicdisorders.py index 977caa4c91..a40fdad69b 100644 --- a/tests/test_cardiometabolicdisorders.py +++ b/tests/test_cardiometabolicdisorders.py @@ -770,7 +770,7 @@ def test_hsi_emergency_events(seed): assert pd.isnull(df.at[person_id, f'nc_{event}_scheduled_date_death']) assert isinstance(sim.modules['HealthSystem'].HSI_EVENT_QUEUE[0].hsi_event, HSI_CardioMetabolicDisorders_StartWeightLossAndMedication) - assert f"{event}_damage" not in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert f"{event}_damage" not in sim.modules['SymptomManager'].has_what(person_id) def test_no_availability_of_consumables_for_conditions(seed): diff --git a/tests/test_cervical_cancer.py b/tests/test_cervical_cancer.py new file mode 100644 index 0000000000..a5f3703363 --- /dev/null +++ b/tests/test_cervical_cancer.py @@ -0,0 +1,391 @@ +import os +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import DAYS_IN_YEAR, Date, Simulation +from tlo.methods import ( + cervical_cancer, + demography, + enhanced_lifestyle, + healthburden, + healthseekingbehaviour, + healthsystem, + simplified_births, + symptommanager, + epi, + tb, + hiv +) + +# %% Setup: +try: + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' +except NameError: + # running interactively + resourcefilepath = Path('./resources') + +# parameters for whole suite of tests: +start_date = Date(2010, 1, 1) +popsize = 5000 + + +# %% Construction of simulation objects: +def make_simulation_healthsystemdisabled(seed): + """Make the simulation with: + * the demography module with the OtherDeathsPoll not running + """ + sim = Simulation(start_date=start_date, seed=seed) + + # Register the appropriate modules + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + cervical_cancer.CervicalCancer(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, + disable=False, + cons_availability='all'), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath, run_with_checks=False), + hiv.Hiv(resourcefilepath=resourcefilepath, run_with_checks=False) + ) + + return sim + + +def make_simulation_nohsi(seed): + """Make the simulation with: + * the healthsystem enable but with no service availabilty (so no HSI run) + """ + sim = Simulation(start_date=start_date, seed=seed) + + # Register the appropriate modules + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + cervical_cancer.CervicalCancer(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, + disable=False, + cons_availability='all'), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath, run_with_checks=False), + hiv.Hiv(resourcefilepath=resourcefilepath, run_with_checks=False) + ) + + return sim + + +# %% Manipulation of parameters: +def zero_out_init_prev(sim): + # Set initial prevalence to zero: + sim.modules['CervicalCancer'].parameters['init_prev_cin_hpv_cc_stage_hiv'] \ + = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + sim.modules['CervicalCancer'].parameters['init_prev_cin_hpv_cc_stage_nhiv'] \ + = [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + return sim + + +def make_high_init_prev(sim): + # Set initial prevalence to a high value: + sim.modules['CervicalCancer'].parameters['init_prev_cin_hpv_cc_stage'] \ + = [0.55, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05] + return sim + + +def incr_rate_of_onset_lgd(sim): + # Rate of cancer onset per month: + sim.modules['CervicalCancer'].parameters['r_stage1_cin3'] = 0.2 + return sim + + +def zero_rate_of_onset_lgd(sim): + # Rate of cancer onset per month: + sim.modules['CervicalCancer'].parameters['r_stage1_cin3'] = 0.00 + return sim + + +def incr_rates_of_progression(sim): + # Rates of cancer progression per month: + sim.modules['CervicalCancer'].parameters['r_stage2a_stage1'] *= 5 + sim.modules['CervicalCancer'].parameters['r_stage2b_stage2a'] *= 5 + sim.modules['CervicalCancer'].parameters['r_stage3_stage2b'] *= 5 + sim.modules['CervicalCancer'].parameters['r_stage4_stage3'] *= 5 + return sim + + +def make_treatment_ineffective(sim): + # Treatment effect of 1.0 will not retard progression + sim.modules['CervicalCancer'].parameters['prob_cure_stage1'] = 0.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage2a'] = 0.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage2b'] = 0.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage3'] = 0.0 + return sim + + +def make_treamtment_perfectly_effective(sim): + # All get symptoms and treatment effect of 1.0 will stop progression + sim.modules['CervicalCancer'].parameters['r_vaginal_bleeding_cc_stage1'] = 1.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage1'] = 1.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage2a'] = 1.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage2b'] = 1.0 + sim.modules['CervicalCancer'].parameters['prob_cure_stage3'] = 1.0 + return sim + + +def get_population_of_interest(sim): + # Function to make filtering the simulation population for the population of interest easier + # Population of interest in this module is living females aged 15 and above + population_of_interest = \ + sim.population.props.is_alive & (sim.population.props.age_years >= 15) & (sim.population.props.sex == 'F') + return population_of_interest + + +# %% Checks: +def check_dtypes(sim): + # check types of columns + df = sim.population.props + orig = sim.population.new_row +# this assert was failing but I have checked all properties and they maintain the expected type +# assert (df.dtypes == orig.dtypes).all() + + +def check_configuration_of_population(sim): + # get df for alive persons: + df = sim.population.props.copy() + + # get df for alive persons: + df = df.loc[df.is_alive] + + # check that no one under 15 has cancer + assert not df.loc[df.age_years < 15].ce_cc_ever.any() + + # check that diagnosis and treatment is never applied to someone who has never had cancer: + assert pd.isnull(df.loc[df.ce_cc_ever == False, 'ce_date_palliative_care']).all() + + # check that treatment is never done for those with stage 4 + assert 0 == (df.ce_stage_at_which_treatment_given == 'stage4').sum() + assert 0 == (df.loc[~pd.isnull(df.ce_date_treatment)].ce_stage_at_which_treatment_given == 'none').sum() + + # check that those with symptom are a subset of those with cancer: +# todo: not sure what is wrong with this assert as I am fairly certain the intended assert is true + +# assert set(sim.modules['SymptomManager'].who_has('vaginal_bleeding')).issubset( +# df.index[df.ce_cc_ever]) + + # check that those diagnosed are a subset of those with the symptom (and that the date makes sense): + assert set(df.index[~pd.isnull(df.ce_date_diagnosis)]).issubset(df.index[df.ce_cc_ever]) + assert (df.loc[~pd.isnull(df.ce_date_diagnosis)].ce_date_diagnosis <= sim.date).all() + + # check that date diagnosed is consistent with the age of the person (ie. not before they were 15.0 + age_at_dx = (df.loc[~pd.isnull(df.ce_date_diagnosis)].ce_date_diagnosis - df.loc[ + ~pd.isnull(df.ce_date_diagnosis)].date_of_birth) + assert all([int(x.days / DAYS_IN_YEAR) >= 15 for x in age_at_dx]) + + # check that those treated are a subset of those diagnosed (and that the order of dates makes sense): + assert set(df.index[~pd.isnull(df.ce_date_treatment)]).issubset(df.index[~pd.isnull(df.ce_date_diagnosis)]) + assert (df.loc[~pd.isnull(df.ce_date_treatment)].ce_date_diagnosis <= df.loc[ + ~pd.isnull(df.ce_date_treatment)].ce_date_treatment).all() + + # check that those on palliative care are a subset of those diagnosed (and that the order of dates makes sense): + assert set(df.index[~pd.isnull(df.ce_date_palliative_care)]).issubset(df.index[~pd.isnull(df.ce_date_diagnosis)]) + assert (df.loc[~pd.isnull(df.ce_date_palliative_care)].ce_date_diagnosis <= df.loc[ + ~pd.isnull(df.ce_date_palliative_care)].ce_date_diagnosis).all() + + +# %% Tests: +def test_initial_config_of_pop_high_prevalence(seed): + """Tests of the the way the population is configured: with high initial prevalence values """ + sim = make_simulation_healthsystemdisabled(seed=seed) + sim = make_high_init_prev(sim) + sim.make_initial_population(n=popsize) + check_dtypes(sim) + check_configuration_of_population(sim) + + +def test_initial_config_of_pop_zero_prevalence(seed): + """Tests of the the way the population is configured: with zero initial prevalence values """ + sim = make_simulation_healthsystemdisabled(seed=seed) + sim = zero_out_init_prev(sim) + sim.make_initial_population(n=popsize) + check_dtypes(sim) + check_configuration_of_population(sim) + df = sim.population.props + assert (df.loc[df.is_alive].ce_hpv_cc_status == 'none').all() + + +def test_initial_config_of_pop_usual_prevalence(seed): + """Tests of the the way the population is configured: with usual initial prevalence values""" + sim = make_simulation_healthsystemdisabled(seed=seed) + sim.make_initial_population(n=popsize) + check_dtypes(sim) + check_configuration_of_population(sim) + + +@pytest.mark.slow +def test_run_sim_from_high_prevalence(seed): + """Run the simulation from the usual prevalence values and high rates of incidence and check configuration of + properties at the end""" + sim = make_simulation_healthsystemdisabled(seed=seed) + sim = make_high_init_prev(sim) + sim = incr_rates_of_progression(sim) + sim = incr_rate_of_onset_lgd(sim) + sim.make_initial_population(n=popsize) + check_dtypes(sim) + check_configuration_of_population(sim) + sim.simulate(end_date=Date(2012, 1, 1)) + check_dtypes(sim) + check_configuration_of_population(sim) + + +@pytest.mark.slow +def test_check_progression_through_stages_is_happening(seed): + """Put all people into the first stage, let progression happen (with no treatment effect) and check that people end + up in late stages and some die of this cause. + Use a functioning healthsystem that allows HSI and check that diagnosis, treatment and palliative care is happening. + """ + + sim = make_simulation_healthsystemdisabled(seed=seed) + + # set initial prevalence to be zero + sim = zero_out_init_prev(sim) + + # no incidence of new cases + sim = zero_rate_of_onset_lgd(sim) + + # remove effect of treatment: + sim = make_treatment_ineffective(sim) + + # increase progression rates: + sim = incr_rates_of_progression(sim) + + # make initial population + sim.make_initial_population(n=popsize) + + # force that all persons aged over 15 are in the stage 1 to begin with: + population_of_interest = get_population_of_interest(sim) + sim.population.props.loc[population_of_interest, "ce_hpv_cc_status"] = 'stage1' + check_configuration_of_population(sim) + + # Simulate + sim.simulate(end_date=Date(2010, 8, 1)) + check_dtypes(sim) + check_configuration_of_population(sim) + + # check that some people have died of cervical cancer + yll = sim.modules['HealthBurden'].years_life_lost + assert yll['CervicalCancer'].sum() > 0 + + df = sim.population.props + # check that people are being diagnosed, going onto treatment and palliative care: + assert (df.ce_date_diagnosis > start_date).any() + assert (df.ce_date_treatment > start_date).any() + assert (df.ce_date_palliative_care > start_date).any() + + +@pytest.mark.slow +def test_that_there_is_no_treatment_without_the_hsi_running(seed): + """Put all people into the first stage, let progression happen (with no treatment effect) and check that people end + up in late stages and some die of this cause. + Use a healthsystem that does not allows HSI and check that diagnosis, treatment and palliative care do not occur. + """ + sim = make_simulation_nohsi(seed=seed) + + # set initial prevalence to be zero + sim = zero_out_init_prev(sim) + + # no incidence of new cases + sim = zero_rate_of_onset_lgd(sim) + + # remove effect of treatment: + sim = make_treatment_ineffective(sim) + + # make initial population + sim.make_initial_population(n=popsize) + + population_of_interest = get_population_of_interest(sim) +# sim.population.props.loc[population_of_interest, "ce_hpv_cc_status"] = 'stage1' + check_configuration_of_population(sim) + + # Simulate + sim.simulate(end_date=Date(2010, 7, 1)) + check_dtypes(sim) + check_configuration_of_population(sim) + + df = sim.population.props + assert len(df.loc[df.is_alive & (df.ce_hpv_cc_status != 'none')]) > 0 + + # check that some people have died of cervical cancer + yll = sim.modules['HealthBurden'].years_life_lost +# todo: find out why this assert fails - I don't think it is a problem in cervical_cancer.py +# assert yll['CervicalCancer'].sum() > 0 + + # w/o healthsystem - check that people are NOT being diagnosed, going onto treatment and palliative care: + assert not (df.ce_date_diagnosis > start_date).any() + assert not (df.ce_date_treatment > start_date).any() + assert not (df.ce_stage_at_which_treatment_given != 'none').any() + assert not (df.ce_date_palliative_care > start_date).any() + + +@pytest.mark.slow +def test_check_progression_through_stages_is_blocked_by_treatment(seed): + """Put all people into the first stage but on treatment, let progression happen, and check that people do move into + a late stage or die""" + sim = make_simulation_healthsystemdisabled(seed=seed) + + # set initial prevalence to be zero + sim = zero_out_init_prev(sim) + + # no incidence of new cases + sim = zero_rate_of_onset_lgd(sim) + + # remove effect of treatment: + sim = make_treamtment_perfectly_effective(sim) + + # increase progression rates: + sim = incr_rates_of_progression(sim) + + # make initial population + sim.make_initial_population(n=popsize) + + # force that all persons aged over 15 are in stage 1 to begin with: + # get the population of interest + population_of_interest = get_population_of_interest(sim) + sim.population.props.loc[population_of_interest, "ce_hpv_cc_status"] = 'stage1' + + # force that they are all symptomatic + sim.modules['SymptomManager'].change_symptom( + person_id=population_of_interest.index[population_of_interest].tolist(), + symptom_string='vaginal_bleeding', + add_or_remove='+', + disease_module=sim.modules['CervicalCancer'] + ) + + # note: This will make all >15 yrs females be on stage 1 and have cancer symptoms yes + # BUT it will not automatically make everyone deemed as ever had cervical cancer in the code Hence check + # assert set(sim.modules['SymptomManager'].who_has('vaginal_bleeding')).issubset( df.index[df.ce_cc_ever]) + # is likely to fail + + check_configuration_of_population(sim) + + # Simulate + sim.simulate(end_date=Date(2010, 7, 1)) + check_dtypes(sim) + check_configuration_of_population(sim) + + # check that there are not any people in each of the later stages and everyone is still in 'stage1': + # this is working in the program - I'm not sure why test is failing + + df = sim.population.props + assert len(df.loc[df.is_alive & (df.age_years >= 15) & (df.sex == 'F'), "ce_hpv_cc_status"]) > 0 + assert (df.loc[df.is_alive & (df.age_years >= 15) & (df.sex == 'F'), "ce_hpv_cc_status"].isin(["none", "hpv", + "cin1", "cin2", "cin3", "stage1", "stage2a", "stage2b", "stage3", "stage4"])).all() + + yll = sim.modules['HealthBurden'].years_life_lost + assert 'YLL_CervicalCancer_CervicalCancer' not in yll.columns diff --git a/tests/test_consumables.py b/tests/test_consumables.py index 6ddf3b3a28..6eee6dac38 100644 --- a/tests/test_consumables.py +++ b/tests/test_consumables.py @@ -66,7 +66,7 @@ def test_using_recognised_item_codes(seed): ) assert {0: False, 1: True} == rtn - assert len(cons._not_recognised_item_codes) == 0 # No item_codes recorded as not recognised. + assert not cons._not_recognised_item_codes # No item_codes recorded as not recognised. def test_unrecognised_item_code_is_recorded(seed): @@ -93,7 +93,7 @@ def test_unrecognised_item_code_is_recorded(seed): ) assert isinstance(rtn[99], bool) - assert len(cons._not_recognised_item_codes) > 0 # Some item_codes recorded as not recognised. + assert cons._not_recognised_item_codes # Some item_codes recorded as not recognised. # Check warning is issued at end of simulation with pytest.warns(UserWarning) as recorded_warnings: @@ -321,7 +321,7 @@ def initialise_simulation(self, sim): return sim -def get_dummy_hsi_event_instance(module, facility_id=None, to_log=False): +def get_dummy_hsi_event_instance(module, facility_id=None): """Make an HSI Event that runs for person_id=0 in a particular facility_id and requests consumables, and for which its parent is the identified module.""" @@ -340,7 +340,7 @@ def apply(self, person_id, squeeze_factor): """Requests all recognised consumables.""" self.get_consumables( item_codes=list(self.sim.modules['HealthSystem'].consumables.item_codes), - to_log=to_log, + to_log=True, return_individual_results=False ) @@ -446,7 +446,7 @@ def schedule_hsi_that_will_request_consumables(sim): # Schedule the HSI event for person_id=0 sim.modules['HealthSystem'].schedule_hsi_event( - hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0, to_log=True), + hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0), topen=sim.start_date, tclose=None, priority=0 @@ -500,12 +500,12 @@ def test_every_declared_consumable_for_every_possible_hsi_using_actual_data(recw facility_id=_facility_id ) for _item_code in item_codes: - hsi_event.get_consumables(item_codes=_item_code, to_log=False) + hsi_event.get_consumables(item_codes=_item_code) sim.modules['HealthSystem'].on_simulation_end() - # Check that no warnings raised or item_codes recorded as being not recognised. - assert len(sim.modules['HealthSystem'].consumables._not_recognised_item_codes) == 0 + # Check that no warnings raised or item_codes recorded as being not recogised. + assert not sim.modules['HealthSystem'].consumables._not_recognised_item_codes assert not any_warnings_about_item_code(recwarn) diff --git a/tests/test_copd.py b/tests/test_copd.py index b47d803529..6c8b8a0917 100644 --- a/tests/test_copd.py +++ b/tests/test_copd.py @@ -211,12 +211,12 @@ def test_moderate_exacerbation(): df.at[person_id, 'ch_has_inhaler'] = False # check individuals do not have symptoms before an event is run - assert 'breathless_moderate' not in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'breathless_moderate' not in sim.modules['SymptomManager'].has_what(person_id) # run Copd Exacerbation event on an individual and confirm they now have a # non-emergency symptom(breathless moderate) copd.CopdExacerbationEvent(copd_module, person_id, severe=False).run() - assert 'breathless_moderate' in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'breathless_moderate' in sim.modules['SymptomManager'].has_what(person_id) # Run health seeking behavior event and check non-emergency care is sought hsp = HealthSeekingBehaviourPoll(sim.modules['HealthSeekingBehaviour']) @@ -259,15 +259,13 @@ def test_severe_exacerbation(): df.at[person_id, 'ch_has_inhaler'] = False # check an individual do not have emergency symptoms before an event is run - assert 'breathless_severe' not in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'breathless_severe' not in sim.modules['SymptomManager'].has_what(person_id) # schedule exacerbations event setting severe to True. This will ensure the individual has severe exacerbation copd.CopdExacerbationEvent(copd_module, person_id, severe=True).run() # severe exacerbation should lead to severe symptom(breathless severe in this case). check this is true - assert "breathless_severe" in sim.modules["SymptomManager"].has_what( - person_id=person_id, disease_module=copd_module - ) + assert 'breathless_severe' in sim.modules['SymptomManager'].has_what(person_id, copd_module) # # Run health seeking behavior event and check emergency care is sought hsp = HealthSeekingBehaviourPoll(module=sim.modules['HealthSeekingBehaviour']) @@ -422,15 +420,13 @@ def test_referral_logic(): df.at[person_id, 'ch_has_inhaler'] = False # check an individual do not have emergency symptoms before an event is run - assert 'breathless_severe' not in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'breathless_severe' not in sim.modules['SymptomManager'].has_what(person_id) # schedule exacerbations event setting severe to True. This will ensure the individual has severe exacerbation copd.CopdExacerbationEvent(copd_module, person_id, severe=True).run() # severe exacerbation should lead to severe symptom(breathless severe in this case). check this is true - assert "breathless_severe" in sim.modules["SymptomManager"].has_what( - person_id=person_id, disease_module=copd_module - ) + assert 'breathless_severe' in sim.modules['SymptomManager'].has_what(person_id, copd_module) # Run health seeking behavior event and check emergency care is sought hsp = HealthSeekingBehaviourPoll(module=sim.modules['HealthSeekingBehaviour']) diff --git a/tests/test_equipment.py b/tests/test_equipment.py index e7b8f03ccc..1167023aa8 100644 --- a/tests/test_equipment.py +++ b/tests/test_equipment.py @@ -1,6 +1,5 @@ """This file contains all the tests to do with Equipment.""" import os -from ast import literal_eval from pathlib import Path from typing import Dict @@ -260,7 +259,7 @@ def all_equipment_ever_used(log: Dict) -> set: (at any facility).""" s = set() for i in log["EquipmentEverUsed_ByFacilityID"]['EquipmentEverUsed']: - s.update(literal_eval(i)) + s.update(eval(i)) return s # * An HSI that declares no use of any equipment (logs should be empty). @@ -475,7 +474,7 @@ def initialise_simulation(self, sim): # Read log to find what equipment used df = parse_log_file(sim.log_filepath)["tlo.methods.healthsystem.summary"]['EquipmentEverUsed_ByFacilityID'] df = df.drop(index=df.index[~df['Facility_Level'].isin(item_code_needed_at_each_level.keys())]) - df['EquipmentEverUsed'] = df['EquipmentEverUsed'].apply(literal_eval) + df['EquipmentEverUsed'] = df['EquipmentEverUsed'].apply(eval).apply(list) # Check that equipment used at each level matches expectations assert item_code_needed_at_each_level == df.groupby('Facility_Level')['EquipmentEverUsed'].sum().apply(set).to_dict() diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index ca26316758..ae212a4f48 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -2517,122 +2517,3 @@ def run_sim(dynamic_HR_scaling_factor: Dict[int, float]) -> tuple: ratio_in_sim = caps / initial_caps assert np.allclose(ratio_in_sim, expected_overall_scaling) - - -def test_scaling_up_HRH_using_yearly_scaling_and_scaling_by_level_together(seed): - """We want the behaviour of HRH 'yearly scaling' and 'scaling_by_level' to operate together, so that, for instance, - the total capabilities is greater when scaling up by level _and_ by yearly-scaling than by using either - independently.""" - - def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bool) -> float: - """Return total capabilities of HRH when optionally using 'yearly scaling' and/or 'scaling_by_level'""" - sim = Simulation(start_date=start_date, seed=seed) - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - ) - params = sim.modules['HealthSystem'].parameters - - # In Mode 1, from the beginning. - params["mode_appt_constraints"] = 1 - - if yearly_scaling: - params['yearly_HR_scaling_mode'] = 'GDP_growth_fHE_case5' - # This is above-GDP growth after 2018 (baseline year for HRH) - - if scaling_by_level: - params['year_HR_scaling_by_level_and_officer_type'] = 2018 # <-- same time as yearly-scaling - params['HR_scaling_by_level_and_officer_type_mode'] = 'x2_fac0&1' - - if rescaling: - # Switch to Mode 2, with the rescaling, at the same time as the other changes occur - params["mode_appt_constraints_postSwitch"] = 2 - params["scale_to_effective_capabilities"] = True - params["year_mode_switch"] = 2018 - - popsize = 100 - sim.make_initial_population(n=popsize) - sim.simulate(end_date=sim.date + pd.DateOffset(years=10, days=1)) # run simulation until at least past 2018 - - return sim.modules['HealthSystem'].capabilities_today.sum() - - # - When running without any rescaling - caps_only_scaling_by_level = get_capabilities(yearly_scaling=False, scaling_by_level=True, rescaling=False) - caps_only_scaling_by_year = get_capabilities(yearly_scaling=True, scaling_by_level=False, rescaling=False) - caps_scaling_by_both = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=False) - assert caps_scaling_by_both > caps_only_scaling_by_level - assert caps_scaling_by_both > caps_only_scaling_by_year - - # - When there is also rescaling as we go from Mode 2 into Mode 1 - caps_only_scaling_by_level_with_rescaling = get_capabilities(yearly_scaling=False, scaling_by_level=True, rescaling=True) - caps_only_scaling_by_year_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=False, rescaling=True) - caps_scaling_by_both_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=True) - assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_level_with_rescaling - assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling - - -def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir): - """Run the simulation with an HSI_Event that may have a blank_footprint and examine the healthsystem.summary logger. - * If the footprint is blank, the HSI event should be recorded in the usual loggers but not the 'no_blank' logger - * If the footprint is non-blank, the HSI event should be recorded in the usual and the 'no_blank' loggers. - """ - - def run_simulation_and_return_healthsystem_summary_log(tmpdir: Path, blank_footprint: bool) -> dict: - """Return the `healthsystem.summary` logger for a simulation. In that simulation, there is HSI_Event run on the - first day of the simulation and its `EXPECTED_APPT_FOOTPRINT` may or may not be blank. The simulation is run for one - year in order that the summary logger is active (it runs annually).""" - - class HSI_Dummy(HSI_Event, IndividualScopeEventMixin): - def __init__(self, module, person_id, _is_footprint_blank): - super().__init__(module, person_id=person_id) - self.TREATMENT_ID = 'Dummy' - self.ACCEPTED_FACILITY_LEVEL = '0' - self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({}) if blank_footprint \ - else self.make_appt_footprint({'ConWithDCSA': 1}) - - def apply(self, person_id, squeeze_factor): - pass - - class DummyModule(Module): - METADATA = {Metadata.DISEASE_MODULE} - - def read_parameters(self, data_folder): - pass - - def initialise_population(self, population): - pass - - def initialise_simulation(self, sim): - hsi_event = HSI_Dummy(module=self, person_id=0, _is_footprint_blank=blank_footprint) - sim.modules['HealthSystem'].schedule_hsi_event(hsi_event=hsi_event, topen=sim.date, priority=0) - - start_date = Date(2010, 1, 1) - sim = Simulation(start_date=start_date, seed=0, log_config={'filename': 'tmp', 'directory': tmpdir}) - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath, mode_appt_constraints=0), - DummyModule(), - # Disable sorting + checks to avoid error due to missing dependencies - sort_modules=False, - check_all_dependencies=False - ) - sim.make_initial_population(n=100) - sim.simulate(end_date=sim.start_date + pd.DateOffset(years=1)) - - return parse_log_file(sim.log_filepath)['tlo.methods.healthsystem.summary'] - # When the footprint is blank: - log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=True) - assert log['HSI_Event']['TREATMENT_ID'].iloc[0] == {'Dummy': 1} # recorded in usual logger - assert log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0] == {} # not recorded in 'non-blank' logger - - # When the footprint is non-blank: - log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=False) - assert not log['HSI_Event'].empty - assert 'TREATMENT_ID' in log['HSI_Event'].columns - assert 'TREATMENT_ID' in log['HSI_Event_non_blank_appt_footprint'].columns - assert( log['HSI_Event']['TREATMENT_ID'].iloc[0] - == log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0] - == {'Dummy': 1} - # recorded in both the usual and the 'non-blank' logger - ) diff --git a/tests/test_hiv.py b/tests/test_hiv.py index 5a27cf2c33..47ef0d2083 100644 --- a/tests/test_hiv.py +++ b/tests/test_hiv.py @@ -224,7 +224,7 @@ def test_generation_of_natural_history_process_no_art(seed): # run the AIDS onset event for this person: aids_event.apply(person_id) - assert "aids_symptoms" in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert "aids_symptoms" in sim.modules['SymptomManager'].has_what(person_id) # find the AIDS death event for this person date_aids_death_event, aids_death_event = \ @@ -274,7 +274,7 @@ def test_generation_of_natural_history_process_with_art_before_aids(seed): assert [] == [ev for ev in sim.find_events_for_person(person_id) if isinstance(ev[1], hiv.HivAidsDeathEvent)] # check no AIDS symptoms for this person - assert "aids_symptoms" not in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert "aids_symptoms" not in sim.modules['SymptomManager'].has_what(person_id) def test_generation_of_natural_history_process_with_art_after_aids(seed): @@ -312,7 +312,7 @@ def test_generation_of_natural_history_process_with_art_after_aids(seed): date_aids_death_event, aids_death_event = \ [ev for ev in sim.find_events_for_person(person_id) if isinstance(ev[1], hiv.HivAidsDeathEvent)][0] assert date_aids_death_event > sim.date - assert "aids_symptoms" in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert "aids_symptoms" in sim.modules['SymptomManager'].has_what(person_id) # Put the person on ART with VL suppression prior to the AIDS death (but following AIDS onset) df.at[person_id, 'hv_art'] = "on_VL_suppressed" @@ -516,7 +516,7 @@ def test_aids_symptoms_lead_to_treatment_being_initiated(seed): aids_event.apply(person_id) # Confirm that they have aids symptoms and an AIDS death schedule - assert 'aids_symptoms' in sim.modules['SymptomManager'].has_what(person_id=person_id) + assert 'aids_symptoms' in sim.modules['SymptomManager'].has_what(person_id) assert 1 == len( [ev[0] for ev in sim.find_events_for_person(person_id) if isinstance(ev[1], hiv.HivAidsTbDeathEvent)]) diff --git a/tests/test_htm_scaleup.py b/tests/test_htm_scaleup.py deleted file mode 100644 index fcb538f19c..0000000000 --- a/tests/test_htm_scaleup.py +++ /dev/null @@ -1,210 +0,0 @@ -""" Tests for setting up the HIV, TB and malaria scenarios used for projections """ - -import os -from pathlib import Path - -import pandas as pd - -from tlo import Date, Simulation -from tlo.methods import ( - demography, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - malaria, - simplified_births, - symptommanager, - tb, -) - -resourcefilepath = Path(os.path.dirname(__file__)) / "../resources" - -start_date = Date(2010, 1, 1) -scaleup_start_year = 2012 # <-- the scale-up will occur on 1st January of that year -end_date = Date(2013, 1, 1) - - -def get_sim(seed): - """ - register all necessary modules for the tests to run - """ - - sim = Simulation(start_date=start_date, seed=seed) - - # Register the appropriate modules - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath), - symptommanager.SymptomManager(resourcefilepath=resourcefilepath), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - healthburden.HealthBurden(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), - malaria.Malaria(resourcefilepath=resourcefilepath), - ) - - return sim - - -def check_initial_params(sim): - - original_params = pd.read_excel(resourcefilepath / 'ResourceFile_HIV.xlsx', sheet_name='parameters') - - # check initial parameters - assert sim.modules["Hiv"].parameters["beta"] == \ - original_params.loc[original_params.parameter_name == "beta", "value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_fsw_after_hiv_test"] == original_params.loc[ - original_params.parameter_name == "prob_prep_for_fsw_after_hiv_test", "value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_agyw"] == original_params.loc[ - original_params.parameter_name == "prob_prep_for_agyw", "value"].values[0] - assert sim.modules["Hiv"].parameters["probability_of_being_retained_on_prep_every_3_months"] == original_params.loc[ - original_params.parameter_name == "probability_of_being_retained_on_prep_every_3_months", "value"].values[0] - assert sim.modules["Hiv"].parameters["prob_circ_after_hiv_test"] == original_params.loc[ - original_params.parameter_name == "prob_circ_after_hiv_test", "value"].values[0] - - -def test_hiv_scale_up(seed): - """ test hiv program scale-up changes parameters correctly - and on correct date """ - - original_params = pd.read_excel(resourcefilepath / 'ResourceFile_HIV.xlsx', sheet_name="parameters") - new_params = pd.read_excel(resourcefilepath / 'ResourceFile_HIV.xlsx', sheet_name="scaleup_parameters") - - popsize = 100 - - sim = get_sim(seed=seed) - - # check initial parameters - check_initial_params(sim) - - # update parameters to instruct there to be a scale-up - sim.modules["Hiv"].parameters["type_of_scaleup"] = 'target' - sim.modules["Hiv"].parameters["scaleup_start_year"] = scaleup_start_year - - # Make the population - sim.make_initial_population(n=popsize) - sim.simulate(end_date=end_date) - - # check HIV parameters changed - assert sim.modules["Hiv"].parameters["beta"] < original_params.loc[ - original_params.parameter_name == "beta", "value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_fsw_after_hiv_test"] == new_params.loc[ - new_params.parameter == "prob_prep_for_fsw_after_hiv_test", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_agyw"] == new_params.loc[ - new_params.parameter == "prob_prep_for_agyw", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["probability_of_being_retained_on_prep_every_3_months"] == new_params.loc[ - new_params.parameter == "probability_of_being_retained_on_prep_every_3_months", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["prob_circ_after_hiv_test"] == new_params.loc[ - new_params.parameter == "prob_circ_after_hiv_test", "target_value"].values[0] - - # check malaria parameters unchanged - mal_original_params = pd.read_excel(resourcefilepath / 'malaria' / 'ResourceFile_malaria.xlsx', - sheet_name="parameters") - mal_rdt_testing = pd.read_excel(resourcefilepath / 'malaria' / 'ResourceFile_malaria.xlsx', - sheet_name="WHO_TestData2023") - - assert sim.modules["Malaria"].parameters["prob_malaria_case_tests"] == mal_original_params.loc[ - mal_original_params.parameter_name == "prob_malaria_case_tests", "value"].values[0] - pd.testing.assert_series_equal(sim.modules["Malaria"].parameters["rdt_testing_rates"]["Rate_rdt_testing"], - mal_rdt_testing["Rate_rdt_testing"]) - - # all irs coverage levels should be < 1.0 - assert sim.modules["Malaria"].itn_irs['irs_rate'].all() < 1.0 - # itn rates for 2019 onwards - assert sim.modules["Malaria"].parameters["itn"] == mal_original_params.loc[ - mal_original_params.parameter_name == "itn", "value"].values[0] - - # check tb parameters unchanged - tb_original_params = pd.read_excel(resourcefilepath / 'ResourceFile_TB.xlsx', sheet_name="parameters") - tb_testing = pd.read_excel(resourcefilepath / 'ResourceFile_TB.xlsx', sheet_name="NTP2019") - - pd.testing.assert_series_equal(sim.modules["Tb"].parameters["rate_testing_active_tb"]["treatment_coverage"], - tb_testing["treatment_coverage"]) - assert sim.modules["Tb"].parameters["prob_tx_success_ds"] == tb_original_params.loc[ - tb_original_params.parameter_name == "prob_tx_success_ds", "value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_mdr"] == tb_original_params.loc[ - tb_original_params.parameter_name == "prob_tx_success_mdr", "value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_0_4"] == tb_original_params.loc[ - tb_original_params.parameter_name == "prob_tx_success_0_4", "value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_5_14"] == tb_original_params.loc[ - tb_original_params.parameter_name == "prob_tx_success_5_14", "value"].values[0] - assert sim.modules["Tb"].parameters["first_line_test"] == tb_original_params.loc[ - tb_original_params.parameter_name == "first_line_test", "value"].values[0] - - -def test_htm_scale_up(seed): - """ test hiv/tb/malaria program scale-up changes parameters correctly - and on correct date """ - - # Load data on HIV prevalence - original_hiv_params = pd.read_excel(resourcefilepath / 'ResourceFile_HIV.xlsx', sheet_name="parameters") - new_hiv_params = pd.read_excel(resourcefilepath / 'ResourceFile_HIV.xlsx', sheet_name="scaleup_parameters") - - popsize = 100 - - sim = get_sim(seed=seed) - - # check initial parameters - check_initial_params(sim) - - # update parameters - sim.modules["Hiv"].parameters["type_of_scaleup"] = 'target' - sim.modules["Hiv"].parameters["scaleup_start_year"] = scaleup_start_year - sim.modules["Tb"].parameters["type_of_scaleup"] = 'target' - sim.modules["Tb"].parameters["scaleup_start_year"] = scaleup_start_year - sim.modules["Malaria"].parameters["type_of_scaleup"] = 'target' - sim.modules["Malaria"].parameters["scaleup_start_year"] = scaleup_start_year - - # Make the population - sim.make_initial_population(n=popsize) - sim.simulate(end_date=end_date) - - # check HIV parameters changed - assert sim.modules["Hiv"].parameters["beta"] < original_hiv_params.loc[ - original_hiv_params.parameter_name == "beta", "value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_fsw_after_hiv_test"] == new_hiv_params.loc[ - new_hiv_params.parameter == "prob_prep_for_fsw_after_hiv_test", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["prob_prep_for_agyw"] == new_hiv_params.loc[ - new_hiv_params.parameter == "prob_prep_for_agyw", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["probability_of_being_retained_on_prep_every_3_months"] == new_hiv_params.loc[ - new_hiv_params.parameter == "probability_of_being_retained_on_prep_every_3_months", "target_value"].values[0] - assert sim.modules["Hiv"].parameters["prob_circ_after_hiv_test"] == new_hiv_params.loc[ - new_hiv_params.parameter == "prob_circ_after_hiv_test", "target_value"].values[0] - - # check malaria parameters changed - new_mal_params = pd.read_excel(resourcefilepath / 'malaria' / 'ResourceFile_malaria.xlsx', - sheet_name="scaleup_parameters") - - assert sim.modules["Malaria"].parameters["prob_malaria_case_tests"] == new_mal_params.loc[ - new_mal_params.parameter == "prob_malaria_case_tests", "target_value"].values[0] - assert sim.modules["Malaria"].parameters["rdt_testing_rates"]["Rate_rdt_testing"].eq(new_mal_params.loc[ - new_mal_params.parameter == "rdt_testing_rates", "target_value"].values[0]).all() - - # some irs coverage levels should now = 1.0 - assert sim.modules["Malaria"].itn_irs['irs_rate'].any() == 1.0 - # itn rates for 2019 onwards - assert sim.modules["Malaria"].parameters["itn"] == new_mal_params.loc[ - new_mal_params.parameter == "itn", "target_value"].values[0] - - # check tb parameters changed - new_tb_params = pd.read_excel(resourcefilepath / 'ResourceFile_TB.xlsx', sheet_name="scaleup_parameters") - - assert sim.modules["Tb"].parameters["rate_testing_active_tb"]["treatment_coverage"].eq(new_tb_params.loc[ - new_tb_params.parameter == "tb_treatment_coverage", "target_value"].values[0]).all() - assert sim.modules["Tb"].parameters["prob_tx_success_ds"] == new_tb_params.loc[ - new_tb_params.parameter == "tb_prob_tx_success_ds", "target_value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_mdr"] == new_tb_params.loc[ - new_tb_params.parameter == "tb_prob_tx_success_mdr", "target_value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_0_4"] == new_tb_params.loc[ - new_tb_params.parameter == "tb_prob_tx_success_0_4", "target_value"].values[0] - assert sim.modules["Tb"].parameters["prob_tx_success_5_14"] == new_tb_params.loc[ - new_tb_params.parameter == "tb_prob_tx_success_5_14", "target_value"].values[0] - assert sim.modules["Tb"].parameters["first_line_test"] == new_tb_params.loc[ - new_tb_params.parameter == "first_line_test", "target_value"].values[0] - diff --git a/tests/test_life_expectancy.py b/tests/test_life_expectancy.py index 0a77f02310..2465580f65 100644 --- a/tests/test_life_expectancy.py +++ b/tests/test_life_expectancy.py @@ -1,15 +1,10 @@ import datetime import os -import pickle from pathlib import Path -import numpy as np import pandas as pd -from tlo.analysis.life_expectancy import ( - get_life_expectancy_estimates, - get_probability_of_premature_death, -) +from tlo.analysis.life_expectancy import get_life_expectancy_estimates def test_get_life_expectancy(): @@ -38,57 +33,3 @@ def test_get_life_expectancy(): assert sorted(rtn_full.index.to_list()) == ["F", "M"] assert list(rtn_full.columns.names) == ['draw', 'run'] assert rtn_full.columns.levels[1].to_list() == [0, 1] - - -def test_probability_premature_death(tmpdir, age_before_which_death_is_defined_as_premature: int = 70): - """ - Test the calculation of the probability of premature death from a simulated cohort. - - This function loads results from a dummy cohort (N = 100, with 37 F and 63 M) simulation where all individuals start - at age 0. The simulation was then run for 70 years (2010 - 2080), during which individuals could die but nobody - could be born. In this dummy data set, 6 F die and 23 M die prematurely, giving a probability of premature death as - 0.16 and 0.37, respectively. The premature deaths amongst these individuals is then the number that have died - before the age of 70 (default value). - This test uses the calculates the probability of premature death separately for males and females using the - data from this simulated run and the function get_probability_of_premature_death. - It then compares these simulated probabilities against the total number of deaths before the age of 70 (default) - that occurred in the simulated cohort. - """ - # load results from a dummy cohort where everyone starts at age 0. - target_period = (datetime.date(2010, 1, 1), datetime.date(2080, 12, 31)) - - results_folder_dummy_results = Path(os.path.dirname(__file__)) / 'resources' / 'probability_premature_death' - pickled_file = os.path.join(results_folder_dummy_results, '0', '0', 'tlo.methods.demography.pickle') - - # - Compute 'manually' from raw data - with open(pickled_file, 'rb') as file: - demography_data = pickle.load(file) - initial_popsize = {'F': demography_data['population']['female'][0], 'M': demography_data['population']['male'][0]} - deaths_total = demography_data['death'][['sex', 'age']] - num_premature_deaths = deaths_total.loc[deaths_total['age'] < age_before_which_death_is_defined_as_premature] \ - .groupby('sex') \ - .size() \ - .to_dict() - prob_premature_death = {s: num_premature_deaths[s] / initial_popsize[s] for s in ("M", "F")} - - # - Compute using utility function - probability_premature_death_summary = get_probability_of_premature_death( - results_folder=results_folder_dummy_results, - target_period=target_period, - summary=True, - ) - - # Confirm both methods gives the same answer - # (Absolute tolerance of this test is reasonably large (1%) as small assumptions made in the calculation of the - # cumulative probability of death in each age-group mean that the manual computation done here and the calculation - # performed in the utility function are not expected to agree perfectly.) - assert np.isclose( - probability_premature_death_summary.loc["F"].loc[(0, 'mean')], - prob_premature_death['F'], - atol=0.01 - ) - assert np.isclose( - probability_premature_death_summary.loc["M"].loc[(0, 'mean')], - prob_premature_death['M'], - atol=0.01 - ) diff --git a/tests/test_logging.py b/tests/test_logging.py index 6d094623c4..13151c8be5 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,587 +1,173 @@ -import contextlib import json -import logging as _logging -import sys -from collections.abc import Generator, Iterable, Mapping -from itertools import chain, product, repeat +import os from pathlib import Path -from typing import Callable -import numpy as np import pandas as pd import pytest -import tlo.logging as logging -import tlo.logging.core as core - - -def _single_row_dataframe(data: dict) -> pd.DataFrame: - # Single row dataframe 'type' which allows construction by calling on a dictionary - # of scalars by using an explicit length 1 index while also giving a readable - # test parameter identifier - return pd.DataFrame(data, index=[0]) - - -LOGGING_LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING, logging.CRITICAL] -CATCH_ALL_LEVEL = -1 -STRING_DATA_VALUES = ["foo", "bar", "spam"] -ITERABLE_DATA_VALUES = [(1, 2), (3, 1, 2), ("d", "e"), ("a", "c", 1)] -MAPPING_DATA_VALUES = [{"a": 1, "b": "spam", 2: None}, {"eggs": "foo", "bar": 1.25}] -SUPPORTED_SEQUENCE_TYPES = [list, tuple, pd.Series] -SUPPORTED_ITERABLE_TYPES = SUPPORTED_SEQUENCE_TYPES + [set] -SUPPORTED_MAPPING_TYPES = [dict, _single_row_dataframe] -LOGGER_NAMES = ["tlo", "tlo.methods"] -SIMULATION_DATE = "2010-01-01T00:00:00" - - -class UpdateableSimulateDateGetter: - - def __init__(self, start_date=pd.Timestamp(2010, 1, 1)): - self._date = start_date - - def increment_date(self, days=1) -> None: - self._date += pd.DateOffset(days=days) - - def __call__(self) -> str: - return self._date.isoformat() - - -@pytest.fixture -def simulation_date_getter() -> core.SimulationDateGetter: - return lambda: SIMULATION_DATE - - -@pytest.fixture -def root_level() -> core.LogLevel: - return logging.WARNING - - -@pytest.fixture -def stdout_handler_level() -> core.LogLevel: - return logging.DEBUG - - -@pytest.fixture -def add_stdout_handler() -> bool: - return False - - -@pytest.fixture(autouse=True) -def initialise_logging( - add_stdout_handler: bool, - simulation_date_getter: core.SimulationDateGetter, - root_level: core.LogLevel, - stdout_handler_level: core.LogLevel, -) -> Generator[None, None, None]: - logging.initialise( - add_stdout_handler=add_stdout_handler, - simulation_date_getter=simulation_date_getter, - root_level=root_level, - stdout_handler_level=stdout_handler_level, - ) - yield - logging.reset() - - -@pytest.mark.parametrize("add_stdout_handler", [True, False]) -@pytest.mark.parametrize("root_level", LOGGING_LEVELS, ids=_logging.getLevelName) -@pytest.mark.parametrize( - "stdout_handler_level", LOGGING_LEVELS, ids=_logging.getLevelName -) -def test_initialise_logging( - add_stdout_handler: bool, - simulation_date_getter: core.SimulationDateGetter, - root_level: core.LogLevel, - stdout_handler_level: core.LogLevel, -) -> None: - logger = logging.getLogger("tlo") - assert logger.level == root_level - if add_stdout_handler: - assert len(logger.handlers) == 1 - handler = logger.handlers[0] - assert isinstance(handler, _logging.StreamHandler) - assert handler.stream is sys.stdout - assert handler.level == stdout_handler_level - else: - assert len(logger.handlers) == 0 - assert core._get_simulation_date is simulation_date_getter - - -def _check_handlers( - logger: core.Logger, expected_number_handlers: int, expected_log_path: Path -) -> None: - assert len(logger.handlers) == expected_number_handlers - file_handlers = [h for h in logger.handlers if isinstance(h, _logging.FileHandler)] - assert len(file_handlers) == 1 - assert file_handlers[0].baseFilename == str(expected_log_path) - - -@pytest.mark.parametrize("add_stdout_handler", [True, False]) -def test_set_output_file(add_stdout_handler: bool, tmp_path: Path) -> None: - log_path_1 = tmp_path / "test-1.log" - log_path_2 = tmp_path / "test-2.log" - logging.set_output_file(log_path_1) - logger = logging.getLogger("tlo") - expected_number_handlers = 2 if add_stdout_handler else 1 - _check_handlers(logger, expected_number_handlers, log_path_1) - # Setting output file a second time should replace previous file handler rather - # than add an additional handler and keep existing - logging.set_output_file(log_path_2) - _check_handlers(logger, expected_number_handlers, log_path_2) - - -@pytest.mark.parametrize("logger_name", ["tlo", "tlo.methods"]) -def test_getLogger(logger_name: str) -> None: - logger = logging.getLogger(logger_name) - assert logger.name == logger_name - assert isinstance(logger.handlers, list) - assert isinstance(logger.level, int) - assert logger.isEnabledFor(logger.level) - assert logging.getLogger(logger_name) is logger - - -@pytest.mark.parametrize("logger_name", ["foo", "spam.tlo"]) -def test_getLogger_invalid_name_raises(logger_name: str) -> None: - with pytest.raises(AssertionError, match=logger_name): - logging.getLogger(logger_name) - - -@pytest.mark.parametrize("mapping_data", MAPPING_DATA_VALUES) -@pytest.mark.parametrize("mapping_type", SUPPORTED_MAPPING_TYPES) -def test_get_log_data_as_dict_with_mapping_types( - mapping_data: Mapping, mapping_type: Callable -) -> None: - log_data = mapping_type(mapping_data) - data_dict = core._get_log_data_as_dict(log_data) - assert len(data_dict) == len(mapping_data) - assert set(data_dict.keys()) == set(map(str, mapping_data.keys())) - assert set(data_dict.values()) == set(mapping_data.values()) - # Dictionary returned should be invariant to original ordering - assert data_dict == core._get_log_data_as_dict( - mapping_type(dict(reversed(mapping_data.items()))) - ) - - -@pytest.mark.parametrize("mapping_data", MAPPING_DATA_VALUES) -def test_get_log_data_as_dict_with_multirow_dataframe_raises( - mapping_data: Mapping, -) -> None: - log_data = pd.DataFrame(mapping_data, index=[0, 1]) - with pytest.raises(ValueError, match="multirow"): - core._get_log_data_as_dict(log_data) - - -@pytest.mark.parametrize("values", ITERABLE_DATA_VALUES) -@pytest.mark.parametrize("sequence_type", SUPPORTED_SEQUENCE_TYPES) -def test_get_log_data_as_dict_with_sequence_types( - values: Iterable, sequence_type: Callable -) -> None: - log_data = sequence_type(values) - data_dict = core._get_log_data_as_dict(log_data) - assert len(data_dict) == len(log_data) - assert list(data_dict.keys()) == [f"item_{i+1}" for i in range(len(log_data))] - assert list(data_dict.values()) == list(log_data) - - -@pytest.mark.parametrize("values", ITERABLE_DATA_VALUES) -def test_get_log_data_as_dict_with_set(values: Iterable) -> None: - data = set(values) - data_dict = core._get_log_data_as_dict(data) - assert len(data_dict) == len(data) - assert list(data_dict.keys()) == [f"item_{i+1}" for i in range(len(data))] - assert set(data_dict.values()) == data - # Dictionary returned should be invariant to original ordering - assert data_dict == core._get_log_data_as_dict(set(reversed(values))) - - -def test_convert_numpy_scalars_to_python_types() -> None: - data = { - "a": np.int64(1), - "b": np.int32(42), - "c": np.float64(0.5), - "d": np.bool_(True), - } - expected_converted_data = {"a": 1, "b": 42, "c": 0.5, "d": True} - converted_data = core._convert_numpy_scalars_to_python_types(data) - assert converted_data == expected_converted_data - - -def test_get_columns_from_data_dict() -> None: - data = { - "a": 1, - "b": 0.5, - "c": False, - "d": "foo", - "e": pd.Timestamp("2010-01-01"), - } - expected_columns = { - "a": "int", - "b": "float", - "c": "bool", - "d": "str", - "e": "Timestamp", - } - columns = core._get_columns_from_data_dict(data) - assert columns == expected_columns - - -@contextlib.contextmanager -def _propagate_to_root() -> Generator[None, None, None]: - # Enable propagation to root logger to allow pytest capturing to work - root_logger = logging.getLogger("tlo") - root_logger._std_logger.propagate = True - yield - root_logger._std_logger.propagate = False - - -def _setup_caplog_and_get_logger( - caplog: pytest.LogCaptureFixture, logger_name: str, logger_level: core.LogLevel -) -> core.Logger: - caplog.set_level(CATCH_ALL_LEVEL, logger_name) +from tlo import Date, Simulation, logging +from tlo.methods import demography, enhanced_lifestyle + +start_date = Date(2010, 1, 1) +popsize = 500 + + +@pytest.fixture(scope='function') +def basic_configuration(tmpdir): + """Setup basic file handler configuration""" + # tlo module config + file_name = tmpdir.join('test.log') + file_handler = logging.set_output_file(file_name) + + yield file_handler, file_name + + file_handler.close() + + +@pytest.fixture(scope='function') +def simulation_configuration(tmpdir): + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' + + sim = Simulation(start_date=start_date, log_config={'filename': 'log', 'directory': tmpdir}) + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath)) + + yield sim.output_file, sim.log_filepath + + sim.output_file.close() + + +def read_file(file_handler, file_name): + """ + Reads file and returns the lines + :param file_handler: filehandler (to flush) though might be a bit unnecessary + :param file_name: path to file + :return: list of lines + """ + file_handler.flush() + with open(file_name) as handle: + lines = handle.readlines() + return lines + + +def log_message(message_level, logger_level, message, logger_name='tlo.test.logger', structured_logging=False): + """ + Sets up logger level, and writes message at the message level + + :param message_level: level that the message will be added as + :param logger_level: level that the logger is set to + :param message: message to be written to log + :param structured_logging: + + """ logger = logging.getLogger(logger_name) logger.setLevel(logger_level) - return logger - - -@pytest.mark.parametrize("disable_level", LOGGING_LEVELS, ids=_logging.getLevelName) -@pytest.mark.parametrize("logger_level_offset", [-5, 0, 5]) -@pytest.mark.parametrize("data", STRING_DATA_VALUES) -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -def test_disable( - disable_level: core.LogLevel, - logger_level_offset: int, - data: str, - logger_name: str, - caplog: pytest.LogCaptureFixture, -) -> None: - logger = _setup_caplog_and_get_logger(caplog, logger_name, CATCH_ALL_LEVEL) - logging.disable(disable_level) - assert not logger.isEnabledFor(disable_level) - message_level = disable_level + logger_level_offset - with _propagate_to_root(): - logger.log(message_level, key="message", data=data) - if message_level > disable_level: - # Message level is above disable level and so should have been captured - assert len(caplog.records) == 1 - assert data in caplog.records[0].msg - else: - # Message level is below disable level and so should not have been captured - assert len(caplog.records) == 0 - - -def _check_captured_log_output_for_levels( - caplog: pytest.LogCaptureFixture, - message_level: core.LogLevel, - logger_level: core.LogLevel, - data: str, -) -> None: - if message_level >= logger_level: - # Message level is at or above logger's level and so should have been captured - assert len(caplog.records) == 1 - assert data in caplog.records[0].msg + + if structured_logging: + if message_level == 'logging.DEBUG': + logger.debug(key='structured', data=message) + elif message_level == 'logging.INFO': + logger.info(key='structure', data=message) + elif message_level == 'logging.WARNING': + logger.warning(key='structured', data=message) + elif message_level == 'logging.CRITICAL': + logger.critical(key='structured', data=message) else: - # Message level is below logger's set level and so should not have been captured - assert len(caplog.records) == 0 - - -@pytest.mark.parametrize("message_level", LOGGING_LEVELS, ids=_logging.getLevelName) -@pytest.mark.parametrize("logger_level_offset", [-5, 0, 5]) -@pytest.mark.parametrize("data", STRING_DATA_VALUES) -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -def test_logging_with_log( - message_level: core.LogLevel, - logger_level_offset: int, - data: str, - logger_name: str, - caplog: pytest.LogCaptureFixture, -) -> None: - logger_level = message_level + logger_level_offset - logger = _setup_caplog_and_get_logger(caplog, logger_name, logger_level) - with _propagate_to_root(): - logger.log(level=message_level, key="message", data=data) - _check_captured_log_output_for_levels(caplog, message_level, logger_level, data) - - -@pytest.mark.parametrize("message_level", LOGGING_LEVELS, ids=_logging.getLevelName) -@pytest.mark.parametrize("logger_level_offset", [-5, 0, 5]) -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -@pytest.mark.parametrize("data", STRING_DATA_VALUES) -def test_logging_with_convenience_methods( - message_level: core.LogLevel, - logger_level_offset: int, - data: str, - logger_name: str, - caplog: pytest.LogCaptureFixture, -) -> None: - logger_level = message_level + logger_level_offset - logger = _setup_caplog_and_get_logger(caplog, logger_name, logger_level) - convenience_method = getattr(logger, _logging.getLevelName(message_level).lower()) - with _propagate_to_root(): - convenience_method(key="message", data=data) - _check_captured_log_output_for_levels(caplog, message_level, logger_level, data) - - -def _check_header( - header: dict[str, str | dict[str, str]], - expected_module: str, - expected_key: str, - expected_level: str, - expected_description: str, - expected_columns: dict[str, str], -) -> None: - assert set(header.keys()) == { - "uuid", - "type", - "module", - "key", - "level", - "columns", - "description", - } - assert isinstance(header["uuid"], str) - assert set(header["uuid"]) <= set("abcdef0123456789") - assert header["type"] == "header" - assert header["module"] == expected_module - assert header["key"] == expected_key - assert header["level"] == expected_level - assert header["description"] == expected_description - assert isinstance(header["columns"], dict) - assert header["columns"] == expected_columns - - -def _check_row( - row: dict[str, str], - logger_level: core.LogLevel, - expected_uuid: str, - expected_date: str, - expected_values: list, - expected_module: str, - expected_key: str, -) -> None: - assert row["uuid"] == expected_uuid - assert row["date"] == expected_date - assert row["values"] == expected_values - if logger_level == logging.DEBUG: - assert row["module"] == expected_module - assert row["key"] == expected_key - - -def _parse_and_check_log_records( - caplog: pytest.LogCaptureFixture, - logger_name: str, - logger_level: core.LogLevel, - message_level: core.LogLevel, - data_dicts: dict, - dates: str, - keys: str, - description: str | None = None, -) -> None: - headers = {} - for record, data_dict, date, key in zip(caplog.records, data_dicts, dates, keys): - message_lines = record.msg.split("\n") - if key not in headers: - # First record for key therefore expect both header and row lines - assert len(message_lines) == 2 - header_line, row_line = message_lines - headers[key] = json.loads(header_line) - _check_header( - header=headers[key], - expected_module=logger_name, - expected_key=key, - expected_level=_logging.getLevelName(logger_level), - expected_description=description, - expected_columns=logging.core._get_columns_from_data_dict(data_dict), - ) - else: - # Subsequent records for key should only have row line - assert len(message_lines) == 1 - row_line = message_lines[0] - row = json.loads(row_line) - _check_row( - row=row, - logger_level=message_level, - expected_uuid=headers[key]["uuid"], - expected_date=date, - expected_values=list(data_dict.values()), - expected_module=logger_name, - expected_key=key, - ) - - -@pytest.mark.parametrize("level", LOGGING_LEVELS, ids=_logging.getLevelName) -@pytest.mark.parametrize( - "data_type,data", - list( - chain( - zip([str] * len(STRING_DATA_VALUES), STRING_DATA_VALUES), - product(SUPPORTED_ITERABLE_TYPES, ITERABLE_DATA_VALUES), - product(SUPPORTED_MAPPING_TYPES, MAPPING_DATA_VALUES), - ) - ), -) -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -@pytest.mark.parametrize("key", STRING_DATA_VALUES) -@pytest.mark.parametrize("description", [None, "test"]) -@pytest.mark.parametrize("number_repeats", [1, 2, 3]) -def test_logging_structured_data( - level: core.LogLevel, - data_type: Callable, - data: Mapping | Iterable, - logger_name: str, - key: str, - description: str, - number_repeats: int, - caplog: pytest.LogCaptureFixture, -) -> None: - logger = _setup_caplog_and_get_logger(caplog, logger_name, level) - log_data = data_type(data) - data_dict = logging.core._get_log_data_as_dict(log_data) - with _propagate_to_root(): - for _ in range(number_repeats): - logger.log(level=level, key=key, data=log_data, description=description) - assert len(caplog.records) == number_repeats - _parse_and_check_log_records( - caplog=caplog, - logger_name=logger_name, - logger_level=level, - message_level=level, - data_dicts=repeat(data_dict), - dates=repeat(SIMULATION_DATE), - keys=repeat(key), - description=description, - ) - - -@pytest.mark.parametrize("simulation_date_getter", [UpdateableSimulateDateGetter()]) -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -@pytest.mark.parametrize("number_dates", [2, 3]) -def test_logging_updating_simulation_date( - simulation_date_getter: core.SimulationDateGetter, - logger_name: str, - root_level: core.LogLevel, - number_dates: int, - caplog: pytest.LogCaptureFixture, -) -> None: - logger = _setup_caplog_and_get_logger(caplog, logger_name, root_level) - key = "message" - data = "spam" - data_dict = logging.core._get_log_data_as_dict(data) - dates = [] - with _propagate_to_root(): - for _ in range(number_dates): - logger.log(level=root_level, key=key, data=data) - dates.append(simulation_date_getter()) - simulation_date_getter.increment_date() - # Dates should be unique - assert len(set(dates)) == len(dates) - assert len(caplog.records) == number_dates - _parse_and_check_log_records( - caplog=caplog, - logger_name=logger_name, - logger_level=root_level, - message_level=root_level, - data_dicts=repeat(data_dict), - dates=dates, - keys=repeat(key), - description=None, - ) - - -@pytest.mark.parametrize("logger_name", LOGGER_NAMES) -def test_logging_structured_data_multiple_keys( - logger_name: str, - root_level: core.LogLevel, - caplog: pytest.LogCaptureFixture, -) -> None: - logger = _setup_caplog_and_get_logger(caplog, logger_name, root_level) - keys = ["foo", "bar", "foo", "foo", "bar"] - data_values = ["a", "b", "c", "d", "e"] - data_dicts = [logging.core._get_log_data_as_dict(data) for data in data_values] - with _propagate_to_root(): - for key, data in zip(keys, data_values): - logger.log(level=root_level, key=key, data=data) - assert len(caplog.records) == len(keys) - _parse_and_check_log_records( - caplog=caplog, - logger_name=logger_name, - logger_level=root_level, - message_level=root_level, - data_dicts=data_dicts, - dates=repeat(SIMULATION_DATE), - keys=keys, - description=None, - ) - - -@pytest.mark.parametrize("level", LOGGING_LEVELS) -def test_logging_to_file(level: core.LogLevel, tmp_path: Path) -> None: - log_path = tmp_path / "test.log" - file_handler = logging.set_output_file(log_path) - loggers = [logging.getLogger(name) for name in LOGGER_NAMES] - key = "message" - for logger, data in zip(loggers, STRING_DATA_VALUES): - logger.setLevel(level) - logger.log(level=level, key=key, data=data) - _logging.shutdown([lambda: file_handler]) - with log_path.open("r") as log_file: - log_lines = log_file.readlines() - # Should have two lines (one header + one data row per logger) - assert len(log_lines) == 2 * len(loggers) - for name, data in zip(LOGGER_NAMES, STRING_DATA_VALUES): - header = json.loads(log_lines.pop(0)) - row = json.loads(log_lines.pop(0)) - _check_header( - header=header, - expected_module=name, - expected_key=key, - expected_level=_logging.getLevelName(level), - expected_description=None, - expected_columns={key: "str"}, - ) - _check_row( - row=row, - logger_level=level, - expected_uuid=header["uuid"], - expected_date=SIMULATION_DATE, - expected_values=[data], - expected_module=name, - expected_key=key, - ) - - -@pytest.mark.parametrize( - "inconsistent_data_iterables", - [ - ({"a": 1, "b": 2}, {"a": 3, "b": 4, "c": 5}), - ({"a": 1}, {"b": 2}), - ({"a": None, "b": 2}, {"a": 1, "b": 2}), - ([1], [0.5]), - (["a", "b"], ["a", "b", "c"]), - ("foo", "bar", ["spam"]), - ], -) -def test_logging_structured_data_inconsistent_columns_warns( - inconsistent_data_iterables: Iterable[core.LogData], root_level: core.LogLevel -) -> None: - logger = logging.getLogger("tlo") - with pytest.warns(core.InconsistentLoggedColumnsWarning): - for data in inconsistent_data_iterables: - logger.log(level=root_level, key="message", data=data) - - -@pytest.mark.parametrize( - "consistent_data_iterables", - [ - ([np.int64(1)], [2], [np.int32(1)]), - ([{"a": np.bool_(False)}, {"a": False}]), - ((1.5, 2), (np.float64(0), np.int64(2))), - ], -) -@pytest.mark.filterwarnings("error") -def test_logging_structured_data_mixed_numpy_python_scalars( - consistent_data_iterables: Iterable[core.LogData], root_level: core.LogLevel -) -> None: - logger = logging.getLogger("tlo") - # Should run without any exceptions - for data in consistent_data_iterables: - logger.log(level=root_level, key="message", data=data) + if message_level == 'logging.DEBUG': + logger.debug(message) + elif message_level == 'logging.INFO': + logger.info(message) + elif message_level == 'logging.WARNING': + logger.warning(message) + elif message_level == 'logging.CRITICAL': + logger.critical(message) + + +class TestStructuredLogging: + @pytest.mark.parametrize("message_level", ["logging.DEBUG", "logging.INFO", "logging.WARNING", "logging.CRITICAL"]) + def test_messages_same_level(self, simulation_configuration, message_level): + # given that messages are at the same level as the logger + logger_level = eval(message_level) + message = {"message": pd.Series([12.5])[0]} + file_handler, file_path = simulation_configuration + log_message(message_level, logger_level, message, structured_logging=True) + + lines = read_file(file_handler, file_path) + header_json = json.loads(lines[5]) + data_json = json.loads(lines[6]) + + # message should be written to log + assert len(lines) == 7 + assert header_json['level'] == message_level.lstrip("logging.") + assert 'message' in header_json['columns'] + assert header_json['columns']['message'] == 'float64' + assert data_json['values'] == [12.5] + + @pytest.mark.parametrize("message_level", ["logging.DEBUG", "logging.INFO", "logging.WARNING", "logging.CRITICAL"]) + def test_messages_higher_level(self, simulation_configuration, message_level): + # given that messages are a higher level than the logger + logger_level = eval(message_level) - 1 + message = {"message": pd.Series([12.5])[0]} + file_handler, file_path = simulation_configuration + log_message(message_level, logger_level, message, structured_logging=True) + + lines = read_file(file_handler, file_path) + header_json = json.loads(lines[5]) + data_json = json.loads(lines[6]) + + # message should be written to log + assert len(lines) == 7 + assert header_json['level'] == message_level.lstrip("logging.") + assert 'message' in header_json['columns'] + assert header_json['columns']['message'] == 'float64' + assert data_json['values'] == [12.5] + + @pytest.mark.parametrize("message_level", ["logging.DEBUG", "logging.INFO", "logging.WARNING", "logging.CRITICAL"]) + def test_messages_lower_level(self, simulation_configuration, message_level): + # given that messages are at a lower level than logger + logger_level = eval(message_level) + 1 + message = {"message": pd.Series([12.5])[0]} + file_handler, file_path = simulation_configuration + log_message(message_level, logger_level, message, structured_logging=True) + + lines = read_file(file_handler, file_path) + + # only simulation info messages should be written to log + assert len(lines) == 5 + + +class TestConvertLogData: + def setup_method(self): + self.expected_output = {'item_1': 1, 'item_2': 2} + self.logger = logging.getLogger('tlo.test.logger') + + @pytest.mark.parametrize("iterable_data", [[1, 2], {1, 2}, (1, 2)]) + def test_convert_iterable_to_dict(self, iterable_data): + output = self.logger._get_data_as_dict(iterable_data) + assert self.expected_output == output + + def test_convert_df_to_dict(self): + df = pd.DataFrame({'item_1': [1], 'item_2': [2]}) + output = self.logger._get_data_as_dict(df) + + assert self.expected_output == output + + def test_string_to_dict(self): + output = self.logger._get_data_as_dict("strings") + assert {'message': 'strings'} == output + + +def test_mixed_logging(): + """Logging with both oldstyle and structured logging should raise an error""" + logger = logging.getLogger('tlo.test.logger') + logger.setLevel(logging.INFO) + with pytest.raises(ValueError): + logger.info("stdlib method") + logger.info(key="structured", data={"key": 10}) + + +@pytest.mark.parametrize("add_stdout_handler", ((True, False))) +def test_init_logging(add_stdout_handler): + logging.init_logging(add_stdout_handler) + logger = logging.getLogger('tlo') + assert len(logger.handlers) == (1 if add_stdout_handler else 0) diff --git a/tests/test_logging_end_to_end.py b/tests/test_logging_end_to_end.py index 944c3021c4..5f055c95ab 100644 --- a/tests/test_logging_end_to_end.py +++ b/tests/test_logging_end_to_end.py @@ -16,13 +16,13 @@ def log_input(): log_string = "\n".join(( "col1_str;hello;world;lorem;ipsum;dolor;sit", "col2_int;1;3;5;7;8;10", - "col3_float;2.1;4.1;6.1;8.1;9.1;0.1", + "col3_float;2;4;6;8;9;null", "col4_cat;cat1;cat1;cat2;cat2;cat1;cat2", - "col5_set;{'zero'};{'one'};{'two'};{'three'};{'four'};{'five'}", - "col6_list;[1, 3];[2, 4];[0, 3];[5, 6];[7, 8];[9, 10]", + "col5_set;set();{'one'};{None};{'three','four'};{'eight'};set()", + "col6_list;[];['two'];[None];[5, 6, 7];[];[]", "col7_date;2020-06-19T00:22:58.586101;2020-06-20T00:23:58.586101;2020-06-21T00:24:58.586101;2020-06-22T00:25" - ":58.586101;2020-06-23T00:25:58.586101;2020-06-21T00:24:58.586101", - "col8_fixed_list;['one', 1];['two', 2];['three', 3];['three', 3];['four', 4];['five', 5]" + ":58.586101;2020-06-23T00:25:58.586101;null", + "col8_fixed_list;['one', 1];['two', 2];[None, None];['three', 3];['four', 4];['five', 5]" )) # read in, then transpose log_input = pd.read_csv(StringIO(log_string), sep=';').T @@ -63,6 +63,8 @@ def log_path(tmpdir_factory, log_input, class_scoped_seed): # a logger connected to that simulation logger = logging.getLogger('tlo.test') logger.setLevel(logging.INFO) + # Allowing logging of entire dataframe only for testing + logger._disable_dataframe_logging = False # log data as dicts for index, row in log_input.iterrows(): @@ -74,9 +76,15 @@ def log_path(tmpdir_factory, log_input, class_scoped_seed): logger.info(key='rows_as_individuals', data=log_input.loc[[index]]) sim.date = sim.date + pd.DateOffset(days=1) + # log data as multi-row dataframe + for _ in range(2): + logger.info(key='multi_row_df', data=log_input) + sim.date = sim.date + pd.DateOffset(days=1) + # log data as fixed length list for item in log_input.col8_fixed_list.values: - logger.info(key='a_fixed_length_list', data=item) + logger.info(key='a_fixed_length_list', + data=item) sim.date = sim.date + pd.DateOffset(days=1) # log data as variable length list @@ -129,12 +137,26 @@ def test_rows_as_individuals(self, test_log_df, log_input): log_output.col4_cat = log_output.col4_cat.astype('category') assert log_input.equals(log_output) + def test_log_entire_df(self, test_log_df, log_input): + # get table to compare + log_output = test_log_df['multi_row_df'].drop(['date'], axis=1) + + # within nested dicts/entire df, need manual setting of special types + log_output.col4_cat = log_output.col4_cat.astype('category') + log_input.col5_set = log_input.col5_set.apply(list) + log_output.col7_date = log_output.col7_date.astype('datetime64[ns]') + # deal with index matching by resetting index + log_output.reset_index(inplace=True, drop=True) + expected_output = pd.concat((log_input, log_input), ignore_index=True) + + assert expected_output.equals(log_output) + def test_fixed_length_list(self, test_log_df): log_df = test_log_df['a_fixed_length_list'].drop(['date'], axis=1) expected_output = pd.DataFrame( - {'item_1': ['one', 'two', 'three', 'three', 'four', 'five'], - 'item_2': [1, 2, 3, 3, 4, 5]} + {'item_1': ['one', 'two', None, 'three', 'four', 'five'], + 'item_2': [1, 2, None, 3, 4, 5]} ) assert expected_output.equals(log_df) diff --git a/tests/test_malaria.py b/tests/test_malaria.py index 2b16da0000..6fb185c433 100644 --- a/tests/test_malaria.py +++ b/tests/test_malaria.py @@ -268,7 +268,7 @@ def test_dx_algorithm_for_malaria_outcomes_clinical( add_or_remove='+' ) - assert "fever" in sim.modules["SymptomManager"].has_what(person_id=person_id) + assert "fever" in sim.modules["SymptomManager"].has_what(person_id) def diagnosis_function(tests, use_dict: bool = False, report_tried: bool = False): return hsi_event.healthcare_system.dx_manager.run_dx_test( @@ -346,7 +346,7 @@ def make_blank_simulation(): add_or_remove='+' ) - assert "fever" in sim.modules["SymptomManager"].has_what(person_id=person_id) + assert "fever" in sim.modules["SymptomManager"].has_what(person_id) def diagnosis_function(tests, use_dict: bool = False, report_tried: bool = False): return hsi_event.healthcare_system.dx_manager.run_dx_test( @@ -517,7 +517,7 @@ def test_individual_testing_and_treatment(sim): pollevent.run() assert not pd.isnull(df.at[person_id, "ma_date_symptoms"]) - assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == {"fever", "headache", "vomiting", "stomachache"} + assert set(sim.modules['SymptomManager'].has_what(person_id)) == {"fever", "headache", "vomiting", "stomachache"} # check rdt is scheduled date_event, event = [ @@ -560,7 +560,7 @@ def test_individual_testing_and_treatment(sim): pollevent = malaria.MalariaUpdateEvent(module=sim.modules['Malaria']) pollevent.apply(sim.population) - assert sim.modules['SymptomManager'].has_what(person_id=person_id) == [] + assert sim.modules['SymptomManager'].has_what(person_id) == [] # check no rdt is scheduled assert "malaria.HSI_Malaria_rdt" not in sim.modules['HealthSystem'].find_events_for_person(person_id) diff --git a/tests/test_module_dependencies.py b/tests/test_module_dependencies.py index 8ed5b6811e..ca5bf58482 100644 --- a/tests/test_module_dependencies.py +++ b/tests/test_module_dependencies.py @@ -1,4 +1,5 @@ """Tests for automatic checking and ordering of method module dependencies.""" + import os from pathlib import Path from random import seed as set_seed @@ -7,7 +8,7 @@ import pytest -from tlo import Date, Module, Simulation, logging +from tlo import Date, Module, Simulation from tlo.dependencies import ( ModuleDependencyError, get_all_dependencies, @@ -16,7 +17,6 @@ get_module_class_map, topologically_sort_modules, ) -from tlo.methods import hiv, simplified_births try: resourcefilepath = Path(os.path.dirname(__file__)) / "../resources" @@ -28,6 +28,7 @@ simulation_end_date = Date(2010, 9, 1) simulation_initial_population = 1000 + module_class_map = get_module_class_map( excluded_modules={ "Module", @@ -50,6 +51,7 @@ def sim(seed): @pytest.fixture def dependent_module_pair(): + class Module1(Module): pass @@ -65,7 +67,7 @@ def dependent_module_chain(): type( f'Module{i}', (Module,), - {'INIT_DEPENDENCIES': frozenset({f'Module{i - 1}'})} if i != 0 else {} + {'INIT_DEPENDENCIES': frozenset({f'Module{i-1}'})} if i != 0 else {} ) for i in range(10) ] @@ -249,8 +251,8 @@ def test_module_dependencies_complete(sim, module_class): for module in module_class_map.values() # Skip test for NewbornOutcomes as long simulation needed for birth events to occur and dependencies to be used if module.__name__ not in { - 'NewbornOutcomes' - } + 'NewbornOutcomes' + } for dependency_name in sorted(get_all_required_dependencies(module)) ], ids=lambda pair: f"{pair[0].__name__}, {pair[1].__name__}" @@ -283,76 +285,3 @@ def test_module_dependencies_all_required(sim, module_and_dependency_pair): 'does not appear to be required to run simulation without errors and so ' f'should be removed from the dependencies of {module_class.__name__}.' ) - - -def test_auto_register_module_dependencies(tmpdir): - """ check if module dependencies are registered as expected when an argument to auto register modules in simulation - is set to True """ - # configure logging - log_config = { - 'filename': 'LogFile', - 'directory': tmpdir, - 'custom_levels': { - '*': logging.CRITICAL, - 'tlo.method.demography': logging.INFO - } - } - # set simulation start date - start_date = Date(2010, 1, 1) - - # register required modules for a simple simulation. We have included copd for as it has some dependencies. We want - # to test if the dependencies can be automatically registered when the auto register argument in simulation - # is set to True - def register_disease_modules_manually(): - """ Test manually registering disease modules without including all dependencies and leaving to false an - option to auto register missing dependencies. This should fail with module dependency error """ - with pytest.raises(ModuleDependencyError, match='missing'): - # configure simulation - sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) - # the lines below should fail with missing dependencies - sim.register(hiv.Hiv(resourcefilepath=resourcefilepath)) - - def register_disease_modules_using_labour_modules_for_births(): - """ Test registering disease modules without including all dependencies and not using simplified births - module BUT setting to true an option to auto register missing dependencies. This should register all necessary - modules including all labour modules """ - # configure simulation - sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) - # re-register modules with auto-register-module argument set to True and using labour modules for births - sim.register(hiv.Hiv(resourcefilepath=resourcefilepath), - auto_register_dependencies=True) - # get module dependencies - required_dependencies = get_all_required_dependencies(sim.modules["Hiv"]) - # check registered dependencies - registered_module_names = set(sim.modules.keys()) - # all required dependencies should be available in registered dependencies - assert required_dependencies <= registered_module_names - - def register_disease_modules_using_simplified_births_for_births(): - """ Test registering disease modules without including all dependencies BUT setting to true an option to auto - register missing dependencies and using simplified births module.This should register all necessary modules - except labour modules since we're using simplified births """ - # configure simulation - sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) - sim.register(hiv.Hiv(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - auto_register_dependencies=True - ) - # now that we're using simplified births we want to ensure that all alternative dependencies are not registered - alternative_dependencies = simplified_births.SimplifiedBirths.ALTERNATIVE_TO - # get registered modules - registered_module_names = set(sim.modules.keys()) - # no alternative dependency(labour modules) should get registered when using simplified births - for dependency in alternative_dependencies: - assert dependency not in registered_module_names, (f'{dependency} should not be registered when simplified' - f' module has been registered') - - # test registering disease modules manually(when all dependencies are not included and auto register missing - # dependencies option is set to false) - register_disease_modules_manually() - - # test auto registering disease modules using labor modules for births - register_disease_modules_using_labour_modules_for_births() - - # test auto registering disease modules using simplified module for births - register_disease_modules_using_simplified_births_for_births() diff --git a/tests/test_simulation.py b/tests/test_simulation.py deleted file mode 100644 index c26b501c47..0000000000 --- a/tests/test_simulation.py +++ /dev/null @@ -1,323 +0,0 @@ -from pathlib import Path -from typing import Dict, List - -import numpy as np -import pytest - -from tlo import Date, DateOffset, Module, Population, Simulation, logging -from tlo.analysis.utils import merge_log_files, parse_log_file -from tlo.methods.fullmodel import fullmodel -from tlo.methods.healthsystem import HSI_Event, HSIEventQueueItem -from tlo.simulation import ( - EventQueue, - SimulationNotInitialisedError, - SimulationPreviouslyInitialisedError, -) - - -def _check_basic_simulation_attributes_equal( - simulation_1: Simulation, simulation_2: Simulation -) -> None: - for attribute in [ - "start_date", - "end_date", - "date", - "show_progress_bar", - "_custom_log_levels", - "_seed", - "_initialised", - ]: - assert getattr(simulation_1, attribute) == getattr(simulation_2, attribute) - - -def _nested_dict_are_equal(nested_dict_1: dict, nested_dict_2: dict) -> bool: - for key, value in nested_dict_1.items(): - if key not in nested_dict_2: - return False - if isinstance(value, np.ndarray): - if not np.all(value == nested_dict_2[key]): - return False - elif isinstance(value, dict): - if not _nested_dict_are_equal(value, nested_dict_2[key]): - return False - elif value != nested_dict_2[key]: - return False - return True - - -def _check_random_state_equal( - rng_1: np.random.RandomState, rng_2: np.random.RandomState -) -> None: - rng_state_1 = rng_1.get_state(legacy=False) - rng_state_2 = rng_2.get_state(legacy=False) - assert _nested_dict_are_equal(rng_state_1, rng_state_2) - - -def _check_population_equal(population_1: Population, population_2: Population) -> None: - assert population_1.initial_size == population_2.initial_size - assert population_1.new_row.equals(population_2.new_row) - assert population_1.new_rows.equals(population_2.new_rows) - assert population_1.next_person_id == population_2.next_person_id - assert population_1.props.equals(population_2.props) - - -def _check_modules_are_equal( - modules_dict_1: Dict[str, Module], modules_dict_2: Dict[str, Module] -) -> None: - for module_name, module_1 in modules_dict_1.items(): - assert module_name in modules_dict_2 - module_2 = modules_dict_2[module_name] - assert module_2.PARAMETERS == module_1.PARAMETERS - assert module_2.PROPERTIES == module_1.PROPERTIES - _check_random_state_equal(module_1.rng, module_2.rng) - - -def _check_event_queues_are_equal( - event_queue_1: EventQueue, event_queue_2: EventQueue -) -> None: - assert len(event_queue_1) == len(event_queue_2) - for (*date_priority_count_1, event_1), (*date_priority_count_2, event_2) in zip( - event_queue_1.queue, event_queue_2.queue - ): - assert date_priority_count_1 == date_priority_count_2 - if isinstance(event_1.target, Population): - # We don't check for equality of populations here as we do separately and - # it would create a lot of redundancy to check for every event - assert isinstance(event_2.target, Population) - else: - assert event_1.target == event_2.target - assert event_1.priority == event_1.priority - assert type(event_1.module) is type(event_2.module) # noqa: E721 - - -def _check_hsi_events_are_equal(hsi_event_1: HSI_Event, hsi_event_2: HSI_Event) -> None: - if isinstance(hsi_event_1.target, Population): - # We don't check for equality of populations here as we do separately and - # it would create a lot of redundancy to check for every HSI event - assert isinstance(hsi_event_2.target, Population) - else: - assert hsi_event_1.target == hsi_event_2.target - assert hsi_event_1.module.name == hsi_event_2.module.name - assert hsi_event_1.TREATMENT_ID == hsi_event_2.TREATMENT_ID - assert hsi_event_1.ACCEPTED_FACILITY_LEVEL == hsi_event_2.ACCEPTED_FACILITY_LEVEL - assert hsi_event_1.BEDDAYS_FOOTPRINT == hsi_event_2.BEDDAYS_FOOTPRINT - assert ( - hsi_event_1._received_info_about_bed_days - == hsi_event_2._received_info_about_bed_days - ) - assert hsi_event_1.expected_time_requests == hsi_event_2.expected_time_requests - assert hsi_event_1.facility_info == hsi_event_2.facility_info - - -def _check_hsi_event_queues_are_equal( - hsi_event_queue_1: List[HSIEventQueueItem], - hsi_event_queue_2: List[HSIEventQueueItem], -) -> None: - assert len(hsi_event_queue_1) == len(hsi_event_queue_2) - for hsi_event_queue_item_1, hsi_event_queue_item_2 in zip( - hsi_event_queue_1, hsi_event_queue_2 - ): - assert hsi_event_queue_item_1.priority == hsi_event_queue_item_2.priority - assert hsi_event_queue_item_1.topen == hsi_event_queue_item_2.topen - assert ( - hsi_event_queue_item_1.rand_queue_counter - == hsi_event_queue_item_2.rand_queue_counter - ) - assert hsi_event_queue_item_1.tclose == hsi_event_queue_item_2.tclose - _check_hsi_events_are_equal( - hsi_event_queue_item_1.hsi_event, hsi_event_queue_item_2.hsi_event - ) - - -def _check_simulations_are_equal( - simulation_1: Simulation, simulation_2: Simulation -) -> None: - _check_basic_simulation_attributes_equal(simulation_1, simulation_2) - _check_modules_are_equal(simulation_1.modules, simulation_2.modules) - _check_random_state_equal(simulation_1.rng, simulation_2.rng) - _check_event_queues_are_equal(simulation_1.event_queue, simulation_2.event_queue) - _check_hsi_event_queues_are_equal( - simulation_1.modules["HealthSystem"].HSI_EVENT_QUEUE, - simulation_2.modules["HealthSystem"].HSI_EVENT_QUEUE, - ) - _check_population_equal(simulation_1.population, simulation_2.population) - - -@pytest.fixture(scope="module") -def resource_file_path(): - return Path(__file__).parents[1] / "resources" - - -@pytest.fixture(scope="module") -def initial_population_size(): - return 5000 - - -@pytest.fixture(scope="module") -def start_date(): - return Date(2010, 1, 1) - - -@pytest.fixture(scope="module") -def end_date(start_date): - return start_date + DateOffset(days=180) - - -@pytest.fixture(scope="module") -def intermediate_date(start_date, end_date): - return start_date + (end_date - start_date) / 2 - - -@pytest.fixture(scope="module") -def logging_custom_levels(): - return {"*": logging.INFO} - - -def _simulation_factory( - output_directory, start_date, seed, resource_file_path, logging_custom_levels -): - log_config = { - "filename": "test", - "directory": output_directory, - "custom_levels": logging_custom_levels, - } - simulation = Simulation( - start_date=start_date, - seed=seed, - log_config=log_config, - ) - simulation.register( - *fullmodel( - resourcefilepath=resource_file_path, - ) - ) - return simulation - - -@pytest.fixture -def simulation(tmp_path, start_date, seed, resource_file_path, logging_custom_levels): - return _simulation_factory( - tmp_path, start_date, seed, resource_file_path, logging_custom_levels - ) - - -@pytest.fixture(scope="module") -def simulated_simulation( - tmp_path_factory, - start_date, - end_date, - seed, - resource_file_path, - initial_population_size, - logging_custom_levels, -): - tmp_path = tmp_path_factory.mktemp("simulated_simulation") - simulation = _simulation_factory( - tmp_path, start_date, seed, resource_file_path, logging_custom_levels - ) - simulation.make_initial_population(n=initial_population_size) - simulation.simulate(end_date=end_date) - return simulation - - -def test_save_to_pickle_creates_file(tmp_path, simulation): - pickle_path = tmp_path / "simulation.pkl" - simulation.save_to_pickle(pickle_path=pickle_path) - assert pickle_path.exists() - - -def test_save_load_pickle_after_initialising( - tmp_path, simulation, initial_population_size -): - simulation.make_initial_population(n=initial_population_size) - simulation.initialise(end_date=simulation.start_date) - pickle_path = tmp_path / "simulation.pkl" - simulation.save_to_pickle(pickle_path=pickle_path) - loaded_simulation = Simulation.load_from_pickle(pickle_path) - _check_simulations_are_equal(simulation, loaded_simulation) - - -def test_save_load_pickle_after_simulating(tmp_path, simulated_simulation): - pickle_path = tmp_path / "simulation.pkl" - simulated_simulation.save_to_pickle(pickle_path=pickle_path) - loaded_simulation = Simulation.load_from_pickle(pickle_path) - _check_simulations_are_equal(simulated_simulation, loaded_simulation) - - -def _check_parsed_logs_are_equal( - log_path_1: Path, - log_path_2: Path, - module_name_key_pairs_to_skip: set[tuple[str, str]], -) -> None: - logs_dict_1 = parse_log_file(log_path_1) - logs_dict_2 = parse_log_file(log_path_2) - assert logs_dict_1.keys() == logs_dict_2.keys() - for module_name in logs_dict_1.keys(): - module_logs_1 = logs_dict_1[module_name] - module_logs_2 = logs_dict_2[module_name] - assert module_logs_1.keys() == module_logs_2.keys() - for key in module_logs_1: - if key == "_metadata": - assert module_logs_1[key] == module_logs_2[key] - elif (module_name, key) not in module_name_key_pairs_to_skip: - assert module_logs_1[key].equals(module_logs_2[key]) - - -@pytest.mark.slow -def test_continuous_and_interrupted_simulations_equal( - tmp_path, - simulation, - simulated_simulation, - initial_population_size, - intermediate_date, - end_date, - logging_custom_levels, -): - simulation.make_initial_population(n=initial_population_size) - simulation.initialise(end_date=end_date) - simulation.run_simulation_to(to_date=intermediate_date) - pickle_path = tmp_path / "simulation.pkl" - simulation.save_to_pickle(pickle_path=pickle_path) - simulation.close_output_file() - log_config = { - "filename": "test_continued", - "directory": tmp_path, - "custom_levels": logging_custom_levels, - } - interrupted_simulation = Simulation.load_from_pickle(pickle_path, log_config) - interrupted_simulation.run_simulation_to(to_date=end_date) - interrupted_simulation.finalise() - _check_simulations_are_equal(simulated_simulation, interrupted_simulation) - merged_log_path = tmp_path / "concatenated.log" - merge_log_files( - simulation.log_filepath, interrupted_simulation.log_filepath, merged_log_path - ) - _check_parsed_logs_are_equal( - simulated_simulation.log_filepath, merged_log_path, {("tlo.simulation", "info")} - ) - - -def test_run_simulation_to_past_end_date_raises( - simulation, initial_population_size, end_date -): - simulation.make_initial_population(n=initial_population_size) - simulation.initialise(end_date=end_date) - with pytest.raises(ValueError, match="after simulation end date"): - simulation.run_simulation_to(to_date=end_date + DateOffset(days=1)) - - -def test_run_simulation_without_initialisation_raises( - simulation, initial_population_size, end_date -): - simulation.make_initial_population(n=initial_population_size) - with pytest.raises(SimulationNotInitialisedError): - simulation.run_simulation_to(to_date=end_date) - - -def test_initialise_simulation_twice_raises( - simulation, initial_population_size, end_date -): - simulation.make_initial_population(n=initial_population_size) - simulation.initialise(end_date=end_date) - with pytest.raises(SimulationPreviouslyInitialisedError): - simulation.initialise(end_date=end_date) diff --git a/tests/test_symptommanager.py b/tests/test_symptommanager.py index 73ea7619d0..85c7156902 100644 --- a/tests/test_symptommanager.py +++ b/tests/test_symptommanager.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import os from pathlib import Path -from typing import TYPE_CHECKING, List import pytest from pandas import DateOffset @@ -27,9 +24,6 @@ SymptomManager_SpuriousSymptomOnset, ) -if TYPE_CHECKING: - from tlo.methods.symptommanager import SymptomManager - try: resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' except NameError: @@ -193,9 +187,8 @@ def test_adding_quering_and_removing_symptoms(seed): assert set(has_symp) == set(ids) for person_id in ids: - assert symp in sim.modules["SymptomManager"].has_what( - person_id=person_id, disease_module=sim.modules["Mockitis"] - ) + assert symp in sim.modules['SymptomManager'].has_what(person_id=person_id, + disease_module=sim.modules['Mockitis']) # Check cause of the symptom: for person in ids: @@ -210,103 +203,6 @@ def test_adding_quering_and_removing_symptoms(seed): assert list() == sim.modules['SymptomManager'].who_has(symp) -@pytest.mark.parametrize( - "supply_disease_module", - [ - pytest.param(False, id="disease_module kwarg NOT supplied"), - pytest.param(True, id="disease_module kwarg supplied"), - ], -) -def test_has_what_via_individual_properties(seed, supply_disease_module: bool): - """ - Test that the has_what method returns the same symptoms for an individual - when supplied a person_id and the individual_properties context for that - same person. - - Test the case when the optional disease_module kwarg is supplied as well. - - We will create 3 'dummy' symptoms and select 8 individuals in the - population to infect with these symptoms; in the following combinations: - - id has_symp1 has_symp2 has_symp3 - 0 1 1 1 - 1 1 1 0 - 2 1 0 1 - 3 1 0 0 - 4 0 1 1 - 5 0 1 0 - 6 0 0 1 - 7 0 0 0 - - We will then assert that has_what returns the expected symptoms for the - individuals, and that supplying either the person_id keyword or the - individual_properties keyword gives the same answer. - """ - sim = Simulation(start_date=start_date, seed=seed) - sim.register( - demography.Demography(resourcefilepath=resourcefilepath), - enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath, disable=True), - symptommanager.SymptomManager(resourcefilepath=resourcefilepath), - healthseekingbehaviour.HealthSeekingBehaviour( - resourcefilepath=resourcefilepath - ), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - mockitis.Mockitis(), - chronicsyndrome.ChronicSyndrome(), - ) - disease_module: mockitis.Mockitis = sim.modules["Mockitis"] - symptom_manager: SymptomManager = sim.modules["SymptomManager"] - - # Generate the symptoms and select the people to infect - n_symptoms = 3 - n_patients = 2 ** n_symptoms - symptoms = [f"test_symptom{i}" for i in range(n_symptoms)] - symptom_manager.register_symptom(*[Symptom(name=symptom) for symptom in symptoms]) - - # Create the initial population after generating extra symptoms, so that they are registered - sim.make_initial_population(n=popsize) - df = sim.population.props - - # Infect the people with the corresponding symptoms - persons_infected_with: List[int] = [ - id for id in sim.rng.choice(list(df.index[df.is_alive]), n_patients) - ] - for i, id in enumerate(persons_infected_with): - bin_rep = format(i, f"0{n_symptoms}b") - for symptom_number, digit in enumerate(bin_rep): - if digit == "1": - symptom_manager.change_symptom( - symptom_string=symptoms[symptom_number], - person_id=[id], - add_or_remove="+", - disease_module=disease_module, - ) - - # Now check that has_what returns the same (correct!) arguments when supplied with - # individual_properties and person_id. - for person_id in persons_infected_with: - symptoms_via_pid = symptom_manager.has_what( - person_id=person_id, - disease_module=disease_module if supply_disease_module else None, - ) - with sim.population.individual_properties( - person_id, read_only=True - ) as individual_properties: - symptoms_via_iprops = symptom_manager.has_what( - individual_details=individual_properties, - disease_module=disease_module if supply_disease_module else None, - ) - - # Assert all returned symptoms are in agreement - assert len(symptoms_via_pid) == len( - symptoms_via_iprops - ), "Method does not return same number of symptoms." - assert set(symptoms_via_pid) == set( - symptoms_via_iprops - ), "Method does not return the same symptoms" - - def test_baby_born_has_no_symptoms(seed): sim = Simulation(start_date=start_date, seed=seed) @@ -331,7 +227,7 @@ def test_baby_born_has_no_symptoms(seed): person_id = sim.do_birth(mother_id) # check that the new person does not have symptoms: - assert [] == sim.modules['SymptomManager'].has_what(person_id=person_id) + assert [] == sim.modules['SymptomManager'].has_what(person_id) def test_auto_onset_symptom(seed): @@ -354,7 +250,7 @@ def test_auto_onset_symptom(seed): sim.population.props.loc[person_id, 'is_alive'] = True for symptom in sm.symptom_names: sim.population.props.loc[person_id, sm.get_column_name_for_symptom(symptom)] = 0 - assert 0 == len(sm.has_what(person_id=person_id)) + assert 0 == len(sm.has_what(person_id)) def get_events_in_sim(): return [ev for ev in sim.event_queue.queue if (person_id in ev[3].person_id)] @@ -377,7 +273,7 @@ def get_events_in_sim(): ) # check that the symptom is not imposed - assert 0 == len(sm.has_what(person_id=person_id)) + assert 0 == len(sm.has_what(person_id)) # get the future events for this person (should be just the auto-onset event) assert 1 == len(get_events_in_sim()) @@ -389,7 +285,7 @@ def get_events_in_sim(): # run the events and check for the changing of symptoms sim.date = date_of_onset onset[3].apply(sim.population) - assert symptom_string in sm.has_what(person_id=person_id) + assert symptom_string in sm.has_what(person_id) # get the future events for this person (should now include the auto-resolve event) assert 2 == len(get_events_in_sim()) @@ -399,7 +295,7 @@ def get_events_in_sim(): assert isinstance(resolve[3], SymptomManager_AutoResolveEvent) resolve[3].apply(sim.population) - assert 0 == len(sm.has_what(person_id=person_id)) + assert 0 == len(sm.has_what(person_id)) def test_nonemergency_spurious_symptoms_during_simulation(seed): @@ -608,26 +504,13 @@ def test_has_what( df.is_alive & (df[symptom_manager.get_column_name_for_symptom(symptom)] > 0) ][0] - assert symptom in symptom_manager.has_what(person_id=person_with_symptom) + assert symptom in symptom_manager.has_what(person_with_symptom) person_without_symptom = df.index[ df.is_alive & (df[symptom_manager.get_column_name_for_symptom(symptom)] == 0) ][0] - assert symptom not in symptom_manager.has_what(person_id=person_without_symptom) - - # Do the same checks but using an IndividualDetails context - with simulation.population.individual_properties( - person_with_symptom, read_only=True - ) as with_symptom_properties: - assert symptom in symptom_manager.has_what( - individual_details=with_symptom_properties - ) - with simulation.population.individual_properties( - person_without_symptom, read_only=True - ) as without_symptom_properties: - assert symptom not in symptom_manager.has_what( - individual_details=without_symptom_properties - ) + assert symptom not in symptom_manager.has_what(person_without_symptom) + def test_has_what_disease_module( symptom_manager, disease_module, disease_module_symptoms, simulation @@ -639,16 +522,12 @@ def test_has_what_disease_module( df.is_alive & (df[symptom_manager.get_column_name_for_symptom(symptom)] > 0) ][0] - assert symptom in symptom_manager.has_what( - person_id=person_with_symptom, disease_module=disease_module - ) + assert symptom in symptom_manager.has_what(person_with_symptom, disease_module) person_without_symptom = df.index[ df.is_alive & (df[symptom_manager.get_column_name_for_symptom(symptom)] == 0) ][0] - assert symptom not in symptom_manager.has_what( - person_id=person_without_symptom, disease_module=disease_module - ) + assert symptom not in symptom_manager.has_what(person_without_symptom, disease_module) def test_have_what( diff --git a/tests/test_tb.py b/tests/test_tb.py index 66d5abd60e..0434c70069 100644 --- a/tests/test_tb.py +++ b/tests/test_tb.py @@ -576,7 +576,7 @@ def test_children_referrals(seed): duration_in_days=None, ) - assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == symptom_list + assert set(sim.modules['SymptomManager'].has_what(person_id)) == symptom_list # run HSI_Tb_ScreeningAndRefer and check outcomes sim.modules['HealthSystem'].schedule_hsi_event( @@ -1036,7 +1036,7 @@ def test_hsi_scheduling(seed): duration_in_days=None, ) - assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == symptom_list + assert set(sim.modules['SymptomManager'].has_what(person_id)) == symptom_list hsi_event = tb.HSI_Tb_ScreeningAndRefer(person_id=person_id, module=sim.modules['Tb']) hsi_event.run(squeeze_factor=0) @@ -1080,7 +1080,7 @@ def test_hsi_scheduling(seed): duration_in_days=None, ) - assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == symptom_list + assert set(sim.modules['SymptomManager'].has_what(person_id)) == symptom_list hsi_event = tb.HSI_Tb_ScreeningAndRefer(person_id=person_id, module=sim.modules['Tb']) hsi_event.run(squeeze_factor=0) @@ -1125,7 +1125,7 @@ def test_hsi_scheduling(seed): duration_in_days=None, ) - assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == symptom_list + assert set(sim.modules['SymptomManager'].has_what(person_id)) == symptom_list hsi_event = tb.HSI_Tb_ScreeningAndRefer(person_id=person_id, module=sim.modules['Tb']) hsi_event.run(squeeze_factor=0) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1022c95010..02ae63b7ba 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,6 @@ """Unit tests for utility functions.""" import os import pickle -import shutil import string import types from pathlib import Path @@ -15,7 +14,7 @@ from tlo import Date, Simulation from tlo.analysis.utils import parse_log_file from tlo.methods import demography -from tlo.util import DEFAULT_MOTHER_ID, convert_excel_files_to_csv, read_csv_files +from tlo.util import DEFAULT_MOTHER_ID path_to_files = Path(os.path.dirname(__file__)) @@ -318,148 +317,3 @@ def check_hash_is_valid(dfh): # check hash differs for different dataframes if not dataframes[i].equals(dataframes[j]): assert df_hash != tlo.util.hash_dataframe(dataframes[j]) - - -def copy_files_to_temporal_directory_and_return_path(tmpdir): - """ copy resource files in tests/resources to a temporal directory and return its path - - :param tmpdir: path to a temporal directory - - """ - resource_filepath = path_to_files / 'resources' - tmpdir_resource_filepath = Path(tmpdir / 'resources') - shutil.copytree(resource_filepath, tmpdir_resource_filepath) - return tmpdir_resource_filepath - - -def test_read_csv_method_with_no_file(tmpdir): - """ read csv method when no file name is supplied - i) should return dictionary. - ii) dictionary keys should match csv file names in resource folder - iii) all dictionary values should be dataframes - - :param tmpdir: path to a temporal directory - - """ - tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) - file_names = [csv_file_path.stem for csv_file_path in tmpdir_resource_filepath.rglob("*.csv")] - df_no_files = read_csv_files(tmpdir_resource_filepath) - assert isinstance(df_no_files, dict) - assert set(df_no_files.keys()) == set(file_names) - assert all(isinstance(value, pd.DataFrame) for value in df_no_files.values()) - - -def test_read_csv_method_with_one_file(tmpdir): - """ test read csv method when one file name is supplied. should return a dataframe - :param tmpdir: path to a temporal directory - - """ - tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) - df = read_csv_files(tmpdir_resource_filepath, files=['df_at_healthcareseeking']) - assert isinstance(df, pd.DataFrame) - - -def test_read_csv_method_with_multiple_files(tmpdir): - """ read csv method when multiple file names are supplied. - i) should return dictionary. - ii) dictionary keys should match supplied file names - iii) all dictionary values should be dataframes - - :param tmpdir: path to a temporal directory - - """ - tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) - file_names = ['df_at_healthcareseeking', 'df_at_init_of_lifestyle'] - df_dict = read_csv_files(tmpdir_resource_filepath, files=file_names) - assert isinstance(df_dict, dict) - assert set(df_dict.keys()) == set(file_names) - for _key, dataframe in df_dict.items(): - assert isinstance(dataframe, pd.DataFrame) - - -def test_read_csv_method_output_matches_previously_used_read_excel(tmpdir): - """ check read from csv method produces same output as the read Excel file - :param tmpdir: path to a temporal directory - - """ - tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) - excel_file_path = Path(tmpdir_resource_filepath - / 'ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx') - xls = pd.ExcelFile(excel_file_path) - sheet_names = xls.sheet_names - # convert the above Excel file into csv equivalent. we will use the newly converted files to determine if - # loading parameters from Excel file will be equal to loading parameters from the converted csv files - convert_excel_files_to_csv(folder=Path(tmpdir_resource_filepath / 'ResourceFile_test_convert_to_csv'), - files=[excel_file_path.name]) - - # get excel sheet names - df_excel = pd.read_excel(xls, sheet_name=sheet_names) - - # read newly converted csv files using read_csv_files method - df_csv = read_csv_files(Path(str(excel_file_path).split('.')[0]), - files=sheet_names) - - # dictionary keys from both dataframe dictionaries should match - assert isinstance(df_excel, dict) and isinstance(df_csv, dict) - assert df_excel.keys() == df_csv.keys() - for key in df_excel: - assert df_excel[key].astype(str).equals(df_csv[key].astype(str)) - - -def test_convert_excel_files_method(tmpdir): - """ Test converting Excel files to csv equivalent is done as expected - - 1) Excel file name should become the name of the folder containing the newly converted csv files - 2) Excel file sheet names should become csv file names - 3) if files are given, the function should only convert to excel only those given files in a folder - 4) if no files are given, all Excel files in the parent folder and subsequent folders within the parent folder - should get converted to csv files - - """ - - def check_logic_of_converting_excel_files_to_csv_files(folder: Path, files: list) -> None: - """ check converting Excel files to csv files is done as expected - 1) check that a new directory to hold the newly created csv files has been created - 2) check that this new directory name matches the Excel file name it has been created from - 3) check csv files are created and that the csv names should match sheet names of an Excel file they - have been created from - """ - # check that the above function has created a folder named `ResourceFile_load-parameters`(name of the Excel - # file) and a csv file named `parameter_values` (Excel file sheet name). - excel_file_paths = [folder / file for file in files] - - for excel_file_path in excel_file_paths: - xl = pd.ExcelFile(excel_file_path) - path_to_new_directory = excel_file_path.with_suffix("") - # new folder should be created - assert path_to_new_directory.exists() and path_to_new_directory.is_dir() - # the new folder name should be the same as the Excel file name - assert excel_file_path.stem == path_to_new_directory.name - for sheet_name in xl.sheet_names: - path_to_new_file = Path(path_to_new_directory / f'{sheet_name}.csv') - # new csv file(s) should be created with name(s) resembling sheet name(s) in excel file - assert path_to_new_file.exists() and path_to_new_file.is_file() - assert sheet_name == path_to_new_file.name.split('.')[0] - - - # get resource file path - resourcefilepath = path_to_files / 'resources' - tmpdir_resourcefilepath = Path(tmpdir/'resources') - shutil.copytree(resourcefilepath, tmpdir_resourcefilepath) - - # check convert to csv logic when a list of file name(s) is given - excel_file = ['ResourceFile_load-parameters.xlsx'] - convert_excel_files_to_csv(tmpdir_resourcefilepath, files=excel_file) - # check new folder containing csv file is created. The folder name and csv file name should resemble the supplied - # Excel file name and sheet name respectively - check_logic_of_converting_excel_files_to_csv_files(tmpdir_resourcefilepath, files=excel_file) - - # check convert to csv logic when no list of file name(s) is given - excel_files = [file for file in tmpdir_resourcefilepath.rglob("*.xlsx")] - if excel_files is None: - excel_files = excel_file - - convert_excel_files_to_csv(tmpdir_resourcefilepath) - # check behaviours are as expected. New folders containing csv files should be created with names resembling the - # Excel file they were created from - check_logic_of_converting_excel_files_to_csv_files(tmpdir_resourcefilepath, excel_files) diff --git a/tox.ini b/tox.ini index 5d42fe3252..94949bd6d8 100644 --- a/tox.ini +++ b/tox.ini @@ -41,13 +41,6 @@ deps = pytest pytest-cov -[testenv:py3-latest] -deps = - pytest - pytest-xdist -commands = - {posargs:pytest -n auto -vv tests} - [testenv:spell] setenv = SPELLCHECK=1 @@ -78,8 +71,6 @@ commands = ; Generate HSI events listing python src/tlo/analysis/hsi_events.py --output-file docs/_hsi_events.rst --output-format rst-list python src/tlo/analysis/hsi_events.py --output-file docs/hsi_events.csv --output-format csv - ; Generate parameters listing - python docs/tlo_parameters.py {toxinidir}{/}resources {toxinidir}{/}docs{/}parameters.rst sphinx-build {posargs:-E} -b html docs dist/docs -sphinx-build -b linkcheck docs dist/docs @@ -102,7 +93,7 @@ commands = twine check dist/*.tar.gz dist/*.whl ; ignore that _version.py file generated by setuptools_scm is not tracked by VCS check-manifest --ignore **/_version.py {toxinidir} - ruff check src tests + ruff src tests isort --check-only --diff src tests pylint src tests python {toxinidir}/src/scripts/automation/update_citation.py --check @@ -145,13 +136,3 @@ commands = pip-compile --extra dev --output-file {toxinidir}/requirements/dev.txt skip_install = true deps = pip-tools - -[testenv:markslow] -deps = - -r{toxinidir}/requirements/base.txt - pytest - pytest-json-report - redbaron -commands = - pytest tests --json-report --json-report-file {toxinidir}/test-report.json --json-report-omit collectors log traceback streams warnings - python {toxinidir}/src/scripts/automation/mark_slow_tests.py --json-test-report-path test-report.json