covid-19-impact-lab · roecla · Sep 3, 2021 · Sep 3, 2021 · Sep 3, 2021 · Sep 3, 2021
diff --git a/environment.yml b/environment.yml
@@ -25,7 +25,7 @@ dependencies:
   - pandas>=1,<1.1
   - pdbpp
   - pre-commit
-  - pytask>=0.0.14
+  - pytask>=0.1.0
   - pytask-parallel
   - pytest
   - python-snappy
@@ -48,5 +48,5 @@ dependencies:
   - sphinx-panels
 
   - pip:
-    - DFO-LS
-    - Py-BOBYQA
+      - DFO-LS
+      - Py-BOBYQA
diff --git a/src/config.py b/src/config.py
@@ -13,25 +13,14 @@
 import sid
 
 PLOT_START_DATE = "2020-09-15"
-PLOT_END_DATE = "2021-06-06"
+PLOT_END_DATE = "2021-09-01"
 PLOT_SIZE = (8, 4.5)
 
 AFTER_EASTER = pd.Timestamp("2021-04-06")
 SUMMER_SCENARIO_START = "2021-06-07"
 
 FAST_FLAG = "debug"
-"""One of 'debug', 'verify', 'full'.
-
-If 'debug' only the debug initial states are used and only one run of every scenario is
-done. Do **not** interpret the results.
-
-If 'verify' only 10 seeds and the base scenario are done in the fall scenarios. In the
-main_predictions we use 5 seeds for each scenario. This means there 30 simulation runs
-overall.
-
-If 'full' 20 seeds are used for each scenario.
-
-"""
+"""One of 'debug', 'verify', 'full'."""
 
 SID_DEPENDENCIES = {}
 for path in Path(sid.__path__[0]).iterdir():

diff --git a/src/create_initial_states/task_build_full_params.py b/src/create_initial_states/task_build_full_params.py
@@ -201,6 +201,7 @@ def _add_vacation_model_distribution_params(params):
     params.loc[(*loc, "Osterferien2021"), "value"] = 0.5
     params.loc[(*loc, "Pfingstferien2021"), "value"] = 0.5
     params.loc[(*loc, "Sommerferien2021"), "value"] = 0.5
+    params.loc[(*loc, "Herbstferien2021"), "value"] = 0.5
     return params
 
 
@@ -400,7 +401,7 @@ def _build_share_known_cases_params():
             "2021-04-01": 0.22,
             "2021-04-05": 0.22,
             "2021-04-07": 0.31,
-            "2021-08-15": 0.31,
+            "2021-12-23": 0.31,
         },
         name="value",
     ).to_frame()

diff --git a/src/original_data/testing/cosmo_selftest_frequency_last_four_weeks.csv b/src/original_data/testing/cosmo_selftest_frequency_last_four_weeks.csv
@@ -2,3 +2,7 @@ date,share_more_than_5_tests_per_week,share_5_tests_per_week,share_2-4_tests_per
 2021-04-20,0.004,0.004,0.091,0.196,0.208,0.497
 2021-05-04,0.003,0.008,0.141,0.27,0.198,0.38
 2021-05-18,0.006,0.006,0.179,0.244,0.227,0.337
+2021-06-01,0.006,0.005,0.191,0.233,0.233,0.333
+2021-06-15,0.011,0.008,0.209,0.237,0.204,0.331
+2021-06-29,0.008,0.01,0.166,0.243,0.22,0.353
+2021-07-13,0.003,0.007,0.159,0.222,0.233,0.375
diff --git a/src/original_data/vacations.xlsx b/src/original_data/vacations.xlsx
diff --git a/src/original_data/vacations/vacations_2020.xlsx b/src/original_data/vacations/vacations_2020.xlsx
diff --git a/src/plotting/task_create_scenario_comparison_tables.py b/src/plotting/task_create_scenario_comparison_tables.py
diff --git a/src/plotting/task_plot_difference_btw_simulated_and_empirical_incidences.py b/src/plotting/task_plot_difference_btw_simulated_and_empirical_incidences.py
@@ -0,0 +1,48 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import pytask
+
+from src.config import BLD
+from src.config import SRC
+from src.plotting.plotting import BLUE
+from src.plotting.plotting import plot_incidences
+from src.simulation.scenario_config import create_path_to_scenario_outcome_time_series
+
+_DEPENDENCIES = {
+    "scenario_config.py": SRC / "simulation" / "scenario_config.py",
+    "plotting.py": SRC / "plotting" / "plotting.py",
+    "empirical": BLD / "data" / "empirical_data_for_plotting.pkl",
+}
+
+
+SIM_DATA_PATH = create_path_to_scenario_outcome_time_series(
+    scenario_name="combined_baseline", entry="new_known_case"
+)
+
+if SIM_DATA_PATH.exists():
+    _DEPENDENCIES["simulated"] = SIM_DATA_PATH
+
+
+@pytask.mark.skipif(
+    not SIM_DATA_PATH.exists(), reason="combined_baseline data does not exist."
+)
+@pytask.mark.depends_on(_DEPENDENCIES)
+@pytask.mark.produces(
+    BLD / "figures" / "diff_btw_simulated_and_empirical_case_numbers.pdf"
+)
+def task_plot_difference_btw_official_and_simulated_cases(depends_on, produces):
+    empirical = pd.read_pickle(depends_on["empirical"])
+    simulated = pd.read_pickle(depends_on["simulated"])
+    diff = simulated.subtract(empirical.loc[simulated.index, "new_known_case"], axis=0)
+    fig, ax = plot_incidences(
+        incidences={"": diff},
+        title="Difference Between Simulated and Empirical Case Numbers",
+        ylabel="difference between simulated and empirical case numbers",
+        name_to_label={"": ""},
+        colors=[BLUE],
+        n_single_runs=-1,
+    )
+    ax.set_xlim(pd.Timestamp("2020-09-01"), pd.Timestamp("2021-07-01"))
+    fig.tight_layout()
+    fig.savefig(produces)
+    plt.close()
diff --git a/src/plotting/task_plot_scenario_comparisons.py b/src/plotting/task_plot_scenario_comparisons.py
@@ -68,6 +68,13 @@
 
 
 PLOTS = {
+    "summer_baseline": {
+        "title": "",
+        "scenarios": ["summer_baseline"],
+        "colors": [BLUE],
+        "name_to_label": {"summer_baseline": "summer and fall with open schools"},
+        "empirical": True,
+    },
     # Main Plots (Fixed)
     "combined_fit": {
         "title": "Simulated versus Empirical: {outcome}",

diff --git a/src/policies/enacted_policies.py b/src/policies/enacted_policies.py
@@ -21,7 +21,13 @@
     ("lockdown_light_with_fatigue", "2020-12-23", 0.57),
     ("christmas_days", "2020-12-26", 0.65),
     ("hard_lockdown", "2021-02-10", 0.35),
-    ("hard_lockdown_with_fatige", "2021-02-28", 0.5),
+    ("hard_lockdown_with_fatigue", "2021-02-28", 0.5),
+    ("reopening", "2021-06-01", 0.515),
+    # end of May the federal incidence dropped below 35
+    # leading to massive opening of restaurants etc.
+    ("june", "2021-07-01", 0.6),
+    # rules were loosend (e.g. https://bit.ly/3t1Vx3b)
+    ("summer", VERY_LATE, 1.0),
     ("reopening", "2021-04-12", 0.515),
     ("decline", VERY_LATE, 0.515),
 ]
@@ -357,11 +363,15 @@ def _get_enacted_school_policies(contact_models):
             => A/B for everyone. With this we err on the side of restricting contacts
                too much.
 
-    - "summer": (last updated May 22nd)
+    - "summer": (last updated Sep 3)
         - BW: normal <100, A/B from 100 to 165 (source: https://bit.ly/3hYtbDt).
         - BY: normal <50, A/B from 50 to 165 (https://bit.ly/2T6uupf).
         - NRW: normal <100, A/B from 100 ot 165 (https://bit.ly/2SbmD9v).
 
+        From Aug onwards politicians strongly support keeping schools open
+        (https://bit.ly/3mUqaGC) and rules with respect to infection numbers
+        are removed (https://bit.ly/2WJG7ov).
+
             => normal
 
     """

diff --git a/src/prepare_data/task_prepare_vacations.py b/src/prepare_data/task_prepare_vacations.py
@@ -50,7 +50,7 @@ def _convert_to_params_format(df):
 
 @pytask.mark.depends_on(
     {
-        "data": SRC / "original_data" / "vacations" / "vacations_2020.xlsx",
+        "data": SRC / "original_data" / "vacations.xlsx",
         "shared.py": SRC / "shared.py",
     }
 )

diff --git a/src/simulation/scenario_config.py b/src/simulation/scenario_config.py
@@ -107,8 +107,8 @@ def get_named_scenarios():
         n_main_seeds = 1
         n_other_seeds = 1
     elif FAST_FLAG == "verify":
-        n_main_seeds = 30
-        n_other_seeds = 0
+        n_main_seeds = 20
+        n_other_seeds = 5
     elif FAST_FLAG == "full":
         n_main_seeds = 30
         n_other_seeds = 30
@@ -123,10 +123,12 @@ def get_named_scenarios():
             "start_date": "2020-09-15",
             "end_date": SPRING_START - pd.Timedelta(days=1),
         }
-        spring_dates = {"start_date": SPRING_START, "end_date": "2021-05-31"}
+        # spring goes until April 10 because we start mid April to introduce Delta.
+        spring_dates = {"start_date": SPRING_START, "end_date": "2021-04-10"}
+        summer_dates = {"start_date": "2021-04-11", "end_date": "2021-08-01"}
         combined_dates = {
             "start_date": fall_dates["start_date"],
-            "end_date": spring_dates["end_date"],
+            "end_date": summer_dates["end_date"],
         }
     else:
         # for the plotting we need that combined and spring have dates after 2021-01-15
@@ -139,6 +141,7 @@ def get_named_scenarios():
             "start_date": "2021-01-01",
             "end_date": "2021-01-18",
         }
+        summer_dates = {"start_date": "2021-01-19", "end_date": "2021-01-22"}
 
     named_scenarios = {
         # Baseline Scenarios
@@ -162,8 +165,18 @@ def get_named_scenarios():
             "params_scenario": "baseline",
             "n_seeds": n_main_seeds,
             "save_rapid_test_statistics": True,
+            "save_last_states": True,
             **spring_dates,
         },
+        "summer_baseline": {
+            "sim_input_scenario": "baseline_save_rapid_test_statistics",
+            "params_scenario": "baseline",
+            "n_seeds": n_main_seeds,
+            "save_rapid_test_statistics": True,
+            "save_last_states": False,
+            "is_resumed": "spring",
+            **summer_dates,
+        },
         # Scenarios for the main plots
         "spring_no_effects": {
             "sim_input_scenario": "just_seasonality",

diff --git a/src/testing/task_get_and_plot_share_of_tests_for_symptomatics.py b/src/testing/task_get_and_plot_share_of_tests_for_symptomatics.py
@@ -55,7 +55,7 @@
     }
 )
 def task_prepare_characteristics_of_the_tested(depends_on, produces):
-    df = pd.read_excel(depends_on["data"], sheet_name="Klinische_Aspekte", header=1)
+    df = pd.read_excel(depends_on["data"], sheet_name="Klinische_Aspekte", header=2)
 
     df = _clean_data(df)
     df = convert_weekly_to_daily(df.reset_index(), divide_by_7_cols=[])
@@ -162,7 +162,7 @@ def _clean_data(df):
     return df
 
 
-def _extrapolate_series_after_february(sr, end_date="2021-08-30"):
+def _extrapolate_series_after_february(sr, end_date="2021-12-23"):
     end_date = pd.Timestamp(end_date)
     last_empirical_date = min(pd.Timestamp("2021-02-28"), sr.index.max())
     empirical_part = sr[:last_empirical_date]

diff --git a/src/testing/testing_models.py b/src/testing/testing_models.py
@@ -90,8 +90,12 @@ def _calculate_test_demand_from_share_known_cases(
     """
     n_newly_infected = states["newly_infected"].sum()
     n_pos_tests = n_newly_infected * share_known_cases
-    untested = ~states["knows_immune"] & ~states["pending_test"]
-
+    has_not_received_pcr_test_recently = states["cd_received_test_result_true"] < -14
+    untested = (
+        ~states["knows_immune"]
+        & ~states["pending_test"]
+        & has_not_received_pcr_test_recently
+    )
     symptomatic_untested = states["symptomatic"] & untested
     n_symptomatic_untested = symptomatic_untested.sum()
 

diff --git a/tests/test_testing_models.py b/tests/test_testing_models.py
@@ -26,7 +26,6 @@ def states():
     states["cd_symptoms_true"] = [-1, 2, -1] + [-5] * 6 + [-1]
     states["educ_worker"] = False
     states["state"] = "Hessen"
-    states["cd_received_test_result_true"] = -3
     states["cd_received_rapid_test"] = -99
     states["index"] = states.index
     return states
@@ -92,6 +91,7 @@ def test_calculate_test_demand_from_share_known_cases():
     states["pending_test"] = [False, True] + [True] * 7 + [False]
     states["currently_infected"] = [True, True, True] + [False] * 5 + [True] * 2
     states["knows_immune"] = False
+    states["cd_received_test_result_true"] = -28
 
     share_known_cases = 2 / 3
     share_of_tests_for_symptomatics = 0.5