Source code for pypomp.models.measles.measlesPomp

import numpy as np
import pandas as pd
import os
import pickle
import pypomp.models.measles.model_001 as m001
import pypomp.models.measles.model_001b as m001b
import pypomp.models.measles.model_001c as m001c
import pypomp.models.measles.model_001d as m001d
import pypomp.models.measles.model_002 as m002
import pypomp.models.measles.model_003 as m003
from scipy.interpolate import make_smoothing_spline
from pypomp.core.pomp import Pomp
import copy
from pypomp.core.par_trans import ParTrans


# Not sure if this is the best way to implement this.
[docs] class UKMeasles: _module_dir = os.path.dirname(os.path.abspath(__file__)) _data_dir = os.path.join(_module_dir, os.pardir, os.pardir, "data/uk_measles") _data_file = os.path.join(_data_dir, "uk_measles.pkl") with open(_data_file, "rb") as _f: _data = pickle.load(_f)
[docs] @staticmethod def subset(units=None, clean=False): """ Return a subset of the UKMeasles data, filtered by the given units. Parameters ---------- units : list of str, optional A list of unit names to subset the data by. If None, the entire dataset is returned. clean : bool, optional If True, returns a copy of the data with suspicious values set to np.nan. Returns ------- A dictionary with the same structure as UKMeasles.data, but with the data subsetted to only include the given units. """ data = copy.deepcopy(UKMeasles._data) if clean: # London 1955-08-12 124 # London 1955-08-19 82 # London 1955-08-26 0 # London 1955-09-02 58 # London 1955-09-09 38 data["measles"].loc[ (data["measles"]["unit"] == "London") & (data["measles"]["date"] == "1955-08-26"), "cases", ] = np.nan # The value 76 was used in He10. # 13770 Liverpool 1955-11-04 10 # 13771 Liverpool 1955-11-11 25 # 13772 Liverpool 1955-11-18 116 # 13773 Liverpool 1955-11-25 17 # 13774 Liverpool 1955-12-02 18 data["measles"].loc[ (data["measles"]["unit"] == "Liverpool") & (data["measles"]["date"] == "1955-11-18"), "cases", ] = np.nan # 13950 Liverpool 1959-04-17 143 # 13951 Liverpool 1959-04-24 115 # 13952 Liverpool 1959-05-01 450 # 13953 Liverpool 1959-05-08 96 # 13954 Liverpool 1959-05-15 157 data["measles"].loc[ (data["measles"]["unit"] == "Liverpool") & (data["measles"]["date"] == "1959-05-01"), "cases", ] = np.nan # 19552 Nottingham 1961-08-18 6 # 19553 Nottingham 1961-08-25 7 # 19554 Nottingham 1961-09-01 66 # 19555 Nottingham 1961-09-08 8 # 19556 Nottingham 1961-09-15 7 data["measles"].loc[ (data["measles"]["unit"] == "Nottingham") & (data["measles"]["date"] == "1961-09-01"), "cases", ] = np.nan # Sheffield 1961-05-05 266 # Sheffield 1961-05-12 346 # Sheffield 1961-05-19 0 # Sheffield 1961-05-26 314 # Sheffield 1961-06-02 297 data["measles"].loc[ (data["measles"]["unit"] == "Sheffield") & (data["measles"]["date"] == "1961-05-19"), "cases", ] = np.nan # Hull 1956-06-22 72 # Hull 1956-06-29 94 # Hull 1956-07-06 0 # Hull 1956-07-13 91 # Hull 1956-07-20 87 data["measles"].loc[ (data["measles"]["unit"] == "Hull") & (data["measles"]["date"] == "1956-07-06"), "cases", ] = np.nan if units is None: return data else: return { k: v[v["unit"].isin(units)].reset_index(drop=True) for k, v in data.items() }
[docs] @staticmethod def AK_mles() -> pd.DataFrame: """ MLEs from https://kingaa.github.io/sbied/measles/index.html """ module_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join( module_dir, os.pardir, os.pardir, "data/uk_measles/AK_mles.csv" ) df = pd.read_csv(data_file, index_col="town") df.drop(columns=["loglik", "loglik.sd", "mu", "delay"], inplace=True) return df[ [ "R0", "sigma", "gamma", "iota", "rho", "sigmaSE", "psi", "cohort", "amplitude", "S_0", "E_0", "I_0", "R_0", ] ].T
[docs] @staticmethod def Pomp( unit: list[str], theta: dict | list[dict], model: str = "001b", interp_method: str = "shifted_splines", first_year: int = 1950, last_year: int = 1963, dt: float = 1 / 365.25, clean=False, ): """ Returns a Pomp object for the UK Measles data. Parameters ---------- unit : list[str] Which unit to use. Currently only supports one unit. theta : dict | list[dict] Parameters for the model. Can be a single dict or a list of dicts. model : str The model to use. Can be "001b", "001c", "001d" or "002". interp_method : str The method to use to interpolate the covariates. Can be "shifted_splines" or "linear". first_year : int The first year of the data to use. last_year : int The last year of the data to use. dt : float The time step size to use for the model. clean : bool If True, uses a copy of the data with suspicious values set to np.nan. Returns ------- Pomp: A Pomp object for the UK Measles data. """ data = UKMeasles.subset(unit, clean) measles = data["measles"] demog = data["demog"] # ----prep-data------------------------------------------------- dat = measles.copy() dat["year"] = dat["date"].dt.year dat_filtered = dat[ (dat["year"] >= first_year) & (dat["year"] <= last_year) ].copy() dat_filtered["time"] = ( (dat_filtered["date"] - pd.Timestamp(f"{first_year}-01-01")).dt.days / 365.25 ) + first_year dat_filtered = dat_filtered[ (dat_filtered["time"] > first_year) & (dat_filtered["time"] < last_year + 1) ][["time", "cases"]] dat_filtered.set_index("time", inplace=True) # ----prep-covariates------------------------------------------------- demog = demog.drop(columns=["unit"]) times = np.arange(demog["year"].min(), demog["year"].max() + 1 / 12, 1 / 12) if interp_method == "shifted_splines": # TODO fix exploding birthrate below year 1950 pop_bspl = make_smoothing_spline(demog["year"], demog["pop"]) births_bspl = make_smoothing_spline(demog["year"] + 0.5, demog["births"]) pop_interp = pop_bspl(times) births_interp = births_bspl(times - 4) elif interp_method == "linear": pop_interp = np.interp(times, demog["year"], demog["pop"]) births_interp = np.interp(times - 4, demog["year"], demog["births"]) else: raise ValueError(f"interp_method {interp_method} not recognized") covar_df = pd.DataFrame( {"time": times, "pop": pop_interp, "birthrate": births_interp} ) covar_df.set_index("time", inplace=True) # ----pomp-construction----------------------------------------------- mod = { "001": m001, "001b": m001b, "001c": m001c, "001d": m001d, "002": m002, "003": m003, }[model] t0 = float(2 * dat_filtered.index[0] - dat_filtered.index[1]) return Pomp( ys=dat_filtered, theta=theta, covars=covar_df, t0=t0, nstep=None, dt=dt, accumvars=mod.accumvars, statenames=mod.statenames, rinit=mod.rinit, rproc=mod.rproc, dmeas=mod.dmeas, rmeas=mod.rmeas, par_trans=ParTrans(to_est=mod.to_est, from_est=mod.from_est), )