Source code for pypomp.models.measles.measlesPomp

import numpy as np
import pandas as pd
import os
import pickle
import pypomp.models.measles.model_001 as m001
import pypomp.models.measles.model_001b as m001b
import pypomp.models.measles.model_001c as m001c
import pypomp.models.measles.model_001d as m001d
import pypomp.models.measles.model_002 as m002
import pypomp.models.measles.model_003 as m003
from scipy.interpolate import make_smoothing_spline
from pypomp.core.pomp import Pomp
import copy
from pypomp.core.par_trans import ParTrans


# Not sure if this is the best way to implement this.
class UKMeasles:
    _module_dir = os.path.dirname(os.path.abspath(__file__))
    _data_dir = os.path.join(_module_dir, os.pardir, os.pardir, "data/uk_measles")
    _data_file = os.path.join(_data_dir, "uk_measles.pkl")
    with open(_data_file, "rb") as _f:
        _data = pickle.load(_f)

    @staticmethod
    def subset(units=None, clean=False):
        """
        Return a subset of the UKMeasles data, filtered by the given units.

        Parameters
        ----------
        units : list of str, optional
            A list of unit names to subset the data by. If None, the entire
            dataset is returned.

        clean : bool, optional
            If True, returns a copy of the data with suspicious values set to np.nan.

        Returns
        -------
        A dictionary with the same structure as UKMeasles.data, but with the data subsetted to only include the given units.
        """
        data = copy.deepcopy(UKMeasles._data)

        if clean:
            # London 1955-08-12   124
            # London 1955-08-19    82
            # London 1955-08-26     0
            # London 1955-09-02    58
            # London 1955-09-09    38
            data["measles"].loc[
                (data["measles"]["unit"] == "London")
                & (data["measles"]["date"] == "1955-08-26"),
                "cases",
            ] = np.nan
            # The value 76 was used in He10.

            # 13770 Liverpool 1955-11-04    10
            # 13771 Liverpool 1955-11-11    25
            # 13772 Liverpool 1955-11-18   116
            # 13773 Liverpool 1955-11-25    17
            # 13774 Liverpool 1955-12-02    18
            data["measles"].loc[
                (data["measles"]["unit"] == "Liverpool")
                & (data["measles"]["date"] == "1955-11-18"),
                "cases",
            ] = np.nan

            # 13950 Liverpool 1959-04-17   143
            # 13951 Liverpool 1959-04-24   115
            # 13952 Liverpool 1959-05-01   450
            # 13953 Liverpool 1959-05-08    96
            # 13954 Liverpool 1959-05-15   157
            data["measles"].loc[
                (data["measles"]["unit"] == "Liverpool")
                & (data["measles"]["date"] == "1959-05-01"),
                "cases",
            ] = np.nan

            # 19552 Nottingham 1961-08-18     6
            # 19553 Nottingham 1961-08-25     7
            # 19554 Nottingham 1961-09-01    66
            # 19555 Nottingham 1961-09-08     8
            # 19556 Nottingham 1961-09-15     7
            data["measles"].loc[
                (data["measles"]["unit"] == "Nottingham")
                & (data["measles"]["date"] == "1961-09-01"),
                "cases",
            ] = np.nan

            # Sheffield 1961-05-05   266
            # Sheffield 1961-05-12   346
            # Sheffield 1961-05-19     0
            # Sheffield 1961-05-26   314
            # Sheffield 1961-06-02   297
            data["measles"].loc[
                (data["measles"]["unit"] == "Sheffield")
                & (data["measles"]["date"] == "1961-05-19"),
                "cases",
            ] = np.nan

            # Hull 1956-06-22    72
            # Hull 1956-06-29    94
            # Hull 1956-07-06     0
            # Hull 1956-07-13    91
            # Hull 1956-07-20    87

            data["measles"].loc[
                (data["measles"]["unit"] == "Hull")
                & (data["measles"]["date"] == "1956-07-06"),
                "cases",
            ] = np.nan

        if units is None:
            return data
        else:
            return {
                k: v[v["unit"].isin(units)].reset_index(drop=True)
                for k, v in data.items()
            }

    @staticmethod
    def AK_mles() -> pd.DataFrame:
        """
        MLEs from https://kingaa.github.io/sbied/measles/index.html
        """
        module_dir = os.path.dirname(os.path.abspath(__file__))
        data_file = os.path.join(
            module_dir, os.pardir, os.pardir, "data/uk_measles/AK_mles.csv"
        )
        df = pd.read_csv(data_file, index_col="town")
        df.drop(columns=["loglik", "loglik.sd", "mu", "delay"], inplace=True)
        return df[
            [
                "R0",
                "sigma",
                "gamma",
                "iota",
                "rho",
                "sigmaSE",
                "psi",
                "cohort",
                "amplitude",
                "S_0",
                "E_0",
                "I_0",
                "R_0",
            ]
        ].T


[docs]
    @staticmethod
    def Pomp(
        unit: list[str],
        theta: dict | list[dict],
        model: str = "001b",
        interp_method: str = "shifted_splines",
        first_year: int = 1950,
        last_year: int = 1963,
        dt: float = 1 / 365.25,
        clean=False,
    ):
        """
        Returns a Pomp object for the UK Measles data.

        Parameters
        ----------
        unit : list[str]
            Which unit to use. Currently only supports one unit.
        theta : dict | list[dict]
            Parameters for the model. Can be a single dict or a list of dicts.
        model : str
            The model to use. Can be "001b", "001c", "001d" or "002".
        interp_method : str
            The method to use to interpolate the covariates. Can be "shifted_splines" or "linear".
        first_year : int
            The first year of the data to use.
        last_year : int
            The last year of the data to use.
        dt : float
            The time step size to use for the model.
        clean : bool
            If True, uses a copy of the data with suspicious values set to np.nan.

        Returns
        -------
        Pomp:
            A Pomp object for the UK Measles data.
        """

        data = UKMeasles.subset(unit, clean)
        measles = data["measles"]
        demog = data["demog"]

        # ----prep-data-------------------------------------------------
        dat = measles.copy()
        dat["year"] = dat["date"].dt.year
        dat_filtered = dat[
            (dat["year"] >= first_year) & (dat["year"] <= last_year)
        ].copy()
        dat_filtered["time"] = (
            (dat_filtered["date"] - pd.Timestamp(f"{first_year}-01-01")).dt.days
            / 365.25
        ) + first_year
        dat_filtered = dat_filtered[
            (dat_filtered["time"] > first_year) & (dat_filtered["time"] < last_year + 1)
        ][["time", "cases"]]
        dat_filtered.set_index("time", inplace=True)

        # ----prep-covariates-------------------------------------------------
        demog = demog.drop(columns=["unit"])
        times = np.arange(demog["year"].min(), demog["year"].max() + 1 / 12, 1 / 12)
        if interp_method == "shifted_splines":
            # TODO fix exploding birthrate below year 1950
            pop_bspl = make_smoothing_spline(demog["year"], demog["pop"])
            births_bspl = make_smoothing_spline(demog["year"] + 0.5, demog["births"])
            pop_interp = pop_bspl(times)
            births_interp = births_bspl(times - 4)
        elif interp_method == "linear":
            pop_interp = np.interp(times, demog["year"], demog["pop"])
            births_interp = np.interp(times - 4, demog["year"], demog["births"])
        else:
            raise ValueError(f"interp_method {interp_method} not recognized")

        covar_df = pd.DataFrame(
            {"time": times, "pop": pop_interp, "birthrate": births_interp}
        )
        covar_df.set_index("time", inplace=True)

        # ----pomp-construction-----------------------------------------------

        mod = {
            "001": m001,
            "001b": m001b,
            "001c": m001c,
            "001d": m001d,
            "002": m002,
            "003": m003,
        }[model]

        t0 = float(2 * dat_filtered.index[0] - dat_filtered.index[1])
        return Pomp(
            ys=dat_filtered,
            theta=theta,
            covars=covar_df,
            t0=t0,
            nstep=None,
            dt=dt,
            accumvars=mod.accumvars,
            statenames=mod.statenames,
            rinit=mod.rinit,
            rproc=mod.rproc,
            dmeas=mod.dmeas,
            rmeas=mod.rmeas,
            par_trans=ParTrans(to_est=mod.to_est, from_est=mod.from_est),
        )