Source code for pypomp.mcap

"""
This module implements Monte Carlo-adjusted profile (MCAP) for POMP models.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, Optional, Tuple, Any

import numpy as np
import numpy.typing as npt
from scipy.stats import chi2

from loess.loess_1d import loess_1d

FloatArray = npt.NDArray[np.floating[Any]]

__all__ = ["MCAPResult", "mcap"]


def _qchisq(level: float, df: int = 1) -> float:
    return float(chi2.ppf(level, df))


def _loess_smooth_1d(
    x: FloatArray,
    y: FloatArray,
    grid: FloatArray,
    *,
    span: float = 0.75,
    degree: int = 2,
) -> FloatArray:
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)
    grid = np.asarray(grid, dtype=float)

    xmin = float(np.min(x))
    xmax = float(np.max(x))
    scale = xmax - xmin

    if scale <= 0.0 or not np.isfinite(scale):
        # degenerate predictor: return flat line at mean(y)
        return np.full_like(grid, float(np.mean(y)), dtype=float)

    try:
        res = loess_1d(
            x,
            y,
            xnew=grid,
            degree=int(degree),
            frac=float(span),
            rotate=False,
        )
    except np.linalg.LinAlgError:
        # loess_1d can fail with LinAlgError if the internal robust iterations
        # encounter mad=0 (perfect fit), leading to inf weights that clip to 0.
        # Retrying with a tiny sigy forces it to avoid mad-based scaling.
        # TODO: This is just a bandaid so GitHub Actions will pass the CI tests for
        # now. Figure out a more permanent solution.
        print("LinAlgError in loess_1d, retrying with 1e-10 sigy")
        res = loess_1d(
            x,
            y,
            xnew=grid,
            degree=int(degree),
            frac=float(span),
            rotate=False,
            sigy=np.full_like(y, 1e-10),
        )

    y_sm = res[1]
    return y_sm.astype(float, copy=False)


def _fit_local_quadratic(
    x: FloatArray,
    y: FloatArray,
    *,
    center: float,
    span: float,
) -> Tuple[float, float, float, FloatArray]:
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)

    dist = np.abs(x - center)

    m = int(np.trunc(span * len(x)))
    m = max(3, min(m, len(x)))

    # always compute kth distance
    kth = np.sort(dist)[m - 1]
    included = dist < kth

    if np.count_nonzero(included) < 3:
        included = dist <= kth

    # tricube weights on chosen window
    w = np.zeros_like(x, dtype=float)
    if np.any(included):
        maxdist = dist[included].max()
        if maxdist > 0.0:
            w[included] = (1.0 - (dist[included] / maxdist) ** 3) ** 3
        else:
            w[included] = 1.0

    # uncentered
    X = np.column_stack([np.ones_like(x), -(x**2), x])

    # weighted least squares
    sw = np.sqrt(w)
    Xw = X * sw[:, None]
    yw = y * sw

    coef, *_ = np.linalg.lstsq(Xw, yw, rcond=None)
    c, a, b = map(float, coef)

    # residual based variance estimate
    yhat = X @ coef
    resid = (y - yhat) * sw
    df = int(np.sum(w > 0) - X.shape[1])
    if df > 0:
        s2 = float(np.sum(resid**2) / df)
    else:
        s2 = 0.0
    XtWX = Xw.T @ Xw

    try:
        cov_full = s2 * np.linalg.inv(XtWX)
    except np.linalg.LinAlgError:
        # if singular
        cov_full = s2 * np.linalg.pinv(XtWX)

    vc_ab = cov_full[1:3, 1:3]
    return a, b, c, vc_ab


# MCAP result container
[docs] @dataclass class MCAPResult: """ Results of a Monte Carlo adjusted profile (MCAP) analysis. """ level: float """The confidence level of the profile likelihood confidence interval.""" mle: float """The maximum likelihood estimate of the focal parameter, taken as the argmax of the smoothed profile.""" ci: Tuple[Optional[float], Optional[float]] """The profile likelihood confidence interval (lower, upper).""" delta: float """The log-likelihood threshold used to define the confidence interval, relative to the maximum.""" se_stat: float """The standard error due to statistical uncertainty (sampling variance).""" se_mc: float """The standard error due to Monte Carlo noise in the likelihood estimates.""" se_total: float """The total standard error, calculated as the root sum of squares of se_stat and se_mc.""" fit: Dict[str, FloatArray] """A dictionary containing the grid of parameters ('parameter'), the smoothed log-likelihood values ('smoothed'), and the local quadratic fit values ('quadratic').""" quadratic_max: float """The parameter value that maximizes the local quadratic fit.""" quadratic_coef: Dict[str, float] """The coefficients of the local quadratic fit: c - ax^2 + bx.""" vcov: FloatArray """The variance-covariance matrix of the quadratic coefficients a and b."""
[docs] def mcap( parameter: npt.ArrayLike, loglik: npt.ArrayLike, *, level: float = 0.95, span: float = 0.75, n_grid: int = 1000, loess_degree: int = 2, ) -> MCAPResult: """ Monte Carlo adjusted profile. Given a collection of points maximizing the likelihood over a range of fixed values of a focal parameter, this function constructs a profile likelihood confidence interval accommodating both Monte Carlo error in the profile and statistical uncertainty present in the likelihood function. Parameters ---------- parameter : npt.ArrayLike The parameter values at which the log-likelihood was evaluated. loglik : npt.ArrayLike The log-likelihood values corresponding to the parameter values. level : float, optional The confidence level to construct the profile likelihood confidence interval for. span : float, optional The span parameter for the loess smoother. n_grid : int, optional The number of grid points to evaluate the smoothed log-likelihood at. loess_degree : int, optional The degree of the loess smoother. Returns ------- MCAPResult : MCAPResult The MCAP result object containing the profile likelihood confidence interval and other statistics. """ x: FloatArray = np.asarray(parameter, dtype=float) y: FloatArray = np.asarray(loglik, dtype=float) # grid over observed parameter range grid = np.linspace(float(np.min(x)), float(np.max(x)), int(n_grid)) # smooth noisy profile y_sm = _loess_smooth_1d(x, y, grid=grid, span=span, degree=loess_degree) # MLE = argmax of smoothed profile i_max = int(np.nanargmax(y_sm)) mle = float(grid[i_max]) # local quadratic at smoothed MLE with raw data a, b, c, vc_ab = _fit_local_quadratic(x, y, center=mle, span=span) # SE decomposition se_stat2 = 1.0 / (2.0 * a) # Monte Carlo variance from vcov(a, b) var_a = float(vc_ab[0, 0]) var_b = float(vc_ab[1, 1]) cov_ab = float(vc_ab[0, 1]) se_mc2 = ( 1.0 / (4.0 * a * a) * (var_b - 2.0 * (b / a) * cov_ab + (b * b / (a * a)) * var_a) ) # se_tot2 = se_stat2 + se_mc2 # MC-adjusted cutoff q = _qchisq(level, df=1) delta = float(q * (a * se_mc2 + 0.5)) # CI from smoothed profile diff = float(np.nanmax(y_sm)) - y_sm inside = diff < delta ci: tuple[float | None, float | None] if not np.any(inside): ci = (None, None) else: idx = np.where(inside)[0] ci = (float(grid[idx.min()]), float(grid[idx.max()])) # quadratic curve on grid quad = c - a * (grid**2) + b * grid if a > 0.0: quad_max = b / (2.0 * a) else: # fallback to smoothed MLE if curvature is non-positive quad_max = mle return MCAPResult( level=level, mle=mle, ci=ci, delta=delta, se_stat=float(np.sqrt(se_stat2)), se_mc=float(np.sqrt(se_mc2)), se_total=float(np.sqrt(se_stat2 + se_mc2)), fit={ "parameter": grid, "smoothed": y_sm, "quadratic": quad, }, quadratic_max=float(quad_max), quadratic_coef={"a": float(a), "b": float(b), "c": float(c)}, vcov=vc_ab, )