"""
This module implements Monte Carlo-adjusted profile (MCAP) for POMP models.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, Optional, Tuple, Any
import numpy as np
import numpy.typing as npt
from scipy.stats import chi2
from loess.loess_1d import loess_1d
FloatArray = npt.NDArray[np.floating[Any]]
__all__ = ["MCAPResult", "mcap"]
def _qchisq(level: float, df: int = 1) -> float:
return float(chi2.ppf(level, df))
def _loess_smooth_1d(
x: FloatArray,
y: FloatArray,
grid: FloatArray,
*,
span: float = 0.75,
degree: int = 2,
) -> FloatArray:
x = np.asarray(x, dtype=float)
y = np.asarray(y, dtype=float)
grid = np.asarray(grid, dtype=float)
xmin = float(np.min(x))
xmax = float(np.max(x))
scale = xmax - xmin
if scale <= 0.0 or not np.isfinite(scale):
# degenerate predictor: return flat line at mean(y)
return np.full_like(grid, float(np.mean(y)), dtype=float)
try:
res = loess_1d(
x,
y,
xnew=grid,
degree=int(degree),
frac=float(span),
rotate=False,
)
except np.linalg.LinAlgError:
# loess_1d can fail with LinAlgError if the internal robust iterations
# encounter mad=0 (perfect fit), leading to inf weights that clip to 0.
# Retrying with a tiny sigy forces it to avoid mad-based scaling.
# TODO: This is just a bandaid so GitHub Actions will pass the CI tests for
# now. Figure out a more permanent solution.
print("LinAlgError in loess_1d, retrying with 1e-10 sigy")
res = loess_1d(
x,
y,
xnew=grid,
degree=int(degree),
frac=float(span),
rotate=False,
sigy=np.full_like(y, 1e-10),
)
y_sm = res[1]
return y_sm.astype(float, copy=False)
def _fit_local_quadratic(
x: FloatArray,
y: FloatArray,
*,
center: float,
span: float,
) -> Tuple[float, float, float, FloatArray]:
x = np.asarray(x, dtype=float)
y = np.asarray(y, dtype=float)
dist = np.abs(x - center)
m = int(np.trunc(span * len(x)))
m = max(3, min(m, len(x)))
# always compute kth distance
kth = np.sort(dist)[m - 1]
included = dist < kth
if np.count_nonzero(included) < 3:
included = dist <= kth
# tricube weights on chosen window
w = np.zeros_like(x, dtype=float)
if np.any(included):
maxdist = dist[included].max()
if maxdist > 0.0:
w[included] = (1.0 - (dist[included] / maxdist) ** 3) ** 3
else:
w[included] = 1.0
# uncentered
X = np.column_stack([np.ones_like(x), -(x**2), x])
# weighted least squares
sw = np.sqrt(w)
Xw = X * sw[:, None]
yw = y * sw
coef, *_ = np.linalg.lstsq(Xw, yw, rcond=None)
c, a, b = map(float, coef)
# residual based variance estimate
yhat = X @ coef
resid = (y - yhat) * sw
df = int(np.sum(w > 0) - X.shape[1])
if df > 0:
s2 = float(np.sum(resid**2) / df)
else:
s2 = 0.0
XtWX = Xw.T @ Xw
try:
cov_full = s2 * np.linalg.inv(XtWX)
except np.linalg.LinAlgError:
# if singular
cov_full = s2 * np.linalg.pinv(XtWX)
vc_ab = cov_full[1:3, 1:3]
return a, b, c, vc_ab
# MCAP result container
[docs]
@dataclass
class MCAPResult:
"""
Results of a Monte Carlo adjusted profile (MCAP) analysis.
"""
level: float
"""The confidence level of the profile likelihood confidence interval."""
mle: float
"""The maximum likelihood estimate of the focal parameter, taken as the argmax of the smoothed profile."""
ci: Tuple[Optional[float], Optional[float]]
"""The profile likelihood confidence interval (lower, upper)."""
delta: float
"""The log-likelihood threshold used to define the confidence interval, relative to the maximum."""
se_stat: float
"""The standard error due to statistical uncertainty (sampling variance)."""
se_mc: float
"""The standard error due to Monte Carlo noise in the likelihood estimates."""
se_total: float
"""The total standard error, calculated as the root sum of squares of se_stat and se_mc."""
fit: Dict[str, FloatArray]
"""A dictionary containing the grid of parameters ('parameter'), the smoothed log-likelihood values ('smoothed'), and the local quadratic fit values ('quadratic')."""
quadratic_max: float
"""The parameter value that maximizes the local quadratic fit."""
quadratic_coef: Dict[str, float]
"""The coefficients of the local quadratic fit: c - ax^2 + bx."""
vcov: FloatArray
"""The variance-covariance matrix of the quadratic coefficients a and b."""
[docs]
def mcap(
parameter: npt.ArrayLike,
loglik: npt.ArrayLike,
*,
level: float = 0.95,
span: float = 0.75,
n_grid: int = 1000,
loess_degree: int = 2,
) -> MCAPResult:
"""
Monte Carlo adjusted profile.
Given a collection of points maximizing the likelihood over a range of fixed values of a focal parameter, this function constructs a profile likelihood confidence interval accommodating both Monte Carlo error in the profile and statistical uncertainty present in the likelihood function.
Parameters
----------
parameter : npt.ArrayLike
The parameter values at which the log-likelihood was evaluated.
loglik : npt.ArrayLike
The log-likelihood values corresponding to the parameter values.
level : float, optional
The confidence level to construct the profile likelihood confidence interval for.
span : float, optional
The span parameter for the loess smoother.
n_grid : int, optional
The number of grid points to evaluate the smoothed log-likelihood at.
loess_degree : int, optional
The degree of the loess smoother.
Returns
-------
MCAPResult : MCAPResult
The MCAP result object containing the profile likelihood confidence interval and other statistics.
"""
x: FloatArray = np.asarray(parameter, dtype=float)
y: FloatArray = np.asarray(loglik, dtype=float)
# grid over observed parameter range
grid = np.linspace(float(np.min(x)), float(np.max(x)), int(n_grid))
# smooth noisy profile
y_sm = _loess_smooth_1d(x, y, grid=grid, span=span, degree=loess_degree)
# MLE = argmax of smoothed profile
i_max = int(np.nanargmax(y_sm))
mle = float(grid[i_max])
# local quadratic at smoothed MLE with raw data
a, b, c, vc_ab = _fit_local_quadratic(x, y, center=mle, span=span)
# SE decomposition
se_stat2 = 1.0 / (2.0 * a)
# Monte Carlo variance from vcov(a, b)
var_a = float(vc_ab[0, 0])
var_b = float(vc_ab[1, 1])
cov_ab = float(vc_ab[0, 1])
se_mc2 = (
1.0
/ (4.0 * a * a)
* (var_b - 2.0 * (b / a) * cov_ab + (b * b / (a * a)) * var_a)
)
# se_tot2 = se_stat2 + se_mc2
# MC-adjusted cutoff
q = _qchisq(level, df=1)
delta = float(q * (a * se_mc2 + 0.5))
# CI from smoothed profile
diff = float(np.nanmax(y_sm)) - y_sm
inside = diff < delta
ci: tuple[float | None, float | None]
if not np.any(inside):
ci = (None, None)
else:
idx = np.where(inside)[0]
ci = (float(grid[idx.min()]), float(grid[idx.max()]))
# quadratic curve on grid
quad = c - a * (grid**2) + b * grid
if a > 0.0:
quad_max = b / (2.0 * a)
else:
# fallback to smoothed MLE if curvature is non-positive
quad_max = mle
return MCAPResult(
level=level,
mle=mle,
ci=ci,
delta=delta,
se_stat=float(np.sqrt(se_stat2)),
se_mc=float(np.sqrt(se_mc2)),
se_total=float(np.sqrt(se_stat2 + se_mc2)),
fit={
"parameter": grid,
"smoothed": y_sm,
"quadratic": quad,
},
quadratic_max=float(quad_max),
quadratic_coef={"a": float(a), "b": float(b), "c": float(c)},
vcov=vc_ab,
)