"""
Weighted chi-square distribution: CDF and quantile.
This module implements numerical methods for the distribution of
Q = sum_j w_j Z_j^2, Z_j ~ iid N(0, 1), w_j > 0,
which arises when calibrating Gaussian credible sets in function spaces.
With weights $w_j = \\lambda_j$ (eigenvalues of the covariance) the
distribution governs ambient norm balls; with $w_j = \\lambda_j^{1-\\theta}$
it governs weakened-covariance ellipsoids.
The mathematical background is in
``docs/agent-docs/theory/function-space-hardening.md``, section 4.
Public functions
----------------
weighted_chi2_cdf : evaluate $P(Q \\le t)$.
weighted_chi2_quantile : invert the CDF for given probability $p$.
Both accept a ``method`` argument selecting between
* ``"auto"`` (default): Automatically selects between ``"saddlepoint"``
and ``"imhof"`` based on spectrum isotropy (effective degrees of
freedom $\\nu_{\\text{eff}} = (\\sum w)^2 / \\sum w^2$) and the caller's
requested relative tolerance ``tol``. Saddlepoint is used when its
expected error is well below ``tol``; Imhof is used otherwise.
* ``"imhof"``: Imhof's numerical inversion of the characteristic
function. Exact to quadrature tolerance.
* ``"ws"``: Welch--Satterthwaite moment-match to a scaled $\\chi^2_\\nu$.
Closed form; exact when all weights are equal; degrades with anisotropy.
* ``"saddlepoint"``: Lugannani--Rice saddlepoint approximation. Excellent
in deep tails.
* ``"mc"``: Monte Carlo empirical quantile / CDF. Unbiased reference.
The ``tol`` parameter (default ``1e-2``) sets the desired relative accuracy
of the result and is used **only** by ``"auto"`` mode to pick a method.
When an explicit method is specified, ``tol`` is ignored.
"""
from __future__ import annotations
from typing import Optional, Union
import numpy as np
import scipy.optimize
import scipy.stats
_VALID_METHODS = ("auto", "imhof", "ws", "saddlepoint", "mc")
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def weighted_chi2_cdf(
weights: np.ndarray,
t: Union[float, np.ndarray],
*,
method: str = "auto",
tol: float = 1e-2,
rtol: float = 1e-8,
n_samples: int = 100_000,
rng: Optional[np.random.Generator] = None,
) -> Union[float, np.ndarray]:
"""Evaluate $P(Q \\le t)$ for $Q = \\sum_j w_j Z_j^2$.
Args:
weights: Non-negative weights $w_j$. Length-zero or all-zero arrays
are treated as the degenerate point mass at 0.
t: Threshold(s); scalar or array. Returns matching shape.
method: One of {"auto", "imhof", "ws", "saddlepoint", "mc"}.
``"auto"`` selects saddlepoint or Imhof based on spectrum
isotropy and ``tol``.
tol: Desired relative accuracy of the result. Used only when
``method="auto"`` to decide whether saddlepoint is accurate
enough or Imhof is required. Ignored for explicit methods.
rtol: Relative quadrature tolerance for Imhof's integral.
n_samples: Sample count for Monte Carlo. Ignored by other methods.
rng: Optional NumPy generator for Monte Carlo. Defaults to
``np.random.default_rng()``.
Returns:
$P(Q \\le t)$ as a float (if ``t`` is scalar) or numpy array.
Raises:
ValueError: For unknown method, negative weights, or NaN inputs.
"""
w = _validate_weights(weights)
if method not in _VALID_METHODS:
raise ValueError(
f"Unknown method '{method}'. Choose one of {_VALID_METHODS}."
)
scalar = np.isscalar(t)
t_arr = np.atleast_1d(np.asarray(t, dtype=float))
if w.size == 0 or np.all(w == 0.0):
# Q = 0 almost surely; F(t) = 1 if t >= 0 else 0.
out = np.where(t_arr >= 0.0, 1.0, 0.0)
return float(out[0]) if scalar else out
if method == "auto":
method = _auto_select_method(w, tol)
if method == "imhof":
out = np.array([_imhof_cdf(w, float(ti), rtol=rtol) for ti in t_arr])
elif method == "ws":
out = _ws_cdf(w, t_arr)
elif method == "saddlepoint":
out = np.array([_saddlepoint_cdf(w, float(ti)) for ti in t_arr])
elif method == "mc":
out = _mc_cdf(w, t_arr, n_samples=n_samples, rng=rng)
else: # pragma: no cover - guarded above
raise AssertionError("unreachable")
return float(out[0]) if scalar else out
[docs]
def weighted_chi2_quantile(
weights: np.ndarray,
probability: float,
*,
method: str = "auto",
tol: float = 1e-2,
rtol: float = 1e-6,
n_samples: int = 100_000,
rng: Optional[np.random.Generator] = None,
) -> float:
"""Return $r$ such that $P(\\sum_j w_j Z_j^2 \\le r) = $ probability.
Args:
weights: Non-negative weights $w_j$.
probability: Target probability, strictly between 0 and 1.
method: One of {"auto", "imhof", "ws", "saddlepoint", "mc"}.
``"auto"`` (default) selects between saddlepoint and Imhof
based on spectrum isotropy and ``tol``: saddlepoint is used
when its expected error (empirically $\\approx 0.1/\\nu_{\\text{eff}}$
with $\\nu_{\\text{eff}} = (\\sum w)^2/\\sum w^2$) is comfortably
below ``tol``; Imhof is used otherwise.
tol: Desired relative accuracy of the result. Used only when
``method="auto"``; ignored for explicit method choices.
rtol: Relative tolerance for CDF root-finding (Imhof, saddlepoint).
n_samples: Sample count for Monte Carlo.
rng: Optional NumPy generator for Monte Carlo.
Returns:
The quantile $r$.
Raises:
ValueError: For invalid probability, unknown method, or negative weights.
"""
if not 0.0 < probability < 1.0:
raise ValueError("probability must lie strictly between 0 and 1.")
w = _validate_weights(weights)
if method not in _VALID_METHODS:
raise ValueError(
f"Unknown method '{method}'. Choose one of {_VALID_METHODS}."
)
if w.size == 0 or np.all(w == 0.0):
return 0.0
if method == "auto":
method = _auto_select_method(w, tol)
if method == "ws":
return float(_ws_quantile(w, probability))
if method == "mc":
return float(_mc_quantile(w, probability, n_samples=n_samples, rng=rng))
# Closed-form fast path for equal positive weights.
positive = w[w > 0.0]
if positive.size > 0 and np.allclose(
positive, positive[0], rtol=1e-12, atol=0.0
):
return float(
positive[0] * scipy.stats.chi2.ppf(probability, df=positive.size)
)
cdf_fn = (
(lambda t: _imhof_cdf(w, t, rtol=rtol))
if method == "imhof"
else (lambda t: _saddlepoint_cdf(w, t))
)
return float(_invert_cdf(cdf_fn, w, probability, rtol=rtol))
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
def _validate_weights(weights: np.ndarray) -> np.ndarray:
w = np.asarray(weights, dtype=float).ravel()
if not np.all(np.isfinite(w)):
raise ValueError("weights must be finite.")
if np.any(w < 0.0):
raise ValueError("weights must be non-negative.")
return w
# ---------------------------------------------------------------------------
# Auto method selection
# ---------------------------------------------------------------------------
def _auto_select_method(weights: np.ndarray, tol: float) -> str:
"""Select quantile/CDF method based on spectrum isotropy and tolerance.
Uses the effective degrees of freedom
$\\nu_{\\text{eff}} = (\\sum_j w_j)^2 / \\sum_j w_j^2$ as a proxy for
how isotropic the weight spectrum is. Empirical calibration on
Laplacian spectra gives a conservative upper bound on the saddlepoint
relative error:
.. math::
\\varepsilon_{\\mathrm{sp}} \\approx \\frac{0.1}{\\nu_{\\text{eff}}}
A 3x safety factor is applied before trusting the saddlepoint result.
Imhof's method (exact to quadrature tolerance) is used as the fallback.
Rule-of-thumb thresholds for ``tol``:
* ``tol = 0.05`` (5 %): saddlepoint for $\\nu_{\\text{eff}} > 6$
* ``tol = 0.01`` (1 %): saddlepoint for $\\nu_{\\text{eff}} > 30$
* ``tol = 0.001`` (0.1 %): saddlepoint for $\\nu_{\\text{eff}} > 300$
Args:
weights: Validated non-negative weight vector.
tol: Desired relative accuracy of the result.
Returns:
``"saddlepoint"`` if the approximation is expected to meet ``tol``,
``"imhof"`` otherwise.
"""
s1 = float(np.sum(weights))
s2 = float(np.sum(weights ** 2))
nu_eff = s1 * s1 / s2 if s2 > 0.0 else 1.0
# Conservative: 3x safety factor on the empirical error model.
sp_expected_error = 0.1 / max(nu_eff, 1e-10)
return "saddlepoint" if sp_expected_error * 3.0 < tol else "imhof"
# ---------------------------------------------------------------------------
# Imhof's method
# ---------------------------------------------------------------------------
def _imhof_integrand_vec(
u: np.ndarray, weights: np.ndarray, t: float
) -> np.ndarray:
"""Vectorised Imhof integrand on a grid ``u`` of strictly positive points.
Works in log-space for the denominator to avoid overflow when the
spectrum is long and anisotropic.
"""
wu = weights[None, :] * u[:, None]
theta = 0.5 * (np.sum(np.arctan(wu), axis=1) - t * u)
sin_theta = np.sin(theta)
log_rho = 0.25 * np.sum(np.log1p(wu * wu), axis=1)
log_abs = np.log(np.abs(sin_theta) + 1e-300) - np.log(u) - log_rho
return np.sign(sin_theta) * np.exp(np.minimum(log_abs, 700.0))
def _imhof_cdf(weights: np.ndarray, t: float, *, rtol: float = 1e-8) -> float:
"""Imhof CDF $P(Q \\le t)$ via fixed-step trapezoidal quadrature.
A vectorised trapezoidal rule on a truncated domain handles the
oscillatory multi-scale integrands that arise from long anisotropic
spectra (e.g. inverse-Laplacian eigenvalues $\\lambda_j \\propto 1/j^2$)
far more reliably than ``scipy.integrate.quad``'s adaptive routine,
which exhausts its subdivision budget on such cases.
The step size $h$ is chosen to resolve the dominant oscillation period
$2\\pi/t$ with $\\ge 32$ samples; the truncation $U$ is set so the
integrand magnitude $|\\sin(\\theta)|/(u\\rho(u))$ is well below $rtol$
at $u = U$, exploiting the fact that for $u \\gg 1/\\min(w_j)$ all
$\\arctan(w_j u) \\to \\pi/2$ and the integrand decays as
$1/u^{1+n/2}$.
Refinement: the result of a step-$h$ trapezoidal pass is compared
against a step-$h/2$ pass; the routine terminates when the relative
Richardson-style change is below $rtol$ or the step budget is
exhausted.
"""
if t <= 0.0:
return 0.0
# Closed-form fast path for equal positive weights: Q = w * chi^2_n.
positive = weights[weights > 0.0]
if positive.size > 0 and np.allclose(
positive, positive[0], rtol=1e-12, atol=0.0
):
return float(scipy.stats.chi2.cdf(t / positive[0], df=positive.size))
mean = float(np.sum(weights))
# Truncation point. We need U large enough that the integrand tail
# integral is below rtol. The amplitude bound is
#
# |integrand(u)| <= 1 / (u * rho(u)),
# rho(u) = prod_j (1 + w_j^2 u^2)^{1/4}
#
# so the condition 1/(pi * U * rho(U)) < rtol is sufficient.
#
# Single-weight heuristic (16/w_min) massively overestimates U when N is
# large: rho grows as a *product* over all weights, so U only needs to
# reach the point where the product exceeds 1/(pi * rtol * U). For an
# N=50 decaying spectrum this is ~600 instead of ~400 000. We find U by
# doubling from an initial guess of 4/w_max until the condition is met.
U_oscillation = 64.0 * np.pi / max(t, 1e-6)
_target = np.log(1.0 / (np.pi * max(rtol, 1e-300)))
_U = max(4.0 / float(np.max(positive)), 1.0)
for _ in range(80):
_log_rho = 0.25 * float(np.sum(np.log1p((positive * _U) ** 2)))
if np.log(max(_U, 1e-300)) + _log_rho >= _target:
break
_U *= 2.0
U = max(_U, U_oscillation, 1.0)
# Step size: at least 32 points per oscillation period 2pi/t.
h_base = 2.0 * np.pi / max(t, 1e-6) / 32.0
# Iterate trapezoidal step halving for Richardson-style refinement.
integral_prev: Optional[float] = None
integral_curr = 0.0
max_iters = 6
n_steps_cap = 200_000
h = h_base
for _ in range(max_iters):
n_steps = min(int(U / h) + 1, n_steps_cap)
u_grid = np.linspace(h, n_steps * h, n_steps)
vals = _imhof_integrand_vec(u_grid, weights, t)
# u=0 contribution at the limit.
f0 = 0.5 * (mean - t)
integral_curr = h * (
0.5 * (f0 + vals[-1]) + float(np.sum(vals[:-1]))
)
if integral_prev is not None:
denom = max(abs(integral_curr), 1e-12)
if abs(integral_curr - integral_prev) / denom < rtol:
break
integral_prev = integral_curr
h *= 0.5
if int(U / h) > n_steps_cap:
break
cdf = 0.5 - integral_curr / np.pi
return float(np.clip(cdf, 0.0, 1.0))
# ---------------------------------------------------------------------------
# Welch--Satterthwaite (moment matching)
# ---------------------------------------------------------------------------
def _ws_params(weights: np.ndarray) -> tuple[float, float]:
s1 = float(np.sum(weights))
s2 = float(np.sum(weights * weights))
a = s2 / s1
nu = s1 * s1 / s2
return a, nu
def _ws_cdf(weights: np.ndarray, t: np.ndarray) -> np.ndarray:
a, nu = _ws_params(weights)
return scipy.stats.chi2.cdf(t / a, df=nu)
def _ws_quantile(weights: np.ndarray, probability: float) -> float:
a, nu = _ws_params(weights)
return a * float(scipy.stats.chi2.ppf(probability, df=nu))
# ---------------------------------------------------------------------------
# Lugannani--Rice saddlepoint
# ---------------------------------------------------------------------------
def _cgf(s: float, weights: np.ndarray) -> float:
return -0.5 * float(np.sum(np.log1p(-2.0 * s * weights)))
def _cgf_prime(s: float, weights: np.ndarray) -> float:
return float(np.sum(weights / (1.0 - 2.0 * s * weights)))
def _cgf_double_prime(s: float, weights: np.ndarray) -> float:
denom = 1.0 - 2.0 * s * weights
return 2.0 * float(np.sum((weights * weights) / (denom * denom)))
def _saddlepoint_cdf(weights: np.ndarray, t: float) -> float:
if t <= 0.0:
return 0.0
mean = float(np.sum(weights))
# Saddlepoint at the mean is exactly s_hat = 0 and the formula collapses;
# use Welch-Satterthwaite to avoid a 0/0.
if abs(t - mean) < 1e-12 * max(1.0, mean):
return float(_ws_cdf(weights, np.array([t]))[0])
w_max = float(np.max(weights))
s_max = 0.5 / w_max # s must satisfy s < 1/(2 max w_j) for convergence
epsilon = 1e-12
if t < mean:
lo, hi = -1e8, -epsilon
else:
lo, hi = epsilon, s_max * (1.0 - 1e-10)
def equation(s: float) -> float:
return _cgf_prime(s, weights) - t
# Expand the lower bound for very small t.
if t < mean:
while equation(lo) > 0.0:
lo *= 10.0
if lo < -1e30:
return 0.0
s_hat = scipy.optimize.brentq(equation, lo, hi, xtol=1e-14, rtol=1e-12)
k_hat = _cgf(s_hat, weights)
k_pp = _cgf_double_prime(s_hat, weights)
inner = 2.0 * (s_hat * t - k_hat)
if inner <= 0.0:
return float(_ws_cdf(weights, np.array([t]))[0])
w_hat = np.sign(s_hat) * np.sqrt(inner)
u_hat = s_hat * np.sqrt(k_pp)
cdf = scipy.stats.norm.cdf(w_hat) + scipy.stats.norm.pdf(w_hat) * (
1.0 / w_hat - 1.0 / u_hat
)
return float(np.clip(cdf, 0.0, 1.0))
# ---------------------------------------------------------------------------
# Monte Carlo
# ---------------------------------------------------------------------------
def _draw_q_samples(
weights: np.ndarray, n_samples: int, rng: Optional[np.random.Generator]
) -> np.ndarray:
"""Generate ``n_samples`` draws of $Q = \\sum_j w_j Z_j^2$.
Chunked to keep peak memory at roughly 8 MB regardless of len(weights).
"""
rng = np.random.default_rng() if rng is None else rng
n = max(1, int(weights.size))
target_floats = 1_000_000
chunk = max(1, target_floats // n)
samples = np.empty(n_samples, dtype=float)
cursor = 0
while cursor < n_samples:
m = min(chunk, n_samples - cursor)
z = rng.standard_normal((m, n))
np.multiply(z, z, out=z)
samples[cursor : cursor + m] = z @ weights
cursor += m
return samples
def _mc_cdf(
weights: np.ndarray,
t: np.ndarray,
*,
n_samples: int,
rng: Optional[np.random.Generator],
) -> np.ndarray:
samples = _draw_q_samples(weights, n_samples, rng)
samples_sorted = np.sort(samples)
# P(Q <= t) is the fraction of samples not exceeding t.
return np.searchsorted(samples_sorted, t, side="right") / n_samples
def _mc_quantile(
weights: np.ndarray,
probability: float,
*,
n_samples: int,
rng: Optional[np.random.Generator],
) -> float:
samples = _draw_q_samples(weights, n_samples, rng)
return float(np.quantile(samples, probability))
# ---------------------------------------------------------------------------
# CDF inversion
# ---------------------------------------------------------------------------
def _invert_cdf(
cdf_func,
weights: np.ndarray,
probability: float,
*,
rtol: float = 1e-6,
) -> float:
"""Invert ``cdf_func`` at ``probability`` using a WS-seeded brentq bracket."""
t_seed = max(_ws_quantile(weights, probability), 1e-12)
lo, hi = 0.5 * t_seed, 2.0 * t_seed
cdf_lo = cdf_func(lo)
cdf_hi = cdf_func(hi)
while cdf_lo > probability:
lo *= 0.5
if lo < 1e-300:
return 0.0
cdf_lo = cdf_func(lo)
while cdf_hi < probability:
hi *= 2.0
if hi > 1e30:
raise RuntimeError(
"Unable to bracket the CDF for inversion; weights may be "
"pathological."
)
cdf_hi = cdf_func(hi)
return scipy.optimize.brentq(
lambda t: cdf_func(t) - probability, lo, hi, rtol=rtol, xtol=rtol
)