Source code for pygeoinf.quadratic_form_quantile

"""
Weighted chi-square distribution: CDF and quantile.

This module implements numerical methods for the distribution of

    Q = sum_j w_j Z_j^2,    Z_j ~ iid N(0, 1),   w_j > 0,

which arises when calibrating Gaussian credible sets in function spaces.
With weights $w_j = \\lambda_j$ (eigenvalues of the covariance) the
distribution governs ambient norm balls; with $w_j = \\lambda_j^{1-\\theta}$
it governs weakened-covariance ellipsoids.

The mathematical background is in
``docs/agent-docs/theory/function-space-hardening.md``, section 4.

Public functions
----------------
weighted_chi2_cdf      : evaluate $P(Q \\le t)$.
weighted_chi2_quantile : invert the CDF for given probability $p$.

Both accept a ``method`` argument selecting between

* ``"auto"`` (default): Automatically selects between ``"saddlepoint"``
  and ``"imhof"`` based on spectrum isotropy (effective degrees of
  freedom $\\nu_{\\text{eff}} = (\\sum w)^2 / \\sum w^2$) and the caller's
  requested relative tolerance ``tol``. Saddlepoint is used when its
  expected error is well below ``tol``; Imhof is used otherwise.
* ``"imhof"``: Imhof's numerical inversion of the characteristic
  function. Exact to quadrature tolerance.
* ``"ws"``: Welch--Satterthwaite moment-match to a scaled $\\chi^2_\\nu$.
  Closed form; exact when all weights are equal; degrades with anisotropy.
* ``"saddlepoint"``: Lugannani--Rice saddlepoint approximation. Excellent
  in deep tails.
* ``"mc"``: Monte Carlo empirical quantile / CDF. Unbiased reference.

The ``tol`` parameter (default ``1e-2``) sets the desired relative accuracy
of the result and is used **only** by ``"auto"`` mode to pick a method.
When an explicit method is specified, ``tol`` is ignored.
"""

from __future__ import annotations

from typing import Optional, Union

import numpy as np
import scipy.optimize
import scipy.stats


_VALID_METHODS = ("auto", "imhof", "ws", "saddlepoint", "mc")


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


[docs] def weighted_chi2_cdf( weights: np.ndarray, t: Union[float, np.ndarray], *, method: str = "auto", tol: float = 1e-2, rtol: float = 1e-8, n_samples: int = 100_000, rng: Optional[np.random.Generator] = None, ) -> Union[float, np.ndarray]: """Evaluate $P(Q \\le t)$ for $Q = \\sum_j w_j Z_j^2$. Args: weights: Non-negative weights $w_j$. Length-zero or all-zero arrays are treated as the degenerate point mass at 0. t: Threshold(s); scalar or array. Returns matching shape. method: One of {"auto", "imhof", "ws", "saddlepoint", "mc"}. ``"auto"`` selects saddlepoint or Imhof based on spectrum isotropy and ``tol``. tol: Desired relative accuracy of the result. Used only when ``method="auto"`` to decide whether saddlepoint is accurate enough or Imhof is required. Ignored for explicit methods. rtol: Relative quadrature tolerance for Imhof's integral. n_samples: Sample count for Monte Carlo. Ignored by other methods. rng: Optional NumPy generator for Monte Carlo. Defaults to ``np.random.default_rng()``. Returns: $P(Q \\le t)$ as a float (if ``t`` is scalar) or numpy array. Raises: ValueError: For unknown method, negative weights, or NaN inputs. """ w = _validate_weights(weights) if method not in _VALID_METHODS: raise ValueError( f"Unknown method '{method}'. Choose one of {_VALID_METHODS}." ) scalar = np.isscalar(t) t_arr = np.atleast_1d(np.asarray(t, dtype=float)) if w.size == 0 or np.all(w == 0.0): # Q = 0 almost surely; F(t) = 1 if t >= 0 else 0. out = np.where(t_arr >= 0.0, 1.0, 0.0) return float(out[0]) if scalar else out if method == "auto": method = _auto_select_method(w, tol) if method == "imhof": out = np.array([_imhof_cdf(w, float(ti), rtol=rtol) for ti in t_arr]) elif method == "ws": out = _ws_cdf(w, t_arr) elif method == "saddlepoint": out = np.array([_saddlepoint_cdf(w, float(ti)) for ti in t_arr]) elif method == "mc": out = _mc_cdf(w, t_arr, n_samples=n_samples, rng=rng) else: # pragma: no cover - guarded above raise AssertionError("unreachable") return float(out[0]) if scalar else out
[docs] def weighted_chi2_quantile( weights: np.ndarray, probability: float, *, method: str = "auto", tol: float = 1e-2, rtol: float = 1e-6, n_samples: int = 100_000, rng: Optional[np.random.Generator] = None, ) -> float: """Return $r$ such that $P(\\sum_j w_j Z_j^2 \\le r) = $ probability. Args: weights: Non-negative weights $w_j$. probability: Target probability, strictly between 0 and 1. method: One of {"auto", "imhof", "ws", "saddlepoint", "mc"}. ``"auto"`` (default) selects between saddlepoint and Imhof based on spectrum isotropy and ``tol``: saddlepoint is used when its expected error (empirically $\\approx 0.1/\\nu_{\\text{eff}}$ with $\\nu_{\\text{eff}} = (\\sum w)^2/\\sum w^2$) is comfortably below ``tol``; Imhof is used otherwise. tol: Desired relative accuracy of the result. Used only when ``method="auto"``; ignored for explicit method choices. rtol: Relative tolerance for CDF root-finding (Imhof, saddlepoint). n_samples: Sample count for Monte Carlo. rng: Optional NumPy generator for Monte Carlo. Returns: The quantile $r$. Raises: ValueError: For invalid probability, unknown method, or negative weights. """ if not 0.0 < probability < 1.0: raise ValueError("probability must lie strictly between 0 and 1.") w = _validate_weights(weights) if method not in _VALID_METHODS: raise ValueError( f"Unknown method '{method}'. Choose one of {_VALID_METHODS}." ) if w.size == 0 or np.all(w == 0.0): return 0.0 if method == "auto": method = _auto_select_method(w, tol) if method == "ws": return float(_ws_quantile(w, probability)) if method == "mc": return float(_mc_quantile(w, probability, n_samples=n_samples, rng=rng)) # Closed-form fast path for equal positive weights. positive = w[w > 0.0] if positive.size > 0 and np.allclose( positive, positive[0], rtol=1e-12, atol=0.0 ): return float( positive[0] * scipy.stats.chi2.ppf(probability, df=positive.size) ) cdf_fn = ( (lambda t: _imhof_cdf(w, t, rtol=rtol)) if method == "imhof" else (lambda t: _saddlepoint_cdf(w, t)) ) return float(_invert_cdf(cdf_fn, w, probability, rtol=rtol))
# --------------------------------------------------------------------------- # Validation # --------------------------------------------------------------------------- def _validate_weights(weights: np.ndarray) -> np.ndarray: w = np.asarray(weights, dtype=float).ravel() if not np.all(np.isfinite(w)): raise ValueError("weights must be finite.") if np.any(w < 0.0): raise ValueError("weights must be non-negative.") return w # --------------------------------------------------------------------------- # Auto method selection # --------------------------------------------------------------------------- def _auto_select_method(weights: np.ndarray, tol: float) -> str: """Select quantile/CDF method based on spectrum isotropy and tolerance. Uses the effective degrees of freedom $\\nu_{\\text{eff}} = (\\sum_j w_j)^2 / \\sum_j w_j^2$ as a proxy for how isotropic the weight spectrum is. Empirical calibration on Laplacian spectra gives a conservative upper bound on the saddlepoint relative error: .. math:: \\varepsilon_{\\mathrm{sp}} \\approx \\frac{0.1}{\\nu_{\\text{eff}}} A 3x safety factor is applied before trusting the saddlepoint result. Imhof's method (exact to quadrature tolerance) is used as the fallback. Rule-of-thumb thresholds for ``tol``: * ``tol = 0.05`` (5 %): saddlepoint for $\\nu_{\\text{eff}} > 6$ * ``tol = 0.01`` (1 %): saddlepoint for $\\nu_{\\text{eff}} > 30$ * ``tol = 0.001`` (0.1 %): saddlepoint for $\\nu_{\\text{eff}} > 300$ Args: weights: Validated non-negative weight vector. tol: Desired relative accuracy of the result. Returns: ``"saddlepoint"`` if the approximation is expected to meet ``tol``, ``"imhof"`` otherwise. """ s1 = float(np.sum(weights)) s2 = float(np.sum(weights ** 2)) nu_eff = s1 * s1 / s2 if s2 > 0.0 else 1.0 # Conservative: 3x safety factor on the empirical error model. sp_expected_error = 0.1 / max(nu_eff, 1e-10) return "saddlepoint" if sp_expected_error * 3.0 < tol else "imhof" # --------------------------------------------------------------------------- # Imhof's method # --------------------------------------------------------------------------- def _imhof_integrand_vec( u: np.ndarray, weights: np.ndarray, t: float ) -> np.ndarray: """Vectorised Imhof integrand on a grid ``u`` of strictly positive points. Works in log-space for the denominator to avoid overflow when the spectrum is long and anisotropic. """ wu = weights[None, :] * u[:, None] theta = 0.5 * (np.sum(np.arctan(wu), axis=1) - t * u) sin_theta = np.sin(theta) log_rho = 0.25 * np.sum(np.log1p(wu * wu), axis=1) log_abs = np.log(np.abs(sin_theta) + 1e-300) - np.log(u) - log_rho return np.sign(sin_theta) * np.exp(np.minimum(log_abs, 700.0)) def _imhof_cdf(weights: np.ndarray, t: float, *, rtol: float = 1e-8) -> float: """Imhof CDF $P(Q \\le t)$ via fixed-step trapezoidal quadrature. A vectorised trapezoidal rule on a truncated domain handles the oscillatory multi-scale integrands that arise from long anisotropic spectra (e.g. inverse-Laplacian eigenvalues $\\lambda_j \\propto 1/j^2$) far more reliably than ``scipy.integrate.quad``'s adaptive routine, which exhausts its subdivision budget on such cases. The step size $h$ is chosen to resolve the dominant oscillation period $2\\pi/t$ with $\\ge 32$ samples; the truncation $U$ is set so the integrand magnitude $|\\sin(\\theta)|/(u\\rho(u))$ is well below $rtol$ at $u = U$, exploiting the fact that for $u \\gg 1/\\min(w_j)$ all $\\arctan(w_j u) \\to \\pi/2$ and the integrand decays as $1/u^{1+n/2}$. Refinement: the result of a step-$h$ trapezoidal pass is compared against a step-$h/2$ pass; the routine terminates when the relative Richardson-style change is below $rtol$ or the step budget is exhausted. """ if t <= 0.0: return 0.0 # Closed-form fast path for equal positive weights: Q = w * chi^2_n. positive = weights[weights > 0.0] if positive.size > 0 and np.allclose( positive, positive[0], rtol=1e-12, atol=0.0 ): return float(scipy.stats.chi2.cdf(t / positive[0], df=positive.size)) mean = float(np.sum(weights)) # Truncation point. We need U large enough that the integrand tail # integral is below rtol. The amplitude bound is # # |integrand(u)| <= 1 / (u * rho(u)), # rho(u) = prod_j (1 + w_j^2 u^2)^{1/4} # # so the condition 1/(pi * U * rho(U)) < rtol is sufficient. # # Single-weight heuristic (16/w_min) massively overestimates U when N is # large: rho grows as a *product* over all weights, so U only needs to # reach the point where the product exceeds 1/(pi * rtol * U). For an # N=50 decaying spectrum this is ~600 instead of ~400 000. We find U by # doubling from an initial guess of 4/w_max until the condition is met. U_oscillation = 64.0 * np.pi / max(t, 1e-6) _target = np.log(1.0 / (np.pi * max(rtol, 1e-300))) _U = max(4.0 / float(np.max(positive)), 1.0) for _ in range(80): _log_rho = 0.25 * float(np.sum(np.log1p((positive * _U) ** 2))) if np.log(max(_U, 1e-300)) + _log_rho >= _target: break _U *= 2.0 U = max(_U, U_oscillation, 1.0) # Step size: at least 32 points per oscillation period 2pi/t. h_base = 2.0 * np.pi / max(t, 1e-6) / 32.0 # Iterate trapezoidal step halving for Richardson-style refinement. integral_prev: Optional[float] = None integral_curr = 0.0 max_iters = 6 n_steps_cap = 200_000 h = h_base for _ in range(max_iters): n_steps = min(int(U / h) + 1, n_steps_cap) u_grid = np.linspace(h, n_steps * h, n_steps) vals = _imhof_integrand_vec(u_grid, weights, t) # u=0 contribution at the limit. f0 = 0.5 * (mean - t) integral_curr = h * ( 0.5 * (f0 + vals[-1]) + float(np.sum(vals[:-1])) ) if integral_prev is not None: denom = max(abs(integral_curr), 1e-12) if abs(integral_curr - integral_prev) / denom < rtol: break integral_prev = integral_curr h *= 0.5 if int(U / h) > n_steps_cap: break cdf = 0.5 - integral_curr / np.pi return float(np.clip(cdf, 0.0, 1.0)) # --------------------------------------------------------------------------- # Welch--Satterthwaite (moment matching) # --------------------------------------------------------------------------- def _ws_params(weights: np.ndarray) -> tuple[float, float]: s1 = float(np.sum(weights)) s2 = float(np.sum(weights * weights)) a = s2 / s1 nu = s1 * s1 / s2 return a, nu def _ws_cdf(weights: np.ndarray, t: np.ndarray) -> np.ndarray: a, nu = _ws_params(weights) return scipy.stats.chi2.cdf(t / a, df=nu) def _ws_quantile(weights: np.ndarray, probability: float) -> float: a, nu = _ws_params(weights) return a * float(scipy.stats.chi2.ppf(probability, df=nu)) # --------------------------------------------------------------------------- # Lugannani--Rice saddlepoint # --------------------------------------------------------------------------- def _cgf(s: float, weights: np.ndarray) -> float: return -0.5 * float(np.sum(np.log1p(-2.0 * s * weights))) def _cgf_prime(s: float, weights: np.ndarray) -> float: return float(np.sum(weights / (1.0 - 2.0 * s * weights))) def _cgf_double_prime(s: float, weights: np.ndarray) -> float: denom = 1.0 - 2.0 * s * weights return 2.0 * float(np.sum((weights * weights) / (denom * denom))) def _saddlepoint_cdf(weights: np.ndarray, t: float) -> float: if t <= 0.0: return 0.0 mean = float(np.sum(weights)) # Saddlepoint at the mean is exactly s_hat = 0 and the formula collapses; # use Welch-Satterthwaite to avoid a 0/0. if abs(t - mean) < 1e-12 * max(1.0, mean): return float(_ws_cdf(weights, np.array([t]))[0]) w_max = float(np.max(weights)) s_max = 0.5 / w_max # s must satisfy s < 1/(2 max w_j) for convergence epsilon = 1e-12 if t < mean: lo, hi = -1e8, -epsilon else: lo, hi = epsilon, s_max * (1.0 - 1e-10) def equation(s: float) -> float: return _cgf_prime(s, weights) - t # Expand the lower bound for very small t. if t < mean: while equation(lo) > 0.0: lo *= 10.0 if lo < -1e30: return 0.0 s_hat = scipy.optimize.brentq(equation, lo, hi, xtol=1e-14, rtol=1e-12) k_hat = _cgf(s_hat, weights) k_pp = _cgf_double_prime(s_hat, weights) inner = 2.0 * (s_hat * t - k_hat) if inner <= 0.0: return float(_ws_cdf(weights, np.array([t]))[0]) w_hat = np.sign(s_hat) * np.sqrt(inner) u_hat = s_hat * np.sqrt(k_pp) cdf = scipy.stats.norm.cdf(w_hat) + scipy.stats.norm.pdf(w_hat) * ( 1.0 / w_hat - 1.0 / u_hat ) return float(np.clip(cdf, 0.0, 1.0)) # --------------------------------------------------------------------------- # Monte Carlo # --------------------------------------------------------------------------- def _draw_q_samples( weights: np.ndarray, n_samples: int, rng: Optional[np.random.Generator] ) -> np.ndarray: """Generate ``n_samples`` draws of $Q = \\sum_j w_j Z_j^2$. Chunked to keep peak memory at roughly 8 MB regardless of len(weights). """ rng = np.random.default_rng() if rng is None else rng n = max(1, int(weights.size)) target_floats = 1_000_000 chunk = max(1, target_floats // n) samples = np.empty(n_samples, dtype=float) cursor = 0 while cursor < n_samples: m = min(chunk, n_samples - cursor) z = rng.standard_normal((m, n)) np.multiply(z, z, out=z) samples[cursor : cursor + m] = z @ weights cursor += m return samples def _mc_cdf( weights: np.ndarray, t: np.ndarray, *, n_samples: int, rng: Optional[np.random.Generator], ) -> np.ndarray: samples = _draw_q_samples(weights, n_samples, rng) samples_sorted = np.sort(samples) # P(Q <= t) is the fraction of samples not exceeding t. return np.searchsorted(samples_sorted, t, side="right") / n_samples def _mc_quantile( weights: np.ndarray, probability: float, *, n_samples: int, rng: Optional[np.random.Generator], ) -> float: samples = _draw_q_samples(weights, n_samples, rng) return float(np.quantile(samples, probability)) # --------------------------------------------------------------------------- # CDF inversion # --------------------------------------------------------------------------- def _invert_cdf( cdf_func, weights: np.ndarray, probability: float, *, rtol: float = 1e-6, ) -> float: """Invert ``cdf_func`` at ``probability`` using a WS-seeded brentq bracket.""" t_seed = max(_ws_quantile(weights, probability), 1e-12) lo, hi = 0.5 * t_seed, 2.0 * t_seed cdf_lo = cdf_func(lo) cdf_hi = cdf_func(hi) while cdf_lo > probability: lo *= 0.5 if lo < 1e-300: return 0.0 cdf_lo = cdf_func(lo) while cdf_hi < probability: hi *= 2.0 if hi > 1e30: raise RuntimeError( "Unable to bracket the CDF for inversion; weights may be " "pathological." ) cdf_hi = cdf_func(hi) return scipy.optimize.brentq( lambda t: cdf_func(t) - probability, lo, hi, rtol=rtol, xtol=rtol )