Source code for pystatistics.montecarlo.solvers

"""
Solver dispatch for Monte Carlo methods.

Provides boot(), boot_ci(), and permutation_test() as the public API.
"""

from __future__ import annotations

from typing import Any, Callable, Literal, Sequence

import numpy as np
from numpy.typing import ArrayLike, NDArray

from pystatistics.montecarlo._common import BootParams
from pystatistics.montecarlo.design import BootstrapDesign, PermutationDesign
from pystatistics.montecarlo.solution import BootstrapSolution, PermutationSolution
from pystatistics.montecarlo.backends.cpu import (
    CPUBootstrapBackend,
    CPUPermutationBackend,
)


BackendChoice = Literal['auto', 'cpu', 'gpu']


def _get_boot_backend(backend: BackendChoice):
    """Select bootstrap backend."""
    if backend in ('cpu', 'auto'):
        # auto defaults to CPU for bootstrap (GPU requires special handling)
        return CPUBootstrapBackend()

    if backend == 'gpu':
        try:
            from pystatistics.montecarlo.backends.gpu import GPUBootstrapBackend
            return GPUBootstrapBackend()
        except (ImportError, RuntimeError):
            # Fall back to CPU if GPU unavailable
            return CPUBootstrapBackend()

    raise ValueError(f"Unknown backend: {backend!r}")


def _get_perm_backend(backend: BackendChoice):
    """Select permutation backend."""
    if backend in ('cpu', 'auto'):
        return CPUPermutationBackend()

    if backend == 'gpu':
        try:
            from pystatistics.montecarlo.backends.gpu import GPUPermutationBackend
            return GPUPermutationBackend()
        except (ImportError, RuntimeError):
            return CPUPermutationBackend()

    raise ValueError(f"Unknown backend: {backend!r}")



[docs]
def boot(
    data: ArrayLike,
    statistic: Callable,
    R: int = 999,
    *,
    sim: Literal["ordinary", "parametric", "balanced"] = "ordinary",
    stype: Literal["i", "f", "w"] = "i",
    strata: ArrayLike | None = None,
    ran_gen: Callable | None = None,
    mle: Any = None,
    seed: int | None = None,
    backend: BackendChoice = 'auto',
) -> BootstrapSolution:
    """
    Bootstrap resampling. Matches R's boot::boot().

    The statistic function signature depends on sim:
    - For nonparametric (sim="ordinary" or "balanced"):
        statistic(data, indices) -> array of shape (k,)
        where indices are bootstrap sample indices (stype="i"),
        frequency counts (stype="f"), or weights (stype="w").
    - For parametric (sim="parametric"):
        statistic(simulated_data) -> array of shape (k,)
        where simulated_data is generated by ran_gen(data, mle, rng).

    Args:
        data: Original data, shape (n,) or (n, p).
        statistic: Function to compute the statistic(s) of interest.
        R: Number of bootstrap replicates. Default 999.
        sim: Simulation type: "ordinary", "balanced", or "parametric".
        stype: Type of second argument to statistic: "i", "f", or "w".
        strata: Stratification vector (resampling within strata).
        ran_gen: For parametric bootstrap: fn(data, mle, rng) -> sim_data.
        mle: Parameter estimates for parametric bootstrap.
        seed: Random seed for reproducibility.
        backend: "auto", "cpu", or "gpu".

    Returns:
        BootstrapSolution with t0, t, bias, SE.

    Examples:
        >>> import numpy as np
        >>> from pystatistics.montecarlo import boot
        >>> data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
        >>> def mean_stat(data, indices):
        ...     return np.array([np.mean(data[indices])])
        >>> result = boot(data, mean_stat, R=999, seed=42)
        >>> result.t0  # observed mean
        >>> result.bias  # bootstrap bias estimate
        >>> result.se  # bootstrap standard error
    """
    design = BootstrapDesign.for_bootstrap(
        data=data,
        statistic=statistic,
        R=R,
        sim=sim,
        stype=stype,
        strata=strata,
        ran_gen=ran_gen,
        mle=mle,
        seed=seed,
    )

    be = _get_boot_backend(backend)
    result = be.solve(design)

    return BootstrapSolution(_result=result, _design=design)




[docs]
def boot_ci(
    boot_out: BootstrapSolution,
    *,
    conf: float | Sequence[float] = 0.95,
    type: str | Sequence[str] = "all",
    index: int = 0,
    var_t0: float | None = None,
    var_t: NDArray | None = None,
) -> BootstrapSolution:
    """
    Compute bootstrap confidence intervals. Matches R's boot::boot.ci().

    Takes a BootstrapSolution from boot() and computes confidence intervals
    using one or more methods.

    Args:
        boot_out: Result from boot().
        conf: Confidence level(s). Default 0.95.
        type: CI type(s): "normal", "basic", "perc", "bca", "stud", or "all".
            "all" computes normal, basic, percentile, and BCa (not studentized
            unless var_t is provided).
        index: Which statistic to compute CI for (0-indexed into t0).
        var_t0: Variance of the observed statistic (for normal/studentized).
        var_t: Per-replicate variance estimates, shape (R,). Required for
            studentized CI.

    Returns:
        New BootstrapSolution with CI populated.

    Examples:
        >>> result = boot(data, mean_stat, R=999, seed=42)
        >>> ci_result = boot_ci(result, type="perc")
        >>> ci_result.ci["perc"]  # shape (k, 2) for [lower, upper]
    """
    from pystatistics.montecarlo._ci import compute_ci

    # Normalize conf to a single float for now
    if isinstance(conf, (list, tuple)):
        conf_level = float(conf[0])
    else:
        conf_level = float(conf)

    # Normalize type
    if isinstance(type, str):
        if type == "all":
            types = ["normal", "basic", "perc", "bca"]
            if var_t is not None:
                types.append("stud")
        else:
            types = [type]
    else:
        types = list(type)

    ci_dict = compute_ci(
        boot_out=boot_out,
        types=types,
        conf_level=conf_level,
        index=index,
        var_t0=var_t0,
        var_t=var_t,
    )

    # Create new BootParams with CI
    old_params = boot_out._result.params
    new_params = BootParams(
        t0=old_params.t0,
        t=old_params.t,
        R=old_params.R,
        bias=old_params.bias,
        se=old_params.se,
        ci=ci_dict,
        ci_conf_level=conf_level,
    )

    from pystatistics.core.result import Result
    new_result = Result(
        params=new_params,
        info=boot_out._result.info,
        timing=boot_out._result.timing,
        backend_name=boot_out._result.backend_name,
        warnings=boot_out._result.warnings,
    )

    return BootstrapSolution(_result=new_result, _design=boot_out._design)




[docs]
def permutation_test(
    x: ArrayLike,
    y: ArrayLike,
    statistic: Callable,
    R: int = 9999,
    *,
    alternative: Literal["two.sided", "less", "greater"] = "two.sided",
    seed: int | None = None,
    backend: BackendChoice = 'auto',
) -> PermutationSolution:
    """
    Permutation test for two groups.

    Shuffles the combined data R times, computing the test statistic on
    each permutation. P-value uses the Phipson-Smyth correction:
    (count + 1) / (R + 1).

    Args:
        x: Group 1 data.
        y: Group 2 data.
        statistic: fn(x, y) -> float. The test statistic.
        R: Number of permutations. Default 9999.
        alternative: "two.sided", "less", or "greater".
        seed: Random seed for reproducibility.
        backend: "auto", "cpu", or "gpu".

    Returns:
        PermutationSolution with observed_stat, perm_stats, p_value.

    Examples:
        >>> x = np.array([1, 2, 3, 4, 5])
        >>> y = np.array([6, 7, 8, 9, 10])
        >>> def mean_diff(x, y): return np.mean(x) - np.mean(y)
        >>> result = permutation_test(x, y, mean_diff, R=9999, seed=42)
        >>> result.p_value
    """
    design = PermutationDesign.for_permutation_test(
        x=x,
        y=y,
        statistic=statistic,
        R=R,
        alternative=alternative,
        seed=seed,
    )

    be = _get_perm_backend(backend)
    result = be.solve(design)

    return PermutationSolution(_result=result, _design=design)