Source code for scikit_stan.modelcore

"""Abstract classes for different model types that conform to sk-learn style."""

from collections import defaultdict
from inspect import signature
from typing import Any, Dict, List, Optional, Tuple, Union, overload

import numpy as np
from numpy.typing import ArrayLike, NDArray

from scikit_stan.utils.validation import _check_y

from .utils import check_array, check_X_y


# NOTE: This estimator class derives class methods from scikit-learn,
# which is distributed under the 3-Clause BSD License.
[docs]class CoreEstimator:
    """
    Abstract class for all estimator-type models in this package.
    """

    @classmethod
    def _get_param_names(cls) -> List[str]:
        """Get parameter names for the estimator"""
        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
        if init is object.__init__:
            return []

        init_signature = signature(init)

        parameters = [
            p
            for p in init_signature.parameters.values()
            if p.name != "self" and p.kind != p.VAR_KEYWORD
        ]

        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError(
                    "scikit-learn estimators should always "
                    "specify their parameters in the signature"
                    " of their __init__ (no varargs)."
                    " %s with constructor %s doesn't "
                    " follow this convention." % (cls, init_signature)
                )

        return sorted([p.name for p in parameters])

[docs]    def get_params(self, deep: bool = True) -> Dict[str, Any]:
        """
        Get parameters for this estimator.

        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        """
        out: Dict[str, Any] = dict()
        for key in self._get_param_names():
            value = getattr(self, key)
            if deep and hasattr(value, "get_params"):
                deep_items = value.get_params().items()
                out.update((key + "__" + k, val) for k, val in deep_items)
            out[key] = value
        return out

[docs]    def set_params(self, **params: Dict[str, Dict[str, Any]]) -> "CoreEstimator":
        """Set the parameters of this estimator.
        The method works on simple estimators as well as on nested objects
        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
        parameters of the form ``<component>__<parameter>`` so that it's
        possible to update each component of a nested object.
        Parameters
        ----------
        **params : dict
            Estimator parameters.
        Returns
        -------
        self : estimator instance
            Estimator instance.
        """
        if not params:
            return self
        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # type: ignore
        for key, value in params.items():
            key, delim, sub_key = key.partition("__")
            if key not in valid_params:
                local_valid_params = self._get_param_names()
                raise ValueError(
                    f"Invalid parameter {key!r} for estimator {self}. "
                    f"Valid parameters are: {local_valid_params!r}."
                )

            if delim:
                nested_params[key][sub_key] = value
            else:
                setattr(self, key, value)
                valid_params[key] = value

        for key, sub_params in nested_params.items():
            valid_params[key].set_params(**sub_params)

        return self

    @overload
    def _validate_data(
        self,
        X: ArrayLike,
        y: ArrayLike,
        ensure_X_2d: bool = True,
        allow_X_nd: bool = False,
        dtype: type = np.float64,
    ) -> Tuple[
        NDArray[Union[np.float64, np.int64]],
        NDArray[Union[np.float64, np.int64]],
    ]:
        ...

    @overload
    def _validate_data(
        self,
        X: ArrayLike,
        y: Optional[ArrayLike] = None,
        ensure_X_2d: bool = True,
        allow_X_nd: bool = False,
        dtype: type = np.float64,
    ) -> Tuple[
        NDArray[Union[np.float64, np.int64]],
        Optional[NDArray[Union[np.float64, np.int64]]],
    ]:
        ...

    # custom function adapted from sklearn's validations,
    # which are distributed under the 3-Clause BSD License.
    def _validate_data(
        self,
        X: Optional[ArrayLike] = None,
        y: Optional[ArrayLike] = None,
        ensure_X_2d: bool = True,
        allow_X_nd: bool = False,
        dtype: type = np.float64,
    ) -> Tuple[
        Optional[NDArray[Union[np.float64, np.int64]]],
        Optional[NDArray[Union[np.float64, np.int64]]],
    ]:
        """
        Input validation for standard estimators.
        Checks X and y for consistent length, enforces X to be 2D and y 1D. By
        default, X is checked to be non-empty and containing only finite values.
        Standard input checks are also applied to y, such as checking that y
        """
        no_X, no_y = X is None, y is None

        if no_X and no_y:
            raise ValueError("""Validation should be done on X,y or both.""")
        elif not no_X and no_y:
            res_X = check_array(
                X,  # type: ignore
                ensure_2d=ensure_X_2d,
                allow_nd=allow_X_nd,
                dtype=dtype,
                allow_sparse=True,
            )
            res_y = None
        elif no_X and not no_y:
            res_y = _check_y(y, dtype=dtype)  # type:ignore
            res_X = None
        else:
            res_X, res_y = check_X_y(X, y, dtype=dtype)  # type:ignore

        return res_X, res_y

    def _more_tags(self) -> Dict[str, Any]:
        """
        Sets tags for current model that exclude certain sk-learn estimator
        checks that are not applicable to this model.
        """
        return {
            "_xfail_checks": {
                "check_methods_sample_order_invariance": "check is not applicable.",
                "check_methods_subset_invariance": "check is not applicable.",
                "check_fit_idempotent": """model is idempotent, but not to the required degree of
                    accuracy as this is a probabilistic setting.""",
                "check_fit1d": """provided automatic cast from 1d to 2d in data validation.""",
                # NOTE: the expected behavior here is to raise a ValueError, the package intends
                # to give alternative default behavior in these scenarios!
                "check_fit2d_predict1d": """provided automatic cast from 1d to 2d in data validation
                 STILL NEEDS TO BE INVESTIGATED FOR GQ ISSUE""",
                # NOTE: the expected behavior here is to raise a ValueError,
                #  the package intends to give alternative default behavior in these scenarios!
            }
        }