Source code for pyabc.transition.base

import logging
from abc import abstractmethod
from typing import Dict, Tuple, Union

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator

from ..cv import calc_cv
from ..parameters import Parameter
from .exceptions import NotEnoughParticles
from .predict_population_size import predict_population_size
from .transitionmeta import TransitionMeta

logger = logging.getLogger("ABC.Transition")



[docs]
class Transition(BaseEstimator, metaclass=TransitionMeta):
    """
    Abstract Transition base class. Derive all Transitions from this class

        .. note::
            This class does a little bit of meta-programming.

            The `fit`, `pdf` and `rvs` methods are automatically wrapped
            to handle the special case of no parameters.

            Hence, you can safely assume that you encounter at least one
            parameter. All the defined transitions will then automatically
            generalize to the case of no parameter.
    """

    NR_BOOTSTRAP = 5
    X = None
    w = None


[docs]
    @abstractmethod
    def fit(self, X: pd.DataFrame, w: np.ndarray) -> None:
        """
        Fit the density estimator (perturber) to the sampled data.
        Concrete implementations might do something like fitting a KDE.

        The parameters given as ``X`` and ``w`` are automatically stored
        in ``self.X`` and ``self.w``.

        Parameters
        ----------
        X:
            The parameters.
        w:
            The corresponding weights
        """



[docs]
    @abstractmethod
    def rvs_single(self) -> Parameter:
        """
        Random variable sample (rvs).

        Sample from the fitted distribution.

        Returns
        -------
        sample:
            A sample from the fitted model.
        """



[docs]
    def rvs(self, size: int = None) -> Union[Parameter, pd.DataFrame]:
        """
        Sample from the density.

        Parameters
        ----------
        size:
            Number of independent samples to draw.
            If None, a single Parameter from rvs_single() is returned, if it is
            an integer >= 1, a pandas.DataFrame with the corresponding number
            of rows is returned.

        Returns
        -------
        samples: The parameter sample(s).

        Note
        ----
        This method can be overridden for efficient implementations.
        The default is to call rvs_single repeatedly (which might
        not be the most efficient way).
        """
        if size is None:
            return self.rvs_single()
        return pd.DataFrame([self.rvs_single() for _ in range(size)])



[docs]
    @abstractmethod
    def pdf(
        self, x: Union[Parameter, pd.Series, pd.DataFrame]
    ) -> Union[float, np.ndarray]:
        """
        Evaluate the probability density function (PDF) at `x`.

        Parameters
        ----------
        x:
            Parameter. If x is a Parameter or Series,
            then x should have the columns from X passed to the fit method
            as indices.
            If x is a DataFrame, then x should have the same columns as X
            passed before to the fit method. The order of the columns is not
            important

        Returns
        -------
        density: float
            Probability density at `x`.
        """


    def score(self, X: pd.DataFrame, w: np.ndarray):
        densities = self.pdf(X)
        return (np.log(densities) * w).sum()

    def no_meaningful_particles(self) -> bool:
        return len(self.X) == 0 or self.no_parameters


[docs]
    def mean_cv(self, n_samples: Union[None, int] = None) -> float:
        """
        Estimate the uncertainty on the KDE.

        Parameters
        ----------
        n_samples: int, optional
            Estimate the CV for ``n_samples`` samples.
            If this parameter is not given, the sample size of the last fit
            is used.

        Returns
        -------
        mean_cv: float
            The estimated average coefficient of variation.

        Note
        ----

        A call to this method, as a side effect, also sets the attributes
        ``test_points_``, ``test_weights_`` and ``variation_at_test_points_``.
        These are the individual points, weights and variations
        used to calculate the mean.
        """
        # TODO: not sure if this is the right behaviour
        if self.no_meaningful_particles():
            raise NotEnoughParticles(n_samples)

        if n_samples is None:
            n_samples = len(self.X)

        test_points = self.X
        test_weights = self.w

        self.test_points_ = test_points
        self.test_weights_ = test_weights

        # calculate bootstrapped coefficients of variation
        cv, variation_at_test = calc_cv(
            n_samples,
            np.array([1]),
            self.NR_BOOTSTRAP,
            test_weights,
            [self],
            [test_points],
        )

        self.variation_at_test_points_ = variation_at_test[0]

        # return the cv as estimator of the uncertainty of sampling
        # `n_samples` times from the KDE
        return cv


    def required_nr_samples(self, coefficient_of_variation: float) -> int:
        if self.no_meaningful_particles():
            raise NotEnoughParticles

        res = predict_population_size(
            len(self.X), coefficient_of_variation, self.mean_cv
        )
        self.cv_estimate_ = res
        return res.n_estimated




[docs]
class DiscreteTransition(Transition):
    """
    This is a base class for discrete transition kernels.
    """




[docs]
class AggregatedTransition(Transition):
    """Different transitions for different subsets of the parameters.

    The transitions are applied independently of each other, i.e. the
    transition density factorizes. Correlations betweeen parameters must be
    handled inside a single transition, if needed.

    Parameters
    ----------
    mapping:
        The mapping of parameters (as tuples of str or single str) to the
        transition kernel to be used for those parameters.
    """


[docs]
    def __init__(self, mapping: Dict[Union[str, Tuple[str, ...]], Transition]):
        # normalize input
        tidy_mapping = {}
        for keys, transition in mapping.items():
            if isinstance(keys, str):
                keys = (keys,)
            tidy_mapping[keys] = transition
        self.mapping = tidy_mapping



[docs]
    def fit(self, X: pd.DataFrame, w: np.ndarray) -> None:
        # fit each transition separately
        for keys, transition in self.mapping.items():
            # get parameters for that transition
            X_for_keys = X[list(keys)]
            # fit it
            transition.fit(X_for_keys, w)



[docs]
    def rvs_single(self) -> Parameter:
        sample = Parameter({key: np.nan for key in self.X.columns})
        for transition in self.mapping.values():
            sample_for_keys = transition.rvs_single()
            # in-place update
            sample.update(sample_for_keys)
        return sample



[docs]
    def pdf(
        self, x: Union[Parameter, pd.Series, pd.DataFrame]
    ) -> Union[float, np.ndarray]:
        # density
        pd = 1.0
        for keys, transition in self.mapping.items():
            # extract values for parameters
            if isinstance(x, Parameter):
                x_for_keys = Parameter({key: x[key] for key in keys})
            else:
                # series or dataframe
                x_for_keys = x[list(keys)]
            # compute transition density (numpy will automatically broadcast)
            pd *= transition.pdf(x_for_keys)
        return pd