Source code for pyabc.epsilon.epsilon

import logging
from typing import Callable, List, Union

import numpy as np
import pandas as pd

from ..weighted_statistics import weighted_quantile
from .base import Epsilon

logger = logging.getLogger("ABC.Epsilon")


[docs] class ConstantEpsilon(Epsilon): """ Keep epsilon constant over all populations. This acceptance threshold scheduling strategy is most likely only interesting for debugging purposes. Parameters ---------- constant_epsilon_value: float The epsilon value for all populations """
[docs] def __init__(self, constant_epsilon_value: float): super().__init__() self.constant_epsilon_value = constant_epsilon_value
[docs] def get_config(self): config = super().get_config() config["constant_epsilon_value"] = self.constant_epsilon_value return config
[docs] def __call__(self, t: int): return self.constant_epsilon_value
[docs] class ListEpsilon(Epsilon): """ Return epsilon values from a predefined list. For every time point enquired later, an epsilon value must exist in the list. Parameters ---------- values: List[float] List of epsilon values. ``values[t]`` is the value for population t. """
[docs] def __init__(self, values: List[float]): super().__init__() self.epsilon_values = list(values)
[docs] def get_config(self): config = super().get_config() config["epsilon_values"] = self.epsilon_values return config
[docs] def __call__(self, t: int): return self.epsilon_values[t]
def __len__(self): return len(self.epsilon_values)
[docs] class QuantileEpsilon(Epsilon): """ Calculate epsilon as alpha-quantile of the distances from the last population. This strategy works even if the posterior is multi-modal. Note that the acceptance threshold calculation is based on the distance to the observation, not on the parameters which generated data with that distance. If completely different parameter sets produce equally good samples, the distances of their samples to the ground truth data should be comparable. The idea behind weighting is that the probability p_k of obtaining a distance eps_k in the next generation should be proportional to the weight w_k of respective particle k in the current generation. Both weighted and non-weighted median should lead to correct results. Parameters ---------- initial_epsilon: Union[str, int] * If 'from_sample', then the initial quantile is calculated from a sample of the current population size from the prior distribution. * If a number is given, this number is used. alpha: float The alpha-quantile to be used, e.g. alpha=0.5 means median. quantile_multiplier: float Multiplies the quantile by that number. also applies it to the initial quantile if it is calculated from samples. However, it does **not** apply to the initial quantile if it is given as a number. weighted: bool Flag indicating whether the new epsilon should be computed using weighted (True, default) or non-weighted (False) distances. """
[docs] def __init__( self, initial_epsilon: Union[str, int, float] = 'from_sample', alpha: float = 0.5, quantile_multiplier: float = 1, weighted: bool = True, ): super().__init__() self._initial_epsilon = initial_epsilon self.alpha = alpha self.quantile_multiplier = quantile_multiplier self.weighted = weighted self._look_up = {} if self.alpha > 1 or self.alpha <= 0: raise ValueError("It must be 0 < alpha <= 1")
[docs] def get_config(self): config = super().get_config() config.update( { "initial_epsilon": self._initial_epsilon, "alpha": self.alpha, "quantile_multiplier": self.quantile_multiplier, "weighted": self.weighted, } ) return config
[docs] def requires_calibration(self) -> bool: return self._initial_epsilon == 'from_sample'
[docs] def is_adaptive(self) -> bool: return True
[docs] def initialize( self, t: int, get_weighted_distances: Callable[[], pd.DataFrame], get_all_records: Callable[[], List[dict]], max_nr_populations: int, acceptor_config: dict, ): if not self.requires_calibration(): # safety check in __call__ return # execute function weighted_distances = get_weighted_distances() # initialize epsilon self._update(t, weighted_distances) # logging logger.debug(f"Initial eps: {self._look_up[t]:.4e}")
[docs] def __call__(self, t: int) -> float: """ Epsilon value for time t, set before via update() method. Returns ------- eps: float The epsilon value for time t (throws error if not existent). """ # initialize if necessary if not self._look_up: self._set_initial_value(t) try: eps = self._look_up[t] except KeyError as e: raise KeyError( f"The epsilon value for time {t} does not exist: {repr(e)}" ) return eps
def _set_initial_value(self, t: int): self._look_up = {t: self._initial_epsilon}
[docs] def update( self, t: int, get_weighted_distances: Callable[[], pd.DataFrame], get_all_records: Callable[[], List[dict]], acceptance_rate: float, acceptor_config: dict, ): """ Compute quantile of the (weighted) distances given in population, and use this to update epsilon. """ # execute function weighted_distances = get_weighted_distances() # update epsilon self._update(t, weighted_distances) # logger logger.debug(f"New eps, t={t}, eps={self._look_up[t]}")
def _update( self, t: int, weighted_distances: pd.DataFrame, ): """ Here the real update happens, based on the weighted distances. """ # extract distances distances = weighted_distances.distance.values # extract weights if self.weighted: weights = weighted_distances.w.values.astype(float) # Normalize weights to 1. weights /= weights.sum() else: len_distances = len(distances) weights = np.ones(len_distances) / len_distances # compute weighted quantile quantile = weighted_quantile( points=distances, weights=weights, alpha=self.alpha ) # append to look_up property self._look_up[t] = quantile * self.quantile_multiplier
[docs] class MedianEpsilon(QuantileEpsilon): """ Calculate epsilon as median of the distances from the last population. """
[docs] def __init__( self, initial_epsilon: Union[str, int, float] = 'from_sample', median_multiplier: float = 1, weighted: bool = True, ): super().__init__( initial_epsilon=initial_epsilon, alpha=0.5, quantile_multiplier=median_multiplier, weighted=weighted, )