"""
Acceptor
--------
After summary statistics of samples for given parameters have
been generated, it must be checked whether these are to be
accepted or not. This happens in the Acceptor class.
The most typical and simple way is to compute the distance
between simulated and observed summary statistics, and accept
if this distance is below some epsilon threshold. However, also
more complex acceptance criteria are possible, in particular
when the distance measure and epsilon criteria develop over
time.
"""
import logging
from typing import Callable, Union
import numpy as np
import pandas as pd
from ..distance import SCALE_LIN, Distance, StochasticKernel
from ..epsilon import Epsilon
from ..parameters import Parameter
from ..storage import save_dict_to_json
from .pdf_norm import pdf_norm_from_kernel, pdf_norm_max_found
logger = logging.getLogger("ABC.Acceptor")
[docs]
class AcceptorResult(dict):
"""
Result of an acceptance step.
Parameters
----------
distance:
Distance value obtained.
accept:
A flag indicating the recommendation to accept or reject.
More specifically:
True: The distance is below the acceptance threshold.
False: The distance is above the acceptance threshold.
weight:
Weight associated with the evaluation, which may need
to be taken into account via importance sampling when
calculating the parameter weight. Defaults to 1.0.
"""
[docs]
def __init__(self, distance: float, accept: bool, weight: float = 1.0):
super().__init__()
self.distance = distance
self.accept = accept
self.weight = weight
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(key)
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
[docs]
class Acceptor:
"""
The acceptor class encodes the acceptance step.
This class is abstract and cannot be invoked itself.
"""
[docs]
def __init__(self):
"""
Default constructor.
"""
pass
[docs]
def initialize(
self,
t: int,
get_weighted_distances: Callable[[], pd.DataFrame],
distance_function: Distance,
x_0: dict,
):
"""
Initialize. This method is called by the ABCSMC framework initially,
and can be used to calibrate the acceptor to initial statistics.
The default is to do nothing.
Parameters
----------
t:
The timepoint to initialize the acceptor for.
get_weighted_distances:
Returns on demand the distances for initializing the acceptor.
distance_function:
Distance object. The acceptor should not modify it, but might
extract some meta information.
x_0:
The observed summary statistics.
"""
pass
[docs]
def update(
self,
t: int,
get_weighted_distances: Callable[[], pd.DataFrame],
prev_temp: float,
acceptance_rate: float,
):
"""
Update the acceptance criterion.
Parameters
----------
t:
The timepoint to initialize the acceptor for.
get_weighted_distances: Callable[[], pd.DataFrame]
The past generation's weighted distances.
prev_temp:
The past generation's temperature.
acceptance_rate:
The past generation's acceptance rate.
"""
pass
[docs]
def __call__(
self,
distance_function: Distance,
eps: Epsilon,
x: dict,
x_0: dict,
t: int,
par: Parameter,
):
"""
Compute distance between summary statistics and evaluate whether to
accept or reject.
All concrete implementations must implement this method.
Parameters
----------
distance_function:
The distance function.
The user is free to use or ignore this function.
eps:
The acceptance thresholds.
The user is free to use or ignore this object.
x:
Current summary statistics to evaluate.
x_0:
The observed summary statistics.
t:
Time point for which to check.
par:
The model parameters used to simulate x.
Returns
-------
An AcceptorResult.
.. note::
Currently, only one value encoding the distance is returned
(and stored in the database),
namely that at time t, even if also other distances affect the
acceptance decision, e.g. distances from previous iterations,
or in general if the distance is not scalar.
This is because the last distance is likely to be most informative
for the further process, and because in some parts of ABC a scalar
distance value is required.
"""
raise NotImplementedError()
[docs]
def requires_calibration(self) -> bool:
"""
Whether the class requires an initial calibration, based on
samples from the prior. Default: False.
"""
return False
[docs]
def is_adaptive(self) -> bool:
"""
Whether the class is dynamically updated after each generation,
based on the last generation's available data. Default: False.
"""
return False
# pylint: disable=R0201
[docs]
def get_epsilon_config(self, t: int) -> dict:
"""
Create a configuration object that contains all values of interest for
the update of the Epsilon object.
Parameters
----------
t:
The timepoint for which to get the config.
Returns
-------
config:
The relevant information.
"""
return {}
[docs]
class FunctionAcceptor(Acceptor):
"""
Initialize from function.
Parameters
----------
fun:
Callable with the same signature as the __call__ method.
"""
[docs]
def __init__(self, fun: Callable):
super().__init__()
self.fun = fun
[docs]
def __call__(self, distance_function, eps, x, x_0, t, par):
return self.fun(distance_function, eps, x, x_0, t, par)
[docs]
@staticmethod
def to_acceptor(maybe_acceptor: Union[Acceptor, Callable]) -> Acceptor:
"""
Create an acceptor object from input.
Parameters
----------
maybe_acceptor:
Either pass a full acceptor, or a callable which is then filled
into a SimpleAcceptor.
Returns
-------
acceptor: Acceptor
An Acceptor object in either case.
"""
if isinstance(maybe_acceptor, Acceptor):
return maybe_acceptor
else:
return FunctionAcceptor(maybe_acceptor)
def accept_use_current_time(distance_function, eps, x, x_0, t, par):
"""
Use only the distance function and epsilon criterion at the current time
point to evaluate whether to accept or reject.
"""
d = distance_function(x, x_0, t, par)
accept = d <= eps(t)
return AcceptorResult(distance=d, accept=accept)
def accept_use_complete_history(distance_function, eps, x, x_0, t, par):
"""
Use the acceptance criteria from the complete history to evaluate whether
to accept or reject.
This includes time points 0,...,t, as far as these are
available. If either the distance function or the epsilon criterion cannot
handle any time point in this interval, the resulting error is simply
intercepted and the respective time not used for evaluation. This situation
can frequently occur when continuing a stopped run. A different behavior
is easy to implement.
.. note::
This may not work as intended or at all when using adaptive summary
statistics.
"""
# first test current criterion, which is most likely to fail
d = distance_function(x, x_0, t, par)
accept = d <= eps(t)
if accept:
# also check against all previous distances and acceptance criteria
for t_prev in range(0, t):
try:
d_prev = distance_function(x, x_0, t_prev, par)
accept = d_prev <= eps(t_prev)
if not accept:
break
except Exception:
# ignore as of now
accept = True
return AcceptorResult(distance=d, accept=accept)
[docs]
class StochasticAcceptor(Acceptor):
"""
This acceptor implements a stochastic acceptance step based on a
probability density, generalizing from the uniform acceptance kernel.
A particle is accepted if for the simulated summary statistics x,
observed summary statistics x_0 and parameters theta holds
.. math::
\\frac{\\text{pdf}(x_0|x,\\theta)}{c}\\geq u
where u ~ U[0,1], and c is a normalizing constant.
The concept is based on [#wilkinson]_. In addition, we introduce
acceptance kernel temperation and rejection control importance sampling
to permit a more flexible choice and adaptation of c.
.. [#wilkinson] Wilkinson, Richard David; "Approximate Bayesian
computation (ABC) gives exact results under the assumption of model
error"; Statistical applications in genetics and molecular biology
12.2 (2013): 129-141.
"""
[docs]
def __init__(
self,
pdf_norm_method: Callable = None,
apply_importance_weighting: bool = True,
log_file: str = None,
):
"""
Parameters
----------
pdf_norm_method:
Function to calculate a pdf normalization (denoted `c` above).
Shipped are `pyabc.acceptor.pdf_norm_from_kernel` to use the
value provided by the StochasticKernel, and
`pyabc.acceptor.pdf_norm_max_found` (default) to always use
the maximum value among accepted particles so far.
Note that re-weighting based on ideas from rejection control
importance sampling to handle the normalization constant being
insufficient, and thus avoiding an importance sampling bias,
is included either way.
apply_importance_weighting:
Whether to apply weights to correct for a bias induced by
samples exceeding the density normalization. This may be False
usually only for testing purposes.
log_file:
A log file for storing data of the acceptor that are currently not
saved in the database. The data are saved in json format and can
be retrieved via `pyabc.storage.load_dict_from_json`.
"""
super().__init__()
if pdf_norm_method is None:
pdf_norm_method = pdf_norm_max_found
self.pdf_norm_method = pdf_norm_method
self.apply_importance_weighting = apply_importance_weighting
self.log_file = log_file
# maximum pdfs, indexed by time
self.pdf_norms = {}
# fields to be filled later
self.x_0 = None
self.kernel_scale = None
self.kernel_pdf_max = None
[docs]
def requires_calibration(self) -> bool:
# this check is rather superficial and may be improved by a re-design
# of `pdf_norm_method`
return self.pdf_norm_method != pdf_norm_from_kernel
[docs]
def is_adaptive(self) -> bool:
# this check is rather superficial and may be improved by a re-design
# of `pdf_norm_method`
return self.pdf_norm_method != pdf_norm_from_kernel
[docs]
def initialize(
self,
t: int,
get_weighted_distances: Callable[[], pd.DataFrame],
distance_function: StochasticKernel,
x_0: dict,
):
"""
Initialize temperature and maximum pdf.
"""
self.x_0 = x_0
self.kernel_scale = distance_function.ret_scale
self.kernel_pdf_max = distance_function.pdf_max
# update
self._update(t, get_weighted_distances)
[docs]
def update(
self,
t: int,
get_weighted_distances: Callable[[], pd.DataFrame],
prev_temp: float,
acceptance_rate: float,
):
self._update(t, get_weighted_distances, prev_temp, acceptance_rate)
def _update(
self,
t: int,
get_weighted_distances: Callable[[], pd.DataFrame],
prev_temp: float = None,
acceptance_rate: float = 1.0,
):
"""
Update schemes for the upcoming time point t.
"""
# update pdf normalization
pdf_norm = self.pdf_norm_method(
kernel_val=self.kernel_pdf_max,
get_weighted_distances=get_weighted_distances,
prev_pdf_norm=None
if not self.pdf_norms
else max(self.pdf_norms.values()),
acceptance_rate=acceptance_rate,
prev_temp=prev_temp,
)
self.pdf_norms[t] = pdf_norm
self.log(t)
def log(self, t):
logger.debug(f"pdf_norm={self.pdf_norms[t]:.4e} for t={t}.")
if self.log_file:
save_dict_to_json(self.pdf_norms, self.log_file)
[docs]
def get_epsilon_config(self, t: int) -> dict:
"""
Pack the pdf normalization and the kernel scale.
"""
return {
'pdf_norm': self.pdf_norms[t],
'kernel_scale': self.kernel_scale, # TODO Refactor
}
[docs]
def __call__(
self, distance_function: StochasticKernel, eps, x, x_0, t, par
):
# rename
kernel = distance_function
# temperature
temp = eps(t)
# compute probability density
density = kernel(x, x_0, t, par)
pdf_norm = self.pdf_norms[t]
# compute acceptance probability
if kernel.ret_scale == SCALE_LIN:
acc_prob = (density / pdf_norm) ** (1 / temp)
else: # kernel.ret_scale == SCALE_LOG
acc_prob = np.exp((density - pdf_norm) * (1 / temp))
# accept
threshold = np.random.uniform(low=0, high=1)
if acc_prob >= threshold:
accept = True
else:
accept = False
# weight
if acc_prob == 0.0:
weight = 0.0
elif self.apply_importance_weighting:
weight = acc_prob / min(1, acc_prob)
else:
weight = 1.0
# check pdf max ok
if pdf_norm < density:
logger.debug(
f"Encountered density={density:.4e} > c={pdf_norm:.4e}, "
f"thus weight={weight:.4e}."
)
# return unscaled density value and the acceptance flag
return AcceptorResult(density, accept, weight)