"""Various basic distances."""
import logging
from typing import Callable, List, Union
import numpy as np
from scipy import linalg as la
from ..population import Sample
from .base import Distance
logger = logging.getLogger("ABC.Distance")
[docs]
class DistanceWithMeasureList(Distance):
"""
Base class for distance functions with measure list.
This class is not functional on its own.
Parameters
----------
measures_to_use: Union[str, List[str]].
* If set to "all", all measures are used. This is the default.
* If a list is provided, the measures in the list are used.
* measures refers to the summary statistics.
"""
[docs]
def __init__(
self,
measures_to_use='all',
):
super().__init__()
# the measures (summary statistics) to use for distance calculation
self.measures_to_use = measures_to_use
[docs]
def initialize(
self,
t: int,
get_sample: Callable[[], Sample],
x_0: dict,
total_sims: int,
):
if self.measures_to_use == 'all':
self.measures_to_use = x_0.keys()
[docs]
def get_config(self):
config = super().get_config()
config["measures_to_use"] = self.measures_to_use
return config
[docs]
class ZScoreDistance(DistanceWithMeasureList):
"""
Calculate distance as sum of ZScore over the selected measures.
The measured Data is the reference for the ZScore.
Hence
.. math::
d(x, y) = \
\\sum_{i \\in \\text{measures}} \\left| \\frac{x_i-y_i}{y_i} \\right|
"""
[docs]
def __call__(
self,
x: dict,
x_0: dict,
t: int = None,
par: dict = None,
) -> float:
return sum(
abs((x[key] - x_0[key]) / x_0[key])
if x_0[key] != 0
else (0 if x[key] == 0 else np.inf)
for key in self.measures_to_use
) / len(self.measures_to_use)
[docs]
class PCADistance(DistanceWithMeasureList):
"""
Calculate distance in whitened coordinates.
A PCA whitening transformation :math:`X` is calculated from an initial
sample.
The distance is measured as p-norm distance in the transformed space.
I.e
.. math::
d(x,y) = \\| Wx - Wy \\|
Parameters
----------
measures_to_use: See DistanceWithMeasureList.
p: p-norm, defaults to Euclidean distance.
"""
[docs]
def __init__(self, measures_to_use='all', p: float = 2):
super().__init__(measures_to_use)
self.p: float = p
self.trafo: Union[np.ndarray, None] = None
def _dict_to_vect(self, x):
return np.asarray([x[key] for key in self.measures_to_use])
def _calculate_whitening_transformation_matrix(self, sum_stats):
# create data matrix, shape (n_sample, n_y)
x = np.asarray([self._dict_to_vect(x) for x in sum_stats])
# center
mean = np.mean(x, axis=0)
x -= mean
# covariance matrix, with bias correction
cov = (x.T @ x) / (x.shape[0] - 1)
# eigenvalues and eigenvectors
ew, ev = la.eigh(cov)
# whitening transformation
self.trafo = np.diag(1.0 / np.sqrt(ew)) @ ev.T
[docs]
def requires_calibration(self) -> bool:
return True
[docs]
def initialize(
self,
t: int,
get_sample: Callable[[], Sample],
x_0: dict,
total_sims: int,
):
super().initialize(
t=t,
get_sample=get_sample,
x_0=x_0,
total_sims=total_sims,
)
# execute function
all_sum_stats = get_sample().all_sum_stats
self._calculate_whitening_transformation_matrix(all_sum_stats)
[docs]
def __call__(
self,
x: dict,
x_0: dict,
t: int = None,
par: dict = None,
) -> float:
x_vec, x_0_vec = self._dict_to_vect(x), self._dict_to_vect(x_0)
distance = la.norm(
self.trafo @ (x_vec - x_0_vec).reshape(-1, 1), ord=self.p
)
return distance
[docs]
class RangeEstimatorDistance(DistanceWithMeasureList):
"""
Abstract base class for distance functions which estimate is based on a
range.
It defines the two template methods ``lower`` and ``upper``.
Hence
.. math::
d(x, y) = \
\\sum_{i \\in \\text{measures}} \\left | \\frac{x_i - y_i}{u_i - l_i}\
\\right |
where :math:`l_i` and :math:`u_i` are the lower and upper
margin for measure :math:`i`.
"""
[docs]
@staticmethod
def lower(parameter_list: List[float]):
"""
Calculate the lower margin form a list of parameter values.
Parameters
----------
parameter_list: List[float]
List of values of a parameter.
Returns
-------
lower_margin: float
The lower margin of the range calculated from these parameters
"""
[docs]
@staticmethod
def upper(parameter_list: List[float]):
"""
Calculate the upper margin form a list of parameter values.
Parameters
----------
parameter_list: List[float]
List of values of a parameter.
Returns
-------
upper_margin: float
The upper margin of the range calculated from these parameters
"""
[docs]
def __init__(self, measures_to_use='all'):
super().__init__(measures_to_use)
self.normalization = None
[docs]
def get_config(self):
config = super().get_config()
config["normalization"] = self.normalization
return config
def _calculate_normalization(self, sum_stats):
measures = {name: [] for name in self.measures_to_use}
for sample in sum_stats:
for measure in self.measures_to_use:
measures[measure].append(sample[measure])
self.normalization = {
measure: self.upper(measures[measure])
- self.lower(measures[measure])
for measure in self.measures_to_use
}
[docs]
def requires_calibration(self) -> bool:
return True
[docs]
def initialize(
self,
t: int,
get_sample: Callable[[], Sample],
x_0: dict,
total_sims: int,
):
super().initialize(
t=t,
get_sample=get_sample,
x_0=x_0,
total_sims=total_sims,
)
# execute function
all_sum_stats = get_sample().all_sum_stats
self._calculate_normalization(all_sum_stats)
[docs]
def __call__(
self,
x: dict,
x_0: dict,
t: int = None,
par: dict = None,
) -> float:
distance = sum(
abs((x[key] - x_0[key]) / self.normalization[key])
for key in self.measures_to_use
)
return distance
[docs]
class MinMaxDistance(RangeEstimatorDistance):
"""
Calculate upper and lower margins as max and min of the parameters.
This works surprisingly well for normalization in simple cases
"""
[docs]
@staticmethod
def upper(parameter_list):
return max(parameter_list)
[docs]
@staticmethod
def lower(parameter_list):
return min(parameter_list)
[docs]
class PercentileDistance(RangeEstimatorDistance):
"""
Calculate normalization 20% and 80% from percentiles as lower
and upper margins
"""
PERCENTILE = 20 #: The percentiles
[docs]
@staticmethod
def upper(parameter_list):
return np.percentile(
parameter_list, 100 - PercentileDistance.PERCENTILE
)
[docs]
@staticmethod
def lower(parameter_list):
return np.percentile(parameter_list, PercentileDistance.PERCENTILE)
[docs]
def get_config(self):
config = super().get_config()
config["PERCENTILE"] = self.PERCENTILE
return config