"""A discrete jump transition function."""
from typing import Union
import numpy as np
import pandas as pd
from ..parameters import Parameter
from ..random_choice import fast_random_choice
from ..random_variables import RV
from .base import DiscreteTransition
[docs]
class PerturbationKernel:
"""Parameter perturbation kernel for a discrete set of parameters.
Parameters
----------
domain:
All possible parameter values.
p_stay:
The probability to stay at a given parameter value.
"""
[docs]
def __init__(self, domain: np.ndarray, p_stay: float = 0.7):
if len(np.unique(domain)) != len(domain):
raise ValueError("The domain contains duplicates.")
self.domain = domain
if len(domain) == 1:
p_stay = 1.0
if not 0 <= p_stay <= 1:
raise ValueError("p_stay must be in [0, 1].")
self.p_stay = p_stay
self.p_move = (1 - p_stay) / (len(self.domain) - 1)
# cache a random variable (later the start index and 0 must be swapped)
indices = np.arange(len(domain))
probabilities = [p_stay, *[self.p_move] * (len(self.domain) - 1)]
self.rv = RV('rv_discrete', values=(indices, probabilities))
[docs]
def rvs(self, a: float) -> float:
"""Sample a kernel jump from parameter `a` to another parameter."""
if a not in self.domain:
raise ValueError("The parameter value is not in the domain.")
if len(self.domain) == 1:
return a
# sample from the cached random variable
ix = self.rv.rvs()
# get index of the starting parameter
ix_a = np.argmax(self.domain == a)
# in the cached rv, ix_a and 0 were swapped -> swap them again
if ix == 0:
ix = ix_a
elif ix == ix_a:
ix = 0
return self.domain[ix]
[docs]
def pdf(self, b: float, a: float) -> float:
"""Probability mass function for a jump to target `b` from source `a`."""
if a not in self.domain or b not in self.domain:
raise ValueError(
"At least one parameter value is not in the domain."
)
return self.p_stay if b == a else self.p_move
[docs]
class DiscreteJumpTransition(DiscreteTransition):
"""
Transition with positive random jump probability for discrete parameters.
Adapts base draw probabilities to the last generation's histogram and then
jumps to an arbitrary other parameter with a positive jump probability to
ensure that the prior is absolutely continuous w.r.t. the proposal.
Parameters
----------
domain, p_stay:
See the PerturbationKernel.
.. note::
This transition can only deal with a single parameter. Use an
AggregatedTransition to combine multiple parameters.
"""
[docs]
def __init__(self, domain: np.ndarray, p_stay: float = 0.7):
self.values = None
self.weights = None
self.perturbation_kernel = PerturbationKernel(
domain=domain, p_stay=p_stay
)
[docs]
def fit(self, X: pd.DataFrame, w: np.ndarray) -> None:
"""Fit starting weights to the distribution of samples."""
# this is only meant to be used with a single parameter
if len(X.columns) != 1:
raise ValueError(
"This transition can only handle a single parameter."
)
# compute a single weight per unique parameter value
x = np.array(X).flatten()
self.values = []
self.weights = []
for value in np.unique(x):
self.values.append(value)
self.weights.append(sum(w[x == value]))
self.weights = np.array(self.weights)
self.weights /= self.weights.sum()
[docs]
def rvs_single(self) -> Parameter:
"""Generate a single random variable."""
# sample a starting index
index = fast_random_choice(self.weights)
# get value at that index
value = self.values[index]
# maybe jump to another value
value = self.perturbation_kernel.rvs(value)
return Parameter({self.X.columns[0]: value})
[docs]
def pdf(
self, x: Union[Parameter, pd.Series, pd.DataFrame]
) -> Union[float, np.ndarray]:
"""Compute the probability mass function at `x`."""
# extract values
key = self.X.columns[0]
if isinstance(x, (Parameter, pd.Series)):
x = np.array([[x[key]]])
else:
x = x.to_numpy()
# compute densities
pds = np.array(
[
sum(
w * self.perturbation_kernel.pdf(par, start)
for w, start in zip(self.weights, self.values)
)
for par in x
]
)
return pds if pds.size != 1 else float(pds[0])