Source code for pyabc.random_variables.random_variables

import logging
from abc import ABC, abstractmethod
from functools import reduce
from typing import Union

from ..parameters import Parameter, ParameterStructure

rv_logger = logging.getLogger("ABC.RV")


[docs] class RVBase(ABC): """Random variable abstract base class. .. note:: Why introduce another random variable class and not just use the one's provided in ``scipy.stats``? This funny construction is done because ``scipy.stats`` distributions are not pickleable. This class is really a very thin wrapper around ``scipy.stats`` distributions to make them pickleable. It is important to be able to pickle them to execute the ABCSMC algorithm in a distributed cluster environment """
[docs] @abstractmethod def copy(self) -> "RVBase": """Copy the random variable. Returns ------- copied_rv: RVBase A copy of the random variable. """
[docs] @abstractmethod def rvs(self, *args, **kwargs) -> float: """Sample from the RV. Returns ------- sample: float A sample from the random variable. """
[docs] @abstractmethod def pmf(self, x, *args, **kwargs) -> float: """Probability mass function. Parameters ---------- x: int Probability mass at ``x``. Returns ------- mass: float The mass at ``x``. """
[docs] @abstractmethod def pdf(self, x: float, *args, **kwargs) -> float: """Probability density function. Parameters ---------- x: float Probability density at x. Returns ------- density: float Probability density at x. """
[docs] @abstractmethod def cdf(self, x: float, *args, **kwargs) -> float: """Cumulative distribution function. Parameters ---------- x: float Cumulative distribution function at x. Returns ------- density: float Cumulative distribution function at x. """
[docs] class RV(RVBase): """Concrete random variable. Parameters ---------- name: str Name of the distribution as in ``scipy.stats`` args: Arguments as in ``scipy.stats`` matching the distribution with name "name". kwargs: Keyword arguments as in ``scipy.stats`` matching the distribution with name "name". """
[docs] @classmethod def from_dictionary(cls, dictionary: dict) -> "RV": """Construct random variable from dictionary. Parameters ---------- dictionary: dict A dictionary with the keys * "name" (mandatory) * "args" (optional) * "kwargs" (optional) as in scipy.stats. .. note:: Either the "args" or the "kwargs" key has to be present. """ return cls( dictionary['type'], *dictionary.get('args', []), **dictionary.get('kwargs', {}), )
[docs] def __init__(self, name: str, *args, **kwargs): self.name = name self.args = args self.kwargs = kwargs self.distribution = None "the scipy.stats. ... distribution object" self.__setstate__(self.__getstate__())
def __getattr__(self, item): return getattr(self.distribution, item) def __getstate__(self): return self.name, self.args, self.kwargs def __setstate__(self, state): self.name = state[0] self.args = state[1] self.kwargs = state[2] import scipy.stats as st distribution = getattr(st, self.name) self.distribution = distribution(*self.args, **self.kwargs)
[docs] def copy(self): return self.__class__(self.name, *self.args, **self.kwargs)
[docs] def rvs(self, *args, **kwargs): return self.distribution.rvs(*args, **kwargs)
[docs] def pmf(self, x, *args, **kwargs): return self.distribution.pmf(x, *args, **kwargs)
[docs] def pdf(self, x, *args, **kwargs): return self.distribution.pdf(x, *args, **kwargs)
[docs] def cdf(self, x, *args, **kwargs): return self.distribution.cdf(x, *args, **kwargs)
def __repr__(self): return "<RV name={name}, args={args}, kwargs={kwargs}>".format( name=self.name, args=self.args, kwargs=self.kwargs )
[docs] class RVDecorator(RVBase): """Random variable decorator base class. Implement a decorator pattern. Further decorators should derive from this class. It stores the decorated random variable in ``self.component`` Overwrite the method ``decorator_repr`` to represent the decorator type. The decorated variable will then be automatically included in the call to ``__repr__``. Parameters ---------- component: RVBase The random variable to be decorated. """
[docs] def __init__(self, component: RVBase): self.component = component #: The decorated random variable
[docs] def rvs(self, *args, **kwargs): return self.component.rvs(*args, **kwargs)
[docs] def pmf(self, x, *args, **kwargs): return self.component.pmf(x, *args, **kwargs)
[docs] def pdf(self, x, *args, **kwargs): return self.component.pdf(x, *args, **kwargs)
[docs] def cdf(self, x, *args, **kwargs): return self.component.cdf(x, *args, **kwargs)
[docs] def copy(self): return self.__class__(self.component.copy())
[docs] def decorator_repr(self) -> str: # pylint: disable=R0201 """Represent the decorator itself. Template method. The ``__repr__`` method used ``decorator_repr`` and the ``__repr__`` of the decorated RV to build a combined representation. Returns ------- decorator_repr: str A string representing the decorator only. """ return "Decorator"
def __repr__(self): return ( "[{decorator_repr}]".format(decorator_repr=self.decorator_repr()) + self.component.__repr__() )
[docs] class LowerBoundDecorator(RVDecorator): """ Impose a strict lower bound on a random variable. Condition RV X to X > lower bound. In particular P(X = lower_bound) = 0. .. note:: Sampling is done via rejection. Up to 10000 samples are taken from the decorated RV. The first sample within the permitted range is then taken. Otherwise None is returned. Parameters ---------- component: RV The decorated random variable. lower_bound: float The lower bound. """ MAX_TRIES = 10000
[docs] def __init__(self, component: RV, lower_bound: float): if component.cdf(lower_bound) == 1: raise Exception( "LowerBoundDecorator: Conditioning on a set of measure zero." ) self.lower_bound = lower_bound super(LowerBoundDecorator, self).__init__(component)
[docs] def copy(self): return self.__class__(self.component.copy(), self.lower_bound)
[docs] def decorator_repr(self): return "Lower: X > {lower:2f}".format(lower=self.lower_bound)
[docs] def rvs(self, *args, **kwargs): for _ in range(LowerBoundDecorator.MAX_TRIES): sample = self.component.rvs() # not sure whether > is the exact opposite. but <= is consistent if not (sample <= self.lower_bound): return sample # with the other functions return None
[docs] def pdf(self, x, *args, **kwargs): if x <= self.lower_bound: return 0.0 return self.component.pdf(x) / ( 1 - self.component.cdf(self.lower_bound) )
[docs] def pmf(self, x, *args, **kwargs): if x <= self.lower_bound: return 0.0 return self.component.pmf(x) / ( 1 - self.component.cdf(self.lower_bound) )
[docs] def cdf(self, x, *args, **kwargs): if x <= self.lower_bound: return 0.0 lower_mass = self.component.cdf(self.lower_bound) return (self.component.cdf(x) - lower_mass) / (1 - lower_mass)
[docs] class DistributionBase(ABC): """Distribution of parameters for a model, abstract base class. A distribution is a collection of RVs and/or distributions. This should be used to define a prior. """
[docs] @abstractmethod def rvs(self, *args, **kwargs) -> Parameter: """Sample from joint distribution. Returns ------- parameter: Parameter A parameter which was sampled. """
[docs] @abstractmethod def pdf(self, x: Union[Parameter, dict]): """Get probability density at point `x`. Parameters ---------- x : Union[Parameter, dict] Evaluate at the given Parameter ``x``. """
[docs] class Distribution(DistributionBase, ParameterStructure): """Distribution of parameters for a model assuming independence. Essentially something like a dictionary of random variables or distributions. The variables from which the distribution is initialized are independent. """ def __repr__(self): return ( "<Distribution\n " + ",\n ".join(f"{id}={rv}" for id, rv in self.items()) + ">" )
[docs] @classmethod def from_dictionary_of_dictionaries( cls, dict_of_dicts: dict ) -> "Distribution": """Create distribution from dictionary of dictionaries. Parameters ---------- dict_of_dicts: dict The keys of the dict indicate the parameters names. The values are itself dictionaries representing scipy.stats distribution. I.e. the have the key "name" and at least one of the keys "args" or "kwargs". Returns ------- distribution: Distribution Created distribution. """ rv_dictionary = {} for key, value in dict_of_dicts.items(): rv_dictionary[key] = RV.from_dictionary(value) return cls(rv_dictionary)
[docs] def copy(self) -> "Distribution": """Copy the distribution. Returns ------- copied_distribution: Distribution A copy of the distribution. """ return self.__class__( **{key: value.copy() for key, value in self.items()} )
[docs] def update_random_variables(self, **random_variables): """Update random variables within the distribution. Parameters ---------- **random_variables: keywords are the parameters' names, the values are random variable. """ self.update(random_variables)
[docs] def get_parameter_names(self) -> list: """Get a sorted list of parameter names. Returns ------- sorted_names: list Sorted list of parameter names. """ return sorted(self.keys())
[docs] def rvs(self, *args, **kwargs) -> Parameter: """Sample from joint distribution. Returns ------- parameter: Parameter A parameter which was sampled. """ return Parameter( **{key: val.rvs(*args, **kwargs) for key, val in self.items()} )
[docs] def pdf(self, x: Union[Parameter, dict]): """Get probability density at point `x` (product of marginals). Combination of probability density functions (for continuous variables) and probability mass function (for discrete variables). Parameters ---------- x : Union[Parameter, dict] Evaluate at the given Parameter ``x``. """ # check if the parameters match if sorted(x.keys()) != sorted(self.keys()): raise Exception( "Random variable parameter mismatch. Expected: " + str(sorted(self.keys())) + " got " + str(sorted(x.keys())) ) if len(self) > 0: res = [] for key, val in x.items(): try: # works for continuous variables res.append(self[key].pdf(val)) except AttributeError: # discrete variables do not have a pdf but a pmf res.append(self[key].pmf(val)) return reduce(lambda s, t: s * t, res) else: return 1