Source code for pyabc.population.population

import logging
from typing import Callable, Dict, List, Tuple

import numpy as np
import pandas as pd

from ..parameters import Parameter

logger = logging.getLogger("ABC.Population")


[docs] class Particle: """ An (accepted or rejected) particle, containing the information that will also be stored in the database. Stores all summary statistics that were generated during the creation of this particle, and a flag indicating whether this particle was accepted or not. Parameters ---------- m: The model index. parameter: The model specific parameter. weight: The weight of the particle. 0 <= weight <= 1. sum_stat: Model simulation. distance: Distance of simulated and measured data. accepted: True if particle was accepted, False if not. proposal_id: An identifier for the proposal the particle was generated from. This allows grouping particles accordingly. preliminary: Whether this particle is only preliminarily accepted. Must be False eventually for all particles. .. note:: There are two different ways of weighting particles: First, the weights can be calculated as emerges from the importance sampling. Second, the weights of particles belonging to one model can be summed to, after normalization, find model probabilities. Then, the weights of all particles belonging to one model can be summed to one. Weighting is transferred to the second way in _normalize_weights() in order to also have access to model probabilities. This mode is also stored in the database. If one needs access to the first weighting scheme later on again, one has to perform backwards transformation, multiplying the weights with the model probabilities. """
[docs] def __init__( self, m: int, parameter: Parameter, weight: float, sum_stat: dict, distance: float, accepted: bool = True, proposal_id: int = 0, preliminary: bool = False, ): self.m = m self.parameter = parameter self.weight = weight self.sum_stat = sum_stat self.distance = distance self.accepted = accepted self.proposal_id = proposal_id self.preliminary = preliminary
[docs] class Population: """ A population contains a list of particles and offers standardized access to them. Upon initialization, the particle weights are normalized and model probabilities computed as described in _normalize_weights. Parameters ---------- particles: Particles that constitute the accepted population. """
[docs] def __init__(self, particles: List[Particle]): self.particles = particles self._model_probabilities = None # checks if any(not p.accepted for p in particles): raise AssertionError( "A population should only consist of accepted particles" ) if not np.isclose(total_weight := sum(p.weight for p in particles), 1): raise AssertionError( f"The population total weight {total_weight} is not normalized." ) self.calculate_model_probabilities()
def __len__(self): return len(self.particles)
[docs] def calculate_model_probabilities(self): """Compute the model probabilities and ensure normalization. Computes model weights as relative sums of particles belonging to a model. Also ensures that the total weight is 1, raising if this was not the case before already since usually normalization should happen already for the entire sample including rejected particles. """ store = self.get_particles_by_model() # calculate weight per model model_total_weights = { m: sum(particle.weight for particle in plist) for m, plist in store.items() } # calculate total weight population_total_weight = sum(model_total_weights.values()) # 1 # model probabilities are weights per model divided by total weight model_probabilities = { m: w / population_total_weight for m, w in model_total_weights.items() } # cache model probabilities self._model_probabilities = model_probabilities
[docs] def update_distances( self, distance_to_ground_truth: Callable[[dict, Parameter], float], ) -> None: """ Update the distances of all summary statistics of all particles according to the passed distance function (which is typically different from the distance function with which the original distances were computed). :param distance_to_ground_truth: Distance function to the observed summary statistics. """ for particle in self.particles: particle.distance = distance_to_ground_truth( particle.sum_stat, particle.parameter )
[docs] def get_model_probabilities(self) -> pd.DataFrame: """Get probabilities of the individual models. Returns ------- model_probabilities: List The model probabilities. """ # _model_probabilities are cached at the beginning vars = [(key, val) for key, val in self._model_probabilities.items()] ms = [var[0] for var in vars] ps = [var[1] for var in vars] return pd.DataFrame({'m': ms, 'p': ps}).set_index('m')
def get_alive_models(self) -> List: return self._model_probabilities.keys() def nr_of_models_alive(self) -> int: return len(self.get_alive_models()) def get_distribution(self, m: int) -> Tuple[pd.DataFrame, np.ndarray]: particles = self.get_particles_by_model()[m] parameters = pd.DataFrame([p.parameter for p in particles]) weights = np.array([p.weight for p in particles]) weights /= weights.sum() return parameters, weights
[docs] def get_weighted_distances(self) -> pd.DataFrame: """ Create DataFrame of (distance, weight)'s. The particle weights are multiplied by the model probabilities. The weights thus sum to 1. Returns ------- weighted_distances: pd.DataFrame: A pd.DataFrame containing in column 'distance' the distances and in column 'w' the scaled weights. """ rows = [] for particle in self.particles: rows.append({'distance': particle.distance, 'w': particle.weight}) weighted_distances = pd.DataFrame(rows) return weighted_distances
[docs] def get_weighted_sum_stats(self) -> tuple: """Get weights and summary statistics. Returns ------- weights, sum_stats: 2-Tuple of lists """ weights = [] sum_stats = [] for particle in self.particles: weights.append(particle.weight) sum_stats.append(particle.sum_stat) return weights, sum_stats
[docs] def get_accepted_sum_stats(self) -> List[dict]: """Return a list of all accepted summary statistics.""" return [particle.sum_stat for particle in self.particles]
[docs] def get_for_keys(self, keys): """ Get dataframe of population values. Possible entries of keys: weight, distance, sum_stat, parameter. Returns ------- Dictionary where the keys are associated to same-ordered lists of the corresponding values. """ # check input allowed_keys = ['weight', 'distance', 'parameter', 'sum_stat'] for key in keys: if key not in allowed_keys: raise ValueError(f"Key {key} not in {allowed_keys}.") ret = {key: [] for key in keys} for particle in self.particles: if 'weight' in keys: ret['weight'].append(particle.weight) if 'parameter' in keys: ret['parameter'].append(particle.parameter) if 'distance' in keys: ret['distance'].append(particle.distance) if 'sum_stat' in keys: ret['sum_stat'].append(particle.sum_stat) return ret
[docs] def get_particles_by_model(self) -> Dict[int, List[Particle]]: """Get particles by model. Returns ------- particles_by_model: dict A dictionary with the models as keys and a list of particles for each model as values. """ particlees_by_model = {} for particle in self.particles: if particle is not None: # append particle for key particle.m, create empty list # if key not yet existent particlees_by_model.setdefault(particle.m, []).append(particle) else: logger.warning("Empty particle.") return particlees_by_model
[docs] class Sample: """Contains all particles generated during the sampling process. Contains all accepted particles and can also contain rejected particles if requested. Parameters ---------- record_rejected: Whether to record rejected particles as well, along with accepted ones. is_look_ahead: Whether this sample consists of particles generated in look-ahead mode. max_nr_rejected: Maximum number or rejected particles to store. ok: Whether the sampling process succeeded (usually in generating the requested number of particles). """
[docs] def __init__( self, record_rejected: bool = False, max_nr_rejected: int = np.inf, is_look_ahead: bool = False, ok: bool = True, ): self.accepted_particles: List[Particle] = [] self.rejected_particles: List[Particle] = [] self.record_rejected: bool = record_rejected self.max_nr_rejected: int = max_nr_rejected self.is_look_ahead: bool = is_look_ahead self.ok: bool = ok
[docs] @staticmethod def from_population(population: Population) -> 'Sample': """Utility function to create a dummy sample from a population. Obviously, the sample will then only consist of accepted particles. Parameters ---------- population: Population to create a sample from. Returns ------- sample: The generated sample containing all population particles. """ sample = Sample() for particle in population.particles: if not particle.accepted: raise AssertionError( "A population should only consist of accepted particles" ) sample.append(particle) return sample
@property def all_particles(self): return self.accepted_particles + self.rejected_particles @property def all_sum_stats(self): """Get all summary statistics. Returns ------- all_sum_stats: List Concatenation of all the all_sum_stats lists of all particles added and accepted to this sample via append(). """ return [particle.sum_stat for particle in self.all_particles]
[docs] def append(self, particle: Particle): """Add new particle to the sample. Parameters ---------- particle: Particle Sampled particle containing all information needed later. """ # add to population if accepted, else maybe add to rejected if particle.accepted: self.accepted_particles.append(particle) elif ( self.record_rejected and len(self.rejected_particles) < self.max_nr_rejected ): self.rejected_particles.append(particle)
def __add__(self, other: "Sample"): sample = Sample( record_rejected=self.record_rejected, max_nr_rejected=self.max_nr_rejected, ) sample.accepted_particles = ( self.accepted_particles + other.accepted_particles ) sample.rejected_particles = ( self.rejected_particles + other.rejected_particles ) # restrict to max nr rejected if len(sample.rejected_particles) > sample.max_nr_rejected: sample.rejected_particles = sample.rejected_particles[ : sample.max_nr_rejected ] # the other attributes may keep their defaults return sample @property def n_accepted(self) -> int: """ Returns ------- n_accepted: int Number of accepted particles. """ return len(self.accepted_particles)
[docs] def get_accepted_population(self) -> Population: """ Returns ------- population: Population A population of only the accepted particles. """ return Population(self.accepted_particles.copy())
[docs] def normalize_weights(self): """Normalize weights to sum(accepted weights) = 1. This is done at the end of a sampling run. Normalizing rejected weights by the same factor ensures that weights stay comparable. """ total_weight_accepted = sum(p.weight for p in self.accepted_particles) if np.isclose(total_weight_accepted, 0.0): logger.warning( f"The total population weight {total_weight_accepted} is " "close to zero, which can be numerically problematic" ) if total_weight_accepted == 0.0: raise AssertionError("The total population weight is zero") for p in self.all_particles: p.weight /= total_weight_accepted
[docs] class SampleFactory: """ The SampleFactory class serves as a factory class to create empty samples based on the parameters stored in the SampleFactory object. This is the class that components (like the distance function and epsilon) should refer to when they want to influence the sampling process. Parameters ---------- record_rejected: bool Corresponds to Sample.record_rejected. """
[docs] def __init__( self, record_rejected: bool = False, max_nr_rejected: int = np.inf ): self._record_rejected = record_rejected self._max_nr_rejected = max_nr_rejected
[docs] def record_rejected(self, record: bool = True): """Switch whether to record rejected particles.""" logger.info(f"Recording also rejected particles: {record}") self._record_rejected = record
[docs] def __call__(self, is_look_ahead: bool = False): """Create a new empty sample.""" return Sample( record_rejected=self._record_rejected, max_nr_rejected=self._max_nr_rejected, is_look_ahead=is_look_ahead, )