Source code for pyabc.visualization.data

"""Data and summary statistics plots"""

import logging
from typing import Callable, List, Union

import matplotlib.axes
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..storage import History

logger = logging.getLogger("ABC.Visualization")


[docs] def plot_data_callback( history: History, f_plot: Callable = None, f_plot_aggregated: Callable = None, t: int = None, n_sample: int = None, ax: matplotlib.axes.Axes = None, **kwargs, ): """ Plot the summary statistics from the history using callback functions to plot single statistics or aggregated values. Parameters ---------- history: The history object to use. f_plot: Function to plot a single summary statistic. Takes the parameters ``(sum_stat, weight, ax, **kwargs)``. f_plot_aggregated: Function to plot aggregated values on summary statistics. Takes the parameters ``(sum_stats, weights, ax, **kwargs)``. t: Time point to extract data from the history for. n_sample: Number of samples to plot. Defaults to all. ax: Axis object for the plot. This object is not touched directly and can thus be also e.g. a list of axis objects. Additional arguments are passed on to the plotting functions. Returns ------- ax: Axis of the generated plot. """ weights, sum_stats = history.get_weighted_sum_stats(t=t) return plot_data_callback_lowlevel( sum_stats=sum_stats, weights=weights, f_plot=f_plot, f_plot_aggregated=f_plot_aggregated, n_sample=n_sample, ax=ax, **kwargs, )
def plot_data_callback_lowlevel( sum_stats: List, weights: List, f_plot: Callable, f_plot_aggregated: Callable = None, n_sample: int = None, ax=None, **kwargs, ): """ Lowlevel interface for plot_data_callback (see there for the remaining parameters). Parameters ---------- sum_stats: List List of summary statistics. weights: List List of corresponding (usually normalized) weights. """ if ax is None: _, ax = plt.subplots() if f_plot is not None: if n_sample is None: n_sample = len(weights) _sum_stats, _weights = sum_stats[:n_sample], weights[:n_sample] for sum_stat, weight in zip(_sum_stats, _weights): f_plot(sum_stat, weight, ax, **kwargs) if f_plot_aggregated is not None: f_plot_aggregated(sum_stats, weights, ax, **kwargs) return ax
[docs] def plot_data_default( obs_data: dict, sim_data: dict, keys: Union[List[str], str] = None ): """ Plot summary statistic data. Parameters ---------- obs_data: dict A dictionary for the summary statistic of the observed data, where keys represent the summary statistic name and values represent the data itself. The values can be represented as pandas dataframe, 1d numpy array, or 2d numpy array. sim_data: dict A dictionary for the summary statistic of the simulated data, where keys represent the summary statistic name and values represent the data itself. The values can be represented as pandas dataframe, 1d numpy array, or 2d numpy array. key: Union[List[str], str], optional Specific summary statistic keys to be used. If None, then all entries are used. Returns ------- arr_ax: Axes of the generated plot. """ # check if user specified a specific key to be printed if keys is None: keys = list(obs_data.keys()) if not isinstance(keys, list): keys = [keys] obs_data = {key: obs_data[key] for key in keys} sim_data = {key: sim_data[key] for key in keys} # get number of rows and columns ndata = len(obs_data) ncols = int(np.ceil(np.sqrt(ndata))) nrows = ncols while ncols * (nrows - 1) >= ndata: nrows -= 1 # initialize figure fig, arr_ax = plt.subplots(nrows, ncols) # iterate over keys for plot_index, ((obs_key, obs), (_, sim)) in enumerate( zip(obs_data.items(), sim_data.items()) ): if nrows == ncols == 1: ax = arr_ax else: ax = arr_ax.flatten()[plot_index] # data frame if isinstance(obs, pd.DataFrame): if len(obs.columns) == 1: # 1d: plot ax.plot(sim.values.flatten(), '-x', label="Simulation") ax.plot(obs.values.flatten(), '-x', label="Data") ax.set_xlabel("Index") ax.set_ylabel(obs.columns[0]) else: # nd: scatter for key in obs.columns: ax.scatter(obs[key].values, sim[key].values, label=key) ax.set_xlabel("Data") ax.set_ylabel("Simulation") elif isinstance(obs, np.ndarray) and obs.ndim == 1: # 1d: plot obs_value = obs sim_value = sim ax.plot(sim_value, '-x', color="C0", label='Simulation') ax.plot(obs_value, '-x', color="C1", label='Data') ax.set_xlabel("Index") ax.set_ylabel(str(obs_key)) elif isinstance(obs, np.ndarray): # nd: scatter for j, (obs_val, sim_val) in enumerate(zip(obs, sim)): ax.scatter(obs_val, sim_val, label=f"Coordinate {j}") ax.set_xlabel("Data") ax.set_ylabel("Simulation") else: logger.info( f"Data type {type(obs)} for key {obs_key} is " f"not supported." ) # remove not needed axis ax.axis('off') # finalize axes ax.set_title(str(obs_key)) ax.legend() # remove not needed axes for plot_index in range(ndata, ncols * nrows): ax = arr_ax.flatten()[plot_index] ax.axis('off') # finalize plot fig.tight_layout() return arr_ax