Source code for pyabc.visualization.sample

"""Sample number plots"""

from typing import TYPE_CHECKING, List, Tuple, Union

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MaxNLocator

if TYPE_CHECKING:
    import plotly.graph_objs as go

from ..storage import History
from ..weighted_statistics import effective_sample_size
from .util import get_labels, to_lists


def _prepare_plot_sample_numbers(
    histories: Union[List[History], History],
    labels: Union[List[str], str],
):
    # preprocess input
    histories = to_lists(histories)
    labels = get_labels(labels, len(histories))

    # extract sample numbers
    samples = []
    for history in histories:
        # note: the first entry corresponds to the calibration and should
        # be included here to be fair against methods not requiring
        # calibration
        samples.append(np.array(history.get_all_populations()['samples']))

    # create matrix
    n_run = len(histories)
    n_pop = max(len(sample) for sample in samples)
    matrix = np.zeros((n_pop, n_run))
    for i_sample, sample in enumerate(samples):
        matrix[: len(sample), i_sample] = sample

    return labels, matrix, n_run, n_pop


[docs] def plot_sample_numbers( histories: Union[List[History], History], labels: Union[List[str], str] = None, rotation: int = 0, title: str = "Required samples", size: Tuple[float, float] = None, ax: mpl.axes.Axes = None, ) -> mpl.axes.Axes: """ Stacked bar plot of required numbers of samples over all iterations. Parameters ---------- histories: The histories to plot from. History ids must be set correctly. labels: Labels corresponding to the histories. If None are provided, indices are used as labels. rotation: Rotation to apply to the plot's x tick labels. For longer labels, a tilting of 45 or even 90 can be preferable. title: Title for the plot. size: The size of the plot in inches. ax: The matplotlib axes to plot on. If None, a new figure is created. Returns ------- ax: A reference to the axis object created. """ # prepare data labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers( histories, labels ) # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # plot bars for i_pop in reversed(range(n_pop)): ax.bar( x=np.arange(n_run), height=matrix[i_pop, :], bottom=np.sum(matrix[:i_pop, :], axis=0), label=f"Generation {i_pop-1}", ) # add labels ax.set_xticks(np.arange(n_run)) ax.set_xticklabels(labels, rotation=rotation) ax.set_title(title) ax.set_ylabel("Samples") ax.set_xlabel("Run") ax.legend() # set size if size is not None: fig.set_size_inches(size) fig.tight_layout() return ax
[docs] def plot_sample_numbers_plotly( histories: Union[List[History], History], labels: Union[List[str], str] = None, rotation: int = 0, title: str = "Required samples", size: Tuple[float, float] = None, fig: "go.Figure" = None, ) -> "go.Figure": """Plot sample numbers using plotly.""" import plotly.graph_objects as go # prepare data labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers( histories, labels ) # none or empty values are not supported by plotly for ix in range(n_run): if labels[ix] is None: labels[ix] = " " # create figure if fig is None: fig = go.Figure() # plot bars for i_pop in reversed(range(n_pop)): fig.add_trace( go.Bar( x=np.arange(n_run), y=matrix[i_pop, :], name=f"Generation {i_pop-1}", offsetgroup=0, base=np.sum(matrix[:i_pop, :], axis=0), ) ) # add labels fig.update_layout( xaxis=go.layout.XAxis( tickmode="array", tickvals=list(range(n_run)), ticktext=labels, tickangle=rotation, title="Run", ), yaxis=go.layout.YAxis(title="Samples"), title=title, ) if size is not None: fig.update_layout(width=size[0], height=size[1]) return fig
def _prepare_plot_total_sample_numbers( histories: Union[List[History], History], labels: Union[List[str], str], yscale: str, ): # preprocess input histories = to_lists(histories) labels = get_labels(labels, len(histories)) n_run = len(histories) # extract sample numbers samples = [] for history in histories: # note: the first entry corresponds to the calibration and should # be included here to be fair against methods not requiring # calibration samples.append(np.sum(history.get_all_populations()['samples'])) samples = np.array(samples) # apply scale ylabel = "Total samples" if yscale == 'log': samples = np.log(samples) ylabel = "log(" + ylabel + ")" elif yscale == 'log10': samples = np.log10(samples) ylabel = "log10(" + ylabel + ")" return samples, labels, ylabel, n_run
[docs] def plot_total_sample_numbers( histories: Union[List, History], labels: Union[List, str] = None, rotation: int = 0, title: str = "Total required samples", yscale: str = 'lin', size: tuple = None, ax: mpl.axes.Axes = None, ) -> mpl.axes.Axes: """ Bar plot of total required sample number over all iterations, i.e. a single-colored bar per history, in contrast to `plot_sample_numbers`, which visually distinguishes iterations. Parameters ---------- histories: The histories to plot from. History ids must be set correctly. labels: Labels corresponding to the histories. If None are provided, indices are used as labels. rotation: Rotation to apply to the plot's x tick labels. For longer labels, a tilting of 45 or even 90 can be preferable. title: Title for the plot. yscale: The scale on which to plot the counts. Can be one of 'lin', 'log' (basis e) or 'log10' size: The size of the plot in inches. ax: The matplotlib axes to plot on. If None, a new figure is created. Returns ------- ax: A reference to the axis object created. """ # prepare data samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers( histories, labels, yscale ) # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # plot bars ax.bar(x=np.arange(n_run), height=samples) # add labels ax.set_xticks(np.arange(n_run)) ax.set_xticklabels(labels, rotation=rotation) ax.set_title(title) ax.set_ylabel(ylabel) ax.set_xlabel("Run") # set size if size is not None: fig.set_size_inches(size) fig.tight_layout() return ax
[docs] def plot_total_sample_numbers_plotly( histories: Union[List, History], labels: Union[List, str] = None, rotation: int = 0, title: str = "Total required samples", yscale: str = 'lin', size: tuple = None, fig: "go.Figure" = None, ) -> "go.Figure": """Plot total sample numbers using plotly.""" import plotly.graph_objects as go # prepare data samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers( histories, labels, yscale ) # create figure if fig is None: fig = go.Figure() # plot bars fig.add_trace( go.Bar( x=np.arange(n_run), y=samples, name="Total samples", offsetgroup=0, base=0, ) ) # add labels fig.update_layout( xaxis=go.layout.XAxis( tickmode="array", tickvals=list(range(n_run)), ticktext=labels, tickangle=rotation, ), yaxis=go.layout.YAxis(title=ylabel), title=title, ) if size is not None: fig.update_layout(width=size[0], height=size[1]) return fig
def _prepare_plot_sample_numbers_trajectory( histories: Union[List, History], labels: Union[List, str], yscale: str, ): """Prepare data for plotting sample number trajectories.""" # preprocess input histories = to_lists(histories) labels = get_labels(labels, len(histories)) # extract sample numbers times = [] samples = [] for history in histories: # note: the first entry corresponds to the calibration and should # be included here to be fair against methods not requiring # calibration h_info = history.get_all_populations() times.append(np.array(h_info['t'])) samples.append(np.array(h_info['samples'])) # apply scale ylabel = "Samples" if yscale == 'log': samples = [np.log(sample) for sample in samples] ylabel = "log(" + ylabel + ")" elif yscale == 'log10': samples = [np.log10(sample) for sample in samples] ylabel = "log10(" + ylabel + ")" return samples, times, labels, ylabel
[docs] def plot_sample_numbers_trajectory( histories: Union[List, History], labels: Union[List, str] = None, title: str = "Required samples", yscale: str = 'lin', size: tuple = None, ax: mpl.axes.Axes = None, ) -> mpl.axes.Axes: """ Plot of required sample number over all iterations, i.e. one trajectory per history. Parameters ---------- histories: The histories to plot from. History ids must be set correctly. labels: Labels corresponding to the histories. If None are provided, indices are used as labels. title: Title for the plot. yscale: The scale on which to plot the counts. Can be one of 'lin', 'log' (basis e) or 'log10' size: The size of the plot in inches. ax: The matplotlib axes to plot on. If None, a new figure is created. Returns ------- ax: Axis of the generated plot. """ # prepare data samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory( histories, labels, yscale ) # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # plot for t, sample, label in zip(times, samples, labels): ax.plot(t, sample, 'x-', label=label) # add labels if any(lab is not None for lab in labels): ax.legend() ax.set_title(title) ax.set_ylabel(ylabel) ax.set_xlabel("Population index $t$") # set size if size is not None: fig.set_size_inches(size) fig.tight_layout() return ax
[docs] def plot_sample_numbers_trajectory_plotly( histories: Union[List, History], labels: Union[List, str] = None, title: str = "Required samples", yscale: str = 'lin', size: tuple = None, fig: "go.Figure" = None, ) -> "go.Figure": """Plot sample number trajectories using plotly.""" import plotly.graph_objects as go # prepare data samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory( histories, labels, yscale ) # create figure if fig is None: fig = go.Figure() # plot for t, sample, label in zip(times, samples, labels): fig.add_trace( go.Scatter( x=t, y=sample, mode='lines+markers', name=label, ) ) # add labels fig.update_layout( title=title, xaxis=go.layout.XAxis(title="Population index $t$"), yaxis=go.layout.YAxis(title=ylabel), ) if size is not None: fig.update_layout(width=size[0], height=size[1]) return fig
def _prepare_plot_acceptance_rates_trajectory( histories: Union[List, History], labels: Union[List, str], yscale: str, colors: List[str], normalize_by_ess: bool, ): # preprocess input histories = to_lists(histories) labels = get_labels(labels, len(histories)) if colors is None: colors = [None] * len(histories) # extract sample numbers times = [] samples = [] pop_sizes = [] for history in histories: # note: the first entry of time -1 is trivial and is thus ignored here h_info = history.get_all_populations() times.append(np.array(h_info['t'])[1:]) if normalize_by_ess: ess = np.zeros(len(h_info['t']) - 1) for t in np.array(h_info['t'])[1:]: w = history.get_weighted_distances(t=t)['w'] ess[t - 1] = effective_sample_size(w) pop_sizes.append(ess) else: pop_sizes.append( np.array(history.get_nr_particles_per_population().values[1:]) ) samples.append(np.array(h_info['samples'])[1:]) # compute acceptance rates rates = [] for sample, pop_size in zip(samples, pop_sizes): rates.append(pop_size / sample) # apply scale ylabel = "Acceptance rate" if yscale == 'log': rates = [np.log(rate) for rate in rates] ylabel = "log(" + ylabel + ")" elif yscale == 'log10': rates = [np.log10(rate) for rate in rates] ylabel = "log10(" + ylabel + ")" return rates, times, labels, ylabel, colors
[docs] def plot_acceptance_rates_trajectory( histories: Union[List, History], labels: Union[List, str] = None, title: str = "Acceptance rates", yscale: str = 'lin', size: tuple = None, colors: List[str] = None, normalize_by_ess: bool = False, ax: mpl.axes.Axes = None, ) -> mpl.axes.Axes: """ Plot of acceptance rates over all iterations, i.e. one trajectory per history. Parameters ---------- histories: The histories to plot from. History ids must be set correctly. labels: Labels corresponding to the histories. If None are provided, indices are used as labels. title: Title for the plot. yscale: The scale on which to plot the counts. Can be one of 'lin', 'log' (basis e) or 'log10' size: The size of the plot in inches. colors: Colors to use for the trajectories. If None, then the matplotlib default values are used. normalize_by_ess: bool, optional (default = False) Indicator to use effective sample size for the acceptance rate in place of the population size. ax: Axis of the plot. If None, a new axis object is generated. Returns ------- ax: Axis of the generated plot. """ # prepare data ( rates, times, labels, ylabel, colors, ) = _prepare_plot_acceptance_rates_trajectory( histories, labels, yscale, colors, normalize_by_ess ) # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # plot for t, rate, label, color in zip(times, rates, labels, colors): ax.plot(t, rate, 'x-', label=label, color=color) # add labels if any(lab is not None for lab in labels): ax.legend() ax.set_title(title) ax.set_ylabel(ylabel) ax.set_xlabel("Population index $t$") # set size if size is not None: fig.set_size_inches(size) fig.tight_layout() return ax
[docs] def plot_acceptance_rates_trajectory_plotly( histories: Union[List, History], labels: Union[List, str] = None, title: str = "Acceptance rates", yscale: str = 'lin', size: tuple = None, colors: List[str] = None, normalize_by_ess: bool = False, fig: "go.Figure" = None, ) -> "go.Figure": """Plot acceptance rates trajectories using plotly.""" import plotly.graph_objects as go # prepare data ( rates, times, labels, ylabel, colors, ) = _prepare_plot_acceptance_rates_trajectory( histories, labels, yscale, colors, normalize_by_ess ) # create figure if fig is None: fig = go.Figure() # plot for t, rate, label, color in zip(times, rates, labels, colors): fig.add_trace( go.Scatter( x=t, y=rate, mode='lines+markers', name=label, line={'color': color}, ) ) # add labels fig.update_layout( title=title, xaxis_title="Population index $t$", yaxis_title=ylabel, ) # set size if size is not None: fig.update_layout(width=size[0], height=size[1]) return fig
[docs] def plot_lookahead_evaluations( sampler_df: Union[pd.DataFrame, str], relative: bool = False, fill: bool = False, alpha: float = None, t_min: int = 0, title: str = "Total evaluations", size: tuple = None, ax: mpl.axes.Axes = None, ): """Plot total vs look-ahead evaluations over the generations. Parameters ---------- sampler_df: Dataframe or file as generated via `RedisEvalParallelSampler(log_file=...)`. relative: Whether to normalize the total evaluations for each generation to 1. fill: If True, instead of lines, filled areas are drawn that sum up to the totals. alpha: Alpha value for lines or areas. t_min: The minimum generation to show. E.g. a value of 1 omits the first generation. title: Plot title. size: The size of the plot in inches. ax: The axis object to use. Returns ------- ax: Axis of the generated plot. """ # process input if isinstance(sampler_df, str): sampler_df = pd.read_csv(sampler_df, sep=',') if alpha is None: alpha = 0.7 if fill else 1.0 # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # restrict to t >= 0 sampler_df = sampler_df[sampler_df.t >= t_min] # extract variables t = sampler_df.t n_la = sampler_df.n_lookahead n_eval = sampler_df.n_evaluated n_act = n_eval - n_la # normalize if relative: n_la /= n_eval n_act /= n_eval n_eval /= n_eval # plot if fill: ax.fill_between(t, n_la, n_eval, alpha=alpha, label="Actual") ax.fill_between(t, 0, n_la, alpha=alpha, label="Look-ahead") else: ax.plot( t, n_eval, linestyle='--', marker='o', color='black', alpha=alpha, label="Total", ) ax.plot(t, n_act, marker='o', alpha=alpha, label="Actual") ax.plot(t, n_la, marker='o', alpha=alpha, label="Look-ahead") # prettify plot ax.legend() ax.set_title(title) ax.set_xlabel("Population index") ax.set_ylabel("Evaluations") ax.set_ylim(bottom=0) # enforce integer ticks ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if size is not None: fig.set_size_inches(size) return ax
[docs] def plot_lookahead_final_acceptance_fractions( sampler_df: Union[pd.DataFrame, str], population_sizes: Union[np.ndarray, History], relative: bool = False, fill: bool = False, alpha: float = None, t_min: int = 0, title: str = "Composition of final acceptances", size: tuple = None, ax: mpl.axes.Axes = None, ): """Plot fraction of look-ahead samples in final acceptances, over generations. Parameters ---------- sampler_df: Dataframe or file as generated via `RedisEvalParallelSampler(log_file=...)`. population_sizes: The sizes of the populations of accepted particles. If a History is passed, those values are extracted automatically, otherwise should be for the same time values as `sampler_df`. relative: Whether to normalize the total evaluations for each generation to 1. fill: If True, instead of lines, filled areas are drawn that sum up to the totals. alpha: Alpha value for lines or areas. t_min: The minimum generation to show. E.g. a value of 1 omits the first generation. title: Plot title. size: The size of the plot in inches. ax: The axis object to use. Returns ------- ax: Axis of the generated plot. """ # process input if isinstance(sampler_df, str): sampler_df = pd.read_csv(sampler_df, sep=',') if alpha is None: alpha = 0.7 if fill else 1.0 # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # get numbers of final acceptances if isinstance(population_sizes, History): pop = population_sizes.get_all_populations() population_sizes = np.array( [pop.loc[pop.t == t, 'particles'] for t in sampler_df.t], dtype=float, ).flatten() # restrict to t >= 0 population_sizes = population_sizes[sampler_df.t >= t_min] sampler_df = sampler_df[sampler_df.t >= t_min] # extract variables t = sampler_df.t n_la_acc = sampler_df.n_lookahead_accepted # actual look-ahead acceptances cannot be more than requested n_la_acc = np.minimum(n_la_acc, population_sizes) # actual acceptances are the remaining ones, as these are always later n_act_acc = population_sizes - n_la_acc # normalize if relative: n_la_acc /= population_sizes n_act_acc /= population_sizes population_sizes /= population_sizes # plot if fill: ax.fill_between( t, n_la_acc, population_sizes, alpha=alpha, label="Actual" ) ax.fill_between(t, 0, n_la_acc, alpha=alpha, label="Look-ahead") else: ax.plot( t, population_sizes, linestyle='--', marker='o', color='black', alpha=alpha, label="Population size", ) ax.plot(t, n_act_acc, marker='o', alpha=alpha, label="Actual") ax.plot(t, n_la_acc, marker='o', alpha=alpha, label="Look-ahead") # prettify plot ax.legend() ax.set_title(title) ax.set_xlabel("Population index") ax.set_ylabel("Final acceptances") ax.set_ylim(bottom=0) # enforce integer ticks ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if size is not None: fig.set_size_inches(size) return ax
[docs] def plot_lookahead_acceptance_rates( sampler_df: Union[pd.DataFrame, str], t_min: int = 0, title: str = "Acceptance rates", size: tuple = None, ax: mpl.axes.Axes = None, ): """Plot acceptance rates for look-ahead vs ordinary samples. The ratios are relative to all accepted particles, including eventually discarded ones. Parameters ---------- sampler_df: Dataframe or file as generated via `RedisEvalParallelSampler(log_file=...)`. t_min: The minimum generation to show. E.g. a value of 1 omits the first generation. title: Plot title. size: The size of the plot in inches. ax: The axis object to use. Returns ------- ax: Axis of the generated plot. """ # process input if isinstance(sampler_df, str): sampler_df = pd.read_csv(sampler_df, sep=',') # create figure if ax is None: fig, ax = plt.subplots() else: fig = ax.get_figure() # restrict to t >= 0 sampler_df = sampler_df[sampler_df.t >= t_min] # extract variables # time t = sampler_df.t # look-ahead acceptances and samples n_la_acc = sampler_df.n_lookahead_accepted n_la = sampler_df.n_lookahead # total acceptances and samples n_all_acc = sampler_df.n_accepted n_all = sampler_df.n_evaluated # difference (actual proposal) n_act_acc = n_all_acc - n_la_acc n_act = n_all - n_la # plot ax.plot( t, n_all_acc / n_all, linestyle='--', marker='o', color='black', label="Combined", ) ax.plot(t, n_act_acc / n_act, marker='o', label="Actual") ax.plot(t, n_la_acc / n_la, marker='o', label="Look-ahead") # prettify plot ax.legend() ax.set_title(title) ax.set_xlabel("Population index") ax.set_ylabel("Acceptance rate") ax.set_ylim(bottom=0) # enforce integer ticks ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if size is not None: fig.set_size_inches(size) return ax