"""Sample number plots"""
from typing import TYPE_CHECKING
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MaxNLocator
if TYPE_CHECKING:
import plotly.graph_objs as go
from ..storage import History
from ..weighted_statistics import effective_sample_size
from .util import get_labels, to_lists
def _prepare_plot_sample_numbers(
histories: list[History] | History,
labels: list[str] | str,
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
# extract sample numbers
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
samples.append(np.array(history.get_all_populations()['samples']))
# create matrix
n_run = len(histories)
n_pop = max(len(sample) for sample in samples)
matrix = np.zeros((n_pop, n_run))
for i_sample, sample in enumerate(samples):
matrix[: len(sample), i_sample] = sample
return labels, matrix, n_run, n_pop
[docs]
def plot_sample_numbers(
histories: list[History] | History,
labels: list[str] | str = None,
rotation: int = 0,
title: str = 'Required samples',
size: tuple[float, float] = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Stacked bar plot of required numbers of samples over all iterations.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
rotation:
Rotation to apply to the plot's x tick labels. For longer labels,
a tilting of 45 or even 90 can be preferable.
title:
Title for the plot.
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
A reference to the axis object created.
"""
# prepare data
labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers(
histories, labels
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot bars
for i_pop in reversed(range(n_pop)):
ax.bar(
x=np.arange(n_run),
height=matrix[i_pop, :],
bottom=np.sum(matrix[:i_pop, :], axis=0),
label=f'Generation {i_pop - 1}',
)
# add labels
ax.set_xticks(np.arange(n_run))
ax.set_xticklabels(labels, rotation=rotation)
ax.set_title(title)
ax.set_ylabel('Samples')
ax.set_xlabel('Run')
ax.legend()
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_sample_numbers_plotly(
histories: list[History] | History,
labels: list[str] | str = None,
rotation: int = 0,
title: str = 'Required samples',
size: tuple[float, float] = None,
fig: 'go.Figure' = None,
) -> 'go.Figure':
"""Plot sample numbers using plotly."""
import plotly.graph_objects as go
# prepare data
labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers(
histories, labels
)
# none or empty values are not supported by plotly
for ix in range(n_run):
if labels[ix] is None:
labels[ix] = ' '
# create figure
if fig is None:
fig = go.Figure()
# plot bars
for i_pop in reversed(range(n_pop)):
fig.add_trace(
go.Bar(
x=np.arange(n_run),
y=matrix[i_pop, :],
name=f'Generation {i_pop - 1}',
offsetgroup=0,
base=np.sum(matrix[:i_pop, :], axis=0),
)
)
# add labels
fig.update_layout(
xaxis=go.layout.XAxis(
tickmode='array',
tickvals=list(range(n_run)),
ticktext=labels,
tickangle=rotation,
title='Run',
),
yaxis=go.layout.YAxis(title='Samples'),
title=title,
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_total_sample_numbers(
histories: list[History] | History,
labels: list[str] | str,
yscale: str,
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
n_run = len(histories)
# extract sample numbers
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
samples.append(np.sum(history.get_all_populations()['samples']))
samples = np.array(samples)
# apply scale
ylabel = 'Total samples'
if yscale == 'log':
samples = np.log(samples)
ylabel = 'log(' + ylabel + ')'
elif yscale == 'log10':
samples = np.log10(samples)
ylabel = 'log10(' + ylabel + ')'
return samples, labels, ylabel, n_run
[docs]
def plot_total_sample_numbers(
histories: list | History,
labels: list | str = None,
rotation: int = 0,
title: str = 'Total required samples',
yscale: str = 'lin',
size: tuple = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Bar plot of total required sample number over all iterations, i.e.
a single-colored bar per history, in contrast to `plot_sample_numbers`,
which visually distinguishes iterations.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
rotation:
Rotation to apply to the plot's x tick labels. For longer labels,
a tilting of 45 or even 90 can be preferable.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
A reference to the axis object created.
"""
# prepare data
samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers(
histories, labels, yscale
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot bars
ax.bar(x=np.arange(n_run), height=samples)
# add labels
ax.set_xticks(np.arange(n_run))
ax.set_xticklabels(labels, rotation=rotation)
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel('Run')
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_total_sample_numbers_plotly(
histories: list | History,
labels: list | str = None,
rotation: int = 0,
title: str = 'Total required samples',
yscale: str = 'lin',
size: tuple = None,
fig: 'go.Figure' = None,
) -> 'go.Figure':
"""Plot total sample numbers using plotly."""
import plotly.graph_objects as go
# prepare data
samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers(
histories, labels, yscale
)
# create figure
if fig is None:
fig = go.Figure()
# plot bars
fig.add_trace(
go.Bar(
x=np.arange(n_run),
y=samples,
name='Total samples',
offsetgroup=0,
base=0,
)
)
# add labels
fig.update_layout(
xaxis=go.layout.XAxis(
tickmode='array',
tickvals=list(range(n_run)),
ticktext=labels,
tickangle=rotation,
),
yaxis=go.layout.YAxis(title=ylabel),
title=title,
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_sample_numbers_trajectory(
histories: list | History,
labels: list | str,
yscale: str,
):
"""Prepare data for plotting sample number trajectories."""
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
# extract sample numbers
times = []
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
h_info = history.get_all_populations()
times.append(np.array(h_info['t']))
samples.append(np.array(h_info['samples']))
# apply scale
ylabel = 'Samples'
if yscale == 'log':
samples = [np.log(sample) for sample in samples]
ylabel = 'log(' + ylabel + ')'
elif yscale == 'log10':
samples = [np.log10(sample) for sample in samples]
ylabel = 'log10(' + ylabel + ')'
return samples, times, labels, ylabel
[docs]
def plot_sample_numbers_trajectory(
histories: list | History,
labels: list | str = None,
title: str = 'Required samples',
yscale: str = 'lin',
size: tuple = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Plot of required sample number over all iterations, i.e. one trajectory
per history.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
Axis of the generated plot.
"""
# prepare data
samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory(
histories, labels, yscale
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot
for t, sample, label in zip(times, samples, labels):
ax.plot(t, sample, 'x-', label=label)
# add labels
if any(lab is not None for lab in labels):
ax.legend()
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel('Population index $t$')
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_sample_numbers_trajectory_plotly(
histories: list | History,
labels: list | str = None,
title: str = 'Required samples',
yscale: str = 'lin',
size: tuple = None,
fig: 'go.Figure' = None,
) -> 'go.Figure':
"""Plot sample number trajectories using plotly."""
import plotly.graph_objects as go
# prepare data
samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory(
histories, labels, yscale
)
# create figure
if fig is None:
fig = go.Figure()
# plot
for t, sample, label in zip(times, samples, labels):
fig.add_trace(
go.Scatter(
x=t,
y=sample,
mode='lines+markers',
name=label,
)
)
# add labels
fig.update_layout(
title=title,
xaxis=go.layout.XAxis(title='Population index $t$'),
yaxis=go.layout.YAxis(title=ylabel),
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_acceptance_rates_trajectory(
histories: list | History,
labels: list | str,
yscale: str,
colors: list[str],
normalize_by_ess: bool,
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
if colors is None:
colors = [None] * len(histories)
# extract sample numbers
times = []
samples = []
pop_sizes = []
for history in histories:
# note: the first entry of time -1 is trivial and is thus ignored here
h_info = history.get_all_populations()
times.append(np.array(h_info['t'])[1:])
if normalize_by_ess:
ess = np.zeros(len(h_info['t']) - 1)
for t in np.array(h_info['t'])[1:]:
w = history.get_weighted_distances(t=t)['w']
ess[t - 1] = effective_sample_size(w)
pop_sizes.append(ess)
else:
pop_sizes.append(
np.array(history.get_nr_particles_per_population().values[1:])
)
samples.append(np.array(h_info['samples'])[1:])
# compute acceptance rates
rates = []
for sample, pop_size in zip(samples, pop_sizes):
rates.append(pop_size / sample)
# apply scale
ylabel = 'Acceptance rate'
if yscale == 'log':
rates = [np.log(rate) for rate in rates]
ylabel = 'log(' + ylabel + ')'
elif yscale == 'log10':
rates = [np.log10(rate) for rate in rates]
ylabel = 'log10(' + ylabel + ')'
return rates, times, labels, ylabel, colors
[docs]
def plot_acceptance_rates_trajectory(
histories: list | History,
labels: list | str = None,
title: str = 'Acceptance rates',
yscale: str = 'lin',
size: tuple = None,
colors: list[str] = None,
normalize_by_ess: bool = False,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Plot of acceptance rates over all iterations, i.e. one trajectory
per history.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
colors:
Colors to use for the trajectories. If None, then the matplotlib
default values are used.
normalize_by_ess: bool, optional (default = False)
Indicator to use effective sample size for the acceptance rate in
place of the population size.
ax:
Axis of the plot. If None, a new axis object is generated.
Returns
-------
ax:
Axis of the generated plot.
"""
# prepare data
(
rates,
times,
labels,
ylabel,
colors,
) = _prepare_plot_acceptance_rates_trajectory(
histories, labels, yscale, colors, normalize_by_ess
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot
for t, rate, label, color in zip(times, rates, labels, colors):
ax.plot(t, rate, 'x-', label=label, color=color)
# add labels
if any(lab is not None for lab in labels):
ax.legend()
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel('Population index $t$')
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_acceptance_rates_trajectory_plotly(
histories: list | History,
labels: list | str = None,
title: str = 'Acceptance rates',
yscale: str = 'lin',
size: tuple = None,
colors: list[str] = None,
normalize_by_ess: bool = False,
fig: 'go.Figure' = None,
) -> 'go.Figure':
"""Plot acceptance rates trajectories using plotly."""
import plotly.graph_objects as go
# prepare data
(
rates,
times,
labels,
ylabel,
colors,
) = _prepare_plot_acceptance_rates_trajectory(
histories, labels, yscale, colors, normalize_by_ess
)
# create figure
if fig is None:
fig = go.Figure()
# plot
for t, rate, label, color in zip(times, rates, labels, colors):
fig.add_trace(
go.Scatter(
x=t,
y=rate,
mode='lines+markers',
name=label,
line={'color': color},
)
)
# add labels
fig.update_layout(
title=title,
xaxis_title='Population index $t$',
yaxis_title=ylabel,
)
# set size
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
[docs]
def plot_lookahead_evaluations(
sampler_df: pd.DataFrame | str,
relative: bool = False,
fill: bool = False,
alpha: float = None,
t_min: int = 0,
title: str = 'Total evaluations',
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot total vs look-ahead evaluations over the generations.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
relative:
Whether to normalize the total evaluations for each generation to 1.
fill:
If True, instead of lines, filled areas are drawn that sum up to the
totals.
alpha:
Alpha value for lines or areas.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
if alpha is None:
alpha = 0.7 if fill else 1.0
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# restrict to t >= 0
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
t = sampler_df.t
n_la = sampler_df.n_lookahead
n_eval = sampler_df.n_evaluated
n_act = n_eval - n_la
# normalize
if relative:
n_la /= n_eval
n_act /= n_eval
n_eval /= n_eval
# plot
if fill:
ax.fill_between(t, n_la, n_eval, alpha=alpha, label='Actual')
ax.fill_between(t, 0, n_la, alpha=alpha, label='Look-ahead')
else:
ax.plot(
t,
n_eval,
linestyle='--',
marker='o',
color='black',
alpha=alpha,
label='Total',
)
ax.plot(t, n_act, marker='o', alpha=alpha, label='Actual')
ax.plot(t, n_la, marker='o', alpha=alpha, label='Look-ahead')
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel('Population index')
ax.set_ylabel('Evaluations')
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax
[docs]
def plot_lookahead_final_acceptance_fractions(
sampler_df: pd.DataFrame | str,
population_sizes: np.ndarray | History,
relative: bool = False,
fill: bool = False,
alpha: float = None,
t_min: int = 0,
title: str = 'Composition of final acceptances',
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot fraction of look-ahead samples in final acceptances,
over generations.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
population_sizes:
The sizes of the populations of accepted particles. If a History is
passed, those values are extracted automatically, otherwise should
be for the same time values as `sampler_df`.
relative:
Whether to normalize the total evaluations for each generation to 1.
fill:
If True, instead of lines, filled areas are drawn that sum up to the
totals.
alpha:
Alpha value for lines or areas.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
if alpha is None:
alpha = 0.7 if fill else 1.0
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# get numbers of final acceptances
if isinstance(population_sizes, History):
pop = population_sizes.get_all_populations()
population_sizes = np.array(
[pop.loc[pop.t == t, 'particles'] for t in sampler_df.t],
dtype=float,
).flatten()
# restrict to t >= 0
population_sizes = population_sizes[sampler_df.t >= t_min]
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
t = sampler_df.t
n_la_acc = sampler_df.n_lookahead_accepted
# actual look-ahead acceptances cannot be more than requested
n_la_acc = np.minimum(n_la_acc, population_sizes)
# actual acceptances are the remaining ones, as these are always later
n_act_acc = population_sizes - n_la_acc
# normalize
if relative:
n_la_acc /= population_sizes
n_act_acc /= population_sizes
population_sizes /= population_sizes
# plot
if fill:
ax.fill_between(
t, n_la_acc, population_sizes, alpha=alpha, label='Actual'
)
ax.fill_between(t, 0, n_la_acc, alpha=alpha, label='Look-ahead')
else:
ax.plot(
t,
population_sizes,
linestyle='--',
marker='o',
color='black',
alpha=alpha,
label='Population size',
)
ax.plot(t, n_act_acc, marker='o', alpha=alpha, label='Actual')
ax.plot(t, n_la_acc, marker='o', alpha=alpha, label='Look-ahead')
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel('Population index')
ax.set_ylabel('Final acceptances')
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax
[docs]
def plot_lookahead_acceptance_rates(
sampler_df: pd.DataFrame | str,
t_min: int = 0,
title: str = 'Acceptance rates',
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot acceptance rates for look-ahead vs ordinary samples.
The ratios are relative to all accepted particles, including eventually
discarded ones.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# restrict to t >= 0
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
# time
t = sampler_df.t
# look-ahead acceptances and samples
n_la_acc = sampler_df.n_lookahead_accepted
n_la = sampler_df.n_lookahead
# total acceptances and samples
n_all_acc = sampler_df.n_accepted
n_all = sampler_df.n_evaluated
# difference (actual proposal)
n_act_acc = n_all_acc - n_la_acc
n_act = n_all - n_la
# plot
ax.plot(
t,
n_all_acc / n_all,
linestyle='--',
marker='o',
color='black',
label='Combined',
)
ax.plot(t, n_act_acc / n_act, marker='o', label='Actual')
ax.plot(t, n_la_acc / n_la, marker='o', label='Look-ahead')
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel('Population index')
ax.set_ylabel('Acceptance rate')
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax