"""Sample number plots"""
from typing import TYPE_CHECKING, List, Tuple, Union
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MaxNLocator
if TYPE_CHECKING:
import plotly.graph_objs as go
from ..storage import History
from ..weighted_statistics import effective_sample_size
from .util import get_labels, to_lists
def _prepare_plot_sample_numbers(
histories: Union[List[History], History],
labels: Union[List[str], str],
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
# extract sample numbers
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
samples.append(np.array(history.get_all_populations()['samples']))
# create matrix
n_run = len(histories)
n_pop = max(len(sample) for sample in samples)
matrix = np.zeros((n_pop, n_run))
for i_sample, sample in enumerate(samples):
matrix[: len(sample), i_sample] = sample
return labels, matrix, n_run, n_pop
[docs]
def plot_sample_numbers(
histories: Union[List[History], History],
labels: Union[List[str], str] = None,
rotation: int = 0,
title: str = "Required samples",
size: Tuple[float, float] = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Stacked bar plot of required numbers of samples over all iterations.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
rotation:
Rotation to apply to the plot's x tick labels. For longer labels,
a tilting of 45 or even 90 can be preferable.
title:
Title for the plot.
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
A reference to the axis object created.
"""
# prepare data
labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers(
histories, labels
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot bars
for i_pop in reversed(range(n_pop)):
ax.bar(
x=np.arange(n_run),
height=matrix[i_pop, :],
bottom=np.sum(matrix[:i_pop, :], axis=0),
label=f"Generation {i_pop-1}",
)
# add labels
ax.set_xticks(np.arange(n_run))
ax.set_xticklabels(labels, rotation=rotation)
ax.set_title(title)
ax.set_ylabel("Samples")
ax.set_xlabel("Run")
ax.legend()
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_sample_numbers_plotly(
histories: Union[List[History], History],
labels: Union[List[str], str] = None,
rotation: int = 0,
title: str = "Required samples",
size: Tuple[float, float] = None,
fig: "go.Figure" = None,
) -> "go.Figure":
"""Plot sample numbers using plotly."""
import plotly.graph_objects as go
# prepare data
labels, matrix, n_run, n_pop = _prepare_plot_sample_numbers(
histories, labels
)
# none or empty values are not supported by plotly
for ix in range(n_run):
if labels[ix] is None:
labels[ix] = " "
# create figure
if fig is None:
fig = go.Figure()
# plot bars
for i_pop in reversed(range(n_pop)):
fig.add_trace(
go.Bar(
x=np.arange(n_run),
y=matrix[i_pop, :],
name=f"Generation {i_pop-1}",
offsetgroup=0,
base=np.sum(matrix[:i_pop, :], axis=0),
)
)
# add labels
fig.update_layout(
xaxis=go.layout.XAxis(
tickmode="array",
tickvals=list(range(n_run)),
ticktext=labels,
tickangle=rotation,
title="Run",
),
yaxis=go.layout.YAxis(title="Samples"),
title=title,
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_total_sample_numbers(
histories: Union[List[History], History],
labels: Union[List[str], str],
yscale: str,
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
n_run = len(histories)
# extract sample numbers
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
samples.append(np.sum(history.get_all_populations()['samples']))
samples = np.array(samples)
# apply scale
ylabel = "Total samples"
if yscale == 'log':
samples = np.log(samples)
ylabel = "log(" + ylabel + ")"
elif yscale == 'log10':
samples = np.log10(samples)
ylabel = "log10(" + ylabel + ")"
return samples, labels, ylabel, n_run
[docs]
def plot_total_sample_numbers(
histories: Union[List, History],
labels: Union[List, str] = None,
rotation: int = 0,
title: str = "Total required samples",
yscale: str = 'lin',
size: tuple = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Bar plot of total required sample number over all iterations, i.e.
a single-colored bar per history, in contrast to `plot_sample_numbers`,
which visually distinguishes iterations.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
rotation:
Rotation to apply to the plot's x tick labels. For longer labels,
a tilting of 45 or even 90 can be preferable.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
A reference to the axis object created.
"""
# prepare data
samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers(
histories, labels, yscale
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot bars
ax.bar(x=np.arange(n_run), height=samples)
# add labels
ax.set_xticks(np.arange(n_run))
ax.set_xticklabels(labels, rotation=rotation)
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel("Run")
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_total_sample_numbers_plotly(
histories: Union[List, History],
labels: Union[List, str] = None,
rotation: int = 0,
title: str = "Total required samples",
yscale: str = 'lin',
size: tuple = None,
fig: "go.Figure" = None,
) -> "go.Figure":
"""Plot total sample numbers using plotly."""
import plotly.graph_objects as go
# prepare data
samples, labels, ylabel, n_run = _prepare_plot_total_sample_numbers(
histories, labels, yscale
)
# create figure
if fig is None:
fig = go.Figure()
# plot bars
fig.add_trace(
go.Bar(
x=np.arange(n_run),
y=samples,
name="Total samples",
offsetgroup=0,
base=0,
)
)
# add labels
fig.update_layout(
xaxis=go.layout.XAxis(
tickmode="array",
tickvals=list(range(n_run)),
ticktext=labels,
tickangle=rotation,
),
yaxis=go.layout.YAxis(title=ylabel),
title=title,
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_sample_numbers_trajectory(
histories: Union[List, History],
labels: Union[List, str],
yscale: str,
):
"""Prepare data for plotting sample number trajectories."""
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
# extract sample numbers
times = []
samples = []
for history in histories:
# note: the first entry corresponds to the calibration and should
# be included here to be fair against methods not requiring
# calibration
h_info = history.get_all_populations()
times.append(np.array(h_info['t']))
samples.append(np.array(h_info['samples']))
# apply scale
ylabel = "Samples"
if yscale == 'log':
samples = [np.log(sample) for sample in samples]
ylabel = "log(" + ylabel + ")"
elif yscale == 'log10':
samples = [np.log10(sample) for sample in samples]
ylabel = "log10(" + ylabel + ")"
return samples, times, labels, ylabel
[docs]
def plot_sample_numbers_trajectory(
histories: Union[List, History],
labels: Union[List, str] = None,
title: str = "Required samples",
yscale: str = 'lin',
size: tuple = None,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Plot of required sample number over all iterations, i.e. one trajectory
per history.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
ax:
The matplotlib axes to plot on. If None, a new figure is created.
Returns
-------
ax:
Axis of the generated plot.
"""
# prepare data
samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory(
histories, labels, yscale
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot
for t, sample, label in zip(times, samples, labels):
ax.plot(t, sample, 'x-', label=label)
# add labels
if any(lab is not None for lab in labels):
ax.legend()
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel("Population index $t$")
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_sample_numbers_trajectory_plotly(
histories: Union[List, History],
labels: Union[List, str] = None,
title: str = "Required samples",
yscale: str = 'lin',
size: tuple = None,
fig: "go.Figure" = None,
) -> "go.Figure":
"""Plot sample number trajectories using plotly."""
import plotly.graph_objects as go
# prepare data
samples, times, labels, ylabel = _prepare_plot_sample_numbers_trajectory(
histories, labels, yscale
)
# create figure
if fig is None:
fig = go.Figure()
# plot
for t, sample, label in zip(times, samples, labels):
fig.add_trace(
go.Scatter(
x=t,
y=sample,
mode='lines+markers',
name=label,
)
)
# add labels
fig.update_layout(
title=title,
xaxis=go.layout.XAxis(title="Population index $t$"),
yaxis=go.layout.YAxis(title=ylabel),
)
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
def _prepare_plot_acceptance_rates_trajectory(
histories: Union[List, History],
labels: Union[List, str],
yscale: str,
colors: List[str],
normalize_by_ess: bool,
):
# preprocess input
histories = to_lists(histories)
labels = get_labels(labels, len(histories))
if colors is None:
colors = [None] * len(histories)
# extract sample numbers
times = []
samples = []
pop_sizes = []
for history in histories:
# note: the first entry of time -1 is trivial and is thus ignored here
h_info = history.get_all_populations()
times.append(np.array(h_info['t'])[1:])
if normalize_by_ess:
ess = np.zeros(len(h_info['t']) - 1)
for t in np.array(h_info['t'])[1:]:
w = history.get_weighted_distances(t=t)['w']
ess[t - 1] = effective_sample_size(w)
pop_sizes.append(ess)
else:
pop_sizes.append(
np.array(history.get_nr_particles_per_population().values[1:])
)
samples.append(np.array(h_info['samples'])[1:])
# compute acceptance rates
rates = []
for sample, pop_size in zip(samples, pop_sizes):
rates.append(pop_size / sample)
# apply scale
ylabel = "Acceptance rate"
if yscale == 'log':
rates = [np.log(rate) for rate in rates]
ylabel = "log(" + ylabel + ")"
elif yscale == 'log10':
rates = [np.log10(rate) for rate in rates]
ylabel = "log10(" + ylabel + ")"
return rates, times, labels, ylabel, colors
[docs]
def plot_acceptance_rates_trajectory(
histories: Union[List, History],
labels: Union[List, str] = None,
title: str = "Acceptance rates",
yscale: str = 'lin',
size: tuple = None,
colors: List[str] = None,
normalize_by_ess: bool = False,
ax: mpl.axes.Axes = None,
) -> mpl.axes.Axes:
"""
Plot of acceptance rates over all iterations, i.e. one trajectory
per history.
Parameters
----------
histories:
The histories to plot from. History ids must be set correctly.
labels:
Labels corresponding to the histories. If None are provided,
indices are used as labels.
title:
Title for the plot.
yscale:
The scale on which to plot the counts. Can be one of 'lin', 'log'
(basis e) or 'log10'
size:
The size of the plot in inches.
colors:
Colors to use for the trajectories. If None, then the matplotlib
default values are used.
normalize_by_ess: bool, optional (default = False)
Indicator to use effective sample size for the acceptance rate in
place of the population size.
ax:
Axis of the plot. If None, a new axis object is generated.
Returns
-------
ax:
Axis of the generated plot.
"""
# prepare data
(
rates,
times,
labels,
ylabel,
colors,
) = _prepare_plot_acceptance_rates_trajectory(
histories, labels, yscale, colors, normalize_by_ess
)
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# plot
for t, rate, label, color in zip(times, rates, labels, colors):
ax.plot(t, rate, 'x-', label=label, color=color)
# add labels
if any(lab is not None for lab in labels):
ax.legend()
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.set_xlabel("Population index $t$")
# set size
if size is not None:
fig.set_size_inches(size)
fig.tight_layout()
return ax
[docs]
def plot_acceptance_rates_trajectory_plotly(
histories: Union[List, History],
labels: Union[List, str] = None,
title: str = "Acceptance rates",
yscale: str = 'lin',
size: tuple = None,
colors: List[str] = None,
normalize_by_ess: bool = False,
fig: "go.Figure" = None,
) -> "go.Figure":
"""Plot acceptance rates trajectories using plotly."""
import plotly.graph_objects as go
# prepare data
(
rates,
times,
labels,
ylabel,
colors,
) = _prepare_plot_acceptance_rates_trajectory(
histories, labels, yscale, colors, normalize_by_ess
)
# create figure
if fig is None:
fig = go.Figure()
# plot
for t, rate, label, color in zip(times, rates, labels, colors):
fig.add_trace(
go.Scatter(
x=t,
y=rate,
mode='lines+markers',
name=label,
line={'color': color},
)
)
# add labels
fig.update_layout(
title=title,
xaxis_title="Population index $t$",
yaxis_title=ylabel,
)
# set size
if size is not None:
fig.update_layout(width=size[0], height=size[1])
return fig
[docs]
def plot_lookahead_evaluations(
sampler_df: Union[pd.DataFrame, str],
relative: bool = False,
fill: bool = False,
alpha: float = None,
t_min: int = 0,
title: str = "Total evaluations",
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot total vs look-ahead evaluations over the generations.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
relative:
Whether to normalize the total evaluations for each generation to 1.
fill:
If True, instead of lines, filled areas are drawn that sum up to the
totals.
alpha:
Alpha value for lines or areas.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
if alpha is None:
alpha = 0.7 if fill else 1.0
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# restrict to t >= 0
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
t = sampler_df.t
n_la = sampler_df.n_lookahead
n_eval = sampler_df.n_evaluated
n_act = n_eval - n_la
# normalize
if relative:
n_la /= n_eval
n_act /= n_eval
n_eval /= n_eval
# plot
if fill:
ax.fill_between(t, n_la, n_eval, alpha=alpha, label="Actual")
ax.fill_between(t, 0, n_la, alpha=alpha, label="Look-ahead")
else:
ax.plot(
t,
n_eval,
linestyle='--',
marker='o',
color='black',
alpha=alpha,
label="Total",
)
ax.plot(t, n_act, marker='o', alpha=alpha, label="Actual")
ax.plot(t, n_la, marker='o', alpha=alpha, label="Look-ahead")
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel("Population index")
ax.set_ylabel("Evaluations")
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax
[docs]
def plot_lookahead_final_acceptance_fractions(
sampler_df: Union[pd.DataFrame, str],
population_sizes: Union[np.ndarray, History],
relative: bool = False,
fill: bool = False,
alpha: float = None,
t_min: int = 0,
title: str = "Composition of final acceptances",
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot fraction of look-ahead samples in final acceptances,
over generations.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
population_sizes:
The sizes of the populations of accepted particles. If a History is
passed, those values are extracted automatically, otherwise should
be for the same time values as `sampler_df`.
relative:
Whether to normalize the total evaluations for each generation to 1.
fill:
If True, instead of lines, filled areas are drawn that sum up to the
totals.
alpha:
Alpha value for lines or areas.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
if alpha is None:
alpha = 0.7 if fill else 1.0
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# get numbers of final acceptances
if isinstance(population_sizes, History):
pop = population_sizes.get_all_populations()
population_sizes = np.array(
[pop.loc[pop.t == t, 'particles'] for t in sampler_df.t],
dtype=float,
).flatten()
# restrict to t >= 0
population_sizes = population_sizes[sampler_df.t >= t_min]
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
t = sampler_df.t
n_la_acc = sampler_df.n_lookahead_accepted
# actual look-ahead acceptances cannot be more than requested
n_la_acc = np.minimum(n_la_acc, population_sizes)
# actual acceptances are the remaining ones, as these are always later
n_act_acc = population_sizes - n_la_acc
# normalize
if relative:
n_la_acc /= population_sizes
n_act_acc /= population_sizes
population_sizes /= population_sizes
# plot
if fill:
ax.fill_between(
t, n_la_acc, population_sizes, alpha=alpha, label="Actual"
)
ax.fill_between(t, 0, n_la_acc, alpha=alpha, label="Look-ahead")
else:
ax.plot(
t,
population_sizes,
linestyle='--',
marker='o',
color='black',
alpha=alpha,
label="Population size",
)
ax.plot(t, n_act_acc, marker='o', alpha=alpha, label="Actual")
ax.plot(t, n_la_acc, marker='o', alpha=alpha, label="Look-ahead")
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel("Population index")
ax.set_ylabel("Final acceptances")
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax
[docs]
def plot_lookahead_acceptance_rates(
sampler_df: Union[pd.DataFrame, str],
t_min: int = 0,
title: str = "Acceptance rates",
size: tuple = None,
ax: mpl.axes.Axes = None,
):
"""Plot acceptance rates for look-ahead vs ordinary samples.
The ratios are relative to all accepted particles, including eventually
discarded ones.
Parameters
----------
sampler_df:
Dataframe or file as generated via
`RedisEvalParallelSampler(log_file=...)`.
t_min:
The minimum generation to show. E.g. a value of 1 omits the first
generation.
title:
Plot title.
size:
The size of the plot in inches.
ax:
The axis object to use.
Returns
-------
ax: Axis of the generated plot.
"""
# process input
if isinstance(sampler_df, str):
sampler_df = pd.read_csv(sampler_df, sep=',')
# create figure
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
# restrict to t >= 0
sampler_df = sampler_df[sampler_df.t >= t_min]
# extract variables
# time
t = sampler_df.t
# look-ahead acceptances and samples
n_la_acc = sampler_df.n_lookahead_accepted
n_la = sampler_df.n_lookahead
# total acceptances and samples
n_all_acc = sampler_df.n_accepted
n_all = sampler_df.n_evaluated
# difference (actual proposal)
n_act_acc = n_all_acc - n_la_acc
n_act = n_all - n_la
# plot
ax.plot(
t,
n_all_acc / n_all,
linestyle='--',
marker='o',
color='black',
label="Combined",
)
ax.plot(t, n_act_acc / n_act, marker='o', label="Actual")
ax.plot(t, n_la_acc / n_la, marker='o', label="Look-ahead")
# prettify plot
ax.legend()
ax.set_title(title)
ax.set_xlabel("Population index")
ax.set_ylabel("Acceptance rate")
ax.set_ylim(bottom=0)
# enforce integer ticks
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
if size is not None:
fig.set_size_inches(size)
return ax