"""Kernel density estimate plots
To plot densities from the weighted importance samples, the visualization
routines employ a kernel density estimate. Note that this can "over-smoothen"
so that local structure is lost. If this could be the case, it makes sense
to in the argument `kde` reduce the `scaling` in the default
MultivariateNormalTransition(), or to replace it by a GridSearchCV() to
automatically find a visually good level of smoothness.
"""
from typing import TYPE_CHECKING
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ..storage import History
from ..transition import MultivariateNormalTransition
from .util import format_plot_matrix, format_plot_matrix_plotly
if TYPE_CHECKING:
import plotly.graph_objs as go
def kde_1d(df, w, x, xmin=None, xmax=None, numx=50, kde=None):
"""
Calculates a 1 dimensional histogram from a Dataframe and weights.
For example, a results distribution might be obtained from the history
class and plotted as follows::
df, w = history.get_distribution(0)
x, pdf = hist_2d(df, w, "x")
plt.plot(x, pdf)
Parameters
----------
df: Pandas Dataframe
The rows are the observations, the columns the variables
w: np.ndarray
The corresponding weights
x: str
The variable for the x-axis
xmin: float, optional
The lower limit in x for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as x.
xmax: float, optional
The upper limit in x for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as x.
numx: int, optional
The number of bins in x direction.
Defaults to 50.
kde: pyabc.Transition, optional
The kernel density estimator to use for creating a smooth density
from the sample. If None, a multivariate normal kde with
cross-validated scaling is used.
Returns
-------
x, pdf: (np.ndarray, np.ndarray)
The x and the densities at these points.
These can be passed for plotting, for example as
plt.plot(x, pdf)
"""
if kde is None:
kde = MultivariateNormalTransition(scaling=1)
kde.fit(df[[x]], w)
if xmin is None:
xmin = df[x].min()
if xmax is None:
xmax = df[x].max()
x_vals = np.linspace(xmin, xmax, num=numx)
test = pd.DataFrame({x: x_vals})
pdf = kde.pdf(test)
return x_vals, pdf
[docs]
def plot_kde_1d_highlevel(
history: History,
x: str,
m: int = 0,
t: int = None,
xmin=None,
xmax=None,
numx=50,
ax: mpl.axes.Axes = None,
size=None,
title: str = None,
refval=None,
refval_color='C1',
kde=None,
xname: str = None,
**kwargs,
) -> mpl.axes.Axes:
"""
Plot 1d kernel density estimate of parameter samples.
Parameters
----------
history: History
History to extract data from.
x: str
The variable for the x-axis.
m: int, optional
Id of the model to plot for.
t: int, optional
Time point to plot for. Defaults to last time point.
xmin: float, optional
The lower limit in x for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as x.
xmax: float, optional
The upper limit in x for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as x.
numx: int, optional
The number of bins in x direction.
Defaults to 50.
ax: matplotlib.axes.Axes, optional
The axis object to use.
size: 2-Tuple of float, optional
Size of the plot in inches.
title: str, optional
Title for the plot. Defaults to None.
refval: dict, optional
A reference value for x (as refval[x]: float).
If not None, the value will be highlighted in the plot.
Default: None.
refval_color: str, optional
Color to use for the reference value.
kde: pyabc.Transition, optional
The kernel density estimator to use for creating a smooth density
from the sample. If None, a multivariate normal kde with
cross-validated scaling is used.
xname:
Parameter name. Defaults to `x`.
Returns
-------
ax: matplotlib axis
axis of the plot
"""
df, w = history.get_distribution(m=m, t=t)
return plot_kde_1d(
df,
w,
x,
xmin,
xmax,
numx,
ax,
size,
title,
refval,
refval_color,
kde,
xname,
**kwargs,
)
def plot_kde_1d_highlevel_plotly(
history: History,
x: str,
m: int = 0,
t: int = None,
xmin=None,
xmax=None,
numx: int = 50,
fig: "go.Figure" = None,
row: int = 1,
col: int = 1,
size=None,
title: str = None,
refval=None,
refval_color='gray',
marker_color=None,
kde=None,
xname: str = None,
**kwargs,
):
df, w = history.get_distribution(m=m, t=t)
return plot_kde_1d_plotly(
df=df,
w=w,
x=x,
xmin=xmin,
xmax=xmax,
numx=numx,
fig=fig,
row=row,
col=col,
size=size,
title=title,
refval=refval,
refval_color=refval_color,
kde=kde,
xname=xname,
marker_color=marker_color,
**kwargs,
)
[docs]
def plot_kde_1d(
df,
w,
x,
xmin=None,
xmax=None,
numx=50,
ax: mpl.axes.Axes = None,
size=None,
title: str = None,
refval=None,
refval_color='C1',
kde=None,
xname: str = None,
**kwargs,
) -> mpl.axes.Axes:
"""
Lowlevel interface for plot_kde_1d_highlevel (see there for the remaining
parameters).
Parameters
----------
df: pandas.DataFrame
The rows are the observations, the columns the variables.
w: pandas.DataFrame
The corresponding weights.
Returns
-------
ax: matplotlib axis
Axis of the plot.
"""
x_vals, pdf = kde_1d(df, w, x, xmin=xmin, xmax=xmax, numx=numx, kde=kde)
if xname is None:
xname = x
if ax is None:
_, ax = plt.subplots()
ax.plot(x_vals, pdf, **kwargs)
# TODO This fixes the upper bound inadequately
# ax.set_ylim(bottom=min(ax.get_ylim()[0], 0))
ax.set_xlabel(xname)
ax.set_ylabel("Posterior")
ax.set_xlim(xmin, xmax)
if title is not None:
ax.set_title(title)
if refval is not None:
ax.axvline(refval[x], color=refval_color, linestyle='dotted')
# set size
if size is not None:
ax.get_figure().set_size_inches(size)
return ax
[docs]
def plot_kde_1d_plotly(
df,
w,
x,
xmin=None,
xmax=None,
numx=50,
fig: "go.Figure" = None,
row: int = 1,
col: int = 1,
size=None,
title: str = None,
refval=None,
refval_color='gray',
marker_color=None,
kde=None,
xname: str = None,
**kwargs,
) -> "go.Figure":
"""Plot 1d kde using plotly."""
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS
from plotly.subplots import make_subplots
x_vals, pdf = kde_1d(df, w, x, xmin=xmin, xmax=xmax, numx=numx, kde=kde)
if xname is None:
xname = x
if fig is None:
fig = make_subplots(rows=1, cols=1)
if marker_color is None:
marker_color = DEFAULT_PLOTLY_COLORS[0]
# add trace, in blue color
fig.add_trace(
go.Scatter(
x=x_vals,
y=pdf,
name=xname,
marker_color=marker_color,
**kwargs,
),
row=row,
col=col,
)
# set trace color to blue
# fig.update_traces(marker_color="blue", row=row, col=col)
# fig.add_trace(
# go.Scatter(x=x_vals, y=pdf, name=xname, **kwargs),
# row=row,
# col=col,
# )
fig.update_xaxes(title_text=xname, range=[xmin, xmax], row=row, col=col)
# add vertical line for reference value
if refval is not None:
fig.add_vline(
x=refval[x],
line_dash="dash",
line_color=refval_color,
row=row,
col=col,
)
# set size
if size is not None:
fig.update_layout(width=size[0], height=size[1])
# set title
if title is not None:
fig.update_layout(title_text=title)
return fig
def kde_2d(
df,
w,
x,
y,
xmin=None,
xmax=None,
ymin=None,
ymax=None,
numx=50,
numy=50,
kde=None,
):
"""
Calculates a 2 dimensional histogram from a Dataframe and weights.
For example, a results distribution might be obtained from the history
class and plotted as follows::
df, w = history.get_distribution(0)
X, Y, PDF = hist_2d(df, w, "x", "y")
plt.pcolormesh(X, Y, PDF)
Parameters
----------
df: Pandas Dataframe
The rows are the observations, the columns the variables
w: The corresponding weights
x: str
The variable for the x-axis
y: str
The variable for the y-axis
xmin: float, optional
The lower limit in x for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as x.
xmax: float, optional
The upper limit in x for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as x.
ymin: float, optional
The lower limit in y for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as y
ymax: float, optional
The upper limit in y for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as y.
numx: int, optional
The number of bins in x direction.
Defaults to 50.
numy int, optional
The number of bins in y direction.
Defaults to 50.
kde: pyabc.Transition, optional
The kernel density estimator to use for creating a smooth density
from the sample. If None, a multivariate normal kde with
cross-validated scaling is used.
Returns
-------
X, Y, PDF: (np.ndarray, np.ndarray, np.ndarray)
The X, the Y and the densities at these points.
These can be passed for plotting, for example as
plt.pcolormesh(X, Y, PDF)
"""
if kde is None:
kde = MultivariateNormalTransition(scaling=1)
kde.fit(df[[x, y]], w)
if xmin is None:
xmin = df[x].min()
if xmax is None:
xmax = df[x].max()
if ymin is None:
ymin = df[y].min()
if ymax is None:
ymax = df[y].max()
X, Y = np.meshgrid(
np.linspace(xmin, xmax, num=numx), np.linspace(ymin, ymax, num=numy)
)
test = pd.DataFrame({x: X.flatten(), y: Y.flatten()})
pdf = kde.pdf(test)
PDF = pdf.reshape(X.shape)
return X, Y, PDF
[docs]
def plot_kde_2d_highlevel(
history: History,
x: str,
y: str,
m: int = 0,
t: int = None,
xmin: float = None,
xmax: float = None,
ymin: float = None,
ymax: float = None,
numx: int = 50,
numy: int = 50,
ax: mpl.axes.Axes = None,
size=None,
colorbar=True,
title: str = None,
refval=None,
refval_color='C1',
kde=None,
xname: str = None,
yname: str = None,
**kwargs,
) -> mpl.axes.Axes:
"""
Plot 2d kernel density estimate of parameter samples.
Parameters
----------
history: History
History to extract data from.
x: str
The variable for the x-axis.
y: str
The variable for the y-axis.
m: int, optional
Id of the model to plot for.
t: int, optional
Time point to plot for. Defaults to last time point.
xmin: float, optional
The lower limit in x for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as x.
xmax: float, optional
The upper limit in x for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as x.
ymin: float, optional
The lower limit in y for the histogram.
If left empty, it is set to the minimum of the ovbservations of the
variable to be plotted as y.
ymax: float, optional
The upper limit in y for the histogram.
If left empty, it is set to the maximum of the ovbservations of the
variable to be plotted as y.
numx: int, optional
The number of bins in x direction.
Defaults to 50.
numy int, optional
The number of bins in y direction.
Defaults tp 50.
ax: matplotlib.axes.Axes, optional
The axis object to use.
size: 2-Tuple of float
Size of the plot in inches.
colorbar: bool, optional
Whether to plot a colorbar. Defaults to True.
title: str, optional
Title for the plot. Defaults to None.
refval: dict, optional
A reference parameter to be shown in the plots. Default: None.
refval_color: str, optional
Color to use for the reference value.
kde: pyabc.Transition, optional
The kernel density estimator to use for creating a smooth density
from the sample. If None, a multivariate normal kde with
cross-validated scaling is used.
xname:
Parameter name for the x-axis. Defaults to `x`.
xname:
Parameter name for the y-axis. Defaults to `y`.
Returns
-------
ax: matplotlib axis
Axis of the plot.
"""
df, w = history.get_distribution(m=m, t=t)
return plot_kde_2d(
df,
w,
x,
y,
xmin,
xmax,
ymin,
ymax,
numx,
numy,
ax,
size,
colorbar,
title,
refval,
refval_color,
kde,
xname,
yname,
**kwargs,
)
[docs]
def plot_kde_2d_highlevel_plotly(
history: History,
x: str,
y: str,
m: int = 0,
t: int = None,
xmin: float = None,
xmax: float = None,
ymin: float = None,
ymax: float = None,
numx: int = 50,
numy: int = 50,
fig: "go.Figure" = None,
row: int = 1,
col: int = 1,
size=None,
showscale=True,
showlegend=True,
title: str = None,
refval=None,
refval_color='gray',
kde=None,
xname: str = None,
yname: str = None,
**kwargs,
) -> "go.Figure":
"""
Plot 2d kernel density estimate of parameter samples using plotly.
"""
df, w = history.get_distribution(m=m, t=t)
return plot_kde_2d_plotly(
df,
w,
x,
y,
xmin,
xmax,
ymin,
ymax,
numx,
numy,
fig,
row,
col,
size,
showscale,
showlegend,
title,
refval,
refval_color,
kde,
xname,
yname,
**kwargs,
)
[docs]
def plot_kde_2d(
df,
w,
x,
y,
xmin=None,
xmax=None,
ymin=None,
ymax=None,
numx=50,
numy=50,
ax: mpl.axes.Axes = None,
size=None,
colorbar=True,
title: str = None,
refval=None,
refval_color='C1',
kde=None,
xname: str = None,
yname: str = None,
**kwargs,
) -> mpl.axes.Axes:
"""
Plot a 2d kernel density estimate of parameter samples.
Parameters
----------
df: Pandas Dataframe
The rows are the observations, the columns the variables
w: The corresponding weights.
For the other parameters, see `plot_kde_2d_highlevel`.
Returns
-------
ax: matplotlib axis
Axis of the plot.
"""
X, Y, PDF = kde_2d(
df,
w,
x,
y,
xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax,
numx=numx,
numy=numy,
kde=kde,
)
if xname is None:
xname = x
if yname is None:
yname = y
if ax is None:
_, ax = plt.subplots()
mesh = ax.pcolormesh(X, Y, PDF, shading='auto', **kwargs)
ax.set_xlabel(xname)
ax.set_ylabel(yname)
if title is not None:
ax.set_title(title)
if colorbar:
plt.colorbar(mesh, ax=ax)
# cbar.set_label("PDF")
if refval is not None:
ax.scatter([refval[x]], [refval[y]], color=refval_color)
# set size
if size is not None:
ax.get_figure().set_size_inches(size)
return ax
[docs]
def plot_kde_2d_plotly(
df,
w,
x,
y,
xmin=None,
xmax=None,
ymin=None,
ymax=None,
numx=50,
numy=50,
fig: "go.Figure" = None,
row: int = 1,
col: int = 1,
size=None,
showscale=True,
showlegend=True,
title: str = None,
refval=None,
refval_color='gray',
kde=None,
xname: str = None,
yname: str = None,
**kwargs,
):
"""
Plot a 2d kernel density estimate of parameter samples using plotly.
"""
import plotly.graph_objects as go
from plotly.subplots import make_subplots
X, Y, PDF = kde_2d(
df,
w,
x,
y,
xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax,
numx=numx,
numy=numy,
kde=kde,
)
if xname is None:
xname = x
if yname is None:
yname = y
if fig is None:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
go.Heatmap(
x=X[0],
y=Y[:, 0],
z=PDF,
showscale=showscale,
showlegend=showlegend,
name="Posterior",
**kwargs,
),
row=row,
col=col,
)
fig.update_xaxes(title_text=xname, row=row, col=col)
fig.update_yaxes(title_text=yname, row=row, col=col)
# title
if title is not None:
fig.update_layout(title_text=title, row=row, col=col)
# reference value
if refval is not None:
fig.add_trace(
go.Scatter(
x=[refval[x]],
y=[refval[y]],
mode='markers',
marker={'color': refval_color},
name='Refval',
),
row=row,
col=col,
)
# size
if size is not None:
fig.update_layout(height=size[0], width=size[1])
return fig
[docs]
def plot_kde_matrix_highlevel(
history,
m: int = 0,
t: int = None,
limits=None,
colorbar: bool = True,
height: float = 2.5,
numx: int = 50,
numy: int = 50,
refval=None,
refval_color='C1',
kde=None,
names: dict = None,
arr_ax=None,
):
"""
Plot a KDE matrix for 1- and 2-dim marginals of the parameter samples.
Parameters
----------
history: History
History to extract data from.
m: int, optional
Id of the model to plot for.
t: int, optional
Time point to plot for. Defaults to last time point.
limits: dictionary, optional
Dictionary of the form ``{"name": (lower_limit, upper_limit)}``.
colorbar: bool
Whether to plot the colorbars or not.
height: float, optional
Height of each subplot in inches. Default: 2.5.
numx: int, optional
The number of bins in x direction.
Defaults to 50.
numy: int, optional
The number of bins in y direction.
Defaults to 50.
refval: dict, optional
A reference parameter to be shown in the plots (e.g. the
underlying ground truth parameter used to simulate the data
for testing purposes). Default: None.
refval_color: str, optional
Color to use for the reference value.
kde: pyabc.Transition, optional
The kernel density estimator to use for creating a smooth density
from the sample. If None, a multivariate normal kde with
cross-validated scaling is used.
names:
Parameter names to use
arr_ax:
Array of axes objects to use.
Returns
-------
arr_ax:
Array of the generated plots' axes.
"""
df, w = history.get_distribution(m=m, t=t)
return plot_kde_matrix(
df,
w,
limits,
colorbar,
height,
numx,
numy,
refval,
refval_color,
kde,
names,
arr_ax,
)
[docs]
def plot_kde_matrix_highlevel_plotly(
history,
m: int = 0,
t: int = None,
limits=None,
height: float = 30,
numx: int = 50,
numy: int = 50,
refval=None,
refval_color='gray',
marker_color=None,
kde=None,
names: dict = None,
title: str = "Univariate and bivariate distributions using KDE",
) -> "go.Figure":
"""
Plot a KDE matrix for 1- and 2-dim marginals of the parameter samples,
using plotly.
"""
df, w = history.get_distribution(m=m, t=t)
return plot_kde_matrix_plotly(
df=df,
w=w,
limits=limits,
height=height,
numx=numx,
numy=numy,
refval=refval,
refval_color=refval_color,
kde=kde,
names=names,
title=title,
)
[docs]
def plot_kde_matrix(
df,
w,
limits=None,
colorbar=True,
height=2.5,
numx=50,
numy=50,
refval=None,
refval_color='C1',
kde=None,
names: dict = None,
arr_ax=None,
):
"""
Plot a KDE matrix for 1- and 2-dim marginals of the parameter samples.
Parameters
----------
df: Pandas Dataframe
The rows are the observations, the columns the variables.
w: np.narray
The corresponding weights.
Other parameters: See plot_kde_matrix_highlevel.
Returns
-------
arr_ax:
Array of the generated plots' axes.
"""
n_par = df.shape[1]
par_ids = list(df.columns.values)
if names is None:
names = {key: key for key in par_ids}
if arr_ax is None:
fig, arr_ax = plt.subplots(
nrows=n_par,
ncols=n_par,
sharex=False,
sharey=False,
figsize=(height * n_par, height * n_par),
)
else:
fig = arr_ax[0, 0].get_figure()
if limits is None:
limits = {}
default = (None, None)
def hist_2d(x, y, ax):
df = pd.concat((x, y), axis=1)
plot_kde_2d(
df,
w,
x.name,
y.name,
xmin=limits.get(x.name, default)[0],
xmax=limits.get(x.name, default)[1],
ymin=limits.get(y.name, default)[0],
ymax=limits.get(y.name, default)[1],
numx=numx,
numy=numy,
ax=ax,
title=None,
colorbar=colorbar,
refval=refval,
refval_color=refval_color,
kde=kde,
xname=names[x.name],
yname=names[y.name],
)
def scatter(x, y, ax):
alpha = w / w.max()
colors = np.zeros((alpha.size, 4))
colors[:, 3] = alpha
ax.scatter(x, y, color="k")
if refval is not None:
ax.scatter([refval[x.name]], [refval[y.name]], color=refval_color)
ax.set_xlim(*limits.get(x.name, default))
ax.set_ylim(*limits.get(y.name, default))
def hist_1d(x, ax):
df = pd.concat((x,), axis=1)
plot_kde_1d(
df,
w,
x.name,
xmin=limits.get(x.name, default)[0],
xmax=limits.get(x.name, default)[1],
numx=numx,
ax=ax,
refval=refval,
refval_color=refval_color,
kde=kde,
xname=x.name,
)
# fill all subplots
for i in range(0, n_par):
y_name = par_ids[i]
y = df[y_name]
# diagonal
ax = arr_ax[i, i]
hist_1d(y, ax)
for j in range(0, i):
x_name = par_ids[j]
x = df[x_name]
# lower
ax = arr_ax[i, j]
hist_2d(x, y, ax)
# upper
ax = arr_ax[j, i]
scatter(y, x, ax)
# format
format_plot_matrix(arr_ax, [names[key] for key in par_ids])
# adjust subplots to fit
fig.tight_layout()
return arr_ax
[docs]
def plot_kde_matrix_plotly(
df,
w,
limits=None,
height=100,
numx=50,
numy=50,
refval=None,
refval_color='gray',
marker_color=None,
kde=None,
names: dict = None,
title: str = "Univariate and bivariate distributions using KDE",
) -> "go.Figure":
"""
Plot a KDE matrix for 1- and 2-dim marginals of the parameter samples,
using plotly.
"""
import plotly.graph_objects as go
from plotly.subplots import make_subplots
n_par = df.shape[1]
par_ids = list(df.columns.values)
if names is None:
names = {key: key for key in par_ids}
# create figure
fig = make_subplots(
rows=n_par,
cols=n_par,
shared_xaxes=False,
shared_yaxes=False,
)
# set size
if height is not None:
fig.update_layout(
height=height * n_par,
width=height * n_par,
)
if limits is None:
limits = {}
default = (None, None)
def hist_2d(x, y, fig, row, col):
df = pd.concat((x, y), axis=1)
plot_kde_2d_plotly(
df,
w,
x.name,
y.name,
xmin=limits.get(x.name, default)[0],
xmax=limits.get(x.name, default)[1],
ymin=limits.get(y.name, default)[0],
ymax=limits.get(y.name, default)[1],
numx=numx,
numy=numy,
fig=fig,
row=row,
col=col,
title=None,
showscale=False,
showlegend=False,
refval=refval,
refval_color=refval_color,
kde=kde,
xname=names[x.name],
yname=names[y.name],
)
def scatter(x, y, fig, row, col):
alpha = w / w.max()
colors = np.zeros((alpha.size, 4))
colors[:, 3] = alpha
fig.add_trace(
go.Scatter(
x=x,
y=y,
mode="markers",
marker={'color': "black"},
),
row=row,
col=col,
)
if refval is not None:
fig.add_trace(
go.Scatter(
x=[refval[x.name]],
y=[refval[y.name]],
mode="markers",
marker={'color': refval_color},
),
row=row,
col=col,
)
fig.update_xaxes(range=limits.get(x.name, default), row=row, col=col)
fig.update_yaxes(range=limits.get(y.name, default), row=row, col=col)
def hist_1d(x, fig, row, col):
df = pd.concat((x,), axis=1)
plot_kde_1d_plotly(
df,
w,
x.name,
xmin=limits.get(x.name, default)[0],
xmax=limits.get(x.name, default)[1],
numx=numx,
fig=fig,
row=row,
col=col,
refval=refval,
refval_color=refval_color,
marker_color=marker_color,
kde=kde,
xname=x.name,
)
# fill all subplots
for i in range(0, n_par):
y_name = par_ids[i]
y = df[y_name]
# diagonal
hist_1d(y, fig, row=i + 1, col=i + 1)
for j in range(0, i):
x_name = par_ids[j]
x = df[x_name]
# lower
hist_2d(x, y, fig, row=i + 1, col=j + 1)
# upper
scatter(y, x, fig, row=j + 1, col=i + 1)
# title
fig.update_layout(title=title)
# format
format_plot_matrix_plotly(fig, [names[key] for key in par_ids])
return fig