Source code for pyabc.visualization.contour

"""Contour plots.

To plot contours from the weighted importance samples, the visualization
routines employ a kernel density estimate. Note that this can "over-smoothen"
so that local structure is lost. If this could be the case, it makes sense
to in the argument `kde` reduce the `scaling` in the default
MultivariateNormalTransition(), or to replace it by a GridSearchCV() to
automatically find a visually good level of smoothness.
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..storage import History
from .kde import kde_2d, plot_kde_1d
from .util import format_plot_matrix


[docs] def plot_contour_2d( history: History, x: str, y: str, m: int = 0, t: int = None, xmin: float = None, xmax: float = None, ymin: float = None, ymax: float = None, numx: int = 50, numy: int = 50, ax=None, size=None, title: str = None, refval=None, refval_color='C1', kde=None, xname: str = None, yname: str = None, show_clabel: bool = False, show_legend: bool = False, clabel_kwargs: dict = None, **kwargs, ): """ Plot 2d contour of parameter samples. Parameters ---------- history: History History to extract data from. x: str The variable for the x-axis. y: str The variable for the y-axis. m: int, optional Id of the model to plot for. t: int, optional Time point to plot for. Defaults to last time point. xmin: float, optional The lower limit in x for the histogram. If left empty, it is set to the minimum of the ovbservations of the variable to be plotted as x. xmax: float, optional The upper limit in x for the histogram. If left empty, it is set to the maximum of the ovbservations of the variable to be plotted as x. ymin: float, optional The lower limit in y for the histogram. If left empty, it is set to the minimum of the ovbservations of the variable to be plotted as y. ymax: float, optional The upper limit in y for the histogram. If left empty, it is set to the maximum of the ovbservations of the variable to be plotted as y. numx: int, optional The number of bins in x direction. numy int, optional The number of bins in y direction. ax: matplotlib.axes.Axes, optional The axis object to use. size: 2-Tuple of float Size of the plot in inches. title: str, optional Title for the plot. Defaults to None. refval: dict, optional A reference parameter to be shown in the plots. Default: None. refval_color: str, optional Color to use for the reference value. kde: pyabc.Transition, optional The kernel density estimator to use for creating a smooth density from the sample. If None, a multivariate normal kde with cross-validated scaling is used. xname: Parameter name for the x-axis. xname: Parameter name for the y-axis. show_clabel: Whether to show in-line contour labels. show_legend: Whether to show line labels in form of a legend. clabel_kwargs: Arguments to pass to `matplotlib.contour.ContourLabeler.clabel`. E.g.: "inline", "inline_spacing". Additional keyword arguments are passed to `matplotlib.axes.Axes.contour`. Returns ------- ax: matplotlib axis Axis of the plot. """ df, w = history.get_distribution(m=m, t=t) return plot_contour_2d_lowlevel( df=df, w=w, x=x, y=y, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, numx=numx, numy=numy, ax=ax, size=size, title=title, refval=refval, refval_color=refval_color, kde=kde, xname=xname, yname=yname, show_clabel=show_clabel, show_legend=show_legend, clabel_kwargs=clabel_kwargs, **kwargs, )
[docs] def plot_contour_2d_lowlevel( df, w, x, y, xmin=None, xmax=None, ymin=None, ymax=None, numx=50, numy=50, ax=None, size=None, title: str = None, refval=None, refval_color='C1', kde=None, xname: str = None, yname: str = None, show_clabel: bool = False, show_legend: bool = False, clabel_kwargs: dict = None, **kwargs, ): """ Plot a 2d contour of parameter samples. Parameters ---------- df: Pandas Dataframe The rows are the observations, the columns the variables w: The corresponding weights. For the other parameters, see `plot_contour_2d`. Returns ------- ax: matplotlib axis Axis of the plot. """ X, Y, PDF = kde_2d( df=df, w=w, x=x, y=y, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, numx=numx, numy=numy, kde=kde, ) if xname is None: xname = x if yname is None: yname = y if ax is None: _, ax = plt.subplots() # plot contours contour = ax.contour(X, Y, PDF, **kwargs) # show in-line contour labels if show_clabel: if clabel_kwargs is None: clabel_kwargs = {} ax.clabel(contour, **clabel_kwargs) # show legend if show_legend: handles, labels = contour.legend_elements("pdf") ax.legend(handles, labels) # show title if title is not None: ax.set_title(title) # show reference value if refval is not None: ax.scatter([refval[x]], [refval[y]], color=refval_color) # label axes ax.set_xlabel(xname) ax.set_ylabel(yname) # set size if size is not None: ax.get_figure().set_size_inches(size) return ax
[docs] def plot_contour_matrix( history, m: int = 0, t: int = None, limits: dict = None, height: float = 2.5, numx: int = 50, numy: int = 50, refval: dict = None, refval_color='C1', kde=None, names: dict = None, show_clabel: bool = False, show_legend: bool = False, clabel_kwargs: dict = None, arr_ax=None, **kwargs, ): """ Plot a contour matrix for 1- and 2-dim marginals of the parameter samples. Parameters ---------- history: History History to extract data from. m: int, optional Id of the model to plot for. t: int, optional Time point to plot for. Defaults to last time point. limits: dictionary, optional Dictionary of the form ``{"name": (lower_limit, upper_limit)}``. height: float, optional Height of each subplot in inches. Default: 2.5. numx: int, optional The number of bins in x direction. Defaults to 50. numy: int, optional The number of bins in y direction. Defaults to 50. refval: dict, optional A reference parameter to be shown in the plots (e.g. the underlying ground truth parameter used to simulate the data for testing purposes). Default: None. refval_color: str, optional Color to use for the reference value. kde: pyabc.Transition, optional The kernel density estimator to use for creating a smooth density from the sample. If None, a multivariate normal kde with cross-validated scaling is used. names: Parameter names to use. show_clabel: Whether to show in-line contour labels. show_legend: Whether to show line labels in form of a legend. clabel_kwargs: Arguments to pass to `matplotlib.contour.ContourLabeler.clabel`. E.g.: "inline", "inline_spacing". arr_ax: Array of axes objects to use. Returns ------- arr_ax: Array of the generated plots' axes. """ df, w = history.get_distribution(m=m, t=t) return plot_contour_matrix_lowlevel( df=df, w=w, limits=limits, height=height, numx=numx, numy=numy, refval=refval, refval_color=refval_color, kde=kde, names=names, show_clabel=show_clabel, show_legend=show_legend, clabel_kwargs=clabel_kwargs, arr_ax=arr_ax, **kwargs, )
[docs] def plot_contour_matrix_lowlevel( df: pd.DataFrame, w: np.ndarray, limits: dict = None, height: int = 2.5, numx: int = 50, numy: int = 50, refval: dict = None, refval_color='C1', kde=None, names: dict = None, show_clabel: bool = False, show_legend: bool = False, clabel_kwargs: dict = None, arr_ax=None, **kwargs, ): """ Plot a contour matrix for 1- and 2-dim marginals of the parameter samples. Parameters ---------- df: Pandas Dataframe The rows are the observations, the columns the variables. w: np.narray The corresponding weights. Other parameters: See plot_contour_matrix. Returns ------- arr_ax: Array of the generated plots' axes. """ n_par = df.shape[1] par_ids = list(df.columns.values) if names is None: names = {key: key for key in par_ids} if arr_ax is None: fig, arr_ax = plt.subplots( nrows=n_par, ncols=n_par, sharex=False, sharey=False, figsize=(height * n_par, height * n_par), ) else: fig = arr_ax[0, 0].get_figure() if limits is None: limits = {} default = (None, None) def hist_2d(x, y, ax): df = pd.concat((x, y), axis=1) plot_contour_2d_lowlevel( df, w, x.name, y.name, xmin=limits.get(x.name, default)[0], xmax=limits.get(x.name, default)[1], ymin=limits.get(y.name, default)[0], ymax=limits.get(y.name, default)[1], numx=numx, numy=numy, ax=ax, title=None, refval=refval, refval_color=refval_color, kde=kde, xname=names[x.name], yname=names[y.name], show_clabel=show_clabel, show_legend=show_legend, clabel_kwargs=clabel_kwargs, **kwargs, ) def scatter(x, y, ax): alpha = w / w.max() colors = np.zeros((alpha.size, 4)) colors[:, 3] = alpha ax.scatter(x, y, color="k") if refval is not None: ax.scatter([refval[x.name]], [refval[y.name]], color=refval_color) ax.set_xlim(*limits.get(x.name, default)) ax.set_ylim(*limits.get(y.name, default)) def hist_1d(x, ax): df = pd.concat((x,), axis=1) plot_kde_1d( df, w, x.name, xmin=limits.get(x.name, default)[0], xmax=limits.get(x.name, default)[1], numx=numx, ax=ax, refval=refval, refval_color=refval_color, kde=kde, xname=x.name, ) # fill all subplots for i in range(0, n_par): y_name = par_ids[i] y = df[y_name] # diagonal ax = arr_ax[i, i] hist_1d(y, ax) for j in range(0, i): x_name = par_ids[j] x = df[x_name] # lower ax = arr_ax[i, j] hist_2d(x, y, ax) # upper ax = arr_ax[j, i] scatter(y, x, ax) # format format_plot_matrix(arr_ax, [names[key] for key in par_ids]) # adjust subplots to fit fig.tight_layout() return arr_ax