Source code for arviz_plots.plots.trace_dist_plot

"""TraceDist plot code."""
from collections.abc import Mapping, Sequence
from copy import copy
from importlib import import_module
from typing import Any, Literal

import numpy as np
import xarray as xr
from arviz_base import rcParams
from arviz_base.labels import BaseLabeller

from arviz_plots.plot_collection import PlotCollection
from arviz_plots.plots.dist_plot import plot_dist
from arviz_plots.plots.trace_plot import plot_trace
from arviz_plots.plots.utils import (
    filter_aes,
    get_contrast_colors,
    get_group,
    process_group_variables_coords,
    set_grid_layout,
)
from arviz_plots.visuals import labelled_x, labelled_y, ticklabel_props, trace_rug


[docs] def plot_trace_dist( dt, var_names=None, filter_vars=None, group="posterior", coords=None, sample_dims=None, compact=True, combined=False, kind=None, plot_collection=None, backend=None, labeller=None, aes_by_visuals: Mapping[ Literal[ "dist", "trace", "divergence", "label", "ticklabels", "xlabel_trace", ], Sequence[str], ] = None, visuals: Mapping[ Literal[ "dist", "trace", "divergence", "label", "ticklabels", "xlabel_trace", "remove_axis", ], Mapping[str, Any] | Literal[False], ] = None, stats: Mapping[Literal["dist"], Mapping[str, Any] | xr.Dataset] = None, **pc_kwargs, ): """Plot 1D marginal distributions and iteration versus sampled values. Parameters ---------- dt : DataTree Input data var_names : str or list of str, optional One or more variables to be plotted. Prefix the variables by ~ when you want to exclude them from the plot. filter_vars : {None, “like”, “regex”}, optional, default=None If None (default), interpret var_names as the real variables names. If “like”, interpret var_names as substrings of the real variables names. If “regex”, interpret var_names as regular expressions on the real variables names. group : str, default "posterior" Group to be plotted. sample_dims : str or sequence of hashable, optional Dimensions to reduce unless mapped to an aesthetic. Defaults to ``rcParams["data.sample_dims"]`` compact : bool, default True Plot multidimensional variables in a single :term:`plot`. combined : bool, default False Whether to plot intervals for each chain or not. Ignored when the "chain" dimension is not present. kind : {"kde", "hist", "dot", "ecdf"}, optional How to represent the marginal distribution. Defaults to ``rcParams["plot.density_kind"]`` plot_collection : PlotCollection, optional backend : {"matplotlib", "bokeh"}, optional labeller : labeller, optional aes_by_visuals : mapping, optional Mapping of visuals to aesthetics that should use their mapping in `plot_collection` when plotted. The defaults depend on the combination of `compact` and `combined`, see the examples section for an illustrated description. Valid keys are the same as for `visuals`. visuals : mapping of {str : mapping or False}, optional Valid keys are: * dist -> depending on the value of `kind` passed to: * "kde" -> passed to :func:`~arviz_plots.visuals.line_xy` * "ecdf" -> passed to :func:`~arviz_plots.visuals.ecdf_line` * "hist" -> passed to :func: `~arviz_plots.visuals.hist` * "trace" -> passed to :func:`~.visuals.line` * "divergence" -> passed to :func:`~.visuals.trace_rug` * "label" -> :func:`~.visuals.labelled_x` and :func:`~.visuals.labelled_y` * "ticklabels" -> :func:`~.visuals.ticklabel_props` * "xlabel_trace" -> :func:`~.visuals.labelled_x` * remove_axis -> not passed anywhere, can only be ``False`` to skip calling this function stats : mapping, optional Valid keys are: * density -> passed to kde, ecdf, ... pc_kwargs : mapping Passed to :class:`arviz_plots.PlotCollection` Returns ------- PlotCollection Examples -------- The following examples focus on behaviour specific to ``plot_trace_dist``. For a general introduction to batteries-included functions like this one and common usage examples see :ref:`plots_intro` Default plot_trace_dist (``compact=True`` and ``combined=False``). In this case, the multiple coordinate values are overlaid on the same plot for multidimensional values; by default, the color is mapped to all dimensions of each variable (but `sample_dims`) to allow distinguising the different coordinate values. As ``combined=False`` each chain is also being plotted, overlaying them on their corresponding plots; as the color property is already taken, the chain information is encoded in the linestyle as default. Both mappings are applied to the trace and dist elements. .. plot:: :context: close-figs >>> from arviz_plots import plot_trace_dist, style >>> style.use("arviz-variat") >>> from arviz_base import load_arviz_data >>> centered = load_arviz_data('centered_eight') >>> coords = {"school": ["Choate", "Deerfield", "Hotchkiss"]} >>> pc = plot_trace_dist(centered, coords=coords, compact=True, combined=False) >>> pc.add_legend(["__variable__", "school"]) plot_trace_dist with ``compact=True`` and ``combined=True``. The aesthetic mappings stay the same as in the previous case, but now the linestyle property mapping is only taken into account for the trace as in the left column, we use the data from all chains to generate a single distribution representation for each variable+coordinate value combination. Similarly to the first case, this default and now only mapping is applied to both the trace and the dist elements. .. plot:: :context: close-figs >>> pc = plot_trace_dist(centered, coords=coords, compact=True, combined=True) >>> pc.add_legend(["__variable__", "school"]) When ``compact=False``, each variable and coordinate value gets its own plot, and so the color property is no longer used to encode this information. Instead, it is now used to encode the chain information. .. plot:: :context: close-figs >>> pc = plot_trace_dist(centered, coords=coords, compact=False, combined=False) Similarly to the other ``combined=True`` case, the aesthetics stay the same as with ``combined=False``, but they are ignored by default when plotting on the left column. .. plot:: :context: close-figs >>> pc = plot_trace_dist(centered, coords=coords, compact=False, combined=True) >>> pc.add_legend("chain") """ if sample_dims is None: sample_dims = rcParams["data.sample_dims"] if isinstance(sample_dims, str): sample_dims = [sample_dims] if kind is None: kind = rcParams["plot.density_kind"] if stats is None: stats = {} if visuals is None: visuals = {} distribution = process_group_variables_coords( dt, group=group, var_names=var_names, filter_vars=filter_vars, coords=coords ) if not combined and "chain" not in distribution.dims: combined = True if backend is None: if plot_collection is None: backend = rcParams["plot.backend"] else: backend = plot_collection.backend plot_bknd = import_module(f".backend.{backend}", package="arviz_plots") bg_color = plot_bknd.get_background_color() contrast_color = get_contrast_colors(bg_color=bg_color) color_cycle = pc_kwargs.get("color", plot_bknd.get_default_aes("color", 10, {})) if len(color_cycle) <= 2: raise ValueError( f"Not enough values provided for color cycle, got {color_cycle} " "but at least 3 are needed" ) linestyle_cycle = pc_kwargs.get("linestyle", plot_bknd.get_default_aes("linestyle", 4, {})) if len(linestyle_cycle) <= 2: raise ValueError( f"Not enough values provided for linestyle cycle, got {linestyle_cycle} " "but at least 3 are needed" ) if not compact and combined: neutral_color = color_cycle[0] if "chain" in distribution.dims: pc_kwargs["color"] = color_cycle[1:] else: neutral_color = False if compact and combined: neutral_linestyle = linestyle_cycle[0] if "chain" in distribution.dims: pc_kwargs["linestyle"] = linestyle_cycle[1:] else: neutral_linestyle = False if plot_collection is None: pc_kwargs["figure_kwargs"] = pc_kwargs.get("figure_kwargs", {}).copy() pc_kwargs["aes"] = pc_kwargs.get("aes", {}).copy() pc_kwargs.setdefault("cols", ["column"]) aux_dim_list = [dim for dim in distribution.dims if dim not in sample_dims] if compact: pc_kwargs["rows"] = ["__variable__"] else: pc_kwargs.setdefault("rows", ["__variable__"] + aux_dim_list) aux_dim_list = [dim for dim in pc_kwargs["rows"] if dim != "__variable__"] if compact: pc_kwargs["aes"].setdefault("color", ["__variable__"] + aux_dim_list) if "chain" in distribution.dims: pc_kwargs["aes"].setdefault("overlay", ["__variable__", "chain"] + aux_dim_list) pc_kwargs["aes"].setdefault("linestyle", ["chain"]) else: pc_kwargs["aes"].setdefault("overlay", ["__variable__"] + aux_dim_list) elif "chain" in distribution.dims: pc_kwargs["aes"].setdefault("color", ["chain"]) pc_kwargs["aes"].setdefault("overlay", ["chain"]) pc_kwargs = set_grid_layout(pc_kwargs, plot_bknd, distribution, num_cols=2) plot_collection = PlotCollection.grid( distribution.expand_dims(column=2).assign_coords(column=["dist", "trace"]), backend=backend, **pc_kwargs, ) if aes_by_visuals is None: aes_by_visuals = {} else: aes_by_visuals = aes_by_visuals.copy() if combined and "chain" in distribution.dims: if compact: aes_by_visuals["dist"] = aes_by_visuals.get( "dist", plot_collection.aes_set.difference({"overlay", "linestyle"}) ) else: aes_by_visuals["dist"] = aes_by_visuals.get( "dist", plot_collection.aes_set.difference({"overlay", "color"}) ) else: aes_by_visuals["dist"] = {"overlay"}.union( aes_by_visuals.get("dist", plot_collection.aes_set) ) aes_by_visuals["trace"] = {"overlay"}.union( aes_by_visuals.get("trace", plot_collection.aes_set) ) aes_by_visuals["divergence"] = {"overlay"}.union(aes_by_visuals.get("divergence", {})) if combined and "chain" in distribution.dims: chain_mapped_to_aes = set( aes_key for aes_key, aes_dims in pc_kwargs["aes"].items() if "chain" in aes_dims ).intersection(aes_by_visuals["dist"]) if chain_mapped_to_aes: raise ValueError( f"Found properties {chain_mapped_to_aes} mapped to the chain dimension, " "but combined=True. Set combined=False or modify the aesthetic mappings" ) if labeller is None: labeller = BaseLabeller() _, dist_aes, _ = filter_aes(plot_collection, aes_by_visuals, "dist", sample_dims) # dens visuals_dist = { key: False for key in ("credible_interval", "point_estimate", "point_estimate_text", "title") } dist_kwargs = copy(visuals.get("dist", {})) if dist_kwargs is not False: if neutral_color and "color" not in dist_aes: dist_kwargs.setdefault("color", neutral_color) if neutral_linestyle and "linestyle" not in dist_aes: dist_kwargs.setdefault("linestyle", neutral_linestyle) visuals_dist["dist"] = dist_kwargs if "remove_axis" in visuals: visuals_dist["remove_axis"] = visuals["remove_axis"] plot_collection.coords = {"column": "dist"} plot_dist( dt, var_names=var_names, filter_vars=filter_vars, group=group, coords=coords, sample_dims=sample_dims, kind=kind, plot_collection=plot_collection, labeller=labeller, aes_by_visuals={key: value for key, value in aes_by_visuals.items() if key == "dist"}, visuals=visuals_dist, stats=stats, ) plot_collection.coords = None # trace trace_kwargs = copy(visuals.get("trace", {})) div_kwargs = copy(visuals.get("divergence", {})) xlabel_kwargs = copy(visuals.get("xlabel_trace", {})) visuals_trace = {"trace": trace_kwargs, "divergence": div_kwargs, "xlabel": xlabel_kwargs} visuals_trace["title"] = False visuals_trace["ticklabels"] = False aes_by_visuals_trace = { key.replace("trace", ""): value for key, value in visuals.items() if key in {"trace", "divergence", "xlabel_trace"} } plot_collection.coords = {"column": "trace"} plot_trace( dt, var_names=var_names, filter_vars=filter_vars, group=group, coords=coords, sample_dims=sample_dims, plot_collection=plot_collection, labeller=labeller, aes_by_visuals=aes_by_visuals_trace, visuals=visuals_trace, ) plot_collection.coords = None if xlabel_kwargs is not False: plot_collection.rename_visuals(xlabel="xlabel_trace") # divergences sample_stats = get_group(dt, "sample_stats", allow_missing=True) if ( div_kwargs is not False and sample_stats is not None and "diverging" in sample_stats.data_vars and np.any(sample_stats.diverging) ): # rename divergences visual from plot_trace to avoid clashing plot_collection.rename_visuals(divergence="divergence_trace") divergence_mask = dt.sample_stats.diverging _, div_aes, div_ignore = filter_aes( plot_collection, aes_by_visuals, "divergence", sample_dims ) if "color" not in div_aes: div_kwargs.setdefault("color", contrast_color) if "marker" not in div_aes: div_kwargs.setdefault("marker", "|") if "size" not in div_aes: div_kwargs.setdefault("size", 30) plot_collection.map( trace_rug, "divergence_dist", data=distribution, ignore_aes=div_ignore, xname=False, y=0, coords={"column": "dist"}, mask=divergence_mask, **div_kwargs, ) ## aesthetics # Add varnames as x and y labels _, labels_aes, labels_ignore = filter_aes(plot_collection, aes_by_visuals, "label", sample_dims) label_kwargs = copy(visuals.get("label", {})) if label_kwargs is not False: if "color" not in labels_aes: label_kwargs.setdefault("color", contrast_color) plot_collection.map( labelled_x, "xlabel_dist", ignore_aes=labels_ignore, coords={"column": "dist"}, subset_info=True, labeller=labeller, store_artist=backend == "none", **label_kwargs, ) plot_collection.map( labelled_y, "ylabel_trace", ignore_aes=labels_ignore, coords={"column": "trace"}, subset_info=True, labeller=labeller, store_artist=backend == "none", **label_kwargs, ) # Adjust tick labels ticklabels_kwargs = copy(visuals.get("ticklabels", {})) if ticklabels_kwargs is not False: _, _, ticklabels_ignore = filter_aes( plot_collection, aes_by_visuals, "ticklabels", sample_dims ) plot_collection.map( ticklabel_props, "ticklabels", ignore_aes=ticklabels_ignore, axis="both", store_artist=backend == "none", **ticklabels_kwargs, ) return plot_collection