"""TraceDist plot code."""
from collections.abc import Mapping, Sequence
from copy import copy
from importlib import import_module
from typing import Any, Literal
import numpy as np
import xarray as xr
from arviz_base import rcParams
from arviz_base.labels import BaseLabeller
from arviz_plots.plot_collection import PlotCollection
from arviz_plots.plots.dist_plot import plot_dist
from arviz_plots.plots.trace_plot import plot_trace
from arviz_plots.plots.utils import (
filter_aes,
get_contrast_colors,
get_group,
process_group_variables_coords,
set_grid_layout,
)
from arviz_plots.visuals import labelled_x, labelled_y, ticklabel_props, trace_rug
[docs]
def plot_trace_dist(
dt,
var_names=None,
filter_vars=None,
group="posterior",
coords=None,
sample_dims=None,
compact=True,
combined=False,
kind=None,
plot_collection=None,
backend=None,
labeller=None,
aes_by_visuals: Mapping[
Literal[
"dist",
"trace",
"divergence",
"label",
"ticklabels",
"xlabel_trace",
],
Sequence[str],
] = None,
visuals: Mapping[
Literal[
"dist",
"trace",
"divergence",
"label",
"ticklabels",
"xlabel_trace",
"remove_axis",
],
Mapping[str, Any] | Literal[False],
] = None,
stats: Mapping[Literal["dist"], Mapping[str, Any] | xr.Dataset] = None,
**pc_kwargs,
):
"""Plot 1D marginal distributions and iteration versus sampled values.
Parameters
----------
dt : DataTree
Input data
var_names : str or list of str, optional
One or more variables to be plotted.
Prefix the variables by ~ when you want to exclude them from the plot.
filter_vars : {None, “like”, “regex”}, optional, default=None
If None (default), interpret var_names as the real variables names.
If “like”, interpret var_names as substrings of the real variables names.
If “regex”, interpret var_names as regular expressions on the real variables names.
group : str, default "posterior"
Group to be plotted.
sample_dims : str or sequence of hashable, optional
Dimensions to reduce unless mapped to an aesthetic.
Defaults to ``rcParams["data.sample_dims"]``
compact : bool, default True
Plot multidimensional variables in a single :term:`plot`.
combined : bool, default False
Whether to plot intervals for each chain or not. Ignored when the "chain" dimension
is not present.
kind : {"kde", "hist", "dot", "ecdf"}, optional
How to represent the marginal distribution.
Defaults to ``rcParams["plot.density_kind"]``
plot_collection : PlotCollection, optional
backend : {"matplotlib", "bokeh"}, optional
labeller : labeller, optional
aes_by_visuals : mapping, optional
Mapping of visuals to aesthetics that should use their mapping in `plot_collection` when
plotted. The defaults depend on the combination of `compact` and `combined`,
see the examples section for an illustrated description.
Valid keys are the same as for `visuals`.
visuals : mapping of {str : mapping or False}, optional
Valid keys are:
* dist -> depending on the value of `kind` passed to:
* "kde" -> passed to :func:`~arviz_plots.visuals.line_xy`
* "ecdf" -> passed to :func:`~arviz_plots.visuals.ecdf_line`
* "hist" -> passed to :func: `~arviz_plots.visuals.hist`
* "trace" -> passed to :func:`~.visuals.line`
* "divergence" -> passed to :func:`~.visuals.trace_rug`
* "label" -> :func:`~.visuals.labelled_x` and :func:`~.visuals.labelled_y`
* "ticklabels" -> :func:`~.visuals.ticklabel_props`
* "xlabel_trace" -> :func:`~.visuals.labelled_x`
* remove_axis -> not passed anywhere, can only be ``False`` to skip calling this function
stats : mapping, optional
Valid keys are:
* density -> passed to kde, ecdf, ...
pc_kwargs : mapping
Passed to :class:`arviz_plots.PlotCollection`
Returns
-------
PlotCollection
Examples
--------
The following examples focus on behaviour specific to ``plot_trace_dist``.
For a general introduction to batteries-included functions like this one and common
usage examples see :ref:`plots_intro`
Default plot_trace_dist (``compact=True`` and ``combined=False``). In this case,
the multiple coordinate values are overlaid on the same plot for multidimensional values;
by default, the color is mapped to all dimensions of each variable (but `sample_dims`)
to allow distinguising the different coordinate values.
As ``combined=False`` each chain is also being plotted, overlaying them on their
corresponding plots; as the color property is already taken, the chain information
is encoded in the linestyle as default.
Both mappings are applied to the trace and dist elements.
.. plot::
:context: close-figs
>>> from arviz_plots import plot_trace_dist, style
>>> style.use("arviz-variat")
>>> from arviz_base import load_arviz_data
>>> centered = load_arviz_data('centered_eight')
>>> coords = {"school": ["Choate", "Deerfield", "Hotchkiss"]}
>>> pc = plot_trace_dist(centered, coords=coords, compact=True, combined=False)
>>> pc.add_legend(["__variable__", "school"])
plot_trace_dist with ``compact=True`` and ``combined=True``. The aesthetic mappings
stay the same as in the previous case, but now the linestyle property mapping
is only taken into account for the trace as in the left column, we use
the data from all chains to generate a single distribution representation
for each variable+coordinate value combination.
Similarly to the first case, this default and now only mapping is applied to both
the trace and the dist elements.
.. plot::
:context: close-figs
>>> pc = plot_trace_dist(centered, coords=coords, compact=True, combined=True)
>>> pc.add_legend(["__variable__", "school"])
When ``compact=False``, each variable and coordinate value gets its own plot,
and so the color property is no longer used to encode this information.
Instead, it is now used to encode the chain information.
.. plot::
:context: close-figs
>>> pc = plot_trace_dist(centered, coords=coords, compact=False, combined=False)
Similarly to the other ``combined=True`` case, the aesthetics stay the same
as with ``combined=False``, but they are ignored by default when plotting
on the left column.
.. plot::
:context: close-figs
>>> pc = plot_trace_dist(centered, coords=coords, compact=False, combined=True)
>>> pc.add_legend("chain")
"""
if sample_dims is None:
sample_dims = rcParams["data.sample_dims"]
if isinstance(sample_dims, str):
sample_dims = [sample_dims]
if kind is None:
kind = rcParams["plot.density_kind"]
if stats is None:
stats = {}
if visuals is None:
visuals = {}
distribution = process_group_variables_coords(
dt, group=group, var_names=var_names, filter_vars=filter_vars, coords=coords
)
if not combined and "chain" not in distribution.dims:
combined = True
if backend is None:
if plot_collection is None:
backend = rcParams["plot.backend"]
else:
backend = plot_collection.backend
plot_bknd = import_module(f".backend.{backend}", package="arviz_plots")
bg_color = plot_bknd.get_background_color()
contrast_color = get_contrast_colors(bg_color=bg_color)
color_cycle = pc_kwargs.get("color", plot_bknd.get_default_aes("color", 10, {}))
if len(color_cycle) <= 2:
raise ValueError(
f"Not enough values provided for color cycle, got {color_cycle} "
"but at least 3 are needed"
)
linestyle_cycle = pc_kwargs.get("linestyle", plot_bknd.get_default_aes("linestyle", 4, {}))
if len(linestyle_cycle) <= 2:
raise ValueError(
f"Not enough values provided for linestyle cycle, got {linestyle_cycle} "
"but at least 3 are needed"
)
if not compact and combined:
neutral_color = color_cycle[0]
if "chain" in distribution.dims:
pc_kwargs["color"] = color_cycle[1:]
else:
neutral_color = False
if compact and combined:
neutral_linestyle = linestyle_cycle[0]
if "chain" in distribution.dims:
pc_kwargs["linestyle"] = linestyle_cycle[1:]
else:
neutral_linestyle = False
if plot_collection is None:
pc_kwargs["figure_kwargs"] = pc_kwargs.get("figure_kwargs", {}).copy()
pc_kwargs["aes"] = pc_kwargs.get("aes", {}).copy()
pc_kwargs.setdefault("cols", ["column"])
aux_dim_list = [dim for dim in distribution.dims if dim not in sample_dims]
if compact:
pc_kwargs["rows"] = ["__variable__"]
else:
pc_kwargs.setdefault("rows", ["__variable__"] + aux_dim_list)
aux_dim_list = [dim for dim in pc_kwargs["rows"] if dim != "__variable__"]
if compact:
pc_kwargs["aes"].setdefault("color", ["__variable__"] + aux_dim_list)
if "chain" in distribution.dims:
pc_kwargs["aes"].setdefault("overlay", ["__variable__", "chain"] + aux_dim_list)
pc_kwargs["aes"].setdefault("linestyle", ["chain"])
else:
pc_kwargs["aes"].setdefault("overlay", ["__variable__"] + aux_dim_list)
elif "chain" in distribution.dims:
pc_kwargs["aes"].setdefault("color", ["chain"])
pc_kwargs["aes"].setdefault("overlay", ["chain"])
pc_kwargs = set_grid_layout(pc_kwargs, plot_bknd, distribution, num_cols=2)
plot_collection = PlotCollection.grid(
distribution.expand_dims(column=2).assign_coords(column=["dist", "trace"]),
backend=backend,
**pc_kwargs,
)
if aes_by_visuals is None:
aes_by_visuals = {}
else:
aes_by_visuals = aes_by_visuals.copy()
if combined and "chain" in distribution.dims:
if compact:
aes_by_visuals["dist"] = aes_by_visuals.get(
"dist", plot_collection.aes_set.difference({"overlay", "linestyle"})
)
else:
aes_by_visuals["dist"] = aes_by_visuals.get(
"dist", plot_collection.aes_set.difference({"overlay", "color"})
)
else:
aes_by_visuals["dist"] = {"overlay"}.union(
aes_by_visuals.get("dist", plot_collection.aes_set)
)
aes_by_visuals["trace"] = {"overlay"}.union(
aes_by_visuals.get("trace", plot_collection.aes_set)
)
aes_by_visuals["divergence"] = {"overlay"}.union(aes_by_visuals.get("divergence", {}))
if combined and "chain" in distribution.dims:
chain_mapped_to_aes = set(
aes_key for aes_key, aes_dims in pc_kwargs["aes"].items() if "chain" in aes_dims
).intersection(aes_by_visuals["dist"])
if chain_mapped_to_aes:
raise ValueError(
f"Found properties {chain_mapped_to_aes} mapped to the chain dimension, "
"but combined=True. Set combined=False or modify the aesthetic mappings"
)
if labeller is None:
labeller = BaseLabeller()
_, dist_aes, _ = filter_aes(plot_collection, aes_by_visuals, "dist", sample_dims)
# dens
visuals_dist = {
key: False
for key in ("credible_interval", "point_estimate", "point_estimate_text", "title")
}
dist_kwargs = copy(visuals.get("dist", {}))
if dist_kwargs is not False:
if neutral_color and "color" not in dist_aes:
dist_kwargs.setdefault("color", neutral_color)
if neutral_linestyle and "linestyle" not in dist_aes:
dist_kwargs.setdefault("linestyle", neutral_linestyle)
visuals_dist["dist"] = dist_kwargs
if "remove_axis" in visuals:
visuals_dist["remove_axis"] = visuals["remove_axis"]
plot_collection.coords = {"column": "dist"}
plot_dist(
dt,
var_names=var_names,
filter_vars=filter_vars,
group=group,
coords=coords,
sample_dims=sample_dims,
kind=kind,
plot_collection=plot_collection,
labeller=labeller,
aes_by_visuals={key: value for key, value in aes_by_visuals.items() if key == "dist"},
visuals=visuals_dist,
stats=stats,
)
plot_collection.coords = None
# trace
trace_kwargs = copy(visuals.get("trace", {}))
div_kwargs = copy(visuals.get("divergence", {}))
xlabel_kwargs = copy(visuals.get("xlabel_trace", {}))
visuals_trace = {"trace": trace_kwargs, "divergence": div_kwargs, "xlabel": xlabel_kwargs}
visuals_trace["title"] = False
visuals_trace["ticklabels"] = False
aes_by_visuals_trace = {
key.replace("trace", ""): value
for key, value in visuals.items()
if key in {"trace", "divergence", "xlabel_trace"}
}
plot_collection.coords = {"column": "trace"}
plot_trace(
dt,
var_names=var_names,
filter_vars=filter_vars,
group=group,
coords=coords,
sample_dims=sample_dims,
plot_collection=plot_collection,
labeller=labeller,
aes_by_visuals=aes_by_visuals_trace,
visuals=visuals_trace,
)
plot_collection.coords = None
if xlabel_kwargs is not False:
plot_collection.rename_visuals(xlabel="xlabel_trace")
# divergences
sample_stats = get_group(dt, "sample_stats", allow_missing=True)
if (
div_kwargs is not False
and sample_stats is not None
and "diverging" in sample_stats.data_vars
and np.any(sample_stats.diverging)
):
# rename divergences visual from plot_trace to avoid clashing
plot_collection.rename_visuals(divergence="divergence_trace")
divergence_mask = dt.sample_stats.diverging
_, div_aes, div_ignore = filter_aes(
plot_collection, aes_by_visuals, "divergence", sample_dims
)
if "color" not in div_aes:
div_kwargs.setdefault("color", contrast_color)
if "marker" not in div_aes:
div_kwargs.setdefault("marker", "|")
if "size" not in div_aes:
div_kwargs.setdefault("size", 30)
plot_collection.map(
trace_rug,
"divergence_dist",
data=distribution,
ignore_aes=div_ignore,
xname=False,
y=0,
coords={"column": "dist"},
mask=divergence_mask,
**div_kwargs,
)
## aesthetics
# Add varnames as x and y labels
_, labels_aes, labels_ignore = filter_aes(plot_collection, aes_by_visuals, "label", sample_dims)
label_kwargs = copy(visuals.get("label", {}))
if label_kwargs is not False:
if "color" not in labels_aes:
label_kwargs.setdefault("color", contrast_color)
plot_collection.map(
labelled_x,
"xlabel_dist",
ignore_aes=labels_ignore,
coords={"column": "dist"},
subset_info=True,
labeller=labeller,
store_artist=backend == "none",
**label_kwargs,
)
plot_collection.map(
labelled_y,
"ylabel_trace",
ignore_aes=labels_ignore,
coords={"column": "trace"},
subset_info=True,
labeller=labeller,
store_artist=backend == "none",
**label_kwargs,
)
# Adjust tick labels
ticklabels_kwargs = copy(visuals.get("ticklabels", {}))
if ticklabels_kwargs is not False:
_, _, ticklabels_ignore = filter_aes(
plot_collection, aes_by_visuals, "ticklabels", sample_dims
)
plot_collection.map(
ticklabel_props,
"ticklabels",
ignore_aes=ticklabels_ignore,
axis="both",
store_artist=backend == "none",
**ticklabels_kwargs,
)
return plot_collection