Source code for noggin.utils

from collections import OrderedDict, defaultdict, namedtuple
from pathlib import Path
from typing import Dict, Optional, Tuple, Union

import numpy as np
from custom_inherit import doc_inherit

from noggin.logger import LiveLogger
from noggin.plotter import LivePlot
from noggin.typing import Axes, Figure, LiveMetrics, Metrics, ValidColor, ndarray

__all__ = ["create_plot", "save_metrics", "load_metrics"]


[docs]@doc_inherit(LivePlot.__init__, style="numpy")
def create_plot(
    metrics: Metrics,
    max_fraction_spent_plotting: float = 0.05,
    last_n_batches: Optional[int] = None,
    nrows: Optional[int] = None,
    ncols: int = 1,
    figsize: Optional[Tuple[int, int]] = None,
) -> Tuple[LivePlot, Figure, ndarray]:
    """ Create matplotlib figure/axes, and a live-plotter, which publishes
    "live" training/testing metric data, at a batch and epoch level, to
    the figure.

    Returns
    -------
    Tuple[liveplot.LivePlot, matplotlib.figure.Figure, numpy.ndarray(matplotlib.axes.Axes)]
        (LivePlot-instance, figure, array-of-axes)


    Examples
    --------
    Creating a live plot in a Jupyter notebook

    >>> %matplotlib notebook
    >>> import numpy as np
    >>> from noggin import create_plot, save_metrics
    >>> metrics = ["accuracy", "loss"]
    >>> plotter, fig, ax = create_plot(metrics)
    >>> for i, x in enumerate(np.linspace(0, 10, 100)):
    ...     # training
    ...     x += np.random.rand(1)*5
    ...     batch_metrics = {"accuracy": x**2, "loss": 1/x**.5}
    ...     plotter.set_train_batch(batch_metrics, batch_size=1, plot=True)
    ...
    ...     # cue training epoch
    ...     if i%10 == 0 and i > 0:
    ...         plotter.plot_train_epoch()
    ...
    ...         # cue test-time computations
    ...         for x in np.linspace(0, 10, 5):
    ...             x += (np.random.rand(1) - 0.5)*5
    ...             test_metrics = {"accuracy": x**2}
    ...             plotter.set_test_batch(test_metrics, batch_size=1)
    ...         plotter.plot_test_epoch()
    ...
    ... plotter.plot()  # ensures final data gets plotted

    Saving the logged metrics

    >>> save_metrics("./metrics.npz", plotter) # save metrics to numpy-archive
    """
    live_plotter = LivePlot(
        metrics,
        max_fraction_spent_plotting=max_fraction_spent_plotting,
        last_n_batches=last_n_batches,
        figsize=figsize,
        ncols=ncols,
        nrows=nrows,
    )
    fig, ax = live_plotter.plot_objects
    return live_plotter, fig, ax


[docs]def save_metrics(
    path: Union[str, Path],
    liveplot: Optional[Union[LivePlot, LiveLogger]] = None,
    *,
    train_metrics: LiveMetrics = None,
    test_metrics: LiveMetrics = None
):
    """ Save live-plot metrics to a numpy zipped-archive (.npz). A LivePlot-instance
        can be supplied, or train/test metrics can be passed explicitly to the function.

        Parameters
        ----------
        path: PathLike
           The file-path used to save the archive. E.g. 'path/to/saved_metrics.npz'

        liveplot : Optional[noggin.LivePlot]
           The LivePlot instance whose metrics will be saves.

        train_metrics : Optional[OrderedDict[str, Dict[str, numpy.ndarray]]]]

            '<metric-name>' -> {'batch_data'   -> array,
                                'epoch_domain' -> array,
                                'epoch_data'   -> array}

        test_metrics : Optional[OrderedDict[str, Dict[str, numpy.ndarray]]]]

            '<metric-name>' -> {'batch_data'   -> array,
                                'epoch_domain' -> array,
                                'epoch_data'   -> array}"""
    if liveplot is not None:
        train_metrics = liveplot.train_metrics
        test_metrics = liveplot.test_metrics
    else:
        if train_metrics is None:
            train_metrics = {}

        if test_metrics is None:
            test_metrics = {}

    # use unique separator
    sep = ";"
    names = "".join(tuple(train_metrics) + tuple(test_metrics))
    while sep in names:
        sep += ";"

    # flatten metrics to single mapping
    save_dict = {}  # train/test;metric_name;metric_data -> array
    for type_, metrics in zip(["train", "test"], [train_metrics, test_metrics]):
        for name, metric in metrics.items():
            save_dict.update({sep.join((type_, name, k)): v for k, v in metric.items()})

    with open(path, "wb") as f:
        np.savez(
            f,
            train_order=list(train_metrics),
            test_order=list(test_metrics),
            sep=sep,
            **save_dict
        )


metrics = namedtuple("metrics", ["train", "test"])


[docs]def load_metrics(path: Union[str, Path]) -> Tuple[LiveMetrics, LiveMetrics]:
    """ Load noggin metrics from a numpy archive.

        Parameters
        ----------
        path : PathLike
            Path to numpy archive.

        Returns
        -------
        Tuple[OrderedDict[str, Dict[str, numpy.ndarray]], OrderedDict[str, Dict[str, numpy.ndarray]]]
           (train-metrics, test-metrics)"""

    def recursive_default_dict():
        return defaultdict(recursive_default_dict)

    out = recursive_default_dict()

    with np.load(path) as f:
        data_dict = dict(f)

    train_order = list(data_dict.pop("train_order"))
    test_order = list(data_dict.pop("test_order"))
    sep = data_dict.pop("sep").item()
    for k, v in data_dict.items():
        if v.ndim == 0:
            v = v.item()
        type_, metric_name, data_type = k.split(sep)
        out[type_][metric_name][data_type] = v

    train_metrics = OrderedDict(((k, dict(out["train"][k])) for k in train_order))
    test_metrics = OrderedDict(((k, dict(out["test"][k])) for k in test_order))
    return metrics(train_metrics, test_metrics)


[docs]def plot_logger(
    logger: LiveLogger,
    plot_batches: bool = True,
    last_n_batches: Optional[int] = None,
    colors: Optional[Dict[str, Union[ValidColor, Dict[str, ValidColor]]]] = None,
    nrows: Optional[int] = None,
    ncols: int = 1,
    figsize: Optional[Tuple[int, int]] = None,
) -> Tuple[LivePlot, Figure, Union[Axes, np.ndarray]]:
    """Plots the data recorded by a :class:`~noggin.logger.LiveLogger` instance.

    Converts the logger to an instance of :class:`~noggin.plotter.LivePlot`.

    Parameters
    ----------
    logger : LiveLogger
        The logger whose train/test-split batch/epoch-level data will be plotted.

    plot_batches : bool, optional (default=True)
        If ``True`` include batch-level data in plot.

    last_n_batches : Optional[int]
        The maximum number of batches to be plotted at any given time.
        If ``None``, all of the data will be plotted.

    colors : Optional[Dict[str, Union[ValidColor, Dict[str, ValidColor]]]]
        ``colors`` can be a dictionary, specifying the colors used to plot
        the metrics. Two mappings are valid:
            - '<metric-name>' -> color-value  (specifies train-metric color only)
            - '<metric-name>' -> {'train'/'test' : color-value}
        If ``None``, default colors are used in the plot.

    nrows : Optional[int]
        Number of rows of the subplot grid. Metrics are added in
        row-major order to fill the grid.

    ncols : int, optional, default: 1
        Number of columns of the subplot grid. Metrics are added in
        row-major order to fill the grid.

    figsize : Optional[Sequence[float, float]]
        Specifies the width and height, respectively, of the figure.

    Returns
    -------
    Tuple[LivePlot, Figure, Union[Axes, np.ndarray]]
        The resulting plotter, matplotlib-figure, and axis (or array of axes)
    """

    if not isinstance(logger, LiveLogger):
        raise TypeError(
            "`logger` must be an instance of `noggin.LiveLogger`, got {}".format(logger)
        )

    metrics = sorted(
        set(list(logger.train_metrics.keys()) + list(logger.test_metrics.keys()))
    )

    plotter = LivePlot(
        metrics,
        max_fraction_spent_plotting=0.0,
        last_n_batches=last_n_batches,
        nrows=nrows,
        ncols=ncols,
        figsize=figsize,
    )

    plotter.last_n_batches = last_n_batches
    if colors is not None:
        plotter.metric_colors = colors

    plotter_dict = plotter.to_dict()

    plotter_dict.update(logger.to_dict())
    plotter = LivePlot.from_dict(plotter_dict)
    plotter.plot(plot_batches=plot_batches)
    fig, ax = plotter.plot_objects
    return plotter, fig, ax