Source code for batman.visualization.uncertainty

"""
Uncertainty visualization tools
-------------------------------

It regoups various functions for graph visualizations.

* :func:`kernel_smoothing`,
* :func:`pdf`,
* :func:`sobol`,
* :func:`corr_cov`.
"""
import os
import numpy as np
import openturns as ot
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from scipy.optimize import differential_evolution
from matplotlib import cm
import matplotlib.pyplot as plt
import batman as bat
from ..input_output import formater


[docs]def kernel_smoothing(data, optimize=False):
    """Create gaussian kernel.

    The optimization option could lead to longer computation of the PDF.

    :param array_like data: output sample to draw a PDF from
      (n_samples, n_features).
    :param bool optimize: use global optimization of grid search.
    :return: gaussian kernel.
    :rtype: :class:`sklearn.neighbors.KernelDensity`.
    """
    n_samples, dim = data.shape
    cv = n_samples if n_samples < 50 else 50
    var = np.std(data, ddof=1)
    scott = n_samples ** (-1. / (dim + 4)) * var

    if optimize:
        def bw_score(bw):
            """Get the cross validation score for a given bandwidth."""
            score = cross_val_score(KernelDensity(bandwidth=bw),
                                    data, cv=cv, n_jobs=-1)
            return - score.mean()

        bounds = [(0.1 * var, 5. * var)]
        results = differential_evolution(bw_score, bounds, maxiter=5)
        bw = results.x
        ks_gaussian = KernelDensity(bandwidth=bw)
        ks_gaussian.fit(data)
    else:
        silverman = (n_samples * (dim + 2) / 4.) ** (-1. / (dim + 4)) * var
        bandwidth = np.hstack([np.logspace(-1, 1.0, 10) * var,
                               scott, silverman])
        grid = GridSearchCV(KernelDensity(),
                            {'bandwidth': bandwidth},
                            cv=cv, n_jobs=-1)  # n-fold cross-validation
        grid.fit(data)
        ks_gaussian = grid.best_estimator_

    return ks_gaussian


[docs]def pdf(data, xdata=None, xlabel=None, flabel=None, moments=False,
        ticks_nbr=10, range_cbar=None, fname=None):
    """Plot PDF in 1D or 2D.

    :param nd_array/dict data: array of shape (n_samples, n_features)
        or a dictionary with the following:

        - **bounds** (array_like) -- first line is mins and
          second line is maxs (2, n_features).
        - **model** (:class:`batman.surrogate.SurrogateModel`/str) --
          path to the surrogate data.
        - **method** (str) -- surrogate model method.
        - **dist** (:class:`openturns.ComposedDistribution`) --
          joint distribution.

    :param array_like xdata: 1D discretization of the function (n_features,).
    :param str xlabel: label of the discretization parameter.
    :param str flabel: name of the quantity of interest.
    :param bool moments: whether to plot moments along with PDF if dim > 1.
    :param int ticks_nbr: number of color isolines for response surfaces.
    :param array_like range_cbar: Minimum and maximum values for output
      function (2 values).
    :param str fname: whether to export to filename or display the figures.
    :returns: figure.
    :rtype: Matplotlib figure instances, Matplotlib AxesSubplot instances.
    """
    xlabel = 'x' if xlabel is None else xlabel
    flabel = 'F' if flabel is None else flabel

    dx = 100
    if isinstance(data, dict):
        try:
            f = bat.surrogate.SurrogateModel(data['method'], data['bounds'])
            f.read(data['model'])
        except (AttributeError, TypeError):
            f = data['model']
        sample = np.array(ot.LHSExperiment(data['dist'], 500).generate())
        z_array, _ = f(sample)
    else:
        z_array = np.asarray(data)

    # Compute PDF
    output_len = z_array.shape[1]
    if output_len > 1:
        z_array = z_array[:199]  # Reduce the number of sample to use
        pdf = []
        ydata = []
        for i in range(output_len):
            ks_gaussian = kernel_smoothing(z_array[:, i].reshape(-1, 1), False)
            xpdf = np.linspace(min(z_array[:, i]),
                               max(z_array[:, i]), dx).reshape(-1, 1)
            pdf.append(np.exp(ks_gaussian.score_samples(xpdf)))
            ydata.append(xpdf.flatten())
        pdf = np.array(pdf).T
        if xdata is None:
            xdata = np.linspace(0, 1, output_len)
        xdata = np.tile(xdata, dx).reshape(-1, output_len)
        ydata = np.array(ydata).T
    else:
        z_array = z_array.reshape(-1, 1)
        ks_gaussian = kernel_smoothing(z_array, True)
        xdata = np.linspace(min(z_array), max(z_array), dx).reshape(-1, 1)
        pdf = np.exp(ks_gaussian.score_samples(xdata))

    # Get moments
    if moments:
        mean = np.mean(z_array, axis=0)
        sd = np.std(z_array, axis=0)
        sd_min = mean - sd
        sd_max = mean + sd
        min_ = np.min(z_array, axis=0)
        max_ = np.max(z_array, axis=0)

    # Plotting
    fig = plt.figure('PDF')
    ax = fig.add_subplot(111)
    ax.tick_params(axis='x', labelsize=26)
    ax.tick_params(axis='y', labelsize=26)
    if output_len > 1:
        if range_cbar is None:
            max_pdf = np.percentile(pdf, 97) if np.max(pdf) < 1 else 1
            min_pdf = np.percentile(pdf, 3) if 0 < np.min(pdf) < max_pdf else 0
        else:
            min_pdf, max_pdf = range_cbar
        ticks = np.linspace(min_pdf, max_pdf, num=ticks_nbr)
        bound_pdf = np.linspace(min_pdf, max_pdf, 50, endpoint=True)
        cax = ax.contourf(xdata, ydata, pdf, bound_pdf,
                          cmap=cm.viridis, extend="max")
        cbar = fig.colorbar(cax, shrink=0.5, ticks=ticks)
        cbar.set_label(r"PDF")
        ax.set_xlabel(xlabel, fontsize=26)
        ax.set_ylabel(flabel, fontsize=26)
        if moments:
            ax.plot(xdata[0], sd_min, color='k', ls='-.',
                    linewidth=2, label="Standard Deviation")
            ax.plot(xdata[0], mean, color='k', ls='-', linewidth=2, label="Mean")
            ax.plot(xdata[0], sd_max, color='k', ls='-.', linewidth=2, label=None)
            ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
    else:
        ax.plot(xdata, pdf, color='k', ls='-', linewidth=3, label=None)
        ax.hist(z_array, 30, fc='gray', histtype='stepfilled',
                alpha=0.2, density=True)
        z_delta = np.max(pdf) * 5e-2
        ax.plot(z_array[:199, 0],
                -z_delta - z_delta * np.random.random(z_array[:199].shape[0]),
                '+k', label=None)
        ax.set_xlabel(flabel, fontsize=26)
        ax.set_ylabel("PDF", fontsize=26)

    if fname is not None:
        # Write PDF to file
        xdata_flattened = xdata.flatten('C')
        pdf = pdf.flatten('F')
        names = ['output', 'PDF']
        if output_len > 1:
            ydata = np.array(ydata).flatten('C')
            names = ['x'] + names
            data = np.array([xdata_flattened, ydata, pdf])
        else:
            data = np.array([xdata_flattened, pdf])

        io = formater('json')
        filename, _ = os.path.splitext(fname)
        io.write(filename + '.json', data, names)

        # Write moments to file
        if moments:
            data = np.array([min_, sd_min, mean, sd_max, max_])
            names = ['Min', 'SD_min', 'Mean', 'SD_max', 'Max']
            if output_len != 1:
                names = ['x'] + names
                data = np.append(xdata[0], data)

            io.write(filename + '-moment.json', data, names)

    bat.visualization.save_show(fname, [fig])

    return fig


[docs]def sobol(sobols, conf=None, plabels=None, xdata=None, xlabel='x', fname=None):
    """Plot total Sobol' indices.

    If `len(sobols)>2` map indices are also plotted along with aggregated
    indices.

    :param array_like sobols: `[first (n_params), total (n_params),
        first (xdata, n_params), total (xdata, n_params)]`.
    :param float/array_like conf: relative error around indices. If float,
        same error is applied for all parameters. Otherwise shape
        ([min, n_features], [max, n_features]).
    :param list(str) plabels: parameters' names.
    :param array_like xdata: 1D discretization of the function (n_features,).
    :param str xlabel: label of the discretization parameter.
    :param str fname: wether to export to filename or display the figures.
    :returns: figure.
    :rtype: Matplotlib figure instances, Matplotlib AxesSubplot instances.
    """
    p_len = len(sobols[0])
    if plabels is None:
        plabels = ['x' + str(i) for i in range(p_len)]
    objects = [[r"$S_{" + p + r"}$", r"$S_{T_{" + p + r"}}$"]
               for i, p in enumerate(plabels)]
    color = [[cm.Pastel2(i), cm.Pastel2(i)]
             for i, p in enumerate(plabels)]

    s_lst = [item for sublist in objects for item in sublist]
    color = [item for sublist in color for item in sublist]
    y_pos = np.arange(2 * p_len)

    figures = []
    fig = plt.figure('Aggregated Indices')
    ax = fig.add_subplot(111)
    figures.append(fig)
    ax.bar(y_pos, np.array(sobols[:2]).flatten('F'),
           yerr=conf, align='center', alpha=0.5, color=color)
    ax.set_xticks(y_pos, s_lst)
    ax.tick_params(axis='x', labelsize=20)
    ax.tick_params(axis='y', labelsize=20)
    ax.set_ylabel("Sobol' aggregated indices", fontsize=20)
    ax.set_xlabel("Input parameters", fontsize=20)

    if len(sobols) > 2:
        n_xdata = len(sobols[3])
        if xdata is None:
            xdata = np.linspace(0, 1, n_xdata)
        fig = plt.figure('Sensitivity Map')
        ax = fig.add_subplot(111)
        figures.append(fig)
        sobols = np.hstack(sobols[2:]).T
        s_lst = np.array(objects).T.flatten('C').tolist()
        for sobol, label in zip(sobols, s_lst):
            ax.plot(xdata, sobol, linewidth=3, label=label)
        ax.set_xlabel(xlabel, fontsize=26)
        ax.set_ylabel(r"Indices", fontsize=26)
        ax.set_ylim(-0.1, 1.1)
        ax.tick_params(axis='x', labelsize=23)
        ax.tick_params(axis='y', labelsize=23)
        ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")

    bat.visualization.save_show(fname, figures)

    return figures


[docs]def corr_cov(data, sample, xdata, xlabel='x', plabels=None, interpolation=None,
             fname=None):
    """Correlation and covariance matrices.

    Compute the covariance regarding YY and XY as well as the correlation
    regarding YY.

    :param array_like data: function evaluations (n_samples, n_features).
    :param array_like sample: sample (n_samples, n_featrues).
    :param array_like xdata: 1D discretization of the function (n_features,).
    :param str xlabel: label of the discretization parameter.
    :param list(str) plabels: parameters' labels.
    :param str interpolation: If None, does not interpolate correlation and
        covariance matrices (YY). Otherwize use Matplotlib methods from
        `imshow` such as `['bilinear', 'lanczos', 'spline16', 'hermite', ...]`.
    :param str fname: wether to export to filename or display the figures.
    :returns: figure.
    :rtype: Matplotlib figure instances, Matplotlib AxesSubplot instances.
    """
    p_len = np.asarray(sample).shape[1]
    data_len = np.asarray(data).shape[1]
    data = ot.Sample(data)

    corr_yy = np.array(data.computePearsonCorrelation())
    cov_yy = np.array(data.computeCovariance())
    cov_matrix_xy = np.dot((np.mean(sample) - sample).T,
                           np.mean(data, axis=0) - data) / (len(sample) - 1)

    x_2d_yy, y_2d_yy = np.meshgrid(xdata, xdata)
    x_2d_xy, y_2d_xy = np.meshgrid(xdata, np.arange(p_len))

    c_map = cm.viridis

    figures, axs = [], []

    # Covariance matrix YY
    fig, ax = plt.subplots()
    figures.append(fig)
    axs.append(ax)
    cax = ax.imshow(cov_yy, cmap=c_map, interpolation=interpolation, origin='lower')
    cbar = fig.colorbar(cax)
    cbar.set_label(r"Covariance", size=26)
    cbar.ax.tick_params(labelsize=23)
    ax.set_xlabel(xlabel, fontsize=26)
    ax.set_ylabel(xlabel, fontsize=26)
    ax.tick_params(axis='x', labelsize=23)
    ax.tick_params(axis='y', labelsize=23)

    # Correlation matrix YY
    fig, ax = plt.subplots()
    figures.append(fig)
    cax = ax.imshow(corr_yy, cmap=c_map, interpolation=interpolation, origin='lower')
    cbar = fig.colorbar(cax)
    cbar.set_label(r"Correlation", size=26)
    cbar.ax.tick_params(labelsize=23)
    ax.set_xlabel(xlabel, fontsize=26)
    ax.set_ylabel(xlabel, fontsize=26)
    ax.tick_params(axis='x', labelsize=23)
    ax.tick_params(axis='y', labelsize=23)

    if plabels is None:
        plabels = ['x' + str(i) for i in range(p_len + 1)]
    else:
        plabels.insert(0, 0)

    # Covariance matrix XY
    fig, ax = plt.subplots()
    figures.append(fig)
    axs.append(ax)
    cax = ax.imshow(cov_matrix_xy, cmap=c_map, interpolation='nearest')
    ax.set_yticklabels(plabels, fontsize=6)
    cbar = fig.colorbar(cax)
    cbar.set_label(r"Covariance", size=26)
    cbar.ax.tick_params(labelsize=23)
    ax.set_xlabel(xlabel, fontsize=26)
    ax.set_ylabel('Input parameters', fontsize=26)
    ax.tick_params(axis='x', labelsize=23)
    ax.tick_params(axis='y', labelsize=23)

    if fname is not None:
        data = np.append(x_2d_yy, [y_2d_yy, corr_yy, cov_yy])
        names = ['x', 'y', 'Correlation-YY', 'Covariance']
        io = formater('json')
        io.write(fname.split('.')[0] + '-correlation_covariance.json', data, names)

        data = np.append(x_2d_xy, [y_2d_xy, cov_matrix_xy])
        names = ['x', 'y', 'Correlation-XY']
        io.write(fname.split('.')[0] + '-correlation_XY.dat', data, names)

    bat.visualization.save_show(fname, figures)

    return figures, axs