Source code for psynlig.pca.loadings

# Copyright (c) 2020, Anders Lervik.
# Distributed under the MIT License. See LICENSE for more info.
"""A module defining plots for contributions to principal components."""
from matplotlib import pyplot as plt
import matplotlib.patheffects as path_effects
from mpl_toolkits.mplot3d import Axes3D  # pylint: disable=unused-import
import numpy as np
from adjustText import adjust_text
from psynlig.colors import generate_colors
from psynlig.heatmap import plot_annotated_heatmap
from psynlig.common import (
    MARKERS,
    set_origin_axes,
    get_selector,
    get_text_settings,
)


[docs]def pca_1d_loadings(pca, xvars, select_components=None, plot_type='line', cmap=None, text_settings=None): """Plot the loadings from a PCA in a 1D plot. Parameters ---------- pca : object like :class:`sklearn.decomposition._pca.PCA` The results from a PCA analysis. xvars : list of strings Labels for the original variables. select_componets : set of integers, optional This variable can be used to select the principal components we will create plot for. Note that the principal component numbering will here start from 1 (and not 0). If this is not given, all will be plotted. plot_type : string, optional Select the kind of plot we will be making. Possible values are: * ``line``: For generating a 1D line with contributions. * ``bar``: For generating a bar plot of the contributions. * ``bar-square``: For generating a bar plot of the squared contributions. * ``bar-absolute``: For generating a bar plot of the absolute value of contributions. cmap : string or object like :class:`matplotlib.colors.Colormap`, optional A colormap to use for the components/variables. text_settings : dict, optional Additional settings for creating the text. Returns ------- figures : list of objects like :class:`matplotlib.figure.Figure` The figures containing the plots. axes : list of objects like :class:`matplotlib.axes.Axes` The axes containing the plots. """ figures = [] axes = [] components = pca.n_components_ colors = generate_colors(len(xvars), cmap=cmap) selector = get_selector(components, select_components, 1) for i in selector: fig, axi = plt.subplots(constrained_layout=True) axi.set_title('Loading coefficients for PC{}'.format(i + 1)) coefficients = np.transpose(pca.components_[i, :]) try: if plot_type.lower().startswith('bar'): pca_loadings_bar(axi, coefficients, xvars, plot_type=plot_type.lower()) else: _pca_1d_loadings_component(axi, coefficients, xvars, colors, text_settings=text_settings) except AttributeError: _pca_1d_loadings_component(axi, coefficients, xvars, colors, text_settings=text_settings) figures.append(fig) axes.append(axi) return figures, axes
[docs]def _pca_1d_loadings_component(axi, coefficients, xvars, colors, text_settings=None): """Plot the loadings for a single component in a 1D plot. This plot will show the components on a single line. Parameters ---------- axi : object like :class:`matplotlib.axes.Axes` The plot we will add the loadings to. coefficients : object like :class:`numpy.ndarray` The coefficients we are to show. xvars : list of strings Labels for the original variables. colors : list of floats or strings The colors used for the different labels. text_settings : dict, optional Additional settings for creating the text. """ pos_b, pos_t = 0, 0 for i, coeff in enumerate(coefficients): # Add marker: axi.scatter( coeff, 0, s=200, label=xvars[i], marker=MARKERS[i % len(MARKERS)], color=colors[i], zorder=4, ) if i % 2 == 0: pos_b += 1 ypos = -2 - pos_b valign = 'top' else: pos_t += 1 ypos = 2 + pos_t valign = 'bottom' # Add text: txt_settings, outline_settings = get_text_settings( text_settings, default={ 'fontsize': 'large', 'horizontalalignment': 'center', 'backgroundcolor': 'white', }, ) text = axi.text( coeff, ypos, xvars[i], color=colors[i], verticalalignment=valign, zorder=4, **txt_settings, ) if outline_settings: text.set_path_effects( [ path_effects.Stroke(**outline_settings), path_effects.Normal() ] ) axi.plot( [coeff, coeff], [0, ypos], color=colors[i], lw=3, zorder=0, ) # Do some styling of the axes: ymin, ymax = np.min(axi.get_ylim()), np.max(axi.get_ylim()) axi.set_xlim(-1, 1) axi.set_ylim(ymin - 1, ymax + 1) for loc in ('left', 'right', 'top'): axi.spines[loc].set_visible(False) axi.get_yaxis().set_visible(False) axi.spines['bottom'].set_position('zero') axi.set_xticks([-1, -0.5, 0.0, 0.5, 1]) axi.set_xticklabels([-1, -0.5, 0.0, 0.5, 1])
[docs]def pca_loadings_bar(axi, coefficients, xvars, plot_type='bar'): """Plot the loadings for a single component in a bar plot. Parameters ---------- axi : object like :class:`matplotlib.axes.Axes` The plot we will add the loadings to. coefficients : object like :class:`numpy.ndarray` The coefficients we are to show. xvars : list of strings Labels for the original variables. plot_type : string, optional Selects the type of plot we are making. """ xpos = range(len(coefficients)) if plot_type == 'bar-square': yval = coefficients**2 ylabel = 'Squared coefficients' elif plot_type == 'bar-absolute': yval = np.abs(coefficients) ylabel = 'Absolute value of coefficients' else: yval = coefficients ylabel = 'Coefficient' axi.set_ylabel(ylabel) axi.axhline(y=0, ls=':', color='#262626') axi.bar(xpos, yval) axi.set_xticks(xpos) axi.set_xticklabels( xvars, rotation='vertical', ) axi.set_xlabel('Variables')
[docs]def pca_loadings_map(pca, xvars, val_fmt='{x:.2f}', bubble=False, annotate=True, textcolors=None, plot_style=None, **kwargs): """Show contributions from variables to PC's in a heat map. Parameters ---------- pca : object like :class:`sklearn.decomposition._pca.PCA` The results from a PCA analysis. xvars : list of strings The labels for the original variables. val_fmt : string, optional The format of the annotations inside the heat map. bubble : boolean, optional If True, we will draw bubbles to indicate the size of the given data points. annotate : boolean, optional If True, we will write annotate the plot with values for the contributions. textcolors : list of strings, optional Colors used for the text. The number of colors provided defines a binning for the data values, and values are colored with the corresponding color. If no colors are provided, all are colored black. plot_style : string, optional Determines how the cofficients are plotted: * ``absolute``: The absolute value of the coefficients will be plotted. * ``squared``: The squared value of the coefficients will be plotted. Otherwise, the actual value of the coefficients will be used. **kwargs : dict, optional Arguments used for drawing the heat map. Returns ------- fig : object like :class:`matplotlib.figure.Figure` The figure in which the heatmap is plotted. axi : object like :class:`matplotlib.axes.Axes` The axis to which the heatmap is added. """ components = pca.components_ label = 'Coefficients' # rows: PC, columns: variables comp = ['PC{}'.format(i + 1) for i in range(pca.n_components_)] try: if plot_style.lower() == 'absolute': components = np.abs(components) label = 'Absolute coefficients' elif plot_style.lower() == 'squared': components = components**2 label = 'Squared coefficients' except AttributeError: pass fig1, ax1 = plot_annotated_heatmap( components.T, xvars, comp, cbarlabel=label, val_fmt=val_fmt, annotate=annotate, bubble=bubble, textcolors=textcolors, **kwargs ) return fig1, ax1
[docs]def pca_2d_loadings(pca, xvars, select_components=None, adjust_labels=False, cmap=None, style='box', text_settings=None): """Show loadings for two principal compoents. Parameters ---------- pca : object like :class:`sklearn.decomposition._pca.PCA` The results from a PCA analysis. xvars : list of strings Labels for the original variables. select_componets : set of tuples of integers, optional This variable can be used to select the principal components we will create plot for. Note that the principal component numbering will here start from 1 (and not 0). If this is not given, all will be plotted. adjust_labels : boolean, optional If this is True, we will try to optimize the position of the labels so that they wont overlap. cmap : string or object like :class:`matplotlib.colors.Colormap`, optional A colormap to use for the components/variables. style : string, optional This option changes the styling of the plot. For style ``box``, we show the axes as a normal matplotlib figure with inserted lines showing x=0 and y=0. For the style 'center' we place the x-axis and y-axis at the origin. text_settings : dict, optional Additional settings for creating the text. Returns ------- figures : list of objects like :class:`matplotlib.figure.Figure` The figures containing the plots. axes : list of objects like :class:`matplotlib.axes.Axes` The axes containing the plots. """ figures = [] axes = [] components = pca.n_components_ if components < 2: raise ValueError('Too few (< 2) principal components for a 2D plot!') colors = generate_colors(len(xvars), cmap=cmap) selector = get_selector(components, select_components, 2) for idx1, idx2 in selector: fig, axi = plt.subplots(constrained_layout=True) coefficients1 = np.transpose(pca.components_[idx1, :]) coefficients2 = np.transpose(pca.components_[idx2, :]) _pca_2d_loadings_component( axi, coefficients1, coefficients2, xvars, colors, adjust_labels=adjust_labels, text_settings=text_settings ) axi.set( xlabel='Principal component {}'.format(idx1 + 1), ylabel='Principal component {}'.format(idx2 + 1), ) if style == 'box': axi.axhline(y=0, ls=':', color='#262626', alpha=0.6) axi.axvline(x=0, ls=':', color='#262626', alpha=0.6) elif style == 'center': set_origin_axes( axi, 'PC{}'.format(idx1 + 1), 'PC{}'.format(idx2 + 1), fontsize='x-large', ) axi.set_xticks([-1, -0.5, 0.5, 1]) axi.set_yticks([-1, -0.5, 0.5, 1]) else: # Do not do any styling. pass figures.append(fig) axes.append(axi) return figures, axes
[docs]def _pca_2d_loadings_component(axi, coefficients1, coefficients2, xvars, colors, adjust_labels=False, text_settings=None): """Plot the loadings for two components in a 2D plot. Parameters ---------- axi : object like :class:`matplotlib.axes.Axes` The plot we will add the loadings to. coefficients1 : object like :class:`numpy.ndarray` The coefficients for the first principal component. coefficients2 : object like :class:`numpy.ndarray` The coefficients for the second principal component. xvars : list of strings Labels for the original variables. colors : list of floats or strings The colors used for the different labels. adjust_labels : boolean, optional If this is True, we will try to optimize the position of the labels so that they wont overlap. text_settings : dict, optional Additional settings for creating the text. """ texts, points = [], [] axi.set_aspect('equal') for i, (coeff1, coeff2) in enumerate(zip(coefficients1, coefficients2)): scat = axi.scatter( coeff1, coeff2, s=200, label=xvars[i], marker=MARKERS[i % len(MARKERS)], color=colors[i], ) points.append(scat) txt_settings, outline_settings = get_text_settings( text_settings, default={ 'fontsize': 'large', }, ) if txt_settings.get('show', True): text = axi.text( coeff1, coeff2, xvars[i], color=colors[i], **txt_settings, ) if outline_settings: text.set_path_effects( [ path_effects.Stroke(**outline_settings), path_effects.Normal() ] ) texts.append(text) if adjust_labels and texts: adjust_text( texts, add_objects=points, expand_objects=(1.2, 1.2), expand_text=(1.2, 1.2), expand_points=(1.2, 1.2), force_text=(0.25, 0.25), force_points=(0.5, 0.5), force_objects=(0.25, 0.25), ) axi.set_xlim(-1, 1) axi.set_ylim(-1, 1)
[docs]def pca_3d_loadings(pca, xvars, select_components=None, cmap=None, text_settings=None): """Show contributions to three principal components. Parameters ---------- pca : object like :class:`sklearn.decomposition._pca.PCA` The results from a PCA analysis. xvars : list of strings Labels for the original variables. select_componets : set of tuples of integers, optional This variable can be used to select the principal components we will create plot for. Note that the principal component numbering will here start from 1 (and not 0). If this is not given, all will be plotted. cmap : string or object like :class:`matplotlib.colors.Colormap`, optional A colormap to use for the components/variables. text_settings : dict, optional Additional settings for creating the text. Returns ------- figures : list of objects like :class:`matplotlib.figure.Figure` The figures containing the plots. axes : list of objects like :class:`matplotlib.axes.Axes` The axes containing the plots. """ figures = [] axes = [] components = pca.n_components_ if components < 3: raise ValueError('Too few (< 3) principal components for a 3D plot!') colors = generate_colors(len(xvars), cmap=cmap) selector = get_selector(components, select_components, 3) for idx1, idx2, idx3 in selector: fig = plt.figure(constrained_layout=True) axi = fig.add_subplot(111, projection='3d') axi.set_xlabel('Principal component {}'.format(idx1 + 1), labelpad=15) axi.set_ylabel('Principal component {}'.format(idx2 + 1), labelpad=15) axi.set_zlabel('Principal component {}'.format(idx3 + 1), labelpad=15) coefficients1 = np.transpose(pca.components_[idx1, :]) coefficients2 = np.transpose(pca.components_[idx2, :]) coefficients3 = np.transpose(pca.components_[idx3, :]) _pca_3d_loadings_component(axi, coefficients1, coefficients2, coefficients3, xvars, colors, text_settings=text_settings) figures.append(fig) axes.append(axi) return figures, axes
[docs]def _pca_3d_loadings_component(axi, coefficients1, coefficients2, coefficients3, xvars, colors, text_settings=None): """Show contributions to three principal components. Parameters ---------- axi : object like :class:`matplotlib.axes.Axes` The plot we will add the loadings to. coefficients1 : object like :class:`numpy.ndarray` The coefficients for the first principal component. coefficients2 : object like :class:`numpy.ndarray` The coefficients for the second principal component. coefficients3 : object like :class:`numpy.ndarray` The coefficients for the second principal component. xvars : list of strings Labels for the original variables. colors : list of floats or strings The colors used for the different labels. text_settings : dict, optional Additional settings for creating the text. """ coeffs = zip(coefficients1, coefficients2, coefficients3) for i, (coeff1, coeff2, coeff3) in enumerate(coeffs): axi.scatter( coeff1, coeff2, coeff3, s=200, label=xvars[i], marker=MARKERS[i % len(MARKERS)], color=colors[i], ) txt_settings, outline_settings = get_text_settings( text_settings, default={'fontsize': 'xx-large'}, ) if txt_settings.get('show', True): text = axi.text( coeff1 + 0.02, coeff2 + 0.02, coeff3 + 0.02, xvars[i], color=colors[i], **txt_settings, ) if outline_settings: text.set_path_effects( [ path_effects.Stroke(**outline_settings), path_effects.Normal() ] ) axi.set_xlim(-1, 1) axi.set_ylim(-1, 1) axi.set_zlim(-1, 1) axi.plot([-1, 1], [0, 0], ls=':', color='#262626', alpha=0.8, lw=3) axi.plot([0, 0], [-1, 1], ls=':', color='#262626', alpha=0.8) axi.plot([0, 0], [0, 0], [-1, 1], ls=':', color='#262626', alpha=0.8)