Source code for fee.visualize.pca_components
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
[docs]class PCAComponents():
"""`PCAComponents` Class"""
def __init__(self, E):
"""Plot the PCA principle component bar graph for some direction
of `E` computed using a list of pairs of words.
Args:
E (WE class object): Word embeddings object
"""
self.E = E
[docs] def PlotPCA(self, pairs, title, num_components, dpi, figsize):
"""Main `PCAComponents` visualization driver function
Args:
pairs (list): A list of pair (tuple/list) of words. The
direction is computed by PCA of set of
differences of these words.
title (str): title of the plot
num_components (int): number of principal components
dpi (int): dpi of the figures
figsize (tuple): size of figures in (HxW)
"""
plt.figure(figsize=figsize, dpi=dpi)
matrix = []
for a, b in pairs:
center = (self.E.v(a) + self.E.v(b))/2
matrix.append(self.E.v(a) - center)
matrix.append(self.E.v(b) - center)
matrix = np.array(matrix)
pca = PCA(n_components = num_components)
pca.fit(matrix)
plt.bar(range(num_components), pca.explained_variance_ratio_)
if title is not None:
plt.title(title)
plt.show()
[docs] def run(self, pairs, title=None, num_components=10, dpi=300,
figsize=(8, 5)):
"""Run the `PCAComponents` visualization
Args:
pairs (list): A list of pair (tuple/list) of words. The
direction is computed by PCA of set of
differences of these words.
title (str): title of the plot
num_components (int): number of principal components
figsize (tuple): size of figures in (HxW)
dpi (int): dpi of the figures
"""
assert len(pairs) >= num_components, f"# pairs ({len(pairs)}) should be greater than the number of components ({num_components})."
self.PlotPCA(pairs, title, num_components, dpi, figsize)