Source code for fee.visualize.pca_components

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

[docs]class PCAComponents():
    """`PCAComponents` Class"""
    def __init__(self, E):
        """Plot the PCA principle component bar graph for some direction 
        of `E` computed using a list of pairs of words.
        
        Args:
            E (WE class object): Word embeddings object
        
        """               
        self.E = E

[docs]    def PlotPCA(self, pairs, title, num_components, dpi, figsize):
        """Main `PCAComponents` visualization driver function
        
        Args:
            pairs (list): A list of pair (tuple/list) of words. The 
                        direction is computed by PCA of set of 
                        differences of these words. 
            title (str): title of the plot
            num_components (int): number of principal components
            dpi (int): dpi of the figures  
            figsize (tuple): size of figures in (HxW)  
        
        """           
        plt.figure(figsize=figsize, dpi=dpi)
        matrix = []
        for a, b in pairs:
            center = (self.E.v(a) + self.E.v(b))/2
            matrix.append(self.E.v(a) - center)
            matrix.append(self.E.v(b) - center)
        matrix = np.array(matrix)
        pca = PCA(n_components = num_components)
        pca.fit(matrix)
        plt.bar(range(num_components), pca.explained_variance_ratio_)
        if title is not None:
            plt.title(title)
        plt.show()

[docs]    def run(self, pairs, title=None, num_components=10, dpi=300, 
            figsize=(8, 5)):
        """Run the `PCAComponents` visualization
        
        Args:
            pairs (list): A list of pair (tuple/list) of words. The 
                        direction is computed by PCA of set of 
                        differences of these words. 
            title (str): title of the plot
            num_components (int): number of principal components
            figsize (tuple): size of figures in (HxW)  
            dpi (int): dpi of the figures  
        
        """              
        assert len(pairs) >= num_components, f"# pairs ({len(pairs)}) should be greater than the number of components ({num_components})."
        self.PlotPCA(pairs, title, num_components, dpi, figsize)