Source code for fee.visualize.neighbour_bias_wordcloud

import numpy as np
import matplotlib.pyplot as plt
from ..utils import get_g, cosine
import matplotlib as mpl
from wordcloud import WordCloud   


[docs]class NBWordCloud(): """`NBWordCloud` Class""" def __init__(self, E, g=None, random_state=42): """WordCloud for the neighbourhood of a word. The size of neighbouring words is directly propotional to the bias by projection of these words. Args: E (WE class object): Word embeddings object g (np.array): gender direction random_state (int): for reproducibility """ self.E = E if g is None: g = get_g(E) self.g = g self.random_state = random_state def get_neighbours(self, word, n=100): ns_idx = np.argsort(self.E.vecs.dot(self.E.v(word)))[-n:-1][::-1] return [self.E.words[i] for i in ns_idx] def bias_by_projection_sort(self, words): bias_dict = {} for w in words: bias_dict[w] = cosine(self.E.v(w), self.g) bias_dict = {k: v for k, v in sorted(bias_dict.items(), key=lambda item: item[1])} return bias_dict def bias_score_to_freq(self, d): for k in d: d[k] *= 1000 d[k] = int(d[k]) return d
[docs] def visualize(self, freq_dict, title, figsize, dpi, width, height): """Main `NBWordCloud` visualization driver function Args: freq_dict (dict): dictionary to map size of each word title (str): title of the plot figsize (tuple): size of figures in (HxW) dpi (int): dpi of the figures width (int): width of the wordcloud image height (int): height of the wordcloud image """ wordcloud = WordCloud(width=width, height=height).generate_from_frequencies(freq_dict) plt.figure(figsize=figsize, dpi=dpi) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") if title is not None: plt.title(title) plt.show() return True
[docs] def run(self, word, title=None, n=100, dpi=300, figsize=(8, 5), width=800, height=500): """Run the `NBWordCloud` visualization Args: word (str): word to compute neighbours of and make this plot title (str): title of the plot n (int): number of neighbours to consider figsize (tuple): size of figures in (HxW) dpi (int): dpi of the figures width (int): width of the wordcloud image height (int): height of the wordcloud image """ n += 1 #first neighbour if word itself neighbours = self.get_neighbours(word, n) neighbours_sorted_dict = self.bias_by_projection_sort(neighbours) neighbours_with_freq = self.bias_score_to_freq(neighbours_sorted_dict) self.visualize(neighbours_with_freq, title, figsize, dpi, width, height)