Source code for fee.reports.biased_neighbours

import numpy as np
from ..utils import get_g, cosine
import pandas as pd

[docs]class NeighboursAnalysis(): """`NeighboursAnalysis` report""" def __init__(self, E, g=None, random_state=42): """Analyze Neighbours of a word in the embedding through cosine similarities and bias by projection. Args: E (WE class object): Word embeddings object g (np.array): gender direction random_state (int): random seed for reproduction """ self.E = E if g is None: g = get_g(E) self.g = g self.random_state = random_state
[docs] def get_neighbours(self, word, n=100): """Compute list of `n` neighbours for `word` Args: word (str): Word to compute neighbours for n (int): number of neighbours to compute """ n += 1 #first neighbour if word itself ns_idx = np.argsort(self.E.vecs.dot(self.E.v(word)))[-n:-1][::-1] return [self.E.words[i] for i in ns_idx]
[docs] def print_neighbours(self, words, n): """Pretty print `n` neighbours Args: words (list): List of neighbours n (int): number of neighbours to compute """ bias_dict = {} for w in words: bias_dict[w] = cosine(self.E.v(w), self.g) bias_dict = {k: v for k, v in sorted(bias_dict.items(), key=lambda item: item[1])} report_df = pd.DataFrame() report_df["Neighbour"] = list(bias_dict.keys())[::-1][:n] report_df["Bias by projection"] = list(bias_dict.values())[::-1][:n] return report_df
[docs] def generate(self, word, n=100, ret_report=True): """Generate the report for neighbours of word Args: word (str): Word to generate report for n (int): number of neighbours to compute ret_report (bool): return or print the report dataframe """ neighbours = self.get_neighbours(word, n) report_df = self.print_neighbours(neighbours, n) if ret_report: return report_df else: print(report_df)