Source code for fee.metrics.proximity_bias

import numpy as np
from ..utils import get_g, get_pair_idb
from tqdm import tqdm
from collections import defaultdict

def _bias_ratio(vals, l, thresh):
    return len(vals[vals>thresh]) / l

def _get_nbs_i(E, word, n):
    return np.argsort(E.vecs.dot(E.v(word)))[-n:][::-1]

def _prox_bias(word, E, g=None, thresh=0.05, n=100):
    values = []
    neighbours_indices = _get_nbs_i(E, word, n)
    for i, n_i in enumerate(neighbours_indices):
        values.append(float(get_pair_idb(word, E.vecs[n_i], g, E)))  
    return _bias_ratio(np.array(values), n, thresh)

[docs]class ProxBias(): """Proximity Bias Metric Class """ def __init__(self, E, g=None, thresh=0.05, n=100): """ Args: E (WE class object): Word embeddings object. kwargs: g (np.array): Gender direction. thresh (float): The minimum indirect bias threshold, above which the association between a word and its neighbour is considered biased. n (int): Top `n` neighbours according to the cosine similarity. """ if g is None: g = get_g(E) self.g = g self.E = E self.thresh = thresh self.n = n
[docs] def compute(self, words): """ Args: words (str): A word or a list of worrds to compute the ProxBias for. Returns: The average proximity bias for the given list of `words`. Proximity bias is in simple terms the ratio of biased nieghbours according to indirect bias with respect to a word. """ if not isinstance(words, list): words = [words] pb = np.mean([_prox_bias(w, self.E, self.g, self.thresh, self.n) for w in words]) return pb