Source code for fee.metrics.proximity_bias
import numpy as np
from ..utils import get_g, get_pair_idb
from tqdm import tqdm
from collections import defaultdict
def _bias_ratio(vals, l, thresh):
return len(vals[vals>thresh]) / l
def _get_nbs_i(E, word, n):
return np.argsort(E.vecs.dot(E.v(word)))[-n:][::-1]
def _prox_bias(word, E, g=None, thresh=0.05, n=100):
values = []
neighbours_indices = _get_nbs_i(E, word, n)
for i, n_i in enumerate(neighbours_indices):
values.append(float(get_pair_idb(word, E.vecs[n_i], g, E)))
return _bias_ratio(np.array(values), n, thresh)
[docs]class ProxBias():
"""Proximity Bias Metric Class
"""
def __init__(self, E, g=None, thresh=0.05, n=100):
"""
Args:
E (WE class object): Word embeddings object.
kwargs:
g (np.array): Gender direction.
thresh (float): The minimum indirect bias threshold, above
which the association between a word and its
neighbour is considered biased.
n (int): Top `n` neighbours according to the cosine similarity.
"""
if g is None:
g = get_g(E)
self.g = g
self.E = E
self.thresh = thresh
self.n = n
[docs] def compute(self, words):
"""
Args:
words (str): A word or a list of worrds to compute the
ProxBias for.
Returns:
The average proximity bias for the given list of `words`.
Proximity bias is in simple terms the ratio of biased nieghbours
according to indirect bias with respect to a word.
"""
if not isinstance(words, list):
words = [words]
pb = np.mean([_prox_bias(w, self.E,
self.g, self.thresh, self.n) for w in words])
return pb