Source code for fee.metrics.pmn
import numpy as np
from ..utils import get_g, get_pair_idb
from tqdm import tqdm
from collections import defaultdict
def _get_nbs_i(E, word, n):
return np.argsort(E.vecs.dot(E.v(word)))[-n:][::-1]
def _pmb(word, E, g, n):
values = []
neighbours_indices = _get_nbs_i(E, word, n)
male_neighbours = 0
for i, n_i in enumerate(neighbours_indices):
if E.vecs[n_i].dot(g) < 0: #males have negative direct bias
male_neighbours += 1
return 100*male_neighbours/n
[docs]class PMN():
""" The class that computes the Percentage of Male Neighbours (PMN)
in the top n neighbours for a word.
"""
def __init__(self, E, g=None, n=100):
"""
Args:
E (WE class object): Word embeddings object.
kwargs:
g (np.array): Gender direction.
n (int): Top `n` neighbours according to the cosine similarity.
"""
if g is None:
g = get_g(E)
self.g = g
self.E = E
self.n = n
[docs] def compute(self, words):
"""
Args:
words (str or list[str]): A word or a list of worrds to
compute the PMN for.
Reutrn:
The percentage of male neighbours. Note that the remaining
percentage of neighbours can be considered to be female.
"""
if not isinstance(words, list):
words = [words]
return np.mean([_pmb(w, self.E,
self.g, self.n) for w in words])