Source code for fee.debias.hsr_debias

import numpy as np

[docs]class HSRDebias(): """HSR Debiasing class (Half Sibling Regression) """ def __init__(self, E): """HSR debiasing method class. Args: E (WE class object): Word embeddings object. """ self.E = E
[docs] def subset(self, words): """Create subset such that the words exist in vocabulary. Args: words (list): list of words to debias. """ sub = [] for w in words: try: sub.append(self.E.v(w)) except: continue return np.array(sub).T
[docs] def hsr(self, gender_vecs, nongender_vecs, nongender_list, alpha): """Half Sibling Regression method Args: gender_vecs (np.array): 2D numpy array of gendered words nongender_vecs (np.array): 2D numpy array of non-gendered words nongender_list (list): list of nongender words. alpha (float): alpha hyperparameter. """ W = np.linalg.inv(gender_vecs.T @ gender_vecs + alpha * np.eye(gender_vecs.shape[1])) @ gender_vecs.T @ nongender_vecs preds = gender_vecs @ np.array(W) deb_vecs = nongender_vecs - preds for i, w in enumerate(nongender_list): self.E.vecs[self.E.index[w]] = deb_vecs[:, i] return self.E
[docs] def run(self, gender_list, nongender_list=None, alpha=60): """Run the Half Sibling Regression method Args: gender_list (list): list of gendered words. nongender_list (list): list of nongendered words. alpha (float): alpha hyperparameter. """ if nongender_list is None: nongender_list = list(set(self.E.words) - set(gender_list)) gender_vecs = self.subset(gender_list) nongender_vecs = self.subset(nongender_list) return self.hsr(gender_vecs, nongender_vecs, nongender_list, alpha)