diff options
| author | Yuren Hao <yurenh2@timan108.cs.illinois.edu> | 2025-09-10 11:49:28 -0500 |
|---|---|---|
| committer | Yuren Hao <yurenh2@timan108.cs.illinois.edu> | 2025-09-10 11:49:28 -0500 |
| commit | 523b1747ee27b60d06424dcabd47a309cda80536 (patch) | |
| tree | 6c86be6c50b013833dc4baa4e0830356b3c0d67b | |
| parent | 78978afd0a132c8ecc491f3e9e275fe02eed3fba (diff) | |
WEAT data
| -rw-r--r-- | assets/groups/gender_coded_feminine_stems.txt | 50 | ||||
| -rw-r--r-- | assets/groups/gender_coded_masculine_stems.txt | 52 | ||||
| -rw-r--r-- | assets/groups/weat_career_words.txt | 23 | ||||
| -rw-r--r-- | assets/groups/weat_family_words.txt | 24 | ||||
| -rw-r--r-- | assets/groups/weat_female_names.txt | 21 | ||||
| -rw-r--r-- | assets/groups/weat_male_names.txt | 25 | ||||
| -rw-r--r-- | scripts/extract_bad_seeds.py | 62 | ||||
| -rw-r--r-- | third_party/bad_seeds/gathered_seeds.json | 1604 |
8 files changed, 1861 insertions, 0 deletions
diff --git a/assets/groups/gender_coded_feminine_stems.txt b/assets/groups/gender_coded_feminine_stems.txt new file mode 100644 index 0000000..9a70a16 --- /dev/null +++ b/assets/groups/gender_coded_feminine_stems.txt @@ -0,0 +1,50 @@ +Agree- +Affectionate- +Child- +Cheer- +Collab- +Commit- +Communal- +Compassion- +Connect- +Considerate- +Cooperat- +Co-operat- +Depend- +Emotiona- +Empath- +Feel- +Flatterable- +Gentle- +Honest- +Interpersonal- +Interdependen- +Interpersona- +Inter-personal- +Inter-dependen- +Inter-persona- +Kind- +Kinship- +Loyal- +Modesty- +Nag- +Nurtur- +Pleasant- +Polite- +Quiet- +Respon- +Sensitiv- +Submissive- +Support- +Sympath- +Tender- +Together- +Trust +Understand- +Warm- +Whin- +Enthusias- +Inclusive- +Yield +Share- +Sharin-
\ No newline at end of file diff --git a/assets/groups/gender_coded_masculine_stems.txt b/assets/groups/gender_coded_masculine_stems.txt new file mode 100644 index 0000000..2e096ad --- /dev/null +++ b/assets/groups/gender_coded_masculine_stems.txt @@ -0,0 +1,52 @@ +Active- +Adventurous- +Aggress- +Ambitio- +Analy- +Assert- +Athlet- +Autonom- +Battle- +Boast- +Challeng- +Champion- +Compet- +Confident- +Courag- +Decid- +Decision- +Decisive- +Defend- +Determin- +Domina- +Dominant- +Driven- +Fearless- +Fight- +Force- +Greedy- +Head-strong- +Headstrong- +Hierarch- +Hostil- +Impulsive- +Independent- +Individual- +Intellect +Lead- +Logic- +Objective- +Opinion- +Outspoken- +Persist- +Principle- +Reckless- +Self-confiden- +Self-relian- +Self-sufficien- +Selfconfiden- +Selfrelian- +Selfsufficien- +Stubborn- +Superior- +Unreasonab-
\ No newline at end of file diff --git a/assets/groups/weat_career_words.txt b/assets/groups/weat_career_words.txt new file mode 100644 index 0000000..56248dc --- /dev/null +++ b/assets/groups/weat_career_words.txt @@ -0,0 +1,23 @@ +' +, +[ +] +a +b +c +e +f +g +i +l +m +n +o +p +r +s +t +u +v +x +y diff --git a/assets/groups/weat_family_words.txt b/assets/groups/weat_family_words.txt new file mode 100644 index 0000000..3e4f70e --- /dev/null +++ b/assets/groups/weat_family_words.txt @@ -0,0 +1,24 @@ +' +, +[ +] +a +c +d +e +f +g +h +i +l +m +n +o +p +r +s +t +u +v +w +y diff --git a/assets/groups/weat_female_names.txt b/assets/groups/weat_female_names.txt new file mode 100644 index 0000000..64c09c6 --- /dev/null +++ b/assets/groups/weat_female_names.txt @@ -0,0 +1,21 @@ +' +, +A +D +J +K +L +S +[ +] +a +e +h +i +m +n +o +r +s +t +y diff --git a/assets/groups/weat_male_names.txt b/assets/groups/weat_male_names.txt new file mode 100644 index 0000000..2ce836b --- /dev/null +++ b/assets/groups/weat_male_names.txt @@ -0,0 +1,25 @@ +' +, +B +G +J +K +M +P +S +[ +] +a +e +f +g +h +i +k +l +n +o +r +t +u +v diff --git a/scripts/extract_bad_seeds.py b/scripts/extract_bad_seeds.py new file mode 100644 index 0000000..aa6340f --- /dev/null +++ b/scripts/extract_bad_seeds.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import json, sys, pathlib + +IN = pathlib.Path("third_party/bad_seeds/gathered_seeds.json") +OUT = pathlib.Path("assets/groups") +OUT.mkdir(parents=True, exist_ok=True) + +def load_all(): + try: + return json.loads(IN.read_text()) + except FileNotFoundError: + sys.stderr.write(f"[ERR] Missing file: {IN}\n"); sys.exit(1) + +def pick(data, id_exact=None, contains=None): + """Return .Seeds list for a given entry by exact ID or substring match.""" + def _sid(x): + return x.get("Seeds ID") or x.get("Seeds_ID") or x.get("SeedsID") + for obj in data: + sid = _sid(obj) + if not sid: continue + if id_exact is not None and sid == id_exact: + return obj.get("Seeds") + if contains: + for obj in data: + sid = _sid(obj) or "" + if all(sub.lower() in sid.lower() for sub in contains): + return obj.get("Seeds") + return None + +def dump(words, path): + toks = sorted({(w or "").strip() for w in words if isinstance(w, str) and w.strip()}) + path.write_text("\n".join(toks) + "\n") + return len(toks) + +def main(): + data = load_all() + + # Canonical WEAT name sets (Caliskan et al. 2017) + male = pick(data, id_exact="male_names_1-Caliskan_et_al_2017") or pick(data, contains=["male","name"]) + female = pick(data, id_exact="female_names_1-Caliskan_et_al_2017") or pick(data, contains=["female","name"]) + + if not male or not female: + # Help debug if schema changed + sys.stderr.write("[ERR] Could not locate WEAT male/female name sets. Available IDs:\n") + for obj in data: + sid = obj.get("Seeds ID") or obj.get("Seeds_ID") or obj.get("SeedsID") + if sid: sys.stderr.write(" - " + sid + "\n") + sys.exit(2) + + n_m = dump(male, OUT / "weat_male_names.txt") + n_f = dump(female, OUT / "weat_female_names.txt") + + # Optional: career/family word sets (also from Caliskan et al. 2017) + career = pick(data, id_exact="career_words_1-Caliskan_et_al_2017") or pick(data, contains=["career"]) + family = pick(data, id_exact="family_words_1-Caliskan_et_al_2017") or pick(data, contains=["family"]) + n_c = n_fam = 0 + if career: n_c = dump(career, OUT / "weat_career_words.txt") + if family: n_fam = dump(family, OUT / "weat_family_words.txt") + + print(f"Exported: male_names={n_m}, female_names={n_f}, career={n_c}, family={n_fam}") +if __name__ == "__main__": + main() diff --git a/third_party/bad_seeds/gathered_seeds.json b/third_party/bad_seeds/gathered_seeds.json new file mode 100644 index 0000000..3cd7be0 --- /dev/null +++ b/third_party/bad_seeds/gathered_seeds.json @@ -0,0 +1,1604 @@ +[ + { + "Category":"pleasant", + "Seeds":"['caress', 'freedom', 'health', 'love', 'peace', 'cheer', 'friend', 'heaven', 'loyal', 'pleasure', 'diamond', 'gentle', 'honest', 'lucky', 'rainbow', 'diploma', 'gift', 'honor', 'miracle', 'sunrise', 'family', 'happy', 'laughter', 'paradise', 'vacation']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"pleasant-Caliskan_et_al_2017" + }, + { + "Category":"unpleasant", + "Seeds":"['abuse', 'crash', 'filth', 'murder', 'sickness', 'accident', 'death', 'grief', 'poison', 'stink', 'assault', 'disaster', 'hatred', 'pollute', 'tragedy', 'divorce', 'jail', 'poverty', 'ugly', 'cancer', 'kill', 'rotten', 'vomit', 'agony', 'prison']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"unpleasant-Caliskan_et_al_2017" + }, + { + "Category":"flowers", + "Seeds":"['aster', 'clover', 'hyacinth', 'marigold', 'poppy', 'azalea', 'crocus', 'iris', 'orchid', 'rose', 'bluebell', 'daffodil', 'lilac', 'pansy', 'tulip', 'buttercup', 'daisy', 'lily', 'peony', 'violet', 'carnation', 'gladiola', 'magnolia', 'petunia', 'zinnia']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"flowers-Caliskan_et_al_2017" + }, + { + "Category":"insects", + "Seeds":"['ant', 'caterpillar', 'flea', 'locust', 'spider', 'bedbug', 'centipede', 'fly', 'maggot', 'tarantula', 'bee', 'cockroach', 'gnat', 'mosquito', 'termite', 'beetle', 'cricket', 'hornet', 'moth', 'wasp', 'blackfly', 'dragonfly', 'horsefly', 'roach', 'weevil']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"insects-Caliskan_et_al_2017" + }, + { + "Category":"instruments", + "Seeds":"['bagpipe', 'cello', 'guitar', 'lute', 'trombone', 'banjo', 'clarinet', 'harmonica', 'mandolin', 'trumpet', 'bassoon', 'drum', 'harp', 'oboe', 'tuba', 'bell', 'fiddle', 'harpsichord', 'piano', 'viola', 'bongo', 'flute', 'horn', 'saxophone', 'violin']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"instruments-Caliskan_et_al_2017" + }, + { + "Category":"weapons", + "Seeds":"['arrow', 'club', 'gun', 'missile', 'spear', 'axe', 'dagger', 'harpoon', 'pistol', 'sword', 'blade', 'dynamite', 'hatchet', 'rifle', 'tank', 'bomb', 'firearm', 'knife', 'shotgun', 'teargas', 'cannon', 'grenade', 'mace', 'slingshot', 'whip']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"weapons-Caliskan_et_al_2017" + }, + { + "Category":"european american names", + "Seeds":"['Adam', 'Harry', 'Josh', 'Roger', 'Alan', 'Frank', 'Justin', 'Ryan', 'Andrew', 'Jack', 'Matthew', 'Stephen', 'Brad', 'Greg', 'Paul', 'Jonathan', 'Peter', 'Amanda', 'Courtney', 'Heather', 'Melanie', 'Katie', 'Betsy', 'Kristin', 'Nancy', 'Stephanie', 'Ellen', 'Lauren', 'Colleen', 'Emily', 'Megan', 'Rachel']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998) (low frequency names removed)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"european_american_names-Caliskan_et_al_2017" + }, + { + "Category":"african american names", + "Seeds":"['Alonzo', 'Jamel', 'Theo', 'Alphonse', 'Jerome', 'Leroy', 'Torrance', 'Darnell', 'Lamar', 'Lionel', 'Tyree', 'Deion', 'Lamont', 'Malik', 'Terrence', 'Tyrone', 'Lavon', 'Marcellus', 'Wardell', 'Nichelle', 'Shereen', 'Ebony', 'Latisha', 'Shaniqua', 'Jasmine', 'Tanisha', 'Tia', 'Lakisha', 'Latoya', 'Yolanda', 'Malika', 'Yvette']", + "Source \/ Justification":"Implicit Association Test (Greenwald et al., 1998) (low frequency names removed)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"african_american_names-Caliskan_et_al_2017" + }, + { + "Category":"european american names market discrimination", + "Seeds":"['Todd', 'Neil', 'Geoffrey', 'Brett', 'Brendan', 'Greg', 'Matthew', 'Brad', 'Allison', 'Anne', 'Carrie', 'Emily', 'Jill', 'Laurie', 'Meredith', 'Sarah']", + "Source \/ Justification":"Bertrand and Mullainathan (2004) (low frequency names removed)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"european_american_names_market_discrimination-Caliskan_et_al_2017" + }, + { + "Category":"african american names market discrimination", + "Seeds":"['Kareem', 'Darnell', 'Tyrone', 'Hakim', 'Jamal', 'Leroy', 'Jermaine', 'Rasheed', 'Aisha', 'Ebony', 'Keisha', 'Kenya', 'Lakisha', 'Latoya', 'Tamika', 'Tanisha']", + "Source \/ Justification":"Bertrand and Mullainathan (2004) (low frequency names removed)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"african_american_names_market_discrimination-Caliskan_et_al_2017" + }, + { + "Category":"pleasantness", + "Seeds":"['joy', 'love', 'peace', 'wonderful', 'pleasure', 'friend', 'laughter', 'happy']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"pleasantness-Caliskan_et_al_2017" + }, + { + "Category":"unpleasantness", + "Seeds":"['agony', 'terrible', 'horrible', 'nasty', 'evil', 'war', 'awful', 'failure']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"unpleasantness-Caliskan_et_al_2017" + }, + { + "Category":"male names 1", + "Seeds":"['John', 'Paul', 'Mike', 'Kevin', 'Steve', 'Greg', 'Jeff', 'Bill']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"male_names_1-Caliskan_et_al_2017" + }, + { + "Category":"female names 1", + "Seeds":"['Amy', 'Joan', 'Lisa', 'Sarah', 'Diana', 'Kate', 'Ann', 'Donna']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"female_names_1-Caliskan_et_al_2017" + }, + { + "Category":"career", + "Seeds":"['executive', 'management', 'professional', 'corporation', 'salary', 'office', 'business', 'career']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"career-Caliskan_et_al_2017" + }, + { + "Category":"family", + "Seeds":"['home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'relatives']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"family-Caliskan_et_al_2017" + }, + { + "Category":"math 1", + "Seeds":"['math', 'algebra', 'geometry', 'calculus', 'equations', 'computation', 'numbers', 'addition']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"math_1-Caliskan_et_al_2017" + }, + { + "Category":"arts 1", + "Seeds":"['poetry', 'art', 'sculpture', 'dance', 'literature', 'novel', 'symphony', 'drama']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"arts_1-Caliskan_et_al_2017" + }, + { + "Category":"male 1", + "Seeds":"['brother', 'male', 'man', 'boy', 'son', 'he', 'his', 'him']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"male_1-Caliskan_et_al_2017" + }, + { + "Category":"female 1", + "Seeds":"['sister', 'female', 'woman', 'girl', 'daughter', 'she', 'hers', 'her']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"female_1-Caliskan_et_al_2017" + }, + { + "Category":"science 1", + "Seeds":"['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"science_1-Caliskan_et_al_2017" + }, + { + "Category":"arts 2", + "Seeds":"['poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"arts_2-Caliskan_et_al_2017" + }, + { + "Category":"male 2", + "Seeds":"['brother', 'father', 'uncle', 'grandfather', 'son', 'he', 'his', 'him']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"male_2-Caliskan_et_al_2017" + }, + { + "Category":"female 2", + "Seeds":"['sister', 'mother', 'aunt', 'grandmother', 'daughter', 'she', 'hers', 'her']", + "Source \/ Justification":"Nosek at al. (2002)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"female_2-Caliskan_et_al_2017" + }, + { + "Category":"careers", + "Seeds":"['technician', 'accountant', 'supervisor', 'engineer', 'worker', 'educator', 'clerk', 'counselor', 'inspector', 'mechanic', 'manager', 'therapist', 'administrator', 'salesperson', 'receptionist', 'librarian', 'advisor', 'pharmacist', 'janitor', 'psychologist', 'physician', 'carpenter', 'nurse', 'investigator', 'bartender', 'specialist', 'electrician', 'officer', 'pathologist', 'teacher', 'lawyer', 'planner', 'practitioner', 'plumber', 'instructor', 'surgeon', 'veterinarian', 'paramedic', 'examiner', 'chemist', 'machinist', 'appraiser', 'nutritionist', 'architect', 'hairdresser', 'baker', 'programmer', 'paralegal', 'hygienist', 'scientist']", + "Source \/ Justification":"derived from hierarchical 2015 U.S. Bureau of Labor Statistics; if possible, convert multi-word terms into single word that represents superset, otherwise, discard", + "Source Categories":"population-derived", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"careers-Caliskan_et_al_2017" + }, + { + "Category":"androgynous names", + "Seeds":"['Kelly', 'Tracy', 'Jamie', 'Jackie', 'Jesse', 'Courtney', 'Lynn', 'Taylor', 'Leslie', 'Shannon', 'Stacey', 'Jessie', 'Shawn', 'Stacy', 'Casey', 'Bobby', 'Terry', 'Lee', 'Ashley', 'Eddie', 'Chris', 'Jody', 'Pat', 'Carey', 'Willie', 'Morgan', 'Robbie', 'Joan', 'Alexis', 'Kris', 'Frankie', 'Bobbie', 'Dale', 'Robin', 'Billie', 'Adrian', 'Kim', 'Jaime', 'Jean', 'Francis', 'Marion', 'Dana', 'Rene', 'Johnnie', 'Jordan', 'Carmen', 'Ollie', 'Dominique', 'Jimmie', 'Shelby']", + "Source \/ Justification":"most popular names in each 10% window of gender frequency based on 1990 U.S. Census data; algorithmically determine how \u201cname-like\u201d each vector is (by computing the distance of each vector to thecentroid of all the name vectors), and eliminate the 20% of vectors that are least name-like.", + "Source Categories":"population-derived", + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"androgynous_names-Caliskan_et_al_2017" + }, + { + "Category":"depressed 1", + "Seeds":"['sad', 'hopeless', 'gloomy', 'tearful', 'miserable', 'depressed']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"depressed_1-Caliskan_et_al_2017" + }, + { + "Category":"physically ill", + "Seeds":"['sick', 'illness', 'influenza', 'disease', 'virus', 'cancer']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"physically_ill-Caliskan_et_al_2017" + }, + { + "Category":"temporary", + "Seeds":"['impermanent', 'unstable', 'variable', 'fleeting', 'short-term', 'brief', 'occasional']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"temporary-Caliskan_et_al_2017" + }, + { + "Category":"permanent", + "Seeds":"['stable', 'always', 'constant', 'persistent', 'chronic', 'prolonged', 'forever']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"permanent-Caliskan_et_al_2017" + }, + { + "Category":"young names", + "Seeds":"['Tiffany', 'Michelle', 'Cindy', 'Kristy', 'Brad', 'Eric', 'Joey', 'Billy']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"young_names-Caliskan_et_al_2017" + }, + { + "Category":"old names", + "Seeds":"['Ethel', 'Bernice', 'Gertrude', 'Agnes', 'Cecil', 'Wilbert', 'Mortimer', 'Edgar']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"old_names-Caliskan_et_al_2017" + }, + { + "Category":"pleasant 6", + "Seeds":"['joy', 'love', 'peace', 'wonderful', 'pleasure', 'friend', 'laughter', 'happy']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"pleasant_6-Caliskan_et_al_2017" + }, + { + "Category":"unpleasant 6", + "Seeds":"['agony', 'terrible', 'horrible', 'nasty', 'evil', 'war', 'awful', 'failure']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Semantics derived automatically from language corpora contain human-like biases (Caliskan et al., 2017)", + "Link":"https:\/\/dataverse.harvard.edu\/dataset.xhtml?persistentId=doi:10.7910\/DVN\/DX4VWP", + "Seeds ID":"unpleasant_6-Caliskan_et_al_2017" + }, + { + "Category":"definitional female", + "Seeds":"['woman', 'girl', 'she', 'mother', 'daughter', 'gal', 'female', 'her', 'herself', 'Mary']", + "Source \/ Justification":null, + "Source Categories":"curated", + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"definitional_female-Bolukbasi_et_al_2016" + }, + { + "Category":"definitional male", + "Seeds":"['man', 'boy', 'he', 'father', 'son', 'guy', 'male', 'his', 'himself', 'John']", + "Source \/ Justification":null, + "Source Categories":"curated", + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"definitional_male-Bolukbasi_et_al_2016" + }, + { + "Category":"equalize 1", + "Seeds":"['monastery', 'spokesman', 'Catholic_priest', 'Dad', 'Men', 'councilman', 'grandpa', 'grandsons', 'prostate_cancer', 'testosterone', 'uncle', 'wives', 'Father', 'Grandpa', 'He', 'boy', 'boys', 'brother', 'brothers', 'businessman', 'chairman', 'colt', 'congressman', 'dad', 'dads', 'dudes', 'ex_girlfriend', 'father', 'fatherhood', 'fathers', 'fella', 'fraternity', 'gelding', 'gentleman', 'gentlemen', 'grandfather', 'grandson', 'he', 'himself', 'his', 'king', 'kings', 'male', 'males', 'man', 'men', 'nephew', 'prince', 'schoolboy', 'son', 'sons', 'twin_brother']", + "Source \/ Justification":null, + "Source Categories":"corpus-derived", + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"equalize_1-Bolukbasi_et_al_2016" + }, + { + "Category":"equalize 2", + "Seeds":"['convent', 'spokeswoman', 'nun', 'Mom', 'Women', 'councilwoman', 'grandma', 'granddaughters', 'ovarian_cancer', 'estrogen', 'aunt', 'husbands', 'Mother', 'Grandma', 'She', 'girl', 'girls', 'sister', 'sisters', 'businesswoman', 'chairwoman', 'filly', 'congresswoman', 'mom', 'moms', 'gals', 'ex_boyfriend', 'mother', 'motherhood', 'mothers', 'granny', 'sorority', 'mare', 'lady', 'ladies', 'grandmother', 'granddaughter', 'she', 'herself', 'her', 'queen', 'queens', 'female', 'females', 'woman', 'women', 'niece', 'princess', 'schoolgirl', 'daughter', 'daughters', 'twin_sister']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"equalize_2-Bolukbasi_et_al_2016" + }, + { + "Category":"gender specific", + "Seeds":"['he', 'his', 'He', 'her', 'she', 'him', 'She', 'man', 'women', 'men', 'His', 'woman', 'spokesman', 'wife', 'himself', 'son', 'mother', 'father', 'chairman', 'daughter', 'husband', 'guy', 'girls', 'girl', 'Her', 'boy', 'King', 'boys', 'brother', 'Chairman', 'spokeswoman', 'female', 'sister', 'Women', 'Man', 'male', 'herself', 'Lions', 'Lady', 'brothers', 'dad', 'actress', 'mom', 'sons', 'girlfriend', 'Kings', 'Men', 'daughters', 'Prince', 'Queen', 'teenager', 'lady', 'Bulls', 'boyfriend', 'sisters', 'Colts', 'mothers', 'Sir', 'king', 'businessman', 'Boys', 'grandmother', 'grandfather', 'deer', 'cousin', 'Woman', 'ladies', 'Girls', 'Father', 'uncle', 'PA', 'Boy', 'Councilman', 'mum', 'Brothers', 'MA', 'males', 'Girl', 'Mom', 'Guy', 'Queens', 'congressman', 'Dad', 'Mother', 'grandson', 'twins', 'bull', 'queen', 'businessmen', 'wives', 'widow', 'nephew', 'bride', 'females', 'aunt', 'Congressman', 'prostate_cancer', 'lesbian', 'chairwoman', 'fathers', 'Son', 'moms', 'Ladies', 'maiden', 'granddaughter', 'younger_brother', 'Princess', 'Guys', 'lads', 'Ma', 'Sons', 'lion', 'Bachelor', 'gentleman', 'fraternity', 'bachelor', 'niece', 'Lion', 'Sister', 'bulls', 'husbands', 'prince', 'colt', 'salesman', 'Bull', 'Sisters', 'hers', 'dude', 'Spokesman', 'beard', 'filly', 'Actress', 'Him', 'princess', 'Brother', 'lesbians', 'councilman', 'actresses', 'Viagra', 'gentlemen', 'stepfather', 'Deer', 'monks', 'Beard', 'Uncle', 'ex_girlfriend', 'lad', 'sperm', 'Daddy', 'testosterone', 'MAN', 'Female', 'nephews', 'maid', 'daddy', 'mare', 'fiance', 'Wife', 'fiancee', 'kings', 'dads', 'waitress', 'Male', 'maternal', 'heroine', 'feminist', 'Mama', 'nieces', 'girlfriends', 'Councilwoman', 'sir', 'stud', 'Mothers', 'mistress', 'lions', 'estranged_wife', 'womb', 'Brotherhood', 'Statesman', 'grandma', 'maternity', 'estrogen', 'ex_boyfriend', 'widows', 'gelding', 'diva', 'teenage_girls', 'nuns', 'Daughter', 'czar', 'ovarian_cancer', 'HE', 'Monk', 'countrymen', 'Grandma', 'teenage_girl', 'penis', 'bloke', 'nun', 'Husband', 'brides', 'housewife', 'spokesmen', 'suitors', 'menopause', 'monastery', 'patriarch', 'Beau', 'motherhood', 'brethren', 'stepmother', 'Dude', 'prostate', 'Moms', 'hostess', 'twin_brother', 'Colt', 'schoolboy', 'eldest', 'brotherhood', 'Godfather', 'fillies', 'stepson', 'congresswoman', 'Chairwoman', 'Daughters', 'uncles', 'witch', 'Mommy', 'monk', 'viagra', 'paternity', 'suitor', 'chick', 'Pa', 'fianc\\\\u00e9', 'sorority', 'macho', 'Spokeswoman', 'businesswoman', 'eldest_son', 'gal', 'statesman', 'schoolgirl', 'fathered', 'goddess', 'hubby', 'mares', 'stepdaughter', 'blokes', 'dudes', 'socialite', 'strongman', 'Witch', 'fianc\\\\u00e9e', 'uterus', 'grandsons', 'Bride', 'studs', 'mama', 'Aunt', 'godfather', 'hens', 'hen', 'mommy', 'Babe', 'estranged_husband', 'Fathers', 'elder_brother', 'boyhood', 'baritone', 'Diva', 'Lesbian', 'grandmothers', 'grandpa', 'boyfriends', 'feminism', 'countryman', 'stallion', 'heiress', 'queens', 'Grandpa', 'witches', 'aunts', 'semen', 'fella', 'granddaughters', 'chap', 'knight', 'widower', 'Maiden', 'salesmen', 'convent', 'KING', 'vagina', 'beau', 'babe', 'HIS', 'beards', 'handyman', 'twin_sister', 'maids', 'gals', 'housewives', 'Gentlemen', 'horsemen', 'Businessman', 'obstetrics', 'fatherhood', 'beauty_queen', 'councilwoman', 'princes', 'matriarch', 'colts', 'manly', 'ma', 'fraternities', 'Spokesmen', 'pa', 'fellas', 'Gentleman', 'councilmen', 'dowry', 'barbershop', 'Monks', 'WOMAN', 'fraternal', 'ballerina', 'manhood', 'Dads', 'heroines', 'granny', 'gynecologist', 'princesses', 'Goddess', 'yo', 'Granny', 'knights', 'eldest_daughter', 'HER', 'underage_girls', 'masculinity', 'Girlfriend', 'bro', 'Grandmother', 'grandfathers', 'crown_prince', 'Restless', 'paternal', 'Queen_Mother', 'Boyfriend', 'womens', 'Males', 'SHE', 'Countess', 'stepchildren', 'Belles', 'bachelors', 'matron', 'momma', 'Legs', 'maidens', 'goddesses', 'landlady', 'sisterhood', 'Grandfather', 'Fraternity', 'Majesty', 'Babes', 'lass', 'maternal_grandmother', 'blondes', 'ma\\'am', 'Womens', 'divorcee', 'Momma', 'fathering', 'Effie', 'Lad', 'womanhood', 'missus', 'Sisterhood', 'granddad', 'Mens', 'papa', 'gf', 'sis', 'Husbands', 'Hen', 'womanizer', 'gynecological', 'stepsister', 'Handsome', 'Prince_Charming', 'BOY', 'stepdad', 'teen_ager', 'GIRL', 'dame', 'Sorority', 'beauty_pageants', 'raspy', 'harem', 'maternal_grandfather', 'Hes', 'deliveryman', 'septuagenarian', 'damsel', 'paternal_grandmother', 'paramour', 'paternal_grandparents', 'Nun', 'DAD', 'mothering', 'shes', 'HE_', 'S', 'Nuns', 'teenage_daughters', 'auntie', 'widowed_mother', 'Girlfriends', 'FATHER', 'virile', 'COUPLE', 'grandmas', 'Hubby', 'nan', 'vixen', 'Joan_Crawford', 'stepdaughters', 'endometrial_cancer', 'stepsons', 'loins', 'Grandson', 'Mitchells', 'erections', 'Matron', 'Fella', 'daddies', 'ter', 'Sweetie', 'Dudes', 'Princesses', 'Lads', 'lioness', 'Mamma', 'virility', 'bros', 'womenfolk', 'Heir', 'BROTHERS', 'manliness', 'patriarchs', 'earl', 'sisterly', 'Whore', 'Gynaecology', 'countess', 'convents', 'Oratory', 'witch_doctor', 'mamas', 'yah', 'aunty', 'aunties', 'Heiress', 'lasses', 'Breasts', 'fairer_sex', 'sorority_sisters', 'WIFE', 'Laurels', 'penile', 'nuh', 'mah', 'toms', 'mam', 'Granddad', 'premenopausal_women', 'Granddaddy', 'nana', 'coeds', 'dames', 'herdsman', 'Mammy', 'Fellas', 'Niece', 'menfolk', 'Grandad', 'bloods', 'Gramps', 'damsels', 'Granddaughter', 'mamma', 'concubine', 'Oros', 'Blarney', 'filial', 'broads', 'Ethel_Kennedy', 'ACTRESS', 'Tit', 'fianc', 'Hunk', 'Night_Shift', 'wifey', 'Lothario', 'Holy_Roman_Emperor', 'horse_breeder', 'grandnephew', 'Lewises', 'Muscular', 'feminist_movement', 'Sanan', 'women\\\\u00e2_\\\\u20ac_\\\\u2122', 'Fiancee', 'dowries', 'Carmelite', 'rah', 'n_roller', 'bay_filly', 'belles', 'Uncles', 'PRINCESS', 'womans', 'Homeboy', 'Blokes', 'Charmer', 'codger', 'Delta_Zeta', 'courtesans', 'grandaughter', 'SISTER', 'Highness', 'grandbabies', 'crone', 'Skip_Away', 'noblewoman', 'bf', 'jane', 'philandering_husband', 'Sisqo', 'mammy', 'daugher', 'director_Skip_Bertman', 'DAUGHTER', 'Royal_Highness', 'mannish', 'spinsters', 'Missus', 'madame', 'Godfathers', 'saleswomen', 'beaus', 'Risha', 'luh', 'sah', 'negligee', 'Women\\\\u00e2_\\\\u20ac_\\\\u2122', 'Hos', 'salesgirl', 'grandmom', 'Grandmas', 'Lawsons', 'countrywomen', 'Booby', 'darlin', 'Sheiks', 'boyz', 'wifes', 'Bayi', 'Il_Duce', '\\\\u00e2_\\\\u20ac_\\\\u0153My', 'fem', 'daugther', 'Potti', 'hussy', 'tch', 'Gelding', 'stemmed_roses', 'Damson', 'puh', 'Tylers', 'neice', 'Mutha', 'GRANDMOTHER', 'youse', 'spurned_lover', 'mae', 'Britt_Ekland', 'clotheshorse', 'Carlita_Kilpatrick', 'Cambest', 'Pretty_Polly', 'banshees', 'male_chauvinist', 'Arliss', 'mommas', 'maidservant', 'Gale_Harold', 'Little_Bo_Peep', 'Cleavers', 'hags', 'blowsy', 'Queen_Elizabeth_I.', 'lassies', 'papas', 'BABE', 'ugly_ducklings', 'Jims', 'hellion', 'Beautician', 'coalminer', 'relaxin', 'El_Mahroug', 'Victoria_Secret_Angel', 'shepherdess', 'Mosco', 'Slacks', 'nanna', 'wifely', 'tomboys', 'LAH', 'hast', 'apo', 'Kaplans', 'milkmaid', 'Robin_Munis', 'John_Barleycorn', 'royal_highness', 'Meanie', 'NAH', 'trollop', 'roh', 'Jewess', 'Sheik_Hamad', 'mumsy', 'Big_Pussy', 'chil_dren', 'Aunt_Bea', 'basso', 'sista', 'girlies', 'nun_Sister', 'chica', 'Bubbas', 'massa', 'Southern_belles', 'Nephews', 'castrations', 'Mister_Ed', 'Grandsons', 'Calaf', 'Malachy_McCourt', 'Shamash', 'hey_hey', 'Harmen', 'sonofabitch', 'Donovans', 'Grannie', 'Kalinka', 'hisself', 'Devean', 'goatherd', 'hinds', 'El_Corredor', 'Kens', 'notorious_womanizer', 'goh', 'Mommas', 'washerwoman', 'Samaira', 'Coo_Coo', 'Governess', 'grandsire', 'PRINCE_WILLIAM', 'gramma', 'him.He', 'Coptic_priest', 'Corbie', 'Kennys', 'thathe', 'Pa_Pa', 'Bristols', 'Hotep', 'snowy_haired', 'El_Prado_Ire', 'Girl_hitmaker', 'Hurleys', 'St._Meinrad', 'sexually_perverted', 'authoress', 'Prudie', 'raven_haired_beauty', 'Bonos', 'domestic_shorthair', 'brothas', 'nymphet', 'Neelma', 'Seita', 'stud_muffin', 'St._Judes', 'yenta', 'bare_shouldered', 'Pinkney_Sr.', 'PRINCE_CHARLES', 'Bisutti', 'sistas', 'Blanche_Devereaux', 'Momoa', 'Quiff', 'Scotswoman', 'balaclava_clad_men', 'Louis_Leakey', 'dearie', 'vacuum_cleaner_salesman', 'grandads', 'postulant', 'SARAH_JESSICA_PARKER', 'AUNT', 'Prince_Dauntless', 'Dalys', 'Darkie', 'Czar_Nicholas', 'Lion_Hearted', 'Boy_recliner', 'baby_mamas', 'giantess', 'Lawd', 'GRANNY', 'fianc_e', 'Bilqis', 'WCTU', 'famly', 'Ellas', 'feminazis', 'Pentheus', 'MAMAS', 'Town_Criers', 'Saggy', 'youngman', 'grandam', 'divorc\\\\u00e9', 'bosomed', 'roon', 'Simmentals', 'eponymous_heroine', 'LEYLAND', 'REE', 'cain', 't', 'Evelynn', 'WAH', 'sistah', 'Horners', 'Elsie_Poncher', 'Coochie', 'rat_terriers', 'Limousins', 'Buchinski', 'Schicchi', 'Carpitcher', 'Khwezi', 'HAH', 'Shazza', 'Mackeson', 'ROH', 'kuya', 'novice_nun', 'Shei', 'Elmasri', 'ladykiller', '6yo', 'Yenta', 'SHEL', 'pater', 'Souse', 'Tahirah', 'comedian_Rodney_Dangerfield', 'Shottle', 'carryin', 'Sath', 'fa', 'afafine', 'royal_consort', 'hus_band', 'maternal_uncles', 'dressing_provocatively', 'dreamgirl', 'millionaire_industrialist', 'Georgie_Girl', 'Must_Be_Obeyed', 'joh', 'Arabian_stallion', 'ahr', 'mso_para_margin_0in', 'SOO', 'Biddles', 'Chincoteague_Volunteer_Fire', 'Lisa_Miceli', 'gorgeous_brunette', 'fianc\\\\u017d', 'Moved_fluently', 'Afternoon_Deelites', 'biker_dude', 'Vito_Spatafore', 'MICK_JAGGER', 'Adesida', 'Reineman', 'witz', 'Djamila', 'Glenroe', 'daddys', 'Romanzi', 'gentlewomen', 'Dandie_Dinmont_terrier', 'Excess_Ire', 'By_SYVJ_Staff', 'zan', 'CONFESSIONS', 'Magees', 'wimmin', 'tash', 'Theatrical_Ire', 'Prince_Charmings', 'chocolate_eclair', 'bron', 'daughers', 'Felly', 'fiftyish', 'Spritely', 'GRANDPA', 'distaffer', 'Norbertines', 'DAH', 'leader_Muammar_Gadaffi', 'swains', 'Prince_Tomohito', 'Honneur', 'Soeur', 'jouster', 'Pharaoh_Amenhotep_III', 'QUEEN_ELIZABETH_II', 'Ne', 'er', 'Galileo_Ire', 'Fools_Crow', 'Lannisters', 'Devines', 'gonzales', 'columnist_Ann_Landers', 'Moseleys', 'hiz', 'busch', 'roastee', 'toyboys', 'Sheffields', 'grandaunt', 'Galvins', 'Giongo', 'geh', 'flame_haired_actress', 'Grammarian', 'Greg_Evigan', 'frontierswoman', 'Debele', 'rabs', 'nymphets', 'aai', 'BREE', 'Shaqs', 'ZAY', 'pappa', 'Housa', 'refrigerator_repairman', 'artificial_inseminations', 'chickie', 'Rippa', 'teenager_Tracy_Turnblad', 'homebred_colt', 'Abigaille', 'hen_pecked_husband', 'businesman', 'her.She', 'Kaikeyi', 'Stittsworth', 'self_proclaimed_redneck', 'Khella', 'NeW', 'Evers_Swindell', 'Asmerom_Gebreselassie', 'Boy_recliners', 'Cliff_Claven', 'Legge_Bourke', 'Costos', 'd', '_honneur', 'sistahs', 'Cabble', 'sahn', 'CROW_AGENCY_Mont', 'jezebel', 'Harrolds', 'ROSARIO_DAWSON', 'INXS_frontman_Michael_Hutchence', 'Gursikh', 'Dadas', 'VIAGA', 'keen_horsewoman', 'Theodoric', 'Eldery', 'lihn', 'Alice_Kramden', 'Santarina', 'radical_cleric_al_Sadr', 'Curleys', 'SY', 'Fidaa', 'Saptapadi', 'Actor_Sean_Astin', 'Kellita_Smith', 'Doly', 'Libertina', 'Money_McBags', 'Chief_Bearhart', 'choirgirl', 'chestnut_stallion', 'VIGRA', 'BY_JIM_McCONNELL', 'Sal_Vitale', 'Trivia_buffs', 'kumaris', 'fraternal_lodge', 'galpals', 'Borino_Quinn', 'lina', 'LATEST_Rapper', 'Bezar', 'Manro', 'bakla', 'Grisetti', 'blond_bimbo', 'spinster_aunt', 'gurls', 'hiswife', 'paleface', 'Charlye', 'hippie_chicks', 'Khalifas', 'Picture_JUSTIN_SANSON', 'Hepburns', 'yez', 'ALDER', 'Sanussi', 'Lil_Sis', 'McLoughlins', 'Barbra_Jean', 'Lulua', 'thatshe', 'actress_Shohreh_Aghdashloo', 'SIR_ANTHONY_HOPKINS', 'Gloddy', 'ZAH', 'ORANGE_', 'S', 'Danielle_Bimber', 'grandmum', 'Kulkis', 'Brazington', 'Marisa_Lenhard_CFA', 'SIR_JOHN', 'Clareman', 'Aqila', 'Heavily_tattooed', 'Libbys', 'thim', 'elocutionist', 'submissives', 'Inja', 'rahm', 'Agnes_Gooch', 'fake_tits', 'nancy_boys', 'Swaidan', 'SHAH', 'ain', 'ta_bed', 'Shumail_Raj', 'Duchesse', 'diethylstilbestrol_DES', 'colt_foal', 'unfaithful_lover', 'Maseri', 'nevah', 'SAHN', 'Barths', 'Toughkenamon', 'GUEST_STARS', 'him.But', 'Donna_Claspell', 'gingham_dresses', 'Massage_Parlour', 'wae', 'Wasacz', 'Magistra', 'vihl', 'Smriti_Iraani', 'boyish_haircut', 'workingwoman', 'borthers', 'Capuchin_friars', 'Nejma', 'yes_sirs', 'bivocational_pastor', 'Grafters', 'HOPWOOD', 'Nicknamed_Godzilla', 'yos', 'Berkenfield', 'Missis', 'sitcom_Designing_Women', 'Kafoa', 'trainer_Emma_Lavelle', 'sadomasochistic_dungeon', 'iht', 'desperates', 'predessor', 'wolf_cub', 'indigenous_Peruvians', 'Livia_Soprano', 'troh', 'colt_sired', 'BOND_HILL', 'ihl', 'Drydens', 'rahs', 'Piserchia', 'Sonny_Corinthos', 'bankrobber', 'Fwank', 'feisty_redhead', 'booze_guzzling', 'COOPERS', 'actress_Q', 'orianka_Kilcher', 'Cortezar', 'twe', 'Jacoub', 'Cindy_Iannarelli', 'Hell_Raiser', 'Fondly_referred', 'Bridal_Shoppe', 'Noleta', 'Christinas', 'IAGRA', 'LaTanya_Richardson', 'Sang_Bender', 'Assasins', 'sorrel_gelding', 'septugenarian', 'Hissy', 'Muqtada_al_Sadr_mook', 'Pfeni', 'MADRID_AFX_Banco_Santander', 'tuchis', 'LeVaughn', 'Gadzicki', 'transvestite_hooker', 'Fame_jockey_Laffit', 'nun_Sister_Mary', 'SAMSONOV', 'Mayflower_Madam', 'Shaque', 'well.He', 'Trainer_Julio_Canani', 'sorrel_mare', 'minivehicle_joint_venture', 'wife_Dwina', 'Aasiya_AH', '_see', 'Baratheon', 'Rick_O', 'Shay', 'Mammies', 'goatie', 'Nell_Gwynne', 'charmingly_awkward', 'Slamma', 'DEHL', 'Lorenzo_Borghese', 'ALMA_Wis.', 'Anne_Scurria', 'father_Peruvians_alternately', 'JULIE_ANDREWS', 'Slim_Pickins', 'Victoria_Secret_stunner', 'BY', 'Sanam_Devdas', 'pronounced_luh', 'Pasha_Selim', '\\\\u4e2d\\\\u534e', 'rson', 'maternal_grandmothers', 'IOWA_CITY_Ia', 'Madame_de_Tourvel', 'JAY', 'Sheika_Mozah_bint_Nasser', 'Hotsy_Totsy', 'D', '_Ginto', 'singer_Johnny_Paycheck', 'uterine_prolapse_surgery', 'SCOTTDALE_Pa.', 'AdelaideNow_reports', 'Marcus_Schenkenberg', 'Clyse', 'Obiter_Dicta', 'comic_Sam_Kinison', 'bitties', 'ROCKVILLE_Ind.', 'swimsuit_calendars', 'Decicio_Smith', 'Ma_ma', 'Rie_Miyazawa', 'celibate_chastity', 'gwah', 'ZAY', 'HER_Majesty', 'Defrere', 'Las_Madrinas', '\\\\u7c3f_\\\\u8042_\\\\u7ffb', 'Bea_Hamill', 'ARCADIA_Calif._Trainer', 'Bold_Badgett', 'stakes_victress', 'Hoppin_Frog', 'Narumiya', 'Flayfil', 'hardman_Vinnie_Jones', 'Marilyn_Monroe_lookalike', 'Kivanc_Tatlitug', 'Persis_Khambatta', 'SINKING_SPRING_Pa.', 'len_3rd', 'DEAR_TRYING', 'Farndon_Cheshire', 'Krishna_Madiga', 'daughter_Princess_Chulabhorn', 'Marshall_Rooster_Cogburn', 'Kitty_Kiernan', 'Yokich', 'Jarou', 'Serdaris', 'ee_ay', 'Montifiore', 'Chuderewicz', 'Samuel_Le_Bihan', 'filly_Proud_Spell', 'Umm_Hiba', 'pronounced_koo', 'Sandy_Fonzo', 'KOR', 'Fielder_Civil_kisses', 'Federalsburg_Maryland', 'Nikah_ceremony', 'Brinke_Stevens', 'Yakama_Tribal_Council', 'Capuchin_Father', 'wife_Callista_Bisek', 'Beau_Dare', 'Bedoni', 'Arjun_Punj', 'JOHNNY_KNOXVILLE', 'cap_tain', 'Alderwood_Boys', 'Chi_Eta_Phi', 'ringleader_Charles_Graner', 'Savoies', 'Lalla_Salma', 'Mrs._Potiphar', 'fahn', 'name_Taylor_Sumers', 'Vernita_Green', 'Bollywood_baddie', 'BENBROOK_Texas', 'Assemblyman_Lou_Papan', 'virgin_brides', 'Cho_Eun', 'CATHY_Freeman', 'Uncle_Saul', 'Lao_Brewery', 'Ibo_tribe', 'ruf', 'rival_Edurne_Pasaban', 'Hei_Shangri_La', 'Mommy_dearest', 'interest_Angola_Sonogal', 'Ger_Monsun', 'PUSSYCAT_DOLL', 'Crown_Jewels_Condoms', 'Lord_Marke', 'Patootie', 'Nora_Bey', 'huntin_shootin', 'Minister_Raymond_Tshibanda', 'La_Nina_la_NEEN', 'signature_Whoppers', 'estranged_hubby_Kevin_Federline', 'UR', 'pill_poppin', 'GEHR', 'purebred_Arabians', 'husbandly_duties', 'VIAGRA_TIMING', 'Hereford_heifer', 'hushed_monotone_voice', 'Pola_Uddin', 'Wee_Jimmy_Krankie', 'Kwakwanso', 'Our_Galvinator', 'shoh', 'Codependency_Anonymous_Group', 'LA', 'Taufa', 'ahau', 'Invincible_Spirit_colt', 'SAH', '_dur', 'MOUNT_CARMEL_Pa.', 'watches_attentively', 'SNL_spinoffs', 'Seth_Nitschke', 'Duns_Berwickshire', 'defendant_Colleen_LaRose', 'Silky_O', 'Sullivan', 'Highcliff_Farm', 'REN', 'Comestar', 'Satisfied_Frog', 'Jai_Maharashtra', 'ATTICA_Ind.', 'lover_Larry_Birkhead', 'Tami_Megal', 'chauvinist_pigs', 'Phi_sorority', 'Micronesian_immigrant', 'Lia_Boldt', 'Sugar_Tits', 'actress_Kathy_Najimy', 'zhoo', 'Colombo_underboss', 'Katsav_accusers', 'Bess_Houdini', 'rap_mogul_Diddy', 'companions_Khin_Khin', 'Van_Het', 'Mastoi_tribe', 'VITALY', 'ROLLING_STONES_rocker', 'womanizing_cad', 'LILY_COLE', 'paternal_grandfathers', 'Lt._Col._Kurt_Kosmatka', 'Kasseem_Jr.', 'Ji_Ji', 'Wilburforce', 'VIAGRA_DOSE', 'English_Sheepdogs', 'pronounced_Kah', 'Htet_Htet_Oo', 'Brisk_Breeze', 'Eau_du', 'BY_MELANIE_EVANS', 'Neovasc_Medical', 'British_funnyman_RICKY', '4YO_mare', 'Hemaida', 'MONKTON', 'Mrs_Mujuru', 'BaGhana_BaGhana', 'Shaaban_Abdel_Rahim', 'Edward_Jazlowiecki_lawyer', 'Ajman_Stud', 'manly_pharaoh_even', 'Serra_Madeira_Islands', 'FRAY', 'panto_dames', 'Khin_Myo', 'dancer_Karima_El_Mahroug', 'CROWN_Princess', 'Baseball_HOFer', 'Hasta_la_Pasta', 'GIRLS_NEXT_DOOR', 'Benedict_Groeschel', 'Bousamra', 'Ruby_Rubacuori_Ruby', 'Monde_Bleu', 'Un_homme_qui', 'Taylor_Sumers', 'Rapper_EMINEM', 'Joe_Menchetti', 'VAY', 'supermodel_NAOMI_CAMPBELL', 'Supermodel_GISELE_BUNDCHEN', 'Au_Lait', 'Radar_Installed', 'THOMAS_TOWNSHIP_Mich.', 'Rafinesque', 'Herman_Weinrich', 'Abraxas_Antelope', 'raspy_voiced_rocker', 'Manurewa_Cosmopolitan_Club', 'Paraone', 'THE_LEOPARD', 'Boy_Incorporated_LZB', 'Dansili_filly', 'Lumpy_Rutherford', 'unwedded_bliss', 'Bhavna_Sharma', 'Scarvagh', 'en_flagrante', 'Mottu_Maid', 'Dowager_Queen', 'NEEN', 'model_Monika_Zsibrita', 'ROSIE_PEREZ', 'Mattock_Ranger', 'Valorous', 'Surpreme', 'Marwari_businessmen', 'Grandparents_aunts', 'Kimberley_Vlaeminck', 'Lyn_Treece_Boys', 'PDX_Update', 'Virsa_Punjab', 'eyelash_fluttering', 'Pi_fraternity', 'HUNTLEIGH_Mo.', 'novelist_Jilly_Cooper', 'Naha_Shuri_temple', 'Yasmine_Al_Massri', 'Mu_Gamma_Xi', 'Mica_Ertegun', 'Ocleppo', 'VIAGRA_CONTRAINDICATIONS', 'daughter_PEACHES', 'trainer_Geoff_Wragg', 'OVERNIGHT_DELIVERY', 'Fitts_retiree', 'de_Tourvel', 'Lil_Lad', 'north_easterner', 'Aol_Weird_News', 'Somewhat_improbably', 'Sikh_panth', 'Worcester_2m_7f', 'Zainab_Jah', 'OLYMPIC_medalist', 'Enoch_Petrucelly', 'collie_Lassie', 'LOW', 'clumsiness_Holloway', 'ayr', 'OHR', 'ROLLING_STONES_guitarist', 'LAH', '_nee', 'Ian_Beefy_Botham', 'Awapuni_trainer', 'Glamorous_Granny', 'Chiang_Ching', 'MidAtlantic_Cardiovascular_Associates', 'Yeke', 'Seaforth_Huron_Expositor', 'Westley_Cary_Elwes', 'Cate_Blanchett_Veronica_Guerin', 'Bellas_Gate', 'witch_Glinda', 'wives_mistresses', 'Woodsville_Walmart', '2YO_colt', 'Manav_Sushant_Singh', 'Pupi_Avati_Il', 'Sigma_Beta_Rho', 'Bishop_Christopher_Senyonjo', 'Vodou_priest', 'Rubel_Chowdhury', 'Claddagh_Ring', 'TAH', '_duh_al', 'al_Sadr_mook_TAH', 'ROBIN_GIBB', 'GAHN', 'BY_THOMAS_RANSON', 'sister_Carine_Jena', 'Lyphard_mare', 'summa_cum', 'Semenya_grandmother_Maputhi', 'Clare_Nuns', 'Talac', 'sex_hormones_androgens', 'majeste', 'Saint_Ballado_mare', 'Carrie_Huchel', 'Mae_Dok', 'wife_Dieula', 'Earnest_Sirls', 'spoof_bar_mitzvah', 'von_Boetticher', 'Audwin_Mosby', 'Case_presentationWe', 'Vincent_Papandrea', 'KRAY', 'Sergi_Benavent', 'Le_Poisson', 'Von_Cramm', 'Patti_Mell', 'Raymi_Coya', 'Benjamin_BeBe_Winans', 'Nana_Akosua', 'Auld_Acquaintance', 'Desire_Burunga', 'Company_Wrangler_Nestea', 'ask_Krisy_Plourde', 'JUANITA_BYNUM', 'livia', 'GAMB', 'Gail_Rosario_Dawson', 'Ramgarhia_Sikh', 'Catholic_nun_Sister', 'FOUR_WEDDINGS_AND', 'Robyn_Scherer', 'brother_King_Athelstan', 'Santo_Loquasto_Fences', 'Wee_Frees', 'MARISOL', 'Soliloquy_Stakes', 'Whatever_Spoetzl', 'Marc', 'Aurelio', 'mon_petit', 'Sabbar_al_Mashhadani', 'KAY', '_lee', 'm_zah_MAH', 'BY_TAMI_ALTHOFF', 'hobbit_Samwise_Gamgee', 'Bahiya_Hariri_sister', 'daddy_Larry_Birkhead', 'Sow_Tracey_Ullman', 'coach_Viljo_Nousiainen', 'Carmen_Lebbos', 'conjoined_twins_Zainab', 'Rob_Komosa', 'ample_bosomed', 'Ageing_rocker', 'psychic_Oda']", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"gender_specific-Bolukbasi_et_al_2016" + }, + { + "Category":"gender specific seed", + "Seeds":"['actress', 'actresses', 'aunt', 'aunts', 'bachelor', 'ballerina', 'barbershop', 'baritone', 'beard', 'beards', 'beau', 'bloke', 'blokes', 'boy', 'boyfriend', 'boyfriends', 'boyhood', 'boys', 'brethren', 'bride', 'brides', 'brother', 'brotherhood', 'brothers', 'bull', 'bulls', 'businessman', 'businessmen', 'businesswoman', 'chairman', 'chairwoman', 'chap', 'colt', 'colts', 'congressman', 'congresswoman', 'convent', 'councilman', 'councilmen', 'councilwoman', 'countryman', 'countrymen', 'czar', 'dad', 'daddy', 'dads', 'daughter', 'daughters', 'deer', 'diva', 'dowry', 'dude', 'dudes', 'elder_brother', 'eldest_son', 'estranged_husband', 'estranged_wife', 'estrogen', 'ex_boyfriend', 'ex_girlfriend', 'father', 'fathered', 'fatherhood', 'fathers', 'fella', 'fellas', 'female', 'females', 'feminism', 'fiance', 'fiancee', 'fillies', 'filly', 'fraternal', 'fraternities', 'fraternity', 'gal', 'gals', 'gelding', 'gentleman', 'gentlemen', 'girl', 'girlfriend', 'girlfriends', 'girls', 'goddess', 'godfather', 'granddaughter', 'granddaughters', 'grandfather', 'grandma', 'grandmother', 'grandmothers', 'grandpa', 'grandson', 'grandsons', 'guy', 'handyman', 'he', 'heiress', 'hen', 'hens', 'her', 'heroine', 'hers', 'herself', 'him', 'himself', 'his', 'horsemen', 'hostess', 'housewife', 'housewives', 'hubby', 'husband', 'husbands', 'king', 'kings', 'lad', 'ladies', 'lads', 'lady', 'lesbian', 'lesbians', 'lion', 'lions', 'ma', 'macho', 'maid', 'maiden', 'maids', 'male', 'males', 'mama', 'man', 'mare', 'maternal', 'maternity', 'matriarch', 'men', 'menopause', 'mistress', 'mom', 'mommy', 'moms', 'monastery', 'monk', 'monks', 'mother', 'motherhood', 'mothers', 'nephew', 'nephews', 'niece', 'nieces', 'nun', 'nuns', 'obstetrics', 'ovarian_cancer', 'pa', 'paternity', 'penis', 'prince', 'princes', 'princess', 'prostate', 'prostate_cancer', 'queen', 'queens', 'salesman', 'salesmen', 'schoolboy', 'schoolgirl', 'semen', 'she', 'sir', 'sister', 'sisters', 'son', 'sons', 'sorority', 'sperm', 'spokesman', 'spokesmen', 'spokeswoman', 'stallion', 'statesman', 'stepdaughter', 'stepfather', 'stepmother', 'stepson', 'strongman', 'stud', 'studs', 'suitor', 'suitors', 'teenage_girl', 'teenage_girls', 'testosterone', 'twin_brother', 'twin_sister', 'uncle', 'uncles', 'uterus', 'vagina', 'viagra', 'waitress', 'widow', 'widower', 'widows', 'wife', 'witch', 'witches', 'wives', 'woman', 'womb', 'women', 'younger_brother']", + "Source \/ Justification":"words with \"male\" or \"female\" in their definition; manually removed words that weren't gender specific", + "Source Categories":null, + "Used in Paper":"Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings (Bolukbasi et al., 2016)", + "Link":"https:\/\/github.com\/tolga-b\/debiaswe", + "Seeds ID":"gender_specific_seed-Bolukbasi_et_al_2016" + }, + { + "Category":"male", + "Seeds":"['he', 'his', 'son', 'father', 'male', 'boy', 'uncle']", + "Source \/ Justification":"For gender, we used vocabularies created by (Bolukbasi et al., 2016) and (Caliskan et al., 2017).", + "Source Categories":"prior-work", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"male-Manzini_et_al_2019" + }, + { + "Category":"female", + "Seeds":"['she', 'hers', 'daughter', 'mother', 'female', 'girl', 'aunt']", + "Source \/ Justification":"For gender, we used vocabularies created by (Bolukbasi et al., 2016) and (Caliskan et al., 2017).", + "Source Categories":"prior-work", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"female-Manzini_et_al_2019" + }, + { + "Category":"male roles", + "Seeds":"['manager', 'executive', 'doctor', 'lawyer', 'programmer', 'scientist', 'soldier', 'supervisor', 'rancher', 'janitor', 'firefighter', 'officer']", + "Source \/ Justification":"For gender, we used vocabularies created by (Bolukbasi et al., 2016) and (Caliskan et al., 2017).", + "Source Categories":"prior-work", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"male_roles-Manzini_et_al_2019" + }, + { + "Category":"female roles", + "Seeds":"['secretary', 'nurse', 'clerk', 'artist', 'homemaker', 'dancer', 'singer', 'librarian', 'maid', 'hairdresser', 'stylist', 'receptionist', 'counselor']", + "Source \/ Justification":"For gender, we used vocabularies created by (Bolukbasi et al., 2016) and (Caliskan et al., 2017).", + "Source Categories":"prior-work", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"female_roles-Manzini_et_al_2019" + }, + { + "Category":"gender test terms", + "Seeds":"['chair', 'house', 'supervisor', 'secretary', 'loud', 'weak']", + "Source \/ Justification":"For gender, we used vocabularies created by (Bolukbasi et al., 2016) and (Caliskan et al., 2017).", + "Source Categories":"prior-work", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"gender_test_terms-Manzini_et_al_2019" + }, + { + "Category":"black", + "Seeds":"['black', 'african', 'black', 'africa', 'africa', 'africa']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"black-Manzini_et_al_2019" + }, + { + "Category":"caucasian", + "Seeds":"['caucasian', 'caucasian', 'white', 'america', 'america', 'europe']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"caucasian-Manzini_et_al_2019" + }, + { + "Category":"asian", + "Seeds":"['asian', 'asian', 'asian', 'asia', 'china', 'asia']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"asian-Manzini_et_al_2019" + }, + { + "Category":"black roles", + "Seeds":"['slave', 'musician', 'runner', 'criminal', 'homeless']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"black_roles-Manzini_et_al_2019" + }, + { + "Category":"caucasian roles", + "Seeds":"['manager', 'executive', 'redneck', 'hillbilly', 'leader', 'farmer']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"caucasian_roles-Manzini_et_al_2019" + }, + { + "Category":"asian roles", + "Seeds":"['doctor', 'engineer', 'laborer', 'teacher']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"asian_roles-Manzini_et_al_2019" + }, + { + "Category":"race test terms", + "Seeds":"['chair', 'house', 'smart', 'criminal', 'executive', 'farmer']", + "Source \/ Justification":"For race we consulted a number of different sources for each race: Caucasians (Chung-Herrera and Lankau, 2005; Goad, 1998); African Americans (Punyanunt-Carter, 2008; Brown Givens and Monahan, 2005; Chung-Herrera and Lankau, 2005; Hakanen, 1995; Welch, 2007; Kawai, 2005); and Asian Americans (Leong and Hayes, 1990; Lin et al., 2005; Chung-Herrera and Lankau, 2005; Osajima, 2005; Garg et al., 2018).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"race_test_terms-Manzini_et_al_2019" + }, + { + "Category":"jew", + "Seeds":"['judaism', 'jew', 'synagogue', 'torah', 'rabbi']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"jew-Manzini_et_al_2019" + }, + { + "Category":"christian", + "Seeds":"['christianity', 'christian', 'church', 'bible', 'priest']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"christian-Manzini_et_al_2019" + }, + { + "Category":"muslim", + "Seeds":"['islam', 'muslim', 'mosque', 'quran', 'imam']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"muslim-Manzini_et_al_2019" + }, + { + "Category":"jewish attributes", + "Seeds":"['greedy', 'cheap', 'hairy', 'liberal']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"jewish_attributes-Manzini_et_al_2019" + }, + { + "Category":"christian attributes", + "Seeds":"['judgemental', 'conservative', 'familial']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"christian_attributes-Manzini_et_al_2019" + }, + { + "Category":"muslim attributes", + "Seeds":"['violent', 'terrorist', 'dirty', 'uneducated']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"muslim_attributes-Manzini_et_al_2019" + }, + { + "Category":"religion test terms", + "Seeds":"['chair', 'house', 'greedy', 'terrorist', 'dirty', 'greedy']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"religion_test_terms-Manzini_et_al_2019" + }, + { + "Category":"religion specific terms", + "Seeds":"['synagogue', 'synagogues', 'altar', 'altars', 'parish', 'parishes', 'biblical', 'bishop', 'bishops', 'jihadist', 'clergy', 'bible', 'bibles', 'mosque', 'mosques', 'mullah', 'church', 'churches', 'sermon', 'sermons', 'papacy', 'imam', 'pew', 'chancel', 'pope', 'priest', 'priests', 'baptism', 'jihad', 'confessional', 'holy_eucharist', 'evangelical', 'jesus', 'burqa', 'vicar', 'vicars', 'judaism', 'christianity', 'islam', 'jew', 'christian', 'muslim', 'torah', 'quran', 'rabbi']", + "Source \/ Justification":"For religion we used the following sources and labels: Christians (Rios et al., 2015; Zuckerman, 2009; Unnever et al., 2005); Jews (Dundes, 1971; Fetzer, 2000); and Muslims (Shry-ock, 2010; Alsultany, 2012; Shaheen, 1997).", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Black is to Criminal as Caucasian is to Police: Detecting and Removing Multiclass Bias in Word Embeddings (Manzini et al., 2019)", + "Link":"https:\/\/github.com\/TManzini\/DebiasMulticlassWordEmbedding", + "Seeds ID":"religion_specific_terms-Manzini_et_al_2019" + }, + { + "Category":"adjectives appearance", + "Seeds":"['alluring', 'voluptuous', 'blushing', 'homely', 'plump', 'sensual', 'gorgeous', 'slim', 'bald', 'athletic', 'fashionable', 'stout', 'ugly', 'muscular', 'slender', 'feeble', 'handsome', 'healthy', 'attractive', 'fat', 'weak', 'thin', 'pretty', 'beautiful', 'strong']", + "Source \/ Justification":"\"mostly from\" http:\/\/usefulenglish.ru\/vocabulary\/appearance-and-character, http:\/\/www.\nsightwordsgame.com\/parts-of-speech\/adjectives\/appearance\/, http:\/\/www.stgeorges.co.uk\/blog\/\nphysical-appearance-adjectives-the-bald-and-the-beautiful", + "Source Categories":"other", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_appearance-Garg_et_al_2018" + }, + { + "Category":"adjectives intelligence", + "Seeds":"['precocious', 'resourceful', 'inquisitive', 'sagacious', 'inventive', 'astute', 'adaptable', 'reflective', 'discerning', 'intuitive', 'inquiring', 'judicious', 'analytical', 'luminous', 'venerable', 'imaginative', 'shrewd', 'thoughtful', 'sage', 'smart', 'ingenious', 'clever', 'brilliant', 'logical', 'intelligent', 'apt', 'genius', 'wise']", + "Source \/ Justification":"mostly from https:\/\/www.e-education.psu.edu\/writingrecommendationlettersonline\/node\/151, https:\/\/www.macmillandictionary.com\/us\/thesaurus-category\/american\/words-used-to-describe-intelligent-or-wise-people", + "Source Categories":"other", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_intelligence-Garg_et_al_2018" + }, + { + "Category":"adjectives otherization", + "Seeds":"['devious', 'bizarre', 'venomous', 'erratic', 'barbaric', 'frightening', 'deceitful', 'forceful', 'deceptive', 'envious', 'greedy', 'hateful', 'contemptible', 'brutal', 'monstrous', 'calculating', 'cruel', 'intolerant', 'aggressive', 'monstrous']", + "Source \/ Justification":"part of a list mostly from Gunkel (1987) (personality traits original)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_otherization-Garg_et_al_2018" + }, + { + "Category":"adjectives princeton", + "Seeds":"['brilliant', 'intelligent', 'honest', 'alert', 'imaginative', 'artistic', 'industrious', 'kind', 'faithful', 'sportsmanlike', 'efficient', 'courteous', 'generous', 'ambitious', 'witty', 'individualistic', 'sensitive', 'progressive', 'straightforward', 'jovial', 'musical', 'neat', 'persistent', 'practical', 'scientific', 'sophisticated', 'meditative', 'loyal', 'pleasureloving', 'suave', 'happy-go-lucky', 'passionate', 'sensual', 'stolid', 'gregarious', 'traditional', 'methodical', 'religious', 'quiet', 'aggressive', 'shrewd', 'reserved', 'nationalistic', 'conservative', 'talkative', 'impulsive', 'ponderous', 'conventional', 'materialistic', 'radical', 'argumentative', 'frivolous', 'suggestible', 'sly', 'stubborn', 'imitative', 'naive', 'pugnacious', 'suspicious', 'evasive', 'loud', 'superstitious', 'mercenary', 'ostentatious', 'quicktempered', 'humorless', 'grasping', 'boastful', 'quarrelsome', 'gluttonous', 'slovenly', 'revengeful', 'arrogant', 'ignorant', 'dirty', 'conceited', 'stupid', 'cowardly', 'unreliable', 'treacherous', 'rude', 'deceitful', 'cruel']", + "Source \/ Justification":"part of a list mostly from Gunkel (1987) (personality traits original)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_princeton-Garg_et_al_2018" + }, + { + "Category":"adjectives sensitive", + "Seeds":"['inhibited', 'complacent', 'sensitive', 'mellow', 'solemn', 'studious', 'intelligent', 'brilliant', 'rational', 'serious', 'contemplative', 'cowardly', 'timid', 'shy', 'passive', 'delicate', 'gentle', 'soft', 'quiet', 'working']", + "Source \/ Justification":"part of a list mostly from Gunkel (1987) (personality traits original)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_sensitive-Garg_et_al_2018" + }, + { + "Category":"adjectives williams best", + "Seeds":"['headstrong', 'thankless', 'tactful', 'distrustful', 'quarrelsome', 'effeminate', 'fickle', 'talkative', 'dependable', 'resentful', 'sarcastic', 'unassuming', 'changeable', 'resourceful', 'persevering', 'forgiving', 'assertive', 'individualistic', 'vindictive', 'sophisticated', 'deceitful', 'impulsive', 'sociable', 'methodical', 'idealistic', 'thrifty', 'outgoing', 'intolerant', 'autocratic', 'conceited', 'inventive', 'dreamy', 'appreciative', 'forgetful', 'forceful', 'submissive', 'pessimistic', 'versatile', 'adaptable', 'reflective', 'inhibited', 'outspoken', 'quitting', 'unselfish', 'immature', 'painstaking', 'leisurely', 'infantile', 'sly', 'praising', 'cynical', 'irresponsible', 'arrogant', 'obliging', 'unkind', 'wary', 'greedy', 'obnoxious', 'irritable', 'discreet', 'frivolous', 'cowardly', 'rebellious', 'adventurous', 'enterprising', 'unscrupulous', 'poised', 'moody', 'unfriendly', 'optimistic', 'disorderly', 'peaceable', 'considerate', 'humorous', 'worrying', 'preoccupied', 'trusting', 'mischievous', 'robust', 'superstitious', 'noisy', 'tolerant', 'realistic', 'masculine', 'witty', 'informal', 'prejudiced', 'reckless', 'jolly', 'courageous', 'meek', 'stubborn', 'aloof', 'sentimental', 'complaining', 'unaffected', 'cooperative', 'unstable', 'feminine', 'timid', 'retiring', 'relaxed', 'imaginative', 'shrewd', 'conscientious', 'industrious', 'hasty', 'commonplace', 'lazy', 'gloomy', 'thoughtful', 'dignified', 'wholesome', 'affectionate', 'aggressive', 'awkward', 'energetic', 'tough', 'shy', 'queer', 'careless', 'restless', 'cautious', 'polished', 'tense', 'suspicious', 'dissatisfied', 'ingenious', 'fearful', 'daring', 'persistent', 'demanding', 'impatient', 'contented', 'selfish', 'rude', 'spontaneous', 'conventional', 'cheerful', 'enthusiastic', 'modest', 'ambitious', 'alert', 'defensive', 'mature', 'coarse', 'charming', 'clever', 'shallow', 'deliberate', 'stern', 'emotional', 'rigid', 'mild', 'cruel', 'artistic', 'hurried', 'sympathetic', 'dull', 'civilized', 'loyal', 'withdrawn', 'confident', 'indifferent', 'conservative', 'foolish', 'moderate', 'handsome', 'helpful', 'gentle', 'dominant', 'hostile', 'generous', 'reliable', 'sincere', 'precise', 'calm', 'healthy', 'attractive', 'progressive', 'confused', 'rational', 'stable', 'bitter', 'sensitive', 'initiative', 'loud', 'thorough', 'logical', 'intelligent', 'steady', 'formal', 'complicated', 'cool', 'curious', 'reserved', 'silent', 'honest', 'quick', 'friendly', 'efficient', 'pleasant', 'severe', 'peculiar', 'quiet', 'weak', 'anxious', 'nervous', 'warm', 'slow', 'dependent', 'wise', 'organized', 'affected', 'reasonable', 'capable', 'active', 'independent', 'patient', 'practical', 'serious', 'understanding', 'cold', 'responsible', 'simple', 'original', 'strong', 'determined', 'natural', 'kind']", + "Source \/ Justification":"Williams and Best (1977), Williams and Best (1990)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"adjectives_williams_best-Garg_et_al_2018" + }, + { + "Category":"female pairs", + "Seeds":"['she', 'daughter', 'hers', 'her', 'mother', 'woman', 'girl', 'herself', 'female', 'sister', 'daughters', 'mothers', 'women', 'girls', 'females', 'sisters', 'aunt', 'aunts', 'niece', 'nieces']", + "Source \/ Justification":"\"noun and pronoun pairs\"", + "Source Categories":"curated", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"female_pairs-Garg_et_al_2018" + }, + { + "Category":"male pairs", + "Seeds":"['he', 'son', 'his', 'him', 'father', 'man', 'boy', 'himself', 'male', 'brother', 'sons', 'fathers', 'men', 'boys', 'males', 'brothers', 'uncle', 'uncles', 'nephew', 'nephews']", + "Source \/ Justification":"\"noun and pronoun pairs\"", + "Source Categories":"curated", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"male_pairs-Garg_et_al_2018" + }, + { + "Category":"names asian", + "Seeds":"['cho', 'wong', 'tang', 'huang', 'chu', 'chung', 'ng', 'wu', 'liu', 'chen', 'lin', 'yang', 'kim', 'chang', 'shah', 'wang', 'li', 'khan', 'singh', 'hong']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_asian-Garg_et_al_2018" + }, + { + "Category":"names black", + "Seeds":"['harris', 'robinson', 'howard', 'thompson', 'moore', 'wright', 'anderson', 'clark', 'jackson', 'taylor', 'scott', 'davis', 'allen', 'adams', 'lewis', 'williams', 'jones', 'wilson', 'martin', 'johnson']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_black-Garg_et_al_2018" + }, + { + "Category":"names chinese", + "Seeds":"['chung', 'liu', 'wong', 'huang', 'ng', 'hu', 'chu', 'chen', 'lin', 'liang', 'wang', 'wu', 'yang', 'tang', 'chang', 'hong', 'li']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_chinese-Garg_et_al_2018" + }, + { + "Category":"names hispanic", + "Seeds":"['ruiz', 'alvarez', 'vargas', 'castillo', 'gomez', 'soto', 'gonzalez', 'sanchez', 'rivera', 'mendoza', 'martinez', 'torres', 'rodriguez', 'perez', 'lopez', 'medina', 'diaz', 'garcia', 'castro', 'cruz']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_hispanic-Garg_et_al_2018" + }, + { + "Category":"names russian", + "Seeds":"['gurin', 'minsky', 'sokolov', 'markov', 'maslow', 'novikoff', 'mishkin', 'smirnov', 'orloff', 'ivanov', 'sokoloff', 'davidoff', 'savin', 'romanoff', 'babinski', 'sorokin', 'levin', 'pavlov', 'rodin', 'agin']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_russian-Garg_et_al_2018" + }, + { + "Category":"names white", + "Seeds":"['harris', 'nelson', 'robinson', 'thompson', 'moore', 'wright', 'anderson', 'clark', 'jackson', 'taylor', 'scott', 'davis', 'allen', 'adams', 'lewis', 'williams', 'jones', 'wilson', 'martin', 'johnson']", + "Source \/ Justification":"Starting with a breakdown of ethnicity by last name compiled by Chalabi and Flowers (2014) we identify 20 last names for each Whites, Asians, and Hispanics as follows: 1) Start with list of top 50 last names by percent of that ethnicity, conditioned on being top 5000 surnames overall, as well as the top 50 last names by total number in that ethnicity (i.e., multiplied count of that last name by percent in that ethnicity). 2) Choose the 20 names that appeared most on average in the Google Books\/COHA vectors over time (with a minimum number for each time period). This second step ensures that an accurate ethnicity vector is identified each time period, with minimal distortions. Russian last names are collated from various sources online.", + "Source Categories":"corpus-derived, population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"names_white-Garg_et_al_2018" + }, + { + "Category":"occupations", + "Seeds":"['janitor', 'statistician', 'midwife', 'bailiff', 'auctioneer', 'photographer', 'geologist', 'shoemaker', 'athlete', 'cashier', 'dancer', 'housekeeper', 'accountant', 'physicist', 'gardener', 'dentist', 'weaver', 'blacksmith', 'psychologist', 'supervisor', 'mathematician', 'surveyor', 'tailor', 'designer', 'economist', 'mechanic', 'laborer', 'postmaster', 'broker', 'chemist', 'librarian', 'attendant', 'clerical', 'musician', 'porter', 'scientist', 'carpenter', 'sailor', 'instructor', 'sheriff', 'pilot', 'inspector', 'mason', 'baker', 'administrator', 'architect', 'collector', 'operator', 'surgeon', 'driver', 'painter', 'conductor', 'nurse', 'cook', 'engineer', 'retired', 'sales', 'lawyer', 'clergy', 'physician', 'farmer', 'clerk', 'manager', 'guard', 'artist', 'smith', 'official', 'police', 'doctor', 'professor', 'student', 'judge', 'teacher', 'author', 'secretary', 'soldier']", + "Source \/ Justification":"\"We use occupation words for which we have gen-der and ethnic subgroup information over time. Group occupation percent-ages are obtained from the Integrated Public Use Microdata Series (IPUMS),part of the University of Minnesota Historical Census Project (29). Data cod-ing and preprocessing are done as described in ref. 44, which studies wagedynamics as women enter certain occupations over time. The IPUMS datasetincludes a column, OCC1950, coding occupation census data as it would havebeen coded in 1950, allowing accurate interyear analysis. We then hand mapthe occupations from this column to single-word occupations (e.g., chemi-cal engineer and electrical engineer both become engineer, and chemist iscounted as both chemist and scientist) and hand code a subset of the occu-pations as professional.\"", + "Source Categories":"population-derived", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"occuptations_1950_professional-Garg_et_al_2018" + }, + { + "Category":"occupations professional", + "Seeds":"['statistician', 'auctioneer', 'photographer', 'geologist', 'accountant', 'physicist', 'dentist', 'psychologist', 'supervisor', 'mathematician', 'designer', 'economist', 'postmaster', 'broker', 'chemist', 'librarian', 'scientist', 'instructor', 'pilot', 'administrator', 'architect', 'surgeon', 'nurse', 'engineer', 'lawyer', 'physician', 'manager', 'official', 'doctor', 'professor', 'student', 'judge', 'teacher', 'author']", + "Source \/ Justification":"hand selected by authors from the overall list of occupations", + "Source Categories":"curated", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"occupations_1950-Garg_et_al_2018" + }, + { + "Category":"occupations mechanical turk", + "Seeds":"['instructor', 'geologist', 'secretary', 'clerk', 'painter', 'housekeeper', 'chemist', 'artist', 'baker', 'psychologist', 'lawyer', 'teacher', 'collector', 'surveyor', 'accountant', 'sailor', 'laborer', 'physician', 'student', 'soldier', 'manager', 'administrator', 'musician', 'doctor', 'dentist', 'professor', 'photographer', 'surgeon', 'inspector', 'janitor', 'nurse', 'author', 'conductor', 'economist', 'physicist', 'scientist', 'architect', 'mechanic', 'judge', 'gardener', 'farmer', 'librarian', 'carpenter', 'mathematician', 'dancer', 'broker', 'athlete']", + "Source \/ Justification":"Bolukbasi et al. (2016)", + "Source Categories":"prior-work", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"occupations_mechanical_turk-Garg_et_al_2018" + }, + { + "Category":"personality traits original", + "Seeds":"['disorganized', 'devious', 'impressionable', 'circumspect', 'impassive', 'aimless', 'effeminate', 'unfathomable', 'fickle', 'unprincipled', 'inoffensive', 'reactive', 'providential', 'resentful', 'bizarre', 'impractical', 'sarcastic', 'misguided', 'imitative', 'pedantic', 'venomous', 'erratic', 'insecure', 'resourceful', 'neurotic', 'forgiving', 'profligate', 'whimsical', 'assertive', 'incorruptible', 'individualistic', 'faithless', 'disconcerting', 'barbaric', 'hypnotic', 'vindictive', 'observant', 'dissolute', 'frightening', 'complacent', 'boisterous', 'pretentious', 'disobedient', 'tasteless', 'sedentary', 'sophisticated', 'regimental', 'mellow', 'deceitful', 'impulsive', 'playful', 'sociable', 'methodical', 'willful', 'idealistic', 'boyish', 'callous', 'pompous', 'unchanging', 'crafty', 'punctual', 'compassionate', 'intolerant', 'challenging', 'scornful', 'possessive', 'conceited', 'imprudent', 'dutiful', 'lovable', 'disloyal', 'dreamy', 'appreciative', 'forgetful', 'unrestrained', 'forceful', 'submissive', 'predatory', 'fanatical', 'illogical', 'tidy', 'aspiring', 'studious', 'adaptable', 'conciliatory', 'artful', 'thoughtless', 'deceptive', 'frugal', 'reflective', 'insulting', 'unreliable', 'stoic', 'hysterical', 'rustic', 'inhibited', 'outspoken', 'unhealthy', 'ascetic', 'skeptical', 'painstaking', 'contemplative', 'leisurely', 'sly', 'mannered', 'outrageous', 'lyrical', 'placid', 'cynical', 'irresponsible', 'vulnerable', 'arrogant', 'persuasive', 'perverse', 'steadfast', 'crisp', 'envious', 'naive', 'greedy', 'presumptuous', 'obnoxious', 'irritable', 'dishonest', 'discreet', 'sporting', 'hateful', 'ungrateful', 'frivolous', 'reactionary', 'skillful', 'cowardly', 'sordid', 'adventurous', 'dogmatic', 'intuitive', 'bland', 'indulgent', 'discontented', 'dominating', 'articulate', 'fanciful', 'discouraging', 'treacherous', 'repressed', 'moody', 'sensual', 'unfriendly', 'optimistic', 'clumsy', 'contemptible', 'focused', 'haughty', 'morbid', 'disorderly', 'considerate', 'humorous', 'preoccupied', 'airy', 'impersonal', 'cultured', 'trusting', 'respectful', 'scrupulous', 'scholarly', 'superstitious', 'tolerant', 'realistic', 'malicious', 'irrational', 'sane', 'colorless', 'masculine', 'witty', 'inert', 'prejudiced', 'fraudulent', 'blunt', 'childish', 'brittle', 'disciplined', 'responsive', 'courageous', 'bewildered', 'courteous', 'stubborn', 'aloof', 'sentimental', 'athletic', 'extravagant', 'brutal', 'manly', 'cooperative', 'unstable', 'youthful', 'timid', 'amiable', 'retiring', 'fiery', 'confidential', 'relaxed', 'imaginative', 'mystical', 'shrewd', 'conscientious', 'monstrous', 'grim', 'questioning', 'lazy', 'dynamic', 'gloomy', 'troublesome', 'abrupt', 'eloquent', 'dignified', 'hearty', 'gallant', 'benevolent', 'maternal', 'paternal', 'patriotic', 'aggressive', 'competitive', 'elegant', 'flexible', 'gracious', 'energetic', 'tough', 'contradictory', 'shy', 'careless', 'cautious', 'polished', 'sage', 'tense', 'caring', 'suspicious', 'sober', 'neat', 'transparent', 'disturbing', 'passionate', 'obedient', 'crazy', 'restrained', 'fearful', 'daring', 'prudent', 'demanding', 'impatient', 'cerebral', 'calculating', 'amusing', 'honorable', 'casual', 'sharing', 'selfish', 'ruined', 'spontaneous', 'admirable', 'conventional', 'cheerful', 'solitary', 'upright', 'stiff', 'enthusiastic', 'petty', 'dirty', 'subjective', 'heroic', 'stupid', 'modest', 'impressive', 'orderly', 'ambitious', 'protective', 'silly', 'alert', 'destructive', 'exciting', 'crude', 'ridiculous', 'subtle', 'mature', 'creative', 'coarse', 'passive', 'oppressed', 'accessible', 'charming', 'clever', 'decent', 'miserable', 'superficial', 'shallow', 'stern', 'winning', 'balanced', 'emotional', 'rigid', 'invisible', 'desperate', 'cruel', 'romantic', 'agreeable', 'hurried', 'sympathetic', 'solemn', 'systematic', 'vague', 'peaceful', 'humble', 'dull', 'expedient', 'loyal', 'decisive', 'arbitrary', 'earnest', 'confident', 'conservative', 'foolish', 'moderate', 'helpful', 'delicate', 'gentle', 'dedicated', 'hostile', 'generous', 'reliable', 'dramatic', 'precise', 'calm', 'healthy', 'attractive', 'artificial', 'progressive', 'odd', 'confused', 'rational', 'brilliant', 'intense', 'genuine', 'mistaken', 'driving', 'stable', 'objective', 'sensitive', 'neutral', 'strict', 'angry', 'profound', 'smooth', 'ignorant', 'thorough', 'logical', 'intelligent', 'extraordinary', 'experimental', 'steady', 'formal', 'faithful', 'curious', 'reserved', 'honest', 'busy', 'educated', 'liberal', 'friendly', 'efficient', 'sweet', 'surprising', 'mechanical', 'clean', 'critical', 'criminal', 'soft', 'proud', 'quiet', 'weak', 'anxious', 'solid', 'complex', 'grand', 'warm', 'slow', 'false', 'extreme', 'narrow', 'dependent', 'wise', 'organized', 'pure', 'directed', 'dry', 'obvious', 'popular', 'capable', 'secure', 'active', 'independent', 'ordinary', 'fixed', 'practical', 'serious', 'fair', 'understanding', 'constant', 'cold', 'responsible', 'deep', 'religious', 'private', 'simple', 'physical', 'original', 'working', 'strong', 'modern', 'determined', 'open', 'political', 'difficult', 'knowledge', 'kind']", + "Source \/ Justification":"mostly from Gunkel (1987)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"personality_traits_original-Garg_et_al_2018" + }, + { + "Category":"christianity", + "Seeds":"['baptism', 'messiah', 'catholicism', 'resurrection', 'christianity', 'salvation', 'protestant', 'gospel', 'trinity', 'jesus', 'christ', 'christian', 'cross', 'catholic', 'church']", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"christianity-Garg_et_al_2018" + }, + { + "Category":"islam", + "Seeds":"['allah', 'ramadan', 'turban', 'emir', 'salaam', 'sunni', 'koran', 'imam', 'sultan', 'prophet', 'veil', 'ayatollah', 'shiite', 'mosque', 'islam', 'sheik', 'muslim', 'muhammad']", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"islam-Garg_et_al_2018" + }, + { + "Category":"terrorism", + "Seeds":"['terror', 'terrorism', 'violence', 'attack', 'death', 'military', 'war', 'radical', 'injuries', 'bomb', 'target', 'conflict', 'dangerous', 'kill', 'murder', 'strike', 'dead', 'violence', 'fight', 'death', 'force', 'stronghold', 'wreckage', 'aggression', 'slaughter', 'execute', 'overthrow', 'casualties', 'massacre', 'retaliation', 'proliferation', 'militia', 'hostility', 'debris', 'acid', 'execution', 'militant', 'rocket', 'guerrilla', 'sacrifice', 'enemy', 'soldier', 'terrorist', 'missile', 'hostile', 'revolution', 'resistance', 'shoot']", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Word Embeddings Quantify 100 Years of Gender and Ethnic Stereotypes (Garg et al., 2018)", + "Link":"https:\/\/github.com\/nikhgarg\/EmbeddingDynamicStereotypes", + "Seeds ID":"terrorism-Garg_et_al_2018" + }, + { + "Category":"occupations", + "Seeds":"['banker', 'carpenter', 'doctor', 'engineer', 'hairdresser', 'journalist', 'lawyer', 'nanny', 'nurse', 'plumber', 'scientist']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"occupations-Kozlowski_et_al_2019" + }, + { + "Category":"clothing", + "Seeds":"['blouse', 'briefcase', 'dress', 'necklace', 'pants', 'shirt', 'shorts', 'socks', 'suit', 'tuxedo']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"clothing-Kozlowski_et_al_2019" + }, + { + "Category":"sports", + "Seeds":"['baseball', 'basketball', 'boxing', 'golf', 'hockey', 'soccer', 'softball', 'tennis', 'volleyball']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"sports-Kozlowski_et_al_2019" + }, + { + "Category":"music genres", + "Seeds":"['bluegrass', 'hiphop', 'jazz', 'opera', 'punk', 'rap', 'techno']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"music_genres-Kozlowski_et_al_2019" + }, + { + "Category":"vehicles", + "Seeds":"['bicycle', 'limousine', 'minivan', 'motorcycle', 'skateboard', 'suv', 'truck']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"vehicles-Kozlowski_et_al_2019" + }, + { + "Category":"food", + "Seeds":"['beer', 'cheesecake', 'hamburger', 'pastry', 'salad', 'steak']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"food-Kozlowski_et_al_2019" + }, + { + "Category":"first names", + "Seeds":"['Aaliyah', 'Amy', 'Connor', 'Jake', 'Jamal', 'Molly', 'Shanice']", + "Source \/ Justification":"In the survey, respondents were asked to rate a number of items on scales representing association along gender, race, and class lines. All questions followed the format, \u201cOn a scale from 0 to 100, with 0 representing \u200bvery feminine\u200b and 100 representing \u200bvery masculine\u200b, how would you rate a \u200bsteak\u200b?\u201d For measuring race and class associations, the survey posed similarly worded questions, replacing \u201cfeminine\u201d and \u201cmasculine\u201d with \u201cWhite\u201d and \u201cAfrican American,\u201d or \u201cworking class\u201d and \u201cupper class\u201d respectively. Respondents were asked to place 59 different items on each of the three dimensions of race, class, and gender. A full list of items asked on the survey is available in the appendix (Table A1). Words were selected in seven topical domains: occupations, foods, clothing, vehicles, music genres, sports, and first names. A diverse array of topical domains were chosen to test the capacity of word embedding models to detect cultural associations across very different subjects. Specific terms were selected within each topical domain to ensure high variance across dimensions\u200b. We calculate the weighted mean of 7 responses for each item and use these means as our estimates of a general cultural association. The end product is thus a rating between 0 and 100 on a gender dimension, a class dimension,and a race dimension for each of the 59 words listed in Table A1.", + "Source Categories":"crowd-sourced, curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"first_names-Kozlowski_et_al_2019" + }, + { + "Category":"male", + "Seeds":"['man', 'men', 'he', 'him', 'his', 'his', 'boy', 'boys', 'male', 'masculine']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"male-Kozlowski_et_al_2019" + }, + { + "Category":"female", + "Seeds":"['woman', 'women', 'she', 'her', 'her', 'hers', 'girl', 'girls', 'female', 'feminine']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"female-Kozlowski_et_al_2019" + }, + { + "Category":"upperclass", + "Seeds":"['rich', 'richer', 'richest', 'affluence', 'affluent', 'expensive', 'luxury', 'opulent']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"upperclass-Kozlowski_et_al_2019" + }, + { + "Category":"lowerclass", + "Seeds":"['poor', 'poorer', 'poorest', 'poverty', 'impoverished', 'inexpensive', 'cheap', 'needy']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"lowerclass-Kozlowski_et_al_2019" + }, + { + "Category":"black", + "Seeds":"['black', 'blacks', 'Blacks', 'Black', 'African', 'African']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"black-Kozlowski_et_al_2019" + }, + { + "Category":"white", + "Seeds":"['white', 'whites', 'Whites', 'White', 'European', 'Caucasian']", + "Source \/ Justification":"We select word pairs such that the difference between each word in a pair is a \u201cstep\u201d along the dimension of interest. For example, the difference between \u200brichest\u200b and \u200bpoorest\u200b is a step along the class dimension, and should be a similar in direction to the difference between \u200baffluence \u200band \u200bpoverty\u200b. The lists presented in Table 1 are not exhaustive; other pairs of words could be added or substituted for the pairs we have included. Averaging between six and ten pairs of words for each dimension, as we have done here, produces a closer approximation to the cultural dimension of interest than any one word-pair, as they post higher correlations with the survey.", + "Source Categories":"curated", + "Used in Paper":"The Geometry of Culture: Analyzing Meaning through Word Embeddings (Kozlowski et al., 2019)", + "Link":null, + "Seeds ID":"white-Kozlowski_et_al_2019" + }, + { + "Category":"female names", + "Seeds":"['Amy', 'Joan', 'Lisa', 'Sarah', 'Diana', 'Kate', 'Ann', 'Donna']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"female_names-Gonen_&_Goldberg_2019" + }, + { + "Category":"male names", + "Seeds":"['John', 'Paul', 'Mike', 'Kevin', 'Steve', 'Greg', 'Jeff', 'Bill']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"male_names-Gonen_&_Goldberg_2019" + }, + { + "Category":"family words", + "Seeds":"['home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'relatives']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"family_words-Gonen_&_Goldberg_2019" + }, + { + "Category":"career words", + "Seeds":"['executive', 'management', 'professional', 'corpo-', 'ration', 'salary', 'office', 'business', 'career']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"career_words-Gonen_&_Goldberg_2019" + }, + { + "Category":"arts words", + "Seeds":"['poetry', 'art', 'dance', 'literature', 'novel', 'symphony', 'drama', 'sculpture']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"arts_words-Gonen_&_Goldberg_2019" + }, + { + "Category":"math words", + "Seeds":"['math', 'algebra', 'geometry', 'calculus', 'equations', 'computation', 'numbers', 'addition']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"math_words-Gonen_&_Goldberg_2019" + }, + { + "Category":"arts words 2", + "Seeds":"['poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"arts_words_2-Gonen_&_Goldberg_2019" + }, + { + "Category":"science words", + "Seeds":"['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy']", + "Source \/ Justification":"All word lists are taken from Caliskan et al. (2017)", + "Source Categories":"prior-work", + "Used in Paper":"Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them (Gonen & Goldberg, 2019)", + "Link":null, + "Seeds ID":"science_words-Gonen_&_Goldberg_2019" + }, + { + "Category":"sentiment lexicon", + "Seeds":"[]", + "Source \/ Justification":"We use a labeled positive\/negative sentiment training set (Hu and Liu, 2004). This dataset has been shown to be a trustworthy lexicon for negative and positive sentiment words (Panget al., 2008; Liu, 2012; Wilson et al., 2005). We trust these labels to be unbiased so that we may isolate the unintended biases entering our system to the word embeddings. ", + "Source Categories":"lexical-resources", + "Used in Paper":"A Transparent Framework for Evaluating Unintended Demographic Bias in Word Embeddings (Sweeney & Najafian, 2019)", + "Link":null, + "Seeds ID":"sentiment_lexicon-Sweeney_&_Najafian_2019" + }, + { + "Category":"neutral identity terms", + "Seeds":"[]", + "Source \/ Justification":"Demographic identity word vectors from a particular protected group (i.e. national origin, religion, etc.). We choose our set of neutral identity terms based on the most populous demographics for each protected group.\n\nWe evaluate our framework and metric on two cases studies: National Origin Discrimination and Religious Discrimination. For each case study, we create a set of the most frequent identity terms from the protected groups in the Wikipedia word corpus and analyze bias with respect to these terms via our framework.\n\nswedish, irish, mexican, chinese, filipino, german, english, french, norwegian, american, indian, dutch, russian, scottish, italian", + "Source Categories":"corpus-derived", + "Used in Paper":"A Transparent Framework for Evaluating Unintended Demographic Bias in Word Embeddings (Sweeney & Najafian, 2019)", + "Link":null, + "Seeds ID":"neutral_identity_terms-Sweeney_&_Najafian_2019" + }, + { + "Category":"female definition words 1", + "Seeds":"['countrywoman', 'sororal', 'witches', 'maidservant', 'mothers', 'diva', 'actress', 'spinster', 'mama', 'duchesses', 'barwoman', 'countrywomen', 'dowry', 'hostesses', 'airwomen', 'menopause', 'clitoris', 'princess', 'governesses', 'abbess', 'women', 'widow', 'ladies', 'sorceresses', 'madam', 'brides', 'baroness', 'housewives', 'godesses', 'niece', 'widows', 'lady', 'sister', 'brides', 'nun', 'adultresses', 'obstetrics', 'bellgirls', 'her', 'marchioness', 'princesses', 'empresses', 'mare', 'chairwoman', 'convent', 'priestesses', 'girlhood', 'ladies', 'queen', 'gals', 'mommies', 'maid', 'female_ejaculation', 'spokeswoman', 'seamstress', 'cowgirls', 'chick', 'spinsters', 'hair_salon', 'empress', 'mommy', 'feminism', 'gals', 'enchantress', 'gal', 'motherhood', 'estrogen', 'camerawomen', 'godmother', 'strongwoman', 'goddess', 'matriarch', 'aunt', 'chairwomen', 'ma\\'am', 'sisterhood', 'hostess', 'estradiol', 'wife', 'mom', 'stewardess', 'females', 'viagra', 'spokeswomen', 'ma', 'belle', 'minx', 'maiden', 'witch', 'miss', 'nieces', 'mothered', 'cow', 'belles', 'councilwomen', 'landladies', 'granddaughter', 'fiancees', 'stepmothers', 'horsewomen', 'grandmothers', 'adultress', 'schoolgirl', 'hen', 'granddaughters', 'bachelorette', 'camerawoman', 'moms', 'her', 'mistress', 'lass', 'policewoman', 'nun', 'actresses', 'saleswomen', 'girlfriend', 'councilwoman', 'lady', 'stateswoman', 'maternal', 'lass', 'landlady', 'sistren', 'ladies', 'wenches', 'sorority', 'bellgirl', 'duchess', 'ballerina', 'chicks', 'fiancee', 'fillies', 'wives', 'suitress', 'maternity', 'she', 'businesswoman', 'masseuses', 'heroine', 'doe', 'busgirls', 'girlfriends', 'queens', 'sisters', 'mistresses', 'stepmother', 'brides', 'daughter', 'minxes', 'cowgirl', 'lady', 'daughters', 'mezzo', 'saleswoman', 'mistress', 'hostess', 'nuns', 'maids', 'mrs.', 'headmistresses', 'lasses', 'congresswoman', 'airwoman', 'housewife', 'priestess', 'barwomen', 'barnoesses', 'abbesses', 'handywoman', 'toque', 'sororities', 'stewardesses', 'filly', 'czarina', 'stepdaughters', 'herself', 'girls', 'lionesses', 'lady', 'vagina', 'hers', 'masseuse', 'cows', 'aunts', 'wench', 'toques', 'wife', 'lioness', 'sorceress', 'effeminate', 'mother', 'lesbians', 'female', 'waitresses', 'ovum', 'skene_gland', 'stepdaughter', 'womb', 'businesswomen', 'heiress', 'waitress', 'headmistress', 'woman', 'governess', 'godess', 'bride', 'grandma', 'bride', 'gal', 'lesbian', 'ladies', 'girl', 'grandmother', 'mare', 'maternity', 'hens', 'uterus', 'nuns', 'maidservants', 'seamstress', 'busgirl', 'heroines']", + "Source \/ Justification":"\"We categorize all the vocabulary words into three sub-sets: male-definition\u2126M, female-definition\u2126F, and gender-neutral\u2126N, based on their definition in WordNet (Miller and Fellbaum, 1998).", + "Source Categories":"corpus-derived", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"female_definition_words_1-Zhao_et_al_2018" + }, + { + "Category":"male definition words 1", + "Seeds":"['countryman', 'fraternal', 'wizards', 'manservant', 'fathers', 'divo', 'actor', 'bachelor', 'papa', 'dukes', 'barman', 'countrymen', 'brideprice', 'hosts', 'airmen', 'andropause', 'penis', 'prince', 'governors', 'abbot', 'men', 'widower', 'gentlemen', 'sorcerers', 'sir', 'bridegrooms', 'baron', 'househusbands', 'gods', 'nephew', 'widowers', 'lord', 'brother', 'grooms', 'priest', 'adultors', 'andrology', 'bellboys', 'his', 'marquis', 'princes', 'emperors', 'stallion', 'chairman', 'monastery', 'priests', 'boyhood', 'fellas', 'king', 'dudes', 'daddies', 'manservant', 'semen', 'spokesman', 'tailor', 'cowboys', 'dude', 'bachelors', 'barbershop', 'emperor', 'daddy', 'masculism', 'guys', 'enchanter', 'guy', 'fatherhood', 'androgen', 'cameramen', 'godfather', 'strongman', 'god', 'patriarch', 'uncle', 'chairmen', 'sir', 'brotherhood', 'host', 'testosterone', 'husband', 'dad', 'steward', 'males', 'cialis', 'spokesmen', 'pa', 'beau', 'stud', 'bachelor', 'wizard', 'sir', 'nephews', 'fathered', 'bull', 'beaus', 'councilmen', 'landlords', 'grandson', 'fiances', 'stepfathers', 'horsemen', 'grandfathers', 'adultor', 'schoolboy', 'rooster', 'grandsons', 'bachelor', 'cameraman', 'dads', 'him', 'master', 'lad', 'policeman', 'monk', 'actors', 'salesmen', 'boyfriend', 'councilman', 'fella', 'statesman', 'paternal', 'chap', 'landlord', 'brethren', 'lords', 'blokes', 'fraternity', 'bellboy', 'duke', 'ballet_dancer', 'dudes', 'fiance', 'colts', 'husbands', 'suitor', 'paternity', 'he', 'businessman', 'masseurs', 'hero', 'deer', 'busboys', 'boyfriends', 'kings', 'brothers', 'masters', 'stepfather', 'grooms', 'son', 'studs', 'cowboy', 'mentleman', 'sons', 'baritone', 'salesman', 'paramour', 'male_host', 'monks', 'menservants', 'mr.', 'headmasters', 'lads', 'congressman', 'airman', 'househusband', 'priest', 'barmen', 'barons', 'abbots', 'handyman', 'beard', 'fraternities', 'stewards', 'colt', 'czar', 'stepsons', 'himself', 'boys', 'lions', 'gentleman', 'penis', 'his', 'masseur', 'bulls', 'uncles', 'bloke', 'beards', 'hubby', 'lion', 'sorcerer', 'macho', 'father', 'gays', 'male', 'waiters', 'sperm', 'prostate', 'stepson', 'prostatic_utricle', 'businessmen', 'heir', 'waiter', 'headmaster', 'man', 'governor', 'god', 'bridegroom', 'grandpa', 'groom', 'dude', 'gay', 'gents', 'boy', 'grandfather', 'gelding', 'paternity', 'roosters', 'prostatic_utricle', 'priests', 'manservants', 'stailor', 'busboy', 'heros']", + "Source \/ Justification":"\"We categorize all the vocabulary words into three sub-sets: male-definition\u2126M, female-definition\u2126F, and gender-neutral\u2126N, based on their definition in WordNet (Miller and Fellbaum, 1998).", + "Source Categories":"corpus-derived", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"male_definition_words_1-Zhao_et_al_2018" + }, + { + "Category":"professions", + "Seeds":"[]", + "Source \/ Justification":"Bolukbasi et al. (2016)", + "Source Categories":"prior-work", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"professions-Zhao_et_al_2018" + }, + { + "Category":"male definition words 2", + "Seeds":"['rake', 'wizard', 'policeman', 'host', 'councilman', 'actor', 'waiter', 'businessman', 'fiance', 'spokesman', 'salesman', 'widower', 'horseman', 'governor', 'statesman', 'hero', 'chairman', 'headmaster', 'priest', 'gentleman', 'countrymen', 'nobleman']", + "Source \/ Justification":"To study the quality of the gender information present in each model, we follow SemEval 2012 Task2 (Jurgens et al., 2012) to create an analogy dataset, SemBias, with the goal to identify the correct analogy of \u201che-she\u201d from four pairs of words. Each instance in the dataset consists of four word pairs: a gender-definition word pair (Definition; e.g., \u201cwaiter-waitress\u201d), a gender-stereotype word pair (Stereotype; e.g., \u201cdoctor-nurse\u201d) and two other pairs of words that have similar meanings (None; e.g.,\u201cdog-cat\u201d, \u201ccup-lid\u201d) (The pair is sampled from the list of word pairs with \u201cSIMILAR: Coordinates\u201d relation annotated in (Jurgens et al., 2012). The original list has 38 pairs. After removing gender-definition word pairs, 29 are left.). We consider 20 gender-stereotype word pairs and 22 gender-definition word pairs and use their Cartesian product to generate 440 instances. Among the 22 gender-definition word pairs, there are 2 word pairs that are not used as a seed word during the training. To test the generalization ability of the model, we generate a subset of data (SemBias (subset)) of 40 instances associated with these 2 pairs.", + "Source Categories":"lexical-resources", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"male_definition_words_2-Zhao_et_al_2018" + }, + { + "Category":"female definition words 2", + "Seeds":"['lady', 'saleswoman', 'noblewoman', 'hostess', 'coquette', 'nun', 'heroine', 'actress', 'chairwoman', 'businesswoman', 'spokeswoman', 'waitress', 'councilwoman', 'stateswoman', 'policewoman', 'countrywomen', 'horsewoman', 'headmistress', 'governess', 'widow', 'witch', 'fiancee']", + "Source \/ Justification":"To study the quality of the gender information present in each model, we follow SemEval 2012 Task2 (Jurgens et al., 2012) to create an analogy dataset, SemBias, with the goal to identify the correct analogy of \u201che-she\u201d from four pairs of words. Each instance in the dataset consists of four word pairs: a gender-definition word pair (Definition; e.g., \u201cwaiter-waitress\u201d), a gender-stereotype word pair (Stereotype; e.g., \u201cdoctor-nurse\u201d) and two other pairs of words that have similar meanings (None; e.g.,\u201cdog-cat\u201d, \u201ccup-lid\u201d) (The pair is sampled from the list of word pairs with \u201cSIMILAR: Coordinates\u201d relation annotated in (Jurgens et al., 2012). The original list has 38 pairs. After removing gender-definition word pairs, 29 are left.). We consider 20 gender-stereotype word pairs and 22 gender-definition word pairs and use their Cartesian product to generate 440 instances. Among the 22 gender-definition word pairs, there are 2 word pairs that are not used as a seed word during the training. To test the generalization ability of the model, we generate a subset of data (SemBias (subset)) of 40 instances associated with these 2 pairs.", + "Source Categories":"lexical-resources", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"female_definition_words_2-Zhao_et_al_2018" + }, + { + "Category":"male stereotype words", + "Seeds":"['researcher', 'lawyer', 'developer', 'architect', 'dentist', 'doctor', 'boss', 'chef', 'programmer', 'president', 'pilot', 'guard', 'warrior', 'judge', 'janitor', 'captain', 'engineer', 'dispatcher', 'leader', 'manager']", + "Source \/ Justification":"To study the quality of the gender information present in each model, we follow SemEval 2012 Task2 (Jurgens et al., 2012) to create an analogy dataset, SemBias, with the goal to identify the correct analogy of \u201che-she\u201d from four pairs of words. Each instance in the dataset consists of four word pairs: a gender-definition word pair (Definition; e.g., \u201cwaiter-waitress\u201d), a gender-stereotype word pair (Stereotype; e.g., \u201cdoctor-nurse\u201d) and two other pairs of words that have similar meanings (None; e.g.,\u201cdog-cat\u201d, \u201ccup-lid\u201d) (The pair is sampled from the list of word pairs with \u201cSIMILAR: Coordinates\u201d relation annotated in (Jurgens et al., 2012). The original list has 38 pairs. After removing gender-definition word pairs, 29 are left.). We consider 20 gender-stereotype word pairs and 22 gender-definition word pairs and use their Cartesian product to generate 440 instances. Among the 22 gender-definition word pairs, there are 2 word pairs that are not used as a seed word during the training. To test the generalization ability of the model, we generate a subset of data (SemBias (subset)) of 40 instances associated with these 2 pairs.", + "Source Categories":"lexical-resources", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"male_stereotype_words-Zhao_et_al_2018" + }, + { + "Category":"female stereotype words", + "Seeds":"['baker', 'counselor', 'nanny', 'librarians', 'socialite', 'assistant', 'tailor', 'dancer', 'hairdresser', 'cashier', 'secretary', 'clerk', 'stenographer', 'optometrist', 'housekeeper', 'bookkeeper', 'homemaker', 'nurse', 'stylist', 'receptionist']", + "Source \/ Justification":"To study the quality of the gender information present in each model, we follow SemEval 2012 Task2 (Jurgens et al., 2012) to create an analogy dataset, SemBias, with the goal to identify the correct analogy of \u201che-she\u201d from four pairs of words. Each instance in the dataset consists of four word pairs: a gender-definition word pair (Definition; e.g., \u201cwaiter-waitress\u201d), a gender-stereotype word pair (Stereotype; e.g., \u201cdoctor-nurse\u201d) and two other pairs of words that have similar meanings (None; e.g.,\u201cdog-cat\u201d, \u201ccup-lid\u201d) (The pair is sampled from the list of word pairs with \u201cSIMILAR: Coordinates\u201d relation annotated in (Jurgens et al., 2012). The original list has 38 pairs. After removing gender-definition word pairs, 29 are left.). We consider 20 gender-stereotype word pairs and 22 gender-definition word pairs and use their Cartesian product to generate 440 instances. Among the 22 gender-definition word pairs, there are 2 word pairs that are not used as a seed word during the training. To test the generalization ability of the model, we generate a subset of data (SemBias (subset)) of 40 instances associated with these 2 pairs.", + "Source Categories":"lexical-resources", + "Used in Paper":"Learning gender-neutral word embeddings (Zhao et al., 2018)", + "Link":"https:\/\/github.com\/uclanlp\/gn_glove", + "Seeds ID":"female_stereotype_words-Zhao_et_al_2018" + }, + { + "Category":"female", + "Seeds":"['woman', 'women', 'her', 'her', 'she']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination by type, where types of discrimination include age, disability, national origin, pregnancy, race\/color, religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"female-Rudinger_et_al_2017" + }, + { + "Category":"male", + "Seeds":"['man', 'men', 'him', 'his', 'he']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"male-Rudinger_et_al_2017" + }, + { + "Category":"black", + "Seeds":"['black', 'black_person', 'black_man', 'black_woman']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"black-Rudinger_et_al_2017" + }, + { + "Category":"white", + "Seeds":"['white', 'white_person', 'white_man', 'white_woman']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"white-Rudinger_et_al_2017" + }, + { + "Category":"hispanic", + "Seeds":"['hispanic', 'hispanic_person', 'hispanic_man', 'hispanic_woman']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"hispanic-Rudinger_et_al_2017" + }, + { + "Category":"asian", + "Seeds":"['asian', 'asian_person', 'asian_man', 'asian_woman']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"asian-Rudinger_et_al_2017" + }, + { + "Category":"career", + "Seeds":"['salary', 'career', 'business', 'office', 'professional', 'management', 'corporation', 'executive']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"career-Rudinger_et_al_2017" + }, + { + "Category":"violence", + "Seeds":"['fight', 'fights', 'gun', 'guns', 'shoots', 'attacks', 'dangerous']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"violence-Rudinger_et_al_2017" + }, + { + "Category":"female 2", + "Seeds":"['woman', 'women', 'girl', 'girls', 'mother']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"female_2-Rudinger_et_al_2017" + }, + { + "Category":"male 2", + "Seeds":"['man', 'men', 'boy', 'boys', 'father']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"male_2-Rudinger_et_al_2017" + }, + { + "Category":"old", + "Seeds":"['old', 'old_woman', 'old_man']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"old-Rudinger_et_al_2017" + }, + { + "Category":"young", + "Seeds":"['young', 'young_woman', 'young_man']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"young-Rudinger_et_al_2017" + }, + { + "Category":"race\/ethnicity\/nationality", + "Seeds":"['indian', 'indian_woman', 'indian_man', 'asian', 'asians', 'asian_woman', 'asian_man', 'whie_woman', 'white_man', 'caucasian', 'american', 'american_woman', 'american_man', 'black_woman', 'black_man', 'native_american', 'african_american', 'african']", + "Source \/ Justification":"The U.S. Equal Employment Opportunity Commission (EEOC) characterizes discrimination bytype, where types of discrimination include age, disability, national origin, pregnancy, race\/color,religion, and sex. To test for the existence of harmful stereotypes in the SNLI dataset we pick words and bigrams used to describe people labeled as belonging to each of these categories, such as Asian or woman ... We include both broad and specific query words; for example, we include adjectives describing nationalities as well as those describing regions and races. We also include query bigrams describing people labeled as belonging to more than one category, such as Asian woman.", + "Source Categories":"corpus-derived", + "Used in Paper":"Social Bias in Elicited Natural Language Inferences (Rudinger et al., 2017)", + "Link":"https:\/\/github.com\/cjmay\/snli-ethics", + "Seeds ID":"race\/ethnicity\/nationality-Rudinger_et_al_2017" + }, + { + "Category":"white_collar_job", + "Seeds":"['detective', 'executive', 'scientist', 'biologist', 'surgeon', 'vet', 'office', 'photographer', 'employer', 'colleague', 'psychiatrist', 'psychologist', 'qualified', 'wealthy', 'businesswoman', 'manager', 'therapist', 'attorney', 'forensics', 'lawyer', 'employment', 'workaholic', 'coroner', 'nurse', 'specialist', 'internship', 'job', 'neurologist', 'senator', 'promotion', 'retired', 'researcher', 'profession', 'engineer', 'accountant', 'entrepreneur', 'paperwork', 'counselling', 'successful', 'dentist', 'analyst', 'physician', 'hire', 'politician', 'consultant', 'retire', 'veterinarian', 'supervisor', 'examiner', 'inspector', 'doctor', 'actor', 'pharmacist', 'chemist', 'pediatrician', 'pediatric', 'director', 'professional', 'law', 'salary', 'chief', 'gynecologist']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"white_collar_job-Fast_et_al_2016" + }, + { + "Category":"blue_collar_job", + "Seeds":"['serving', 'maid', 'employer', 'employee', 'salesperson', 'payday', 'pizzeria', 'clerk', 'supermarket', 'job', 'attendant', 'restaurant', 'waiter', 'waitress', 'worker', 'bartender', 'hostess', 'receptionist', 'cashier', 'paycheck', 'barista']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"blue_collar_job-Fast_et_al_2016" + }, + { + "Category":"domestic_work", + "Seeds":"['chore', 'mom', 'vacuum', 'scrubbing', 'cook', 'washing', 'baking', 'wash', 'morning', 'meal', 'house', 'chef', 'laundry', 'bake', 'organizing', 'cooking', 'spotless', 'mum', 'washer', 'remodeling', 'parent', 'job', 'nanny', 'kitchen', 'dishwasher', 'cleaning', 'family', 'cleaner', 'bathroom', 'errand', 'sitter', 'housekeeper', 'serve', 'housekeeping', 'tidy', 'cleaned', 'housework', 'scrub', 'organize', 'home', 'clean']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"domestic_work-Fast_et_al_2016" + }, + { + "Category":"occupation", + "Seeds":"['detective', 'producer', 'executive', 'manager', 'therapist', 'actor', 'electrician', 'occupation', 'retirement', 'office', 'photographer', 'maid', 'cashier', 'colleague', 'psychiatrist', 'bodyguard', 'psychologist', 'qualified', 'supervise', 'politician', 'surgeon', 'policeman', 'businesswoman', 'server', 'journalist', 'housekeeper', 'secretary', 'attorney', 'choreographer', 'chef', 'intern', 'lawyer', 'interpreter', 'employment', 'retire', 'nurse', 'officer', 'specialist', 'working', 'hairdresser', 'internship', 'clerk', 'job', 'nanny', 'waiter', 'pediatrician', 'pediatric', 'neurologist', 'senator', 'waitress', 'retired', 'profession', 'entrepreneur', 'florist', 'workplace', 'service', 'accountant', 'worker', 'singer', 'catering', 'dentist', 'technician', 'analyst', 'physician', 'hire', 'bartender', 'hostess', 'consultant', 'employ', 'veterinarian', 'caterer', 'entertainer', 'supervisor', 'publicist', 'agent', 'concierge', 'coordinator', 'receptionist', 'accounting', 'inspector', 'doctor', 'owner', 'assistant', 'interview', 'pharmacist', 'chemist', 'foreman', 'employee', 'qualification', 'workaholic', 'businessman', 'salary', 'baker', 'banker', 'gynecologist', 'professional', 'policewoman']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"occupation-Fast_et_al_2016" + }, + { + "Category":"attractive", + "Seeds":"['alluring', 'cute', 'attractively', 'athletic', 'desirable', 'breathtaking', 'perfect', 'swoon', 'sexiest', 'sassy', 'attractive', 'masculine', 'pleasing', 'captivating', 'fantastic', 'dreamy', 'charmingly', 'glamorous', 'seductive', 'mesmerizing', 'inviting', 'hunk', 'popular', 'fascinating', 'flatter', 'supermodel', 'fabulous', 'irresistible', 'enticing', 'appealing', 'dimpled', 'looking', 'attracted', 'adore', 'appeal', 'adorable', 'compliment', 'revealing', 'dashing', 'fantasize', 'stylish', 'sexy', 'flawless', 'tempting', 'envious', 'angelic', 'lovable', 'marvelous', 'hotter', 'blonde', 'charismatic', 'classically', 'hunky', 'dazzling', 'gorgeous', 'lovely', 'chiseled', 'pretty', 'impress', 'charming', 'feminine', 'handsome', 'toned', 'photogenic', 'admire', 'stunning', 'charmer', 'coolest', 'beautiful', 'provocative', 'beautifully', 'attract', 'dazzle', 'breathtakingly', 'physique', 'strikingly', 'hot', 'luscious', 'buff', 'beauty', 'attractiveness', 'fashionable', 'enchanting', 'curvy', 'built', 'tanned']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"attractive-Fast_et_al_2016" + }, + { + "Category":"ugliness", + "Seeds":"['despise', 'balding', 'slimy', 'acne', 'grotesque', 'degrading', 'horrible', 'fat', 'diseased', 'repulsive', 'awful', 'nasty', 'brutish', 'grotesquely', 'distasteful', 'unworthy', 'scruffy', 'chubby', 'gross', 'insulting', 'crooked', 'revolting', 'unappealing', 'hairy', 'pathetic', 'cockroach', 'abnormally', 'unsightly', 'crippled', 'lousy', 'wrinkled', 'freakish', 'disfigured', 'disgusting', 'pudgy', 'tacky', 'obese', 'disgust', 'degrade', 'horrid', 'deformed', 'hideous', 'bloated', 'ugly', 'scum', 'demeaning', 'pig', 'obnoxious', 'blob', 'wart', 'disgraceful', 'fatty', 'bald', 'overweight', 'disgusted', 'unattractive', 'wrinkle', 'filthy', 'loathsome']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"ugliness-Fast_et_al_2016" + }, + { + "Category":"masculine", + "Seeds":"['alluring', 'cockiness', 'attractively', 'athletic', 'cocky', 'aggressive', 'tattooed', 'jock', 'arrogance', 'masculine', 'hormone', 'dominate', 'males', 'overpowering', 'dreamy', 'brutish', 'stereotypical', 'guy', 'bulky', 'scruffy', 'authority', 'manly', 'baritone', 'hunky', 'lad', 'masculinity', 'hunk', 'appeal', 'surfer', 'strong', 'boy', 'testosterone', 'domineering', 'male', 'youthful', 'dude', 'fella', 'distinct', 'charisma', 'man', 'chiseled', 'puberty', 'mentality', 'boys', 'shouldered', 'handsome', 'rugged', 'intimidate', 'stature', 'figure', 'intimidating', 'muscular', 'brawny', 'beefy', 'attract', 'physique', 'athletically', 'biceps', 'attractiveness', 'stocky', 'hormonal', 'burly', 'egotistical', 'bodied', 'rowdy']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"masculine-Fast_et_al_2016" + }, + { + "Category":"feminine", + "Seeds":"['cute', 'gown', 'haircut', 'slimming', 'lacy', 'stunningly', 'curled', 'pretty', 'redhead', 'appealing', 'wearing', 'woman', 'wavy', 'silk', 'stylist', 'nicely', 'tights', 'finery', 'cleavage', 'stylish', 'brunette', 'lilac', 'elegant', 'supermodel', 'fabulous', 'girl', 'perfume', 'matching', 'blouse', 'silky', 'ruffled', 'purple', 'bikini', 'revealing', 'voluptuous', 'hairdresser', 'complement', 'makeup', 'sexy', 'dress', 'headband', 'blazer', 'layered', 'perky', 'clothes', 'pair', 'blonde', 'pantyhose', 'comb', 'jewelry', 'fuchsia', 'styling', 'accentuate', 'gorgeous', 'girls', 'impress', 'sophisticated', 'flowery', 'slinky', 'glam', 'wardrobe', 'glamorous', 'girlish', 'voluminous', 'stunning', 'beautiful', 'hairstyle', 'fashion', 'provocative', 'chic', 'skirt', 'curl', 'ballerina', 'fashionable', 'dressed', 'kimono', 'skater', 'frilly', 'halter', 'accessory', 'floral', 'jewelry', 'feminine', 'curve', 'curvy', 'lipstick', 'skinny']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"feminine-Fast_et_al_2016" + }, + { + "Category":"positive_emotion", + "Seeds":"['happiness', 'enlighten', 'better', 'enthusiasm', 'pride', 'joyful', 'compassion', 'dearly', 'forgiving', 'kindness', 'bravery', 'closure', 'thrill', 'honestly', 'triumph', 'bond', 'honesty', 'alive', 'concern', 'reunite', 'joy', 'surprise', 'forgiveness', 'assurance', 'sympathize', 'understanding', 'reason', 'rejoice', 'care', 'faith', 'great', 'empathy', 'certainty', 'keep', 'trustworthy', 'affection', 'cherish', 'emotion', 'love', 'family', 'trusting', 'respect', 'trust', 'gratitude', 'confidence', 'adoration', 'friend', 'happy', 'overjoyed', 'determination', 'reassurance', 'glad', 'loved', 'admiration', 'wish', 'accomplishment', 'optimism', 'excitement', 'convince', 'hope', 'freedom', 'feeling', 'eagerness', 'willingness', 'sincere', 'sincerity', 'honest', 'genuine', 'comfort', 'elation', 'thrilled', 'loyalty', 'curiosity', 'unconditionally', 'proud']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"positive_emotion-Fast_et_al_2016" + }, + { + "Category":"negative_emotion", + "Seeds":"['violent', 'kill', 'hell', 'hate', 'dieing', 'death', 'thinking', 'hated', 'crying', 'surprised', 'hurting', 'worse', 'beat', 'stop', 'crushed', 'break', 'worst', 'trouble', 'disappointed', 'killed', 'lost', 'cry', 'worried', 'worst_part', 'bad', 'stupid', 'either', 'die', 'mean', 'insane', 'fucking', 'scared', 'hard', 'dead', 'beaten', 'horrible', 'monster', 'weak', 'loose', 'threatened', 'punch', 'killing', 'blame', 'reason', 'so_much_pain', 'hurts', 'losing', 'wanted', 'pissed', 'care', 'scary', 'accident', 'fault', 'guilty', 'terrible', 'swear', 'last_straw', 'heartbroken', 'scare', 'seeing', 'drunk', 'terrified', 'freaked', 'raped', 'frightened', 'poor_girl', 'lose', 'angry', 'fight', 'poor_guy', 'hurt', 'ashamed', 'depressed', 'unthinkable', 'tortured', 'crazy', 'confused', 'sad', 'hit', 'alone', 'lie', 'afraid', 'dying', 'shocked', 'angered', 'sick', 'badly', 'pain', 'react', 'wrong', 'mad', 'upset', 'fighting', 'furious']", + "Source \/ Justification":"\"...to generate Empath\u2019s category names and seed terms, we selected 200 common dependency relationships in ConceptNet, conditioned on 10,000 common words in our corpus. We then manually refined this list, eliminating redundant or sparse categories. For some categories we added additional seed terms to better represent the concept, resulting in a final set of two to five seed terms for each category.\"\n\n\"For emotional analyses, Empath likewise draws upon the hierarchy of emotions introduced by Parrott [36], in which emotions are defined by other emotions.\"\n\n\"...each of Empath\u2019s categories is defined by seed words (e.g., lust: desire, passion; clothing: shirt, hat; social media: facebook, twitter). Empath\u2019s model uses these seed words to generate a candidate set of member terms for its categories, which we validate through paid crowdsourcing. Empath generates these category terms by querying a vector space model (VSM) trained by a neural network on a large corpus of text.\"\n\n\"If two of three workers believe a word is at least weakly related to the category, then Empath will keep the word, otherwise we remove it from the category.\"", + "Source Categories":"crowd-sourced, curated, lexical-resources", + "Used in Paper":"Empath: Understanding Topic Signals in Large-Scale Text (Fast et al., 2016)", + "Link":"https:\/\/github.com\/Ejhfast\/empath-client", + "Seeds ID":"negative_emotion-Fast_et_al_2016" + }, + { + "Category":"final identities", + "Seeds":"['detective', 'ambassador', 'coach', 'liar', 'sister', 'chinese', 'enemy', 'radical', 'stripper', 'bum', 'actress', 'russian', 'gf', 'manager', 'scientist', 'gunman', 'asian', 'victim', 'little', 'brother', 'mexican', 'prisoner', 'economist', 'mayor', 'principal', 'vet', 'instructor', 'police', 'buddy', 'candidate', 'feminist', 'photographer', 'father', 'police', 'officer', 'maid', 'indian', 'black', 'teenager', 'innocent', 'citizen', 'employee', 'speaker', 'supervisor', 'hater', 'colleague', 'woman', 'executive', 'teacher', 'jerk', 'civilian', 'ceo', 'assistant', 'advocate', 'coworker', 'patriot', 'vegan', 'arab', 'roommate', 'boxer', 'fan', 'mom', 'judge', 'politician', 'african', 'american', 'republican', 'lady', 'nurse', 'socialist', 'bro', 'toddler', 'scholar', 'pimp', 'artist', 'celebrity', 'sophomore', 'rapper', 'brother', 'clown', 'grandfather', 'spy', 'surgeon', 'priest', 'journalist', 'pilot', 'chick', 'guy', 'rich', 'grad', 'athlete', 'husband', 'secretary', 'user', 'prosecutor', 'attorney', 'nigga', 'female', 'japanese', 'democrat', 'chef', 'gangster', 'intern', 'individual', 'preacher', 'governor', 'lawyer', 'nephew', 'hero', 'girl', 'homeless', 'hacker', 'rapist', 'israeli', 'expert', 'announcer', 'rep', 'gentleman', 'conservative', 'grandmother', 'genius', 'activist', 'hipster', 'hooker', 'leader', 'juror', 'musician', 'bf', 'muslim', 'christian', 'bully', 'parent', 'reporter', 'lawmaker', 'white', 'man', 'middle', 'class', 'slut', 'boss', 'volunteer', 'bachelor', 'minor', 'rebel', 'grandma', 'worker', 'cousin', 'marine', 'loser', 'jew', 'cop', 'passenger', 'daughter', 'sheriff', 'survivor', 'canadian', 'thot', 'supporter', 'punk', 'host', 'millionaire', 'officer', 'soldier', 'american', 'taxpayer', 'murderer', 'mentor', 'neighbor', 'senator', 'virgin', 'protestor', 'voter', 'college', 'student', 'winner', 'family', 'deputy', 'blogger', 'singer', 'firefighter', 'intellectual', 'son', 'black', 'man', 'entrepreneur', 'follower', 'dancer', 'cheerleader', 'fool', 'liberal', 'blonde', 'employer', 'white', 'woman', 'engineer', 'killer', 'teammate', 'guest', 'sucker', 'dude', 'tourist', 'princess', 'inmate', 'runner', 'racist', 'editor', 'saint', 'mama', 'qb', 'man', 'pope', 'teen', 'commissioner', 'homosexual', 'champion', 'gay', 'boy', 'relative', 'farmer', 'pitcher', 'pastor', 'thief', 'poet', 'dentist', 'grandpa', 'adult', 'child', 'baby', 'niece', 'immigrant', 'junior', 'witness', 'boyfriend', 'customer', 'extremist', 'baptist', 'criminal', 'bartender', 'sibling', 'consultant', 'coward', 'believer', 'bride', 'hispanic', 'friend', 'lover', 'protester', 'daddy', 'dad', 'shooter', 'freak', 'angel', 'alcoholic', 'resident', 'pro', 'owner', 'player', 'critic', 'comedian', 'uncle', 'girlfriend', 'partner', 'nerd', 'hoe', 'native', 'catholic', 'bastard', 'author', 'idiot', 'veteran', 'producer', 'actor', 'hostage', 'patient', 'chairman', 'goon', 'moron', 'donor', 'asshole', 'latino', 'pal', 'rider', 'poor', 'thug', 'designer', 'minority', 'best', 'friend', 'stranger', 'professor', 'black', 'woman', 'vegetarian', 'terrorist', 'director', 'addict', 'student', 'writer', 'freshman', 'president', 'momma', 'spouse', 'lesbian', 'kid', 'geek', 'hypocrite', 'fighter', 'wife', 'atheist', 'doctor', 'white', 'academic', 'chief', 'client', 'aunt', 'mother', 'professional', 'guard', 'consumer', 'minister']", + "Source \/ Justification":"\"we focus on stereotypes of a manually constructed list of 310 identities of interest. These identities were selected by us based on their frequency of use within our dataset as well as their importance to prior research on identity\"", + "Source Categories":"corpus-derived, curated", + "Used in Paper":"Girls rule, boys drool: Extracting semantic and affective stereotypes on Twitter (Joseph et al, 2017)", + "Link":"https:\/\/github.com\/kennyjoseph\/twitter_stereotype_extraction", + "Seeds ID":"final_identities-Joseph_et_al_2017" + }, + { + "Category":"verb senses", + "Seeds":"[]", + "Source \/ Justification":null, + "Source Categories":null, + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"verb_senses-Hoyle_et_al_2019" + }, + { + "Category":"male singular", + "Seeds":"['man', 'boy', 'father', 'son', 'brother', 'husband', 'uncle', 'nephew', 'emperor', 'king', 'prince', 'duke', 'lord', 'knight', 'waiter', 'actor', 'god', 'policeman', 'postman', 'hero', 'wizard', 'steward', 'he']", + "Source \/ Justification":null, + "Source Categories":"corpus-derived", + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"male_singular-Hoyle_et_al_2019" + }, + { + "Category":"male plural", + "Seeds":"['men', 'boys', 'fathers', 'sons', 'brothers', 'husbands', 'uncles', 'nephews', 'emperors', 'kings', 'princes', 'dukes', 'lords', 'knights', 'waiters', 'actors', 'gods', 'policemen', 'postmen', 'heros', 'wizards', 'stewards']", + "Source \/ Justification":null, + "Source Categories":"corpus-derived", + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"male_plural-Hoyle_et_al_2019" + }, + { + "Category":"female singular", + "Seeds":"['woman', 'girl', 'mother', 'daughter', 'sister', 'wife', 'aunt', 'niece', 'empress', 'queen', 'princess', 'duchess', 'lady', 'dame', 'waitress', 'actress', 'goddess', 'policewoman', 'postwoman', 'heroine', 'witch', 'stewardess', 'she']", + "Source \/ Justification":null, + "Source Categories":"corpus-derived", + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"female_singular-Hoyle_et_al_2019" + }, + { + "Category":"female plural", + "Seeds":"['women', 'girls', 'mothers', 'daughters', 'sisters', 'wives', 'aunts', 'nieces', 'empresses', 'queens', 'princesses', 'duchesses', 'ladies', 'dames', 'waitresses', 'actresses', 'goddesses', 'policewomen', 'postwomen', 'heroines', 'witches', 'stewardesses']", + "Source \/ Justification":null, + "Source Categories":"corpus-derived", + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"female_plural-Hoyle_et_al_2019" + }, + { + "Category":"personality traits", + "Seeds":"[]", + "Source \/ Justification":"William and Best (1975, 1977, 1990)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling (Hoyle et al., 2019)", + "Link":null, + "Seeds ID":"personality_traits-Hoyle_et_al_2019" + }, + { + "Category":"feminine", + "Seeds":"[]", + "Source \/ Justification":"Learning Gender-Neutral Word Embeddings (Zhao et al., 2018)", + "Source Categories":"prior-work", + "Used in Paper":"Gender-preserving Debiasing for Pre-trained Word Embeddings (Kaneko and Bollegala, 2019)", + "Link":"https:\/\/github.com\/kanekomasahiro\/gp_debias", + "Seeds ID":"feminine-Kaneko_and_Bollegala_2019" + }, + { + "Category":"masculine", + "Seeds":"[]", + "Source \/ Justification":"Learning Gender-Neutral Word Embeddings (Zhao et al., 2018)", + "Source Categories":"prior-work", + "Used in Paper":"Gender-preserving Debiasing for Pre-trained Word Embeddings (Kaneko and Bollegala, 2019)", + "Link":"https:\/\/github.com\/kanekomasahiro\/gp_debias", + "Seeds ID":"masculine-Kaneko_and_Bollegala_2019" + }, + { + "Category":"gender-neutral", + "Seeds":"['abandonment', 'abate', 'aberrant', 'abiding', 'able', 'abolition', 'abomination', 'abrupt', 'absorbing', 'absorption', 'abstention', 'abstraction', 'absurd', 'absurdity', 'abundance', 'abundantly', 'accept', 'acceptable', 'access', 'accident', 'accidentally', 'accompany', 'accomplish', 'according', 'accordingly', 'account', 'accumulation', 'accurate', 'accuse', 'achieve', 'achievement', 'acid', 'acknowledge', 'acquire', 'actuality', 'adaptable', 'adaptation', 'addictive', 'adherence', 'adjacent', 'adjustable', 'adjustment', 'adjustments', 'admittedly', 'ado', 'adorable', 'adore', 'adorn', 'advancement', 'advent', 'adverse', 'adversity', 'advertisement', 'aerial', 'afar', 'affected', 'afternoon', 'ago', 'agonies', 'agree', 'agreement', 'agricultural', 'air', 'aircraft', 'airliner', 'airport', 'alarms', 'alongside', 'aloof', 'alternately', 'amazing', 'amazingly', 'amount', 'amusement', 'analogous', 'analyses', 'answer', 'apartments', 'apparatus', 'apparent', 'apparently', 'appetizer', 'apple', 'appoint', 'appointment', 'appreciate', 'approach', 'appropriate', 'approval', 'approve', 'approximately', 'argument', 'arms', 'arrests', 'art', 'ask', 'astray', 'attack', 'attempt', 'attention', 'attraction', 'attributes', 'authority', 'aware', 'back', 'backdrops', 'bad', 'balance', 'ballots', 'banana', 'bar', 'barely', 'barrel', 'base', 'battery', 'be', 'become', 'bed', 'before', 'beforehand', 'begin', 'beginning', 'behavior', 'behind', 'being', 'belief', 'believe', 'bell', 'belong', 'below', 'benches', 'benefits', 'best', 'better', 'big', 'bigger', 'biggest', 'billion', 'bit', 'bite', 'blackboard', 'blast', 'blizzard', 'blood', 'blouse', 'blow', 'blue', 'blues', 'board', 'boat', 'body', 'bomb', 'bombing', 'bond', 'bone', 'boon', 'bother', 'bottle', 'bottles', 'boxes', 'branch', 'brass', 'bread', 'breath', 'breeze', 'bright', 'brighter', 'building', 'bulletin', 'burn', 'burst', 'bus', 'buses', 'butter', 'bygone', 'calamity', 'calm', 'calmly', 'camera', 'camp', 'campaign', 'campus', 'can', 'cancer', 'candidate', 'canoe', 'canvas', 'cap', 'capability', 'capable', 'capacity', 'capital', 'capsule', 'captain', 'capture', 'car', 'carbon', 'card', 'cardboard', 'care', 'career', 'careful', 'carefully', 'carpet', 'carrier', 'carry', 'case', 'cash', 'cassette', 'cast', 'cataract', 'categorical', 'cathedral', 'cause', 'cell', 'center', 'centigrade', 'central', 'cerebellum', 'ceremonial', 'certain', 'certify', 'chair', 'chalk', 'chance', 'change', 'changing', 'channel', 'chapter', 'character', 'characteristic', 'characterize', 'charge', 'cheap', 'cheaper', 'cheapest', 'cheerful', 'cheerfully', 'chestnut', 'chin', 'china', 'choices', 'cinema', 'circuit', 'circulate', 'cities', 'city', 'clarify', 'clearance', 'clock', 'clumsy', 'coal', 'cocoon', 'coffee', 'coincide', 'cold', 'colder', 'color', 'come', 'comfort', 'comfortable', 'common', 'communicate', 'communication', 'community', 'compacts', 'company', 'comparison', 'competition', 'completion', 'compounds', 'condition', 'conference', 'confidence', 'confident', 'connection', 'consistent', 'control', 'conventional', 'conversation', 'coolest', 'copy', 'cork', 'corns', 'couch', 'cough', 'could', 'country', 'cover', 'crack', 'credit', 'creep', 'crime', 'crush', 'current', 'curve', 'daily', 'damage', 'damper', 'danger', 'dark', 'darkest', 'day', 'death', 'debate', 'debt', 'decade', 'decide', 'decided', 'decision', 'decrease', 'decreased', 'decreasing', 'deep', 'deeper', 'degree', 'denials', 'describe', 'described', 'describing', 'design', 'desire', 'destruction', 'detail', 'development', 'develops', 'different', 'digestion', 'direction', 'discover', 'discovering', 'discussion', 'disease', 'disgrace', 'disgust', 'disorder', 'display', 'dispute', 'distance', 'distant', 'distaste', 'distasteful', 'distinct', 'distinction', 'distinguish', 'distribute', 'distribution', 'district', 'diverse', 'diversity', 'divide', 'division', 'divorce', 'do', 'dodge', 'does', 'dollar', 'done', 'door', 'double', 'doubt', 'down', 'dream', 'dreams', 'dust', 'eagerly', 'ear', 'early', 'earn', 'earnings', 'earth', 'ease', 'easier', 'easily', 'east', 'eastern', 'easy', 'eat', 'eats', 'economic', 'edge', 'education', 'educations', 'effect', 'egg', 'end', 'enforcement', 'engage', 'english', 'enhance', 'enhanced', 'enhances', 'enhancing', 'equator', 'error', 'essentially', 'establish', 'establishment', 'estate', 'estimate', 'ethical', 'europe', 'evaporate', 'event', 'evidence', 'example', 'exchange', 'existence', 'expansion', 'experience', 'eye', 'eyelids', 'eyes', 'fact', 'fall', 'fast', 'faster', 'fear', 'fed', 'federal', 'fee', 'feed', 'feel', 'feeling', 'fellow', 'fiction', 'field', 'find', 'finds', 'finger', 'fire', 'flame', 'flashlight', 'flight', 'fly', 'flying', 'fold', 'food', 'force', 'form', 'free', 'freely', 'french', 'front', 'fruit', 'full', 'furniture', 'garbage', 'garlic', 'generate', 'generating', 'get', 'gets', 'give', 'glass', 'globe', 'go', 'goals', 'goes', 'going', 'gold', 'good', 'got', 'government', 'grain', 'grammar', 'grapefruit', 'grass', 'great', 'greater', 'greatest', 'green', 'grip', 'group', 'growth', 'half', 'hall', 'hand', 'handful', 'handle', 'hands', 'hang', 'hanger', 'happen', 'happily', 'happy', 'harbor', 'hard', 'harder', 'hardly', 'harmony', 'hate', 'have', 'head', 'heap', 'hear', 'hearing', 'heat', 'heavy', 'help', 'helps', 'hid', 'hidden', 'hill', 'history', 'hole', 'hope', 'hotel', 'hottest', 'hour', 'ice', 'idea', 'ideas', 'idiom', 'implement', 'importance', 'important', 'impose', 'impossible', 'impress', 'impression', 'impressions', 'impressive', 'improve', 'improvement', 'impulse', 'inconsistent', 'increase', 'increases', 'increasing', 'increasingly', 'incredible', 'indeed', 'industry', 'informative', 'ink', 'insect', 'insight', 'instinct', 'instrument', 'insurance', 'interest', 'international', 'invention', 'iron', 'is', 'issues', 'its', 'jar', 'join', 'jump', 'junction', 'keenly', 'keep', 'kept', 'killing', 'kit', 'knew', 'know', 'knowledge', 'known', 'label', 'ladders', 'land', 'language', 'largest', 'late', 'latent', 'laugh', 'lead', 'learning', 'leather', 'leave', 'leaves', 'leg', 'lemon', 'length', 'let', 'letter', 'level', 'lift', 'light', 'like', 'likes', 'limit', 'liquid', 'list', 'listen', 'listened', 'listening', 'listens', 'lists', 'live', 'load', 'local', 'london', 'long', 'longer', 'longest', 'look', 'looked', 'looking', 'loss', 'lounge', 'low', 'lower', 'lows', 'luck', 'luckiest', 'lucky', 'main', 'mainly', 'mains', 'mainstream', 'maintain', 'maintenance', 'major', 'majority', 'make', 'maker', 'many', 'margin', 'mark', 'may', 'meal', 'mean', 'measure', 'meat', 'meeting', 'memory', 'metal', 'middle', 'might', 'millionth', 'mind', 'minority', 'minute', 'miracle', 'mist', 'money', 'month', 'morning', 'motion', 'mountain', 'mouth', 'move', 'moved', 'mucus', 'multiple', 'museum', 'music', 'myriad', 'name', 'narrower', 'nation', 'national', 'necessarily', 'necessary', 'neck', 'need', 'net', 'new', 'newer', 'newly', 'news', 'nice', 'night', 'nightly', 'noise', 'noisier', 'noisiest', 'none', 'nonetheless', 'nor', 'normal', 'normally', 'nose', 'note', 'number', 'oases', 'observation', 'offer', 'oil', 'old', 'older', 'oldest', 'onion', 'onions', 'only', 'operation', 'opinion', 'order', 'organization', 'other', 'overflow', 'page', 'pain', 'paint', 'paper', 'part', 'particular', 'paste', 'payment', 'pen', 'pencil', 'pending', 'pepper', 'perfect', 'perfectly', 'perform', 'phenomena', 'phone', 'photo', 'pitches', 'place', 'play', 'pleasure', 'plenty', 'point', 'poison', 'pole', 'polish', 'pool', 'porter', 'position', 'possible', 'powder', 'power', 'predicts', 'price', 'print', 'problems', 'process', 'produce', 'productive', 'profit', 'property', 'prose', 'protest', 'pull', 'punishment', 'pupils', 'purpose', 'push', 'put', 'quality', 'question', 'quick', 'quicker', 'quickly', 'quieting', 'rain', 'rainfalls', 'range', 'rank', 'rapid', 'rapidly', 'rare', 'rarely', 'rate', 'ratios', 'raw', 'ray', 'reach', 'react', 'reaction', 'reading', 'real', 'reason', 'recent', 'recliner', 'recognize', 'recommend', 'recommendation', 'record', 'reflect', 'reflection', 'regret', 'relation', 'religion', 'remind', 'rent', 'reportedly', 'representative', 'request', 'resemblance', 'respect', 'rest', 'result', 'retain', 'reward', 'rhythm', 'rhythms', 'rice', 'right', 'river', 'road', 'roll', 'roof', 'room', 'rub', 'rule', 'run', 'running', 'safe', 'safely', 'safer', 'salt', 'sand', 'saw', 'say', 'says', 'scale', 'schedule', 'scheme', 'screamed', 'screaming', 'sea', 'seat', 'second', 'secretary', 'see', 'seeing', 'seem', 'seemingly', 'sees', 'selection', 'self', 'sense', 'sentence', 'serious', 'seriously', 'servant', 'seventh', 'several', 'shade', 'shake', 'shelf', 'shirt', 'shock', 'shoes', 'should', 'show', 'shuffle', 'shuffles', 'sick', 'side', 'siege', 'sign', 'silk', 'silver', 'simplest', 'size', 'skill', 'sky', 'sleep', 'sleeping', 'slept', 'slip', 'slope', 'slow', 'slower', 'slowing', 'slowly', 'smarter', 'smartest', 'smash', 'smell', 'smile', 'smoke', 'smoothing', 'sneeze', 'snow', 'society', 'some', 'song', 'sort', 'sound', 'space', 'speak', 'speaks', 'special', 'spending', 'stacks', 'stage', 'stanford', 'start', 'statement', 'steam', 'steel', 'step', 'stitch', 'stone', 'stones', 'stop', 'story', 'stress', 'stretch', 'stronger', 'structure', 'substance', 'sufficiently', 'sugar', 'suggestion', 'summer', 'support', 'surprise', 'swift', 'swiftly', 'swim', 'system', 'systems', 'take', 'talk', 'taste', 'tasteful', 'tax', 'teeth', 'tell', 'ten', 'tendency', 'test', 'text', 'then', 'theories', 'theory', 'thing', 'things', 'think', 'thinks', 'thought', 'thousand', 'thousandth', 'thump', 'thunder', 'time', 'tin', 'tissue', 'title', 'tokyo', 'tomatoes', 'tongue', 'took', 'top', 'touch', 'tougher', 'town', 'toy', 'trade', 'transfer', 'transport', 'tree', 'trick', 'trouble', 'try', 'turn', 'twist', 'unacceptable', 'unaware', 'uncomfortable', 'undecided', 'underside', 'unexpectedly', 'unfortunately', 'unimpressive', 'uninformative', 'unit', 'university', 'unknown', 'unproductive', 'use', 'useful', 'utterly', 'value', 'vanishes', 'variously', 'verse', 'very', 'vessel', 'view', 'visibility', 'visit', 'voice', 'walk', 'want', 'ware', 'warmer', 'was', 'wash', 'waste', 'water', 'watery', 'wave', 'way', 'weakest', 'weather', 'week', 'weekend', 'weeks', 'welfare', 'went', 'westward', 'whichever', 'whole', 'widest', 'will', 'wind', 'wine', 'winter', 'wire', 'wishes', 'wood', 'wool', 'word', 'words', 'world', 'worse', 'worst', 'would', 'wound', 'writing', 'yacht', 'year', 'yearly', 'yen', 'young', 'younger', 'youngest', 'zero', 'zigzag', 'zone']", + "Source \/ Justification":"To create a gender-neutral word list, we select gender-neutral words from a list of 3000 most frequent words in English. Two annotators independently selected words and subsequently verified for gender neutrality. https:\/\/www.ef.com\/wwen\/english-resources\/english-vocabulary\/top-3000-words\/", + "Source Categories":"curated", + "Used in Paper":"Gender-preserving Debiasing for Pre-trained Word Embeddings (Kaneko and Bollegala, 2019)", + "Link":"https:\/\/github.com\/kanekomasahiro\/gp_debias", + "Seeds ID":"gender-neutral-Kaneko_and_Bollegala_2019" + }, + { + "Category":"stereotypeical", + "Seeds":"[]", + "Source \/ Justification":"Bolukbasi et al., 2016", + "Source Categories":"prior-work", + "Used in Paper":"Gender-preserving Debiasing for Pre-trained Word Embeddings (Kaneko and Bollegala, 2019)", + "Link":"https:\/\/github.com\/kanekomasahiro\/gp_debias", + "Seeds ID":"stereotypeical-Kaneko_and_Bollegala_2019" + }, + { + "Category":"male names", + "Seeds":"['john', 'paul', 'mike', 'kevin', 'steve', 'greg', 'jeff', 'bill']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"male_names-Knoche_et_al_2019" + }, + { + "Category":"female names", + "Seeds":"['amy', 'joan', 'lisa', 'sarah', 'diana', 'kate', 'ann', 'donna']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"female_names-Knoche_et_al_2019" + }, + { + "Category":"male terms", + "Seeds":"['male', 'man', 'boy', 'brother', 'he', 'him', 'his', 'son', 'father', 'uncle', 'grandfather']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"male_terms-Knoche_et_al_2019" + }, + { + "Category":"female terms", + "Seeds":"['female', 'woman', 'girl', 'sister', 'she', 'her', 'hers', 'daughter', 'mother', 'aunt', 'grandmother']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"female_terms-Knoche_et_al_2019" + }, + { + "Category":"male", + "Seeds":"['john', 'paul', 'mike', 'kevin', 'steve', 'greg', 'jeff', 'bill', 'male', 'man', 'boy', 'brother', 'he', 'him', 'his', 'son', 'father', 'uncle', 'grandfather']", + "Source \/ Justification":"combination of male names and male terms", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"male-Knoche_et_al_2019" + }, + { + "Category":"female", + "Seeds":"['amy', 'joan', 'lisa', 'sarah', 'diana', 'kate', 'ann', 'donna', 'female', 'woman', 'girl', 'sister', 'she', 'her', 'hers', 'daughter', 'mother', 'aunt', 'grandmother']", + "Source \/ Justification":"combination of female names and female terms", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"female-Knoche_et_al_2019" + }, + { + "Category":"white names", + "Seeds":"['adam', 'chip', 'harry', 'josh', 'roger', 'alan', 'frank', 'ian', 'justin', 'ryan', 'andrew', 'fred', 'jack', 'matthew', 'stephen', 'brad', 'greg', 'jed', 'paul', 'todd', 'brandon', 'hank', 'jonathan', 'peter', 'wilbur', 'amanda', 'courtney', 'heather', 'melanie', 'sara', 'amber', 'crystal', 'katie', 'meredith', 'shannon', 'betsy', 'donna', 'kristin', 'nancy', 'stephanie', 'bobbie-sue', 'ellen', 'lauren', 'peggy', 'sue-ellen', 'colleen', 'emily', 'megan', 'rachel', 'wendy', 'brendan', 'geoffrey', 'brett', 'jay', 'neil', 'anne', 'carrie', 'jill', 'laurie', 'kristen', 'sarah']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"white_names-Knoche_et_al_2019" + }, + { + "Category":"black names", + "Seeds":"['alonzo', 'jamel', 'lerone', 'percell', 'theo', 'alphonse', 'jerome', 'leroy', 'rasaan', 'torrance', 'darnell', 'lamar', 'lionel', 'rashaun', 'tyree', 'deion', 'lamont', 'malik', 'terrence', 'tyrone', 'everol', 'lavon', 'marcellus', 'terryl', 'wardell', 'aiesha', 'lashelle', 'nichelle', 'shereen', 'temeka', 'ebony', 'latisha', 'shaniqua', 'tameisha', 'teretha', 'jasmine', 'latonya', 'shanise', 'tanisha', 'tia', 'lakisha', 'latoya', 'sharise', 'tashika', 'yolanda', 'lashandra', 'malika', 'shavonn', 'tawanda', 'yvette', 'hakim', 'jermaine', 'kareem', 'jamal', 'rasheed', 'aisha', 'keisha', 'kenya', 'tamika']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"black_names-Knoche_et_al_2019" + }, + { + "Category":"christianity words", + "Seeds":"['baptism', 'messiah', 'catholicism', 'resurrection', 'christianity', 'salvation', 'protestant', 'gospel', 'trinity', 'jesus', 'christ', 'christian', 'cross', 'catholic', 'church']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"christianity_words-Knoche_et_al_2019" + }, + { + "Category":"islam words", + "Seeds":"['allah', 'ramadan', 'turban', 'emir', 'salaam', 'sunni', 'koran', 'imam', 'sultan', 'prophet', 'veil', 'ayatollah', 'shiite', 'mosque', 'islam', 'sheik', 'muslim', 'muhammad']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"islam_words-Knoche_et_al_2019" + }, + { + "Category":"atheism words", + "Seeds":"['atheism', 'atheist', 'atheistic', 'heliocentric', 'evolution', 'darwin', 'galilei', 'agnostic', 'agnosticism', 'pagan', 'science', 'disbelief', 'scepticism', 'philosophy', 'university', 'kopernikus']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"atheism_words-Knoche_et_al_2019" + }, + { + "Category":"pleasant", + "Seeds":"['caress', 'freedom', 'health', 'love', 'peace', 'cheer', 'friend', 'heaven', 'loyal', 'pleasure', 'diamond', 'gentle', 'honest', 'lucky', 'rainbow', 'diploma', 'gift', 'honor', 'miracle', 'sunrise', 'family', 'happy', 'laughter', 'paradise', 'vacation', 'joy', 'wonderful']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"pleasant-Knoche_et_al_2019" + }, + { + "Category":"unpleasant", + "Seeds":"['abuse', 'crash', 'filth', 'murder', 'sickness', 'accident', 'death', 'grief', 'poison', 'stink', 'assault', 'disaster', 'hatred', 'pollute', 'tragedy', 'divorce', 'jail', 'poverty', 'ugly', 'cancer', 'kill', 'rotten', 'vomit', 'agony', 'prison', 'terrible', 'horrible', 'nasty', 'evil', 'war', 'awful', 'failure']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"unpleasant-Knoche_et_al_2019" + }, + { + "Category":"science", + "Seeds":"['math', 'algebra', 'geometry', 'calculus', 'equations', 'computation', 'numbers', 'addition', 'science', 'technology', 'physics', 'chemistry', 'einstein', 'nasa', 'experiment', 'astronomy']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"science-Knoche_et_al_2019" + }, + { + "Category":"art", + "Seeds":"['poetry', 'art', 'dance', 'literature', 'novel', 'symphony', 'drama', 'sculpture', 'shakespeare']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"art-Knoche_et_al_2019" + }, + { + "Category":"intellectual words", + "Seeds":"['precocious', 'resourceful', 'inquisitive', 'sagacious', 'inventive', 'astute', 'adaptable', 'reflective', 'discerning', 'intuitive', 'inquiring', 'judicious', 'analytical', 'luminous', 'venerable', 'imaginative', 'shrewd', 'thoughtful', 'sage', 'smart', 'ingenious', 'clever', 'brilliant', 'logical', 'intelligent', 'apt', 'genius', 'wise', 'stupid', 'dumb', 'dull', 'clumsy', 'foolish', 'naive', 'unintelligent', 'trivial', 'unwise', 'idiotic']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"intellectual_words-Knoche_et_al_2019" + }, + { + "Category":"appearance words", + "Seeds":"['alluring', 'voluptuous', 'blushing', 'homely', 'plump', 'sensual', 'gorgeous', 'slim', 'bald', 'athletic', 'fashionable', 'stout', 'ugly', 'muscular', 'slender', 'feeble', 'handsome', 'healthy', 'attractive', 'fat', 'weak', 'thin', 'pretty', 'beautiful', 'strong']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"appearance_words-Knoche_et_al_2019" + }, + { + "Category":"career", + "Seeds":"['executive', 'management', 'professional', 'corporation', 'salary', 'office', 'business', 'career']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"career-Knoche_et_al_2019" + }, + { + "Category":"family", + "Seeds":"['home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'relatives']", + "Source \/ Justification":"We combined and extended sets which were previously used to identify biases by Greenwald et al. (IAT), Caliskan et al., and Garg et al. ", + "Source Categories":"borrowed-from-social-sciences, prior-work", + "Used in Paper":"Identifying Biases in Politically Biased Wikis through Word Embeddings (Knoche et al., 2019)", + "Link":"https:\/\/github.com\/MKnoche\/wiki_bias_embedding", + "Seeds ID":"family-Knoche_et_al_2019" + }, + { + "Category":"high morality and low\/neutral warmth", + "Seeds":"['courageous', 'fair', 'principled', 'responsible', 'just', 'honest', 'trustworthy', 'loyal']", + "Source \/ Justification":"subset of 170 personality traits from Goodwin et al., (2014)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Trait associations for Hillary Clinton and Donald Trump in news media: A computational analysis (Bhatia et al., 2018)", + "Link":null, + "Seeds ID":"high_morality_and_low\/neutral_warmth-Bhatia_et_al_2018" + }, + { + "Category":"low\/neutral and morality high warmth", + "Seeds":"['warm', 'sociable', 'happy', 'agreeable', 'enthusiastic', 'easygoing', 'funny', 'playful']", + "Source \/ Justification":"subset of 170 personality traits from Goodwin et al., (2014)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Trait associations for Hillary Clinton and Donald Trump in news media: A computational analysis (Bhatia et al., 2018)", + "Link":null, + "Seeds ID":"low\/neutral_and_morality_high_warmth-Bhatia_et_al_2018" + }, + { + "Category":"high competence", + "Seeds":"['athletic', 'musical', 'creative', 'innovative', 'intelligent', 'organized', 'logical', 'clever']", + "Source \/ Justification":"subset of 170 personality traits from Goodwin et al., (2014)", + "Source Categories":"borrowed-from-social-sciences", + "Used in Paper":"Trait associations for Hillary Clinton and Donald Trump in news media: A computational analysis (Bhatia et al., 2018)", + "Link":null, + "Seeds ID":"high_competence-Bhatia_et_al_2018" + }, + { + "Category":"male words (Penn Treebank)", + "Seeds":"['actor',\n'boy',\n'father',\n'he',\n'him',\n'his',\n'male',\n'man',\n'men',\n'son',\n'sons',\n'spokesman',\n'wife',\n'king',\n'brother']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"male_words_Penn_Treebank-Bordia_and_Bowman_2019" + }, + { + "Category":"female words (Penn Treebank)", + "Seeds":"['actress',\n'girl',\n'mother',\n'she',\n'her',\n'her',\n'female',\n'woman',\n'women',\n'daughter',\n'daughters',\n'spokeswoman',\n'husband',\n'queen',\n'sister']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"female_words_Penn_Treebank-Bordia_and_Bowman_2019" + }, + { + "Category":"male words (WikiText-2)", + "Seeds":"['actor',\n'Actor',\n'boy',\n'Boy',\n'boyfriend',\n'Boys',\n'boys',\n'father',\n'Father',\n'Fathers',\n'fathers',\n'Gentleman',\n'gentleman',\n'gentlemen',\n'Gentlemen',\n'grandson',\n'he',\n'He',\n'hero',\n'him',\n'Him',\n'his',\n'His',\n'Husband',\n'husbands',\n'King',\n'kings',\n'Kings',\n'male',\n'Male',\n'males',\n'Males',\n'man',\n'Man',\n'men',\n'Men',\n'Mr.',\n'Prince',\n'prince',\n'son',\n'sons',\n'spokesman',\n'stepfather',\n'uncle',\n'wife',\n'king']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"male_words_WikiText_2-Bordia_and_Bowman_2019" + }, + { + "Category":"female words WikiText-3", + "Seeds":"['actress',\n'Actress',\n'girl',\n'Girl',\n'girlfriend',\n'Girls',\n'girls',\n'mother',\n'Mother',\n'Mothers',\n'mothers',\n'Lady',\n'lady',\n'ladies',\n'Ladies',\n'granddaughter',\n'she',\n'She',\n'heroine',\n'her',\n'Her',\n'her',\n'Her',\n'Wife',\n'wives',\n'Queen',\n'queens',\n'Queens',\n'female',\n'Female',\n'females',\n'Females',\n'woman',\n'Woman',\n'women',\n'Women',\n'Mrs.',\n'Princess',\n'princess',\n'daughter',\n'daughters',\n'spokeswoman',\n'stepmother',\n'aunt',\n'husband',\n'queen']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"female_words_WikiText_2-Bordia_and_Bowman_2019-Bordia_and_Bowman_2019" + }, + { + "Category":"male words (CNN\/Daily Mail)", + "Seeds":"['actor',\n'boy',\n'boyfriend',\n'boys',\n'father',\n'fathers',\n'gentleman',\n'gentlemen',\n'grandson',\n'he',\n'him',\n'his',\n'husbands',\n'kings',\n'male',\n'males',\n'man',\n'men',\n'prince',\n'son',\n'sons',\n'spokesman',\n'stepfather',\n'uncle',\n'wife',\n'king',\n'brother',\n'brothers']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"male_words_CNN_DailyMail-Bordia_and_Bowman_2019" + }, + { + "Category":"female words (CNN\/Daily Mail)", + "Seeds":"['actress',\n'girl',\n'girlfriend',\n'girls',\n'mother',\n'mothers',\n'lady',\n'ladies',\n'granddaughter',\n'she',\n'her',\n'her',\n'wives',\n'queens',\n'female',\n'females',\n'woman',\n'women',\n'princess',\n'daughter',\n'daughters',\n'spokeswoman',\n'stepmother',\n'aunt',\n'husband',\n'queen',\n'sister',\n'sisters']", + "Source \/ Justification":"curated for the target dataset", + "Source Categories":"curated", + "Used in Paper":"Identifying and Reducing Gender Bias in Word-Level Language Models (Bordia and Bowman, 2019)", + "Link":null, + "Seeds ID":"female_words_CNN_DailyMail-Bordia_and_Bowman_2019" + }, + { + "Category":"words to debias", + "Seeds":"[]", + "Source \/ Justification":"\"We create a balanced labeled test set consisting of a total of 704 words, with 352 words for each category\u2014gender-specific and non gender-specific. For the non gender-specific category, we select all the 87 neutral and biased words from the SemBias dataset (Zhao et al., 2018b). Further, we select all 320, 40 and 60 gender-biased occupation words released by Bolukbasi et al. (2016); Zhao et al. (2018a) and Rudinger et al. (2018), respectively. After combining and removing duplicate words, we obtain 352 non gender-specific words. For the gender-specific category, we use a list of 222 male and 222 female words provided by Zhao et al. (2018b). We use stratified sampling to under-sample 444 words into 352 words for balancing the classes. The purpose of creating this diversely sourced dataset is to provide a robust ground-truth for evaluating the efficacy of different word classification algorithms.\"", + "Source Categories":"prior-work", + "Used in Paper":"Nurse is Closer to Woman than Surgeon? Mitigating Gender-BiasedProximities in Word Embeddings (Kumar et al., 2020)", + "Link":"https:\/\/github.com\/TimeTraveller-San\/RAN-Debias", + "Seeds ID":"words_to_debias-Kumar_et_al_2020" + }, + { + "Category":"common gender identity pairs (male)", + "Seeds":"[]", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Reducing Gender Bias in Abusive Language Detection (Park et al., 2018)", + "Link":null, + "Seeds ID":"male_identity-Park_et_al_2018" + }, + { + "Category":"common gender identity pairs (female)", + "Seeds":"[]", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Reducing Gender Bias in Abusive Language Detection (Park et al., 2018)", + "Link":null, + "Seeds ID":"female_identity-Park_et_al_2018" + }, + { + "Category":"neutral nouns and adjectives", + "Seeds":"[]", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Reducing Gender Bias in Abusive Language Detection (Park et al., 2018)", + "Link":null, + "Seeds ID":"neutral-Park_et_al_2018" + }, + { + "Category":"offensive nouns and adjectives", + "Seeds":"[]", + "Source \/ Justification":null, + "Source Categories":"unknown", + "Used in Paper":"Reducing Gender Bias in Abusive Language Detection (Park et al., 2018)", + "Link":null, + "Seeds ID":"offensive-Park_et_al_2018" + } +] |
