1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#!/usr/bin/env python3
import json, sys, pathlib
IN = pathlib.Path("third_party/bad_seeds/gathered_seeds.json")
OUT = pathlib.Path("assets/groups")
OUT.mkdir(parents=True, exist_ok=True)
def load_all():
try:
return json.loads(IN.read_text())
except FileNotFoundError:
sys.stderr.write(f"[ERR] Missing file: {IN}\n"); sys.exit(1)
def pick(data, id_exact=None, contains=None):
"""Return .Seeds list for a given entry by exact ID or substring match."""
def _sid(x):
return x.get("Seeds ID") or x.get("Seeds_ID") or x.get("SeedsID")
for obj in data:
sid = _sid(obj)
if not sid: continue
if id_exact is not None and sid == id_exact:
return obj.get("Seeds")
if contains:
for obj in data:
sid = _sid(obj) or ""
if all(sub.lower() in sid.lower() for sub in contains):
return obj.get("Seeds")
return None
def dump(words, path):
toks = sorted({(w or "").strip() for w in words if isinstance(w, str) and w.strip()})
path.write_text("\n".join(toks) + "\n")
return len(toks)
def main():
data = load_all()
# Canonical WEAT name sets (Caliskan et al. 2017)
male = pick(data, id_exact="male_names_1-Caliskan_et_al_2017") or pick(data, contains=["male","name"])
female = pick(data, id_exact="female_names_1-Caliskan_et_al_2017") or pick(data, contains=["female","name"])
if not male or not female:
# Help debug if schema changed
sys.stderr.write("[ERR] Could not locate WEAT male/female name sets. Available IDs:\n")
for obj in data:
sid = obj.get("Seeds ID") or obj.get("Seeds_ID") or obj.get("SeedsID")
if sid: sys.stderr.write(" - " + sid + "\n")
sys.exit(2)
n_m = dump(male, OUT / "weat_male_names.txt")
n_f = dump(female, OUT / "weat_female_names.txt")
# Optional: career/family word sets (also from Caliskan et al. 2017)
career = pick(data, id_exact="career_words_1-Caliskan_et_al_2017") or pick(data, contains=["career"])
family = pick(data, id_exact="family_words_1-Caliskan_et_al_2017") or pick(data, contains=["family"])
n_c = n_fam = 0
if career: n_c = dump(career, OUT / "weat_career_words.txt")
if family: n_fam = dump(family, OUT / "weat_family_words.txt")
print(f"Exported: male_names={n_m}, female_names={n_f}, career={n_c}, family={n_fam}")
if __name__ == "__main__":
main()
|