scripts/smoke_llms.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

from __future__ import annotations

from pathlib import Path
import sys
import json
import os

ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT / "src"))

from personalization.config.settings import load_local_models_config
from personalization.models.llm.qwen_instruct import QwenInstruct
from personalization.models.preference_extractor.rule_extractor import QwenRuleExtractor


def ensure_models_present() -> bool:
    cfg = load_local_models_config()
    ok = True
    for spec in (cfg.llm, cfg.preference_extractor):
        path = ROOT / spec.local_path
        if not path.exists() or not any(path.iterdir()):
            print(f"[missing] {path} is empty. Run: python scripts/pull_models.py --target all")
            ok = False
    return ok


def main() -> None:
    if not ensure_models_present():
        return

    cfg = load_local_models_config()
    os.environ["PREF_DEBUG"] = "1"
    llm = QwenInstruct.from_config(cfg)
    extractor = QwenRuleExtractor.from_config(cfg)

    print("[llm] generating...")
    out = llm.generate("Say hello in one short sentence.", max_new_tokens=32, temperature=0.2)
    print(out)

    print("[extractor] extracting...")
    scenarios = [
        (
            "math_latex",
            "Consider the sequence defined by a_1 = 1 and a_{n+1} = a_n + 1/n for n >= 1. "
            "(1) Prove that a_n diverges. (2) Derive an asymptotic expression for a_n in terms of the harmonic numbers H_n. "
            "(3) Compute the limit of (a_n - ln n) as n -> infinity. Please use LaTeX for the output.",
        ),
        (
            "code_python311",
            "I have a performance bottleneck in my Python code that processes large CSV files. It reads rows, aggregates stats, and writes summaries. "
            "Explain how to optimize I/O and memory, discuss multiprocessing vs async. When you show code, please use Python 3.11 syntax and include type hints in the snippets.",
        ),
        (
            "data_json_only",
            "Given a dataset of user events with timestamps, device types, and regions, outline steps to compute DAU, WAU, and retention. "
            "List pitfalls and how to handle missing data. Return your final answer as JSON only.",
        ),
        (
            "writing_concise_no_emoji",
            "Explain the difference between supervised and reinforcement learning with practical examples and cautions. "
            "Keep answers concise and avoid emojis.",
        ),
    ]
    for name, query in scenarios:
        print(f"\n[scenario] {name}")
        prefs = extractor.extract_preferences(query)
        print(json.dumps(prefs, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()