blob: 87d92346eb22657ffb765c08f60bd5368e236952 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
# vllm serve meta-llama/Llama-3.3-70B-Instruct --port 8004 --tensor-parallel-size 4 --max-model-len 16384 --gpu-memory-utilization 0.9
# python -m sglang.launch_server --model-path meta-llama/Llama-3.3-70B-Instruct --port 8004 --tp-size 4 --context-length 16384
# python -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --port 8003 --tp-size 4 --context-length 16384
# python -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --port 8003 --tp-size 4 --context-length 16384
# SFT Models
# python -m sglang.launch_server --model-path /shared/storage-01/users/mehri2/LLaMA-Factory/saves/llama-3.1-8b-instruct/full/sft_session_level_reflection/checkpoint-628 --served-model-name meta-llama/Llama-3.1-8B-Instruct --port 8003 --tp-size 4 --context-length 16384
# python -m sglang.launch_server --model-path /shared/storage-01/users/mehri2/LLaMA-Factory/saves/qwen2.5-7b/full/sft_session_level_reflection/checkpoint-628 --served-model-name Qwen/Qwen2.5-7B-Instruct --port 8003 --tp-size 4 --context-length 16384
# GRPO Models
# python -m verl.model_merger merge \
# --backend fsdp \
# --local_dir /shared/storage-01/users/mehri2/mem/collaborativeagents/training/grpo_verl/results/v3/global_step_200/actor \
# --target_dir /shared/storage-01/users/mehri2/mem/collaborativeagents/training/grpo_verl/results/v3/global_step_200_merged_hf
# python -m sglang.launch_server --model-path /shared/storage-01/users/mehri2/mem/collaborativeagents/training/grpo_verl/results/v3/global_step_200_merged_hf --served-model-name meta-llama/Llama-3.1-8B-Instruct --port 8003 --tp-size 4 --context-length 16384
BATCH_SIZE=100
BATCH_SIZE=50
# Loop over eval sizes and datasets
for EVAL_SIZE in 20; do
for DATASET in math-hard math-500 logiqa mmlu medqa; do # humaneval bigcodebench
# Convert dataset name for file paths (replace - with _)
DATASET_FILE=$(echo ${DATASET} | tr '-' '_')
echo "Running experiments for dataset: ${DATASET} with eval_size ${EVAL_SIZE}"
# # no_user experiment
# python3 run.py --experiment_type no_user --dataset ${DATASET} --eval_size ${EVAL_SIZE} --batch_size ${BATCH_SIZE} \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/no_user/${DATASET_FILE}_llama70b_user_llama8b_agent_no_user_eval_size_${EVAL_SIZE}.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/no_user/${DATASET_FILE}_llama70b_user_llama8b_agent_no_user_eval_size_${EVAL_SIZE}.out 2>&1
# # user_no_profile experiment
# python3 run.py --experiment_type user_no_profile --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
# --user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/user_no_profile/${DATASET_FILE}_llama70b_user_llama8b_agent_user_no_profile_eval_size_${EVAL_SIZE}.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/user_no_profile/${DATASET_FILE}_llama70b_user_llama8b_agent_user_no_profile_eval_size_${EVAL_SIZE}.out 2>&1
# # user_profiles_without_preferences experiment
# python3 run.py --experiment_type user_profiles_without_preferences --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
# --user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/user_profiles_without_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_user_profiles_without_preferences_eval_size_${EVAL_SIZE}.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/user_profiles_without_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_user_profiles_without_preferences_eval_size_${EVAL_SIZE}.out 2>&1
# user_profiles_with_preferences experiment
python3 run.py --experiment_type user_profiles_with_preferences --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
--user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
--collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
--judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
--output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b_grpo_v3_ckpt200/user_profiles_with_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_user_profiles_with_preferences_eval_size_${EVAL_SIZE}.jsonl \
>> /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b_grpo_v3_ckpt200/user_profiles_with_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_user_profiles_with_preferences_eval_size_${EVAL_SIZE}.out 2>&1
# # agent_with_user_preferences experiment
# python3 run.py --experiment_type agent_with_user_preferences --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
# --user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/agent_with_user_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_user_preferences_eval_size_${EVAL_SIZE}_v2.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/agent_with_user_preferences/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_user_preferences_eval_size_${EVAL_SIZE}_v2.out 2>&1
# # agent_with_reflection experiment
# python3 run.py --experiment_type agent_with_reflection --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
# --user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/agent_with_reflection_v3/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_eval_size_${EVAL_SIZE}.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/agent_with_reflection_v3/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_eval_size_${EVAL_SIZE}.out 2>&1
# # agent_with_reflection_and_scaffolding
# python3 run.py --experiment_type agent_with_reflection_and_scaffolding --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
# --user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
# --collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
# --judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
# --output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b/agent_with_reflection_and_scaffolding/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_and_scaffolding_eval_size_${EVAL_SIZE}.jsonl \
# >> ./runs/llama70b_temp_1_llama8b/agent_with_reflection_and_scaffolding/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_and_scaffolding_eval_size_${EVAL_SIZE}.out 2>&1
# agent_with_reflection_and_proper_scaffolding
python3 run.py --experiment_type agent_with_reflection_and_proper_scaffolding --dataset ${DATASET} --eval_size ${EVAL_SIZE} --max_turns 10 --batch_size ${BATCH_SIZE} \
--user_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --user_api_base http://localhost:8004/v1 --user_api_key EMPTY \
--collaborator_model_name hosted_vllm/meta-llama/Llama-3.1-8B-Instruct --collaborator_api_base http://localhost:8003/v1 --collaborator_api_key EMPTY \
--judge_model_name hosted_vllm/meta-llama/Llama-3.3-70B-Instruct --judge_api_base http://localhost:8004/v1 --judge_api_key EMPTY \
--output_file /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b_grpo_v3_ckpt200/agent_with_reflection_and_proper_scaffolding/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_and_proper_scaffolding_eval_size_${EVAL_SIZE}.jsonl \
>> /shared/storage-01/users/mehri2/mem/collaborativeagents/scripts/runs/llama70b_temp_1_llama8b_grpo_v3_ckpt200/agent_with_reflection_and_proper_scaffolding/${DATASET_FILE}_llama70b_user_llama8b_agent_agent_with_reflection_and_proper_scaffolding_eval_size_${EVAL_SIZE}.out 2>&1
done
done
|