-
Notifications
You must be signed in to change notification settings - Fork 2
/
config.env
44 lines (35 loc) · 1.56 KB
/
config.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Configuration file for cot-eval pipelines
# secrets
HUGGINGFACEHUB_API_TOKEN=your-hf-token
# model to evaluate
# - adapt to pre-select specific model for cot evaluation
# - comment out following three lines to dynamically fetch model from cot-leaderboard-requests
NEXT_MODEL_PATH=microsoft/phi-2
NEXT_MODEL_REVISION=main
NEXT_MODEL_PRECISION=float16
# setting MAX_MODEL_LENGTH required to handle models with large context window (>8k)
# see also https://github.com/vllm-project/vllm/issues/1559#issuecomment-1797100930
# models with context window < 2048 are not suited for cot-eval
MAX_LENGTH=2048
GPU_MEMORY_UTILIZATION=0.8
# if model is dynamically fetched: max number of params (B) of evaluated model
MAX_MODEL_PARAMS=10
# num of GPUs available on machine
NUM_GPUS=1
# path to local cache directory
COTEVAL_CACHE_DIR=./cot-eval-cache
# Dataset repos
TRACES_REPO=cot-leaderboard/cot-eval-traces-2.0
# raw lm-eval harness results
RESULTS_REPO=cot-leaderboard/cot-eval-results
# cot effectiveness data displayed in leaderboard
REQUESTS_REPO=cot-leaderboard/cot-leaderboard-requests
LEADERBOARD_RESULTS_REPO=cot-leaderboard/cot-leaderboard-results
# whether to create pull requests at HF repos when uploading / updating, rather than pushing directly to datasets
CREATE_PULLREQUESTS=true
# configs for CoT reasoning generation
CHAINS=HandsOn,ReflectBeforeRun
MODELKWARGS=[{temperature: .3, top_k: 100, top_p: .95},{temperature: 0},{temperature: 0, use_beam_search: true, best_of: 2, n: 1}]
TASKS=logiqa,logiqa2,lsat-ar,lsat-rc,lsat-lr
TRUST_REMOTE_CODE=true
DO_BASEEVAL=true