forked from ai-forever/ru-gpts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deepspeed_gpt3_small.sh
46 lines (41 loc) · 1.24 KB
/
deepspeed_gpt3_small.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#! /bin/bash
# Model parallel size
MP_SIZE=1
# Change for multinode config
NUM_GPUS_PER_WORKER=1
#batch per gpu 4, grad acc 4, whole batch 256 samples == 512k tokens
#1 epoch 160000 steps, train 3 epochs
gpt_options=" \
--train-data-path /path/to/train.list \
--test-data-path /path/to/test.list \
--logging-dir=log/ \
--save model \
--save-interval 1000 \
--model-parallel-size ${MP_SIZE} \
--save-interval 100 \
--log-interval 100 \
--eval-interval 100 \
--eval-iters 100 \
--num-layers 12 \
--hidden-size 768 \
--num-attention-heads 12 \
--batch-size 1 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--train-iters 500000 \
--resume-dataloader \
--distributed-backend nccl \
--lr 0.0002 \
--lr-decay-style cosine \
--lr-decay-iters 320000 \
--warmup .004 \
--fp16 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--deepspeed \
--deepspeed_config src/deepspeed_config/gpt3_small_2048.json \
"
run_cmd="USE_DEEPSPEED=1 mpirun --np ${NUM_GPUS_PER_WORKER} python ../pretrain_gpt3.py $@ ${gpt_options}"
echo "${run_cmd}"
eval "${run_cmd}"
set +x